def test_no_results_get_mRNA(self, mock_get_data): data_from_transcript = "ptaki.lataja.kluczem.GACTGACTG" mock_get_data.return_value = data_from_transcript with self.assertRaises(errors.NoResultError): ncbi_api.get_mRNA(self.GOOD_MRNA) mock_get_data.assert_called_once_with(self.GOOD_MRNA)
def test_no_results_get_mRNA(self, mock_get_data): data_from_transcript = 'ptaki.lataja.kluczem.GACTGACTG' mock_get_data.return_value = data_from_transcript with self.assertRaises(errors.NoResultError): ncbi_api.get_mRNA(self.GOOD_MRNA) mock_get_data.assert_called_once_with(self.GOOD_MRNA)
def test_get_mRNA(self, mock_get_data): data_from_transcript = "ptaki.lataja.kluczem.mRNAGACTGACTG" mock_get_data.return_value = data_from_transcript expected_string = data_from_transcript.split("mRNA")[1] data = ncbi_api.get_mRNA(self.GOOD_MRNA) mock_get_data.assert_called_once_with(self.GOOD_MRNA) self.assertEqual(expected_string, data)
def test_get_mRNA(self, mock_get_data): data_from_transcript = 'ptaki.lataja.kluczem.mRNAGACTGACTG' mock_get_data.return_value = data_from_transcript expected_string = data_from_transcript.split('mRNA')[1] data = ncbi_api.get_mRNA(self.GOOD_MRNA) mock_get_data.assert_called_once_with(self.GOOD_MRNA) self.assertEqual(expected_string, data)
def test_incorrect_get_mRNA(self, mock_get_data): with self.assertRaises(errors.IncorrectDataError): ncbi_api.get_mRNA(self.BAD_MNRA) self.assertEqual(len(mock_get_data.mock_calls), 0)
def shmir_from_transcript_sequence( transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, stimulatory_sequences ): """Generating function of shmir from transcript sequence. Args: transcript_name(str): Name of transcipt. minimum_CG(int): Minimum number of 'C' and 'G' nucleotide in sequence. maximum_CG(int): Maximum number of 'C' and 'G' nucleotide in sequence. maximum_offtarget(int): Maximum offtarget. scaffold(str): Name of frame of miRNA or 'all'. stimulatory_sequences(str): One of 'yes', 'no', 'no_difference'. Returns: list of sh-miR(s). """ # check if results are in database try: stored_input = db_session.query(InputData).filter( func.lower(InputData.transcript_name) == transcript_name.lower(), InputData.minimum_CG == minimum_CG, InputData.maximum_CG == maximum_CG, InputData.maximum_offtarget == maximum_offtarget, func.lower(InputData.scaffold) == scaffold.lower(), func.lower( InputData.stimulatory_sequences ) == stimulatory_sequences.lower() ).outerjoin(InputData.results).one() except NoResultFound: pass else: return [result.as_json() for result in stored_input.results] # create path string path = "_".join( map( str, [transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, stimulatory_sequences] ) ) mRNA = ncbi_api.get_mRNA(transcript_name) if scaffold == 'all': original_frames = db_session.query(Backbone).all() else: original_frames = db_session.query(Backbone).filter( func.lower(Backbone.name) == scaffold.lower() ).all() frames_by_name = {frame.name: frame for frame in original_frames} patterns = { frame.name: OrderedDict( sorted( json.loads(frame.regexp).items(), reverse=True ) ) for frame in original_frames } best_sequences = defaultdict(list) for name, patterns_dict in patterns.iteritems(): for regexp_type, sequences in find_by_patterns(patterns_dict, mRNA).iteritems(): with allow_join_result(): is_empty, sequences = generator_is_empty(sequences) if not is_empty: best_sequences[name] = remove_none( group( validate_and_offtarget.s( sequence, maximum_offtarget, minimum_CG, maximum_CG, stimulatory_sequences, int(regexp_type) ).set(queue="blast") for sequence in sequences ).apply_async().get() ) results = [] for name, seq_dict in unpack_dict_to_list(best_sequences): if len(results) == 20: break with allow_join_result(): shmir_result = shmir_from_fasta_string.s( seq_dict['sequence'], [frames_by_name[name]], seq_dict['offtarget'], seq_dict['regexp'], path ).set(queue="score").apply_async().get() if shmir_result: results.extend(shmir_result) if not results: best_sequences = [] sequences = all_possible_sequences(mRNA, 19, 21) with allow_join_result(): is_empty, sequences = generator_is_empty(sequences) if not is_empty: best_sequences = remove_none( group( validate_and_offtarget.s( sequence, maximum_offtarget, minimum_CG, maximum_CG, stimulatory_sequences, 0 ).set(queue="blast") for sequence in sequences ).apply_async().get() ) if best_sequences: with allow_join_result(): results = chain(*remove_none( group( shmir_from_fasta_string.s( seq_dict['sequence'], original_frames, seq_dict['offtarget'], seq_dict['regexp'], path ).set(queue="score") for seq_dict in best_sequences ).apply_async().get() )) sorted_results = sorted( results, key=operator.itemgetter(0), reverse=True )[:10] db_results = [Result( score=score, sh_mir=shmir, pdf=path_id, backbone=frames_by_name[frame_name].id, sequence=found_sequences[0], ) for score, shmir, frame_name, path_id, found_sequences in sorted_results] remove_bad_foldings(path, (result.get_task_id() for result in db_results)) db_input = InputData( transcript_name=transcript_name, minimum_CG=minimum_CG, maximum_CG=maximum_CG, maximum_offtarget=maximum_offtarget, scaffold=scaffold, stimulatory_sequences=stimulatory_sequences, results=db_results ) db_session.add(db_input) db_session.add_all(db_results) db_session.commit() return [result.as_json() for result in db_results]
def shmir_from_transcript_sequence( transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory ): """Generating function of shmir from transcript sequence. Args: transcript_name(str): Name of transcipt. minimum_CG(int): Minimum number of 'C' and 'G' nucleotide in sequence. maximum_CG(int): Maximum number of 'C' and 'G' nucleotide in sequence. maximum_offtarget(int): Maximum offtarget. scaffold(str): Name of frame of miRNA or 'all'. stimulatory_sequences(str): One of 'yes', 'no', 'no_difference'. Returns: list of sh-miR(s). """ # check if results are in database results = get_results(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory) # sometimes results is an empty list if results is not None: return results path = create_path_string(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory) mRNA = ncbi_api.get_mRNA(transcript_name) reversed_mRNA = reverse_complement(mRNA) original_frames = frames_by_scaffold(scaffold) frames_by_name = {frame.name: frame for frame in original_frames} # best patters should be choosen first patterns = { frame.name: OrderedDict(sorted(json.loads(frame.regexp).items(), reverse=True)) for frame in original_frames } with allow_join_result(): validated = ( group( validate_sequences.s( list(sequences), # generators are not serializable regexp_type, name, minimum_CG, maximum_CG, maximum_offtarget, immunostimulatory, ).set(queue="score") for name, patterns_dict in patterns.iteritems() for regexp_type, sequences in find_by_patterns(patterns_dict, reversed_mRNA).iteritems() ) .apply_async() .get() ) best_sequences = merge_results(validated) with allow_join_result(): results = ( group( shmir_from_fasta.s( siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], [frames_by_name[name]], path ).set(queue="score") for name, siRNA in unpack_dict_to_list(best_sequences) ) .apply_async() .get() ) # merge results = list(chain(*results)) if not results: with allow_join_result(): validated = ( validate_sequences.s( list(all_possible_sequences(reversed_mRNA, 21)), # not serializable 0, "all", minimum_CG, maximum_CG, maximum_offtarget, immunostimulatory, ) .apply_async(queue="subtasks") .get() ) best_sequences = merge_results([validated]) with allow_join_result(): results = ( group( shmir_from_fasta.s( siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], original_frames, path ).set(queue="score") for name, siRNA in unpack_dict_to_list(best_sequences) ) .apply_async() .get() ) # merge results = chain(*results) sorted_results = sorted(results, key=lambda result: result["score"]["all"], reverse=True)[:TRANSCRIPT_RESULT_LIMIT] db_results = store_results( transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory, sorted_results ) remove_bad_foldings(path, [result.get_task_id() for result in db_results]) return [result.as_json() for result in db_results]