Exemple #1
0
    def test_no_results_get_mRNA(self, mock_get_data):
        data_from_transcript = "ptaki.lataja.kluczem.GACTGACTG"
        mock_get_data.return_value = data_from_transcript

        with self.assertRaises(errors.NoResultError):
            ncbi_api.get_mRNA(self.GOOD_MRNA)

        mock_get_data.assert_called_once_with(self.GOOD_MRNA)
Exemple #2
0
    def test_no_results_get_mRNA(self, mock_get_data):
        data_from_transcript = 'ptaki.lataja.kluczem.GACTGACTG'
        mock_get_data.return_value = data_from_transcript

        with self.assertRaises(errors.NoResultError):
            ncbi_api.get_mRNA(self.GOOD_MRNA)

        mock_get_data.assert_called_once_with(self.GOOD_MRNA)
Exemple #3
0
    def test_get_mRNA(self, mock_get_data):
        data_from_transcript = "ptaki.lataja.kluczem.mRNAGACTGACTG"
        mock_get_data.return_value = data_from_transcript

        expected_string = data_from_transcript.split("mRNA")[1]
        data = ncbi_api.get_mRNA(self.GOOD_MRNA)

        mock_get_data.assert_called_once_with(self.GOOD_MRNA)
        self.assertEqual(expected_string, data)
Exemple #4
0
    def test_get_mRNA(self, mock_get_data):
        data_from_transcript = 'ptaki.lataja.kluczem.mRNAGACTGACTG'
        mock_get_data.return_value = data_from_transcript

        expected_string = data_from_transcript.split('mRNA')[1]
        data = ncbi_api.get_mRNA(self.GOOD_MRNA)

        mock_get_data.assert_called_once_with(self.GOOD_MRNA)
        self.assertEqual(expected_string, data)
Exemple #5
0
    def test_incorrect_get_mRNA(self, mock_get_data):
        with self.assertRaises(errors.IncorrectDataError):
            ncbi_api.get_mRNA(self.BAD_MNRA)

        self.assertEqual(len(mock_get_data.mock_calls), 0)
Exemple #6
0
    def test_incorrect_get_mRNA(self, mock_get_data):
        with self.assertRaises(errors.IncorrectDataError):
            ncbi_api.get_mRNA(self.BAD_MNRA)

        self.assertEqual(len(mock_get_data.mock_calls), 0)
Exemple #7
0
def shmir_from_transcript_sequence(
    transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold,
    stimulatory_sequences
):
    """Generating function of shmir from transcript sequence.
    Args:
        transcript_name(str): Name of transcipt.
        minimum_CG(int): Minimum number of 'C' and 'G' nucleotide in sequence.
        maximum_CG(int): Maximum number of 'C' and 'G' nucleotide in sequence.
        maximum_offtarget(int): Maximum offtarget.
        scaffold(str): Name of frame of miRNA or 'all'.
        stimulatory_sequences(str): One of 'yes', 'no', 'no_difference'.

    Returns:
        list of sh-miR(s).
    """
    # check if results are in database
    try:
        stored_input = db_session.query(InputData).filter(
            func.lower(InputData.transcript_name) == transcript_name.lower(),
            InputData.minimum_CG == minimum_CG,
            InputData.maximum_CG == maximum_CG,
            InputData.maximum_offtarget == maximum_offtarget,
            func.lower(InputData.scaffold) == scaffold.lower(),
            func.lower(
                InputData.stimulatory_sequences
            ) == stimulatory_sequences.lower()
        ).outerjoin(InputData.results).one()
    except NoResultFound:
        pass
    else:
        return [result.as_json() for result in stored_input.results]

    # create path string
    path = "_".join(
        map(
            str,
            [transcript_name, minimum_CG, maximum_CG, maximum_offtarget,
             scaffold, stimulatory_sequences]
        )
    )

    mRNA = ncbi_api.get_mRNA(transcript_name)

    if scaffold == 'all':
        original_frames = db_session.query(Backbone).all()
    else:
        original_frames = db_session.query(Backbone).filter(
            func.lower(Backbone.name) == scaffold.lower()
        ).all()

    frames_by_name = {frame.name: frame for frame in original_frames}

    patterns = {
        frame.name: OrderedDict(
            sorted(
                json.loads(frame.regexp).items(),
                reverse=True
            )
        ) for frame in original_frames
    }

    best_sequences = defaultdict(list)

    for name, patterns_dict in patterns.iteritems():
        for regexp_type, sequences in find_by_patterns(patterns_dict, mRNA).iteritems():
            with allow_join_result():
                is_empty, sequences = generator_is_empty(sequences)
                if not is_empty:
                    best_sequences[name] = remove_none(
                        group(
                            validate_and_offtarget.s(
                                sequence,
                                maximum_offtarget,
                                minimum_CG,
                                maximum_CG,
                                stimulatory_sequences,
                                int(regexp_type)
                            ).set(queue="blast")
                            for sequence in sequences
                        ).apply_async().get()
                    )

    results = []
    for name, seq_dict in unpack_dict_to_list(best_sequences):
        if len(results) == 20:
            break
        with allow_join_result():
            shmir_result = shmir_from_fasta_string.s(
                seq_dict['sequence'],
                [frames_by_name[name]],
                seq_dict['offtarget'],
                seq_dict['regexp'],
                path
            ).set(queue="score").apply_async().get()

            if shmir_result:
                results.extend(shmir_result)

    if not results:
        best_sequences = []
        sequences = all_possible_sequences(mRNA, 19, 21)

        with allow_join_result():
            is_empty, sequences = generator_is_empty(sequences)
            if not is_empty:
                best_sequences = remove_none(
                    group(
                        validate_and_offtarget.s(
                            sequence,
                            maximum_offtarget,
                            minimum_CG,
                            maximum_CG,
                            stimulatory_sequences,
                            0
                        ).set(queue="blast")
                        for sequence in sequences
                    ).apply_async().get()
                )

        if best_sequences:
            with allow_join_result():
                results = chain(*remove_none(
                    group(
                        shmir_from_fasta_string.s(
                            seq_dict['sequence'], original_frames,
                            seq_dict['offtarget'], seq_dict['regexp'], path
                        ).set(queue="score")
                        for seq_dict in best_sequences
                    ).apply_async().get()
                ))

    sorted_results = sorted(
        results,
        key=operator.itemgetter(0),
        reverse=True
    )[:10]
    db_results = [Result(
        score=score,
        sh_mir=shmir,
        pdf=path_id,
        backbone=frames_by_name[frame_name].id,
        sequence=found_sequences[0],
    ) for score, shmir, frame_name, path_id, found_sequences in sorted_results]

    remove_bad_foldings(path, (result.get_task_id() for result in db_results))

    db_input = InputData(
        transcript_name=transcript_name,
        minimum_CG=minimum_CG,
        maximum_CG=maximum_CG,
        maximum_offtarget=maximum_offtarget,
        scaffold=scaffold,
        stimulatory_sequences=stimulatory_sequences,
        results=db_results
    )
    db_session.add(db_input)
    db_session.add_all(db_results)
    db_session.commit()

    return [result.as_json() for result in db_results]
Exemple #8
0
def shmir_from_transcript_sequence(
    transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory
):
    """Generating function of shmir from transcript sequence.
    Args:
        transcript_name(str): Name of transcipt.
        minimum_CG(int): Minimum number of 'C' and 'G' nucleotide in sequence.
        maximum_CG(int): Maximum number of 'C' and 'G' nucleotide in sequence.
        maximum_offtarget(int): Maximum offtarget.
        scaffold(str): Name of frame of miRNA or 'all'.
        stimulatory_sequences(str): One of 'yes', 'no', 'no_difference'.

    Returns:
        list of sh-miR(s).
    """
    # check if results are in database
    results = get_results(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory)

    # sometimes results is an empty list
    if results is not None:
        return results

    path = create_path_string(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory)

    mRNA = ncbi_api.get_mRNA(transcript_name)
    reversed_mRNA = reverse_complement(mRNA)

    original_frames = frames_by_scaffold(scaffold)

    frames_by_name = {frame.name: frame for frame in original_frames}

    # best patters should be choosen first
    patterns = {
        frame.name: OrderedDict(sorted(json.loads(frame.regexp).items(), reverse=True)) for frame in original_frames
    }

    with allow_join_result():
        validated = (
            group(
                validate_sequences.s(
                    list(sequences),  # generators are not serializable
                    regexp_type,
                    name,
                    minimum_CG,
                    maximum_CG,
                    maximum_offtarget,
                    immunostimulatory,
                ).set(queue="score")
                for name, patterns_dict in patterns.iteritems()
                for regexp_type, sequences in find_by_patterns(patterns_dict, reversed_mRNA).iteritems()
            )
            .apply_async()
            .get()
        )

    best_sequences = merge_results(validated)

    with allow_join_result():
        results = (
            group(
                shmir_from_fasta.s(
                    siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], [frames_by_name[name]], path
                ).set(queue="score")
                for name, siRNA in unpack_dict_to_list(best_sequences)
            )
            .apply_async()
            .get()
        )

    # merge
    results = list(chain(*results))

    if not results:
        with allow_join_result():
            validated = (
                validate_sequences.s(
                    list(all_possible_sequences(reversed_mRNA, 21)),  # not serializable
                    0,
                    "all",
                    minimum_CG,
                    maximum_CG,
                    maximum_offtarget,
                    immunostimulatory,
                )
                .apply_async(queue="subtasks")
                .get()
            )
        best_sequences = merge_results([validated])

        with allow_join_result():
            results = (
                group(
                    shmir_from_fasta.s(
                        siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], original_frames, path
                    ).set(queue="score")
                    for name, siRNA in unpack_dict_to_list(best_sequences)
                )
                .apply_async()
                .get()
            )

        # merge
        results = chain(*results)

    sorted_results = sorted(results, key=lambda result: result["score"]["all"], reverse=True)[:TRANSCRIPT_RESULT_LIMIT]

    db_results = store_results(
        transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory, sorted_results
    )

    remove_bad_foldings(path, [result.get_task_id() for result in db_results])

    return [result.as_json() for result in db_results]