Example #1
0
    def test_find_by_patterns(self):
        patterns = {3: 'a', 2: 'bc'}
        result = find_by_patterns(patterns, 'abcd')

        self.assertEqual(list(result[3]), list(patterns[3]))
        self.assertEqual(list(result[2]), list(patterns[2]))
Example #2
0
def shmir_from_transcript_sequence(
    transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory
):
    """Generating function of shmir from transcript sequence.
    Args:
        transcript_name(str): Name of transcipt.
        minimum_CG(int): Minimum number of 'C' and 'G' nucleotide in sequence.
        maximum_CG(int): Maximum number of 'C' and 'G' nucleotide in sequence.
        maximum_offtarget(int): Maximum offtarget.
        scaffold(str): Name of frame of miRNA or 'all'.
        stimulatory_sequences(str): One of 'yes', 'no', 'no_difference'.

    Returns:
        list of sh-miR(s).
    """
    # check if results are in database
    results = get_results(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory)

    # sometimes results is an empty list
    if results is not None:
        return results

    path = create_path_string(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory)

    mRNA = ncbi_api.get_mRNA(transcript_name)
    reversed_mRNA = reverse_complement(mRNA)

    original_frames = frames_by_scaffold(scaffold)

    frames_by_name = {frame.name: frame for frame in original_frames}

    # best patters should be choosen first
    patterns = {
        frame.name: OrderedDict(sorted(json.loads(frame.regexp).items(), reverse=True)) for frame in original_frames
    }

    with allow_join_result():
        validated = (
            group(
                validate_sequences.s(
                    list(sequences),  # generators are not serializable
                    regexp_type,
                    name,
                    minimum_CG,
                    maximum_CG,
                    maximum_offtarget,
                    immunostimulatory,
                ).set(queue="score")
                for name, patterns_dict in patterns.iteritems()
                for regexp_type, sequences in find_by_patterns(patterns_dict, reversed_mRNA).iteritems()
            )
            .apply_async()
            .get()
        )

    best_sequences = merge_results(validated)

    with allow_join_result():
        results = (
            group(
                shmir_from_fasta.s(
                    siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], [frames_by_name[name]], path
                ).set(queue="score")
                for name, siRNA in unpack_dict_to_list(best_sequences)
            )
            .apply_async()
            .get()
        )

    # merge
    results = list(chain(*results))

    if not results:
        with allow_join_result():
            validated = (
                validate_sequences.s(
                    list(all_possible_sequences(reversed_mRNA, 21)),  # not serializable
                    0,
                    "all",
                    minimum_CG,
                    maximum_CG,
                    maximum_offtarget,
                    immunostimulatory,
                )
                .apply_async(queue="subtasks")
                .get()
            )
        best_sequences = merge_results([validated])

        with allow_join_result():
            results = (
                group(
                    shmir_from_fasta.s(
                        siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], original_frames, path
                    ).set(queue="score")
                    for name, siRNA in unpack_dict_to_list(best_sequences)
                )
                .apply_async()
                .get()
            )

        # merge
        results = chain(*results)

    sorted_results = sorted(results, key=lambda result: result["score"]["all"], reverse=True)[:TRANSCRIPT_RESULT_LIMIT]

    db_results = store_results(
        transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory, sorted_results
    )

    remove_bad_foldings(path, [result.get_task_id() for result in db_results])

    return [result.as_json() for result in db_results]