예제 #1
0
파일: worker.py 프로젝트: sh-miR/designer
def shmir_from_sirna_score(seq1, seq2, shift_left, shift_right):
    """Main function takes string input and returns the best results depending
    on scoring. Single result include sh-miR sequence,
    score and link to 2D structure from mfold program

    Args:
        input_str(str): Input string contains one or two sequences.

    Returns:
        List of sh-miR(s) sorted by score.
    """
    original_frames = frames_by_scaffold('all')

    frames = adjusted_frames(seq1, seq2,
                             shift_left, shift_right,
                             deepcopy(original_frames))

    shmirs = [frame.template() for frame in frames]

    # folding via mfold
    with allow_join_result():
        foldings = group(
            fold.s(
                shmir
            ).set(queue="subtasks") for shmir in shmirs
        ).apply_async().get()

    # scoring results
    with allow_join_result():
        scores = group(
            score_from_sirna.s(
                frame,
                original,
                folding['ss']
            ).set(queue="subtasks")
            for frame, original, folding in izip(frames, original_frames, foldings)
        ).apply_async().get()

    full_reference = [
        {
            'score': score,
            'shmir': shmir,
            'scaffold_name': frame.name,
            'pdf_reference': folding['path_id'],
            'sequences': (frame.siRNA1, frame.siRNA2),
        }
        for score, shmir, frame, folding in izip(scores, shmirs, frames, foldings)
        if score['all'] > 60
    ]

    return sorted(
        full_reference,
        key=lambda elem: elem['score']['all'],
        reverse=True
    )[:SIRNA_RESULT_LIMIT]
예제 #2
0
파일: worker.py 프로젝트: sh-miR/designer
def shmir_from_transcript_sequence(
    transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory
):
    """Generating function of shmir from transcript sequence.
    Args:
        transcript_name(str): Name of transcipt.
        minimum_CG(int): Minimum number of 'C' and 'G' nucleotide in sequence.
        maximum_CG(int): Maximum number of 'C' and 'G' nucleotide in sequence.
        maximum_offtarget(int): Maximum offtarget.
        scaffold(str): Name of frame of miRNA or 'all'.
        stimulatory_sequences(str): One of 'yes', 'no', 'no_difference'.

    Returns:
        list of sh-miR(s).
    """
    # check if results are in database
    results = get_results(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory)

    # sometimes results is an empty list
    if results is not None:
        return results

    path = create_path_string(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory)

    mRNA = ncbi_api.get_mRNA(transcript_name)
    reversed_mRNA = reverse_complement(mRNA)

    original_frames = frames_by_scaffold(scaffold)

    frames_by_name = {frame.name: frame for frame in original_frames}

    # best patters should be choosen first
    patterns = {
        frame.name: OrderedDict(sorted(json.loads(frame.regexp).items(), reverse=True)) for frame in original_frames
    }

    with allow_join_result():
        validated = (
            group(
                validate_sequences.s(
                    list(sequences),  # generators are not serializable
                    regexp_type,
                    name,
                    minimum_CG,
                    maximum_CG,
                    maximum_offtarget,
                    immunostimulatory,
                ).set(queue="score")
                for name, patterns_dict in patterns.iteritems()
                for regexp_type, sequences in find_by_patterns(patterns_dict, reversed_mRNA).iteritems()
            )
            .apply_async()
            .get()
        )

    best_sequences = merge_results(validated)

    with allow_join_result():
        results = (
            group(
                shmir_from_fasta.s(
                    siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], [frames_by_name[name]], path
                ).set(queue="score")
                for name, siRNA in unpack_dict_to_list(best_sequences)
            )
            .apply_async()
            .get()
        )

    # merge
    results = list(chain(*results))

    if not results:
        with allow_join_result():
            validated = (
                validate_sequences.s(
                    list(all_possible_sequences(reversed_mRNA, 21)),  # not serializable
                    0,
                    "all",
                    minimum_CG,
                    maximum_CG,
                    maximum_offtarget,
                    immunostimulatory,
                )
                .apply_async(queue="subtasks")
                .get()
            )
        best_sequences = merge_results([validated])

        with allow_join_result():
            results = (
                group(
                    shmir_from_fasta.s(
                        siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], original_frames, path
                    ).set(queue="score")
                    for name, siRNA in unpack_dict_to_list(best_sequences)
                )
                .apply_async()
                .get()
            )

        # merge
        results = chain(*results)

    sorted_results = sorted(results, key=lambda result: result["score"]["all"], reverse=True)[:TRANSCRIPT_RESULT_LIMIT]

    db_results = store_results(
        transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory, sorted_results
    )

    remove_bad_foldings(path, [result.get_task_id() for result in db_results])

    return [result.as_json() for result in db_results]