Example #1
0
def shmir_from_fasta(siRNA, offtarget, regexp, original_frames, prefix):
    siRNA2 = reverse_complement(siRNA)

    frames = adjusted_frames(siRNA, siRNA2, 0, 0, deepcopy(original_frames))  # we do not have shifts here

    shmirs = [frame.template() for frame in frames]

    with allow_join_result():
        foldings = group(fold.s(shmir, prefix=prefix).set(queue="subtasks") for shmir in shmirs).apply_async().get()

    results = []
    iter_frames = izip(frames, original_frames, foldings)

    for frame, original_frame, folding in iter_frames:
        score = score_from_transcript(frame, original_frame, folding["ss"], offtarget, regexp)
        if validate_transcript_by_score(score):
            results.append({"score": score, "frame": frame, "folding": folding, "found_sequence": siRNA})
    return results
Example #2
0
def parse_input(sirna):
    """Function for checking many sequences and throw error if wrong input
    input limitations: possible letters: {ACTGUactgu}, change all 'u' to 't',
    length 19-21, one strand or two strands splitted by space,
    if two strands check if they are in correct 5'-3' orientation, allow |_20%_|
    mismatches,
    if the sequence is correct input returns 'first sequence' (19-21nt), 'second
    sequence' (19-21nt), left_end{-4,-3,-2,-1,0,1,2,3,4},
    rigth_end{-4,-3,-2,-1,0,1,2,3,4}
    messages:
    * "correct sequence"
    * "changed 'u' to 't'"
    * "cut 'uu' or 'tt' ends"
    errors:
    * "too short"
    * "insert your siRNA sequence"
    * "too long"
    * "insert only one siRNA sequence or both strands of one siRNA at a time;
    check if both stands are in 5'-3' orientation"
    * "sequence can contain only {actgu} letters

    Args:
        sirna: sequence(str) which will be check

    Returns:
        tuple from best_complementarity

    Raises:
        ValidationError
    """
    if " " in sirna:
        sequences = map(replace_mocules, sirna.split(" ", 1))
    else:
        sequences = map(
            replace_mocules,
            [sirna, reverse_complement(sirna)]
        )

    for sequence in sequences:
        validate_sirna(sequence)

    return best_complementarity(*sequences)
Example #3
0
def shmir_from_sirna_score(input_str):
    """Main function takes string input and returns the best results depending
    on scoring. Single result include sh-miR sequence,
    score and link to 2D structure from mfold program

    Args:
        input_str(str): Input string contains one or two sequences.

    Returns:
        List of sh-miR(s) sorted by score.
    """

    seq1, seq2, shift_left, shift_right = check_input(input_str)
    if not seq2:
        seq2 = reverse_complement(seq1)

    original_frames = db_session.query(Backbone).all()

    frames = get_frames(seq1, seq2,
                        shift_left, shift_right,
                        deepcopy(original_frames))

    with allow_join_result():
        frames_with_score = group(
            fold_and_score.s(
                seq1, seq2,
                frame_tuple,
                original,
                score_from_sirna,
                (seq1,)
            ).set(queue="subtasks")
            for frame_tuple, original in zip(frames, original_frames)
        ).apply_async().get()

    sorted_frames = [
        elem[:-1] for elem in sorted(
            frames_with_score, key=operator.itemgetter(0), reverse=True
        ) if elem[0] > 60
    ][:3]

    return sorted_frames
Example #4
0
def shmir_from_fasta_string(fasta_string, original_frames,
                            actual_offtarget, regexp_type, path):
    """Generating function of shmir from fasta string.

    Args:
        fasta_string(str): Sequence.
        original_frames(Backbone): original Backbone object.
        actual_offtarget(int): offtarget value
        regexp_type(int): Number of a regex from database.

    Returns:
        list of sh-miR(s)
    """
    seq2 = reverse_complement(fasta_string)

    frames = get_frames(fasta_string, seq2, 0, 0, deepcopy(original_frames))

    with allow_join_result():
        frames_with_score = group(
            fold_and_score.s(
                fasta_string,
                seq2,
                frame_tuple,
                original,
                score_from_transcript,
                (actual_offtarget, regexp_type),
                path
            ).set(queue="subtasks")
            for frame_tuple, original in zip(frames, original_frames)
        ).apply_async().get()

    filtered_frames = []
    for frame in frames_with_score:
        notes = frame[0]
        if notes['frame'] > 60 and notes['all'] > 100:
            frame[0] = notes['all']
            filtered_frames.append(frame)

    return sorted(filtered_frames, key=operator.itemgetter(0), reverse=True) or None
Example #5
0
 def test_reverse_complement(self):
     sequence = "atcgatcg"
     reversed_sequence = "cgatcgat"
     result = reverse_complement(sequence)
     self.assertEqual(result, reversed_sequence)
Example #6
0
def shmir_from_transcript_sequence(
    transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory
):
    """Generating function of shmir from transcript sequence.
    Args:
        transcript_name(str): Name of transcipt.
        minimum_CG(int): Minimum number of 'C' and 'G' nucleotide in sequence.
        maximum_CG(int): Maximum number of 'C' and 'G' nucleotide in sequence.
        maximum_offtarget(int): Maximum offtarget.
        scaffold(str): Name of frame of miRNA or 'all'.
        stimulatory_sequences(str): One of 'yes', 'no', 'no_difference'.

    Returns:
        list of sh-miR(s).
    """
    # check if results are in database
    results = get_results(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory)

    # sometimes results is an empty list
    if results is not None:
        return results

    path = create_path_string(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory)

    mRNA = ncbi_api.get_mRNA(transcript_name)
    reversed_mRNA = reverse_complement(mRNA)

    original_frames = frames_by_scaffold(scaffold)

    frames_by_name = {frame.name: frame for frame in original_frames}

    # best patters should be choosen first
    patterns = {
        frame.name: OrderedDict(sorted(json.loads(frame.regexp).items(), reverse=True)) for frame in original_frames
    }

    with allow_join_result():
        validated = (
            group(
                validate_sequences.s(
                    list(sequences),  # generators are not serializable
                    regexp_type,
                    name,
                    minimum_CG,
                    maximum_CG,
                    maximum_offtarget,
                    immunostimulatory,
                ).set(queue="score")
                for name, patterns_dict in patterns.iteritems()
                for regexp_type, sequences in find_by_patterns(patterns_dict, reversed_mRNA).iteritems()
            )
            .apply_async()
            .get()
        )

    best_sequences = merge_results(validated)

    with allow_join_result():
        results = (
            group(
                shmir_from_fasta.s(
                    siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], [frames_by_name[name]], path
                ).set(queue="score")
                for name, siRNA in unpack_dict_to_list(best_sequences)
            )
            .apply_async()
            .get()
        )

    # merge
    results = list(chain(*results))

    if not results:
        with allow_join_result():
            validated = (
                validate_sequences.s(
                    list(all_possible_sequences(reversed_mRNA, 21)),  # not serializable
                    0,
                    "all",
                    minimum_CG,
                    maximum_CG,
                    maximum_offtarget,
                    immunostimulatory,
                )
                .apply_async(queue="subtasks")
                .get()
            )
        best_sequences = merge_results([validated])

        with allow_join_result():
            results = (
                group(
                    shmir_from_fasta.s(
                        siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], original_frames, path
                    ).set(queue="score")
                    for name, siRNA in unpack_dict_to_list(best_sequences)
                )
                .apply_async()
                .get()
            )

        # merge
        results = chain(*results)

    sorted_results = sorted(results, key=lambda result: result["score"]["all"], reverse=True)[:TRANSCRIPT_RESULT_LIMIT]

    db_results = store_results(
        transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory, sorted_results
    )

    remove_bad_foldings(path, [result.get_task_id() for result in db_results])

    return [result.as_json() for result in db_results]