Example #1
0
def shmir_from_sirna_score(seq1, seq2, shift_left, shift_right):
    """Main function takes string input and returns the best results depending
    on scoring. Single result include sh-miR sequence,
    score and link to 2D structure from mfold program

    Args:
        input_str(str): Input string contains one or two sequences.

    Returns:
        List of sh-miR(s) sorted by score.
    """
    original_frames = frames_by_scaffold('all')

    frames = adjusted_frames(seq1, seq2,
                             shift_left, shift_right,
                             deepcopy(original_frames))

    shmirs = [frame.template() for frame in frames]

    # folding via mfold
    with allow_join_result():
        foldings = group(
            fold.s(
                shmir
            ).set(queue="subtasks") for shmir in shmirs
        ).apply_async().get()

    # scoring results
    with allow_join_result():
        scores = group(
            score_from_sirna.s(
                frame,
                original,
                folding['ss']
            ).set(queue="subtasks")
            for frame, original, folding in izip(frames, original_frames, foldings)
        ).apply_async().get()

    full_reference = [
        {
            'score': score,
            'shmir': shmir,
            'scaffold_name': frame.name,
            'pdf_reference': folding['path_id'],
            'sequences': (frame.siRNA1, frame.siRNA2),
        }
        for score, shmir, frame, folding in izip(scores, shmirs, frames, foldings)
        if score['all'] > 60
    ]

    return sorted(
        full_reference,
        key=lambda elem: elem['score']['all'],
        reverse=True
    )[:SIRNA_RESULT_LIMIT]
Example #2
0
def shmir_from_fasta(siRNA, offtarget, regexp, original_frames, prefix):
    siRNA2 = reverse_complement(siRNA)

    frames = adjusted_frames(siRNA, siRNA2, 0, 0, deepcopy(original_frames))  # we do not have shifts here

    shmirs = [frame.template() for frame in frames]

    with allow_join_result():
        foldings = group(fold.s(shmir, prefix=prefix).set(queue="subtasks") for shmir in shmirs).apply_async().get()

    results = []
    iter_frames = izip(frames, original_frames, foldings)

    for frame, original_frame, folding in iter_frames:
        score = score_from_transcript(frame, original_frame, folding["ss"], offtarget, regexp)
        if validate_transcript_by_score(score):
            results.append({"score": score, "frame": frame, "folding": folding, "found_sequence": siRNA})
    return results