Beispiel #1
0
def store_results(
    transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immuno,
    results
):
    db_results = [
        Result(
            score=result_dict['score'],
            shmir=result_dict['frame'].template(),
            pdf=result_dict['folding']['path_id'],
            backbone=result_dict['frame'].id,
            sequence=result_dict['found_sequence'],
        ) for result_dict in results
    ]

    db_input = InputData(
        transcript_name=transcript_name,
        minimum_CG=minimum_CG,
        maximum_CG=maximum_CG,
        maximum_offtarget=maximum_offtarget,
        scaffold=scaffold,
        immunostimulatory=immuno,
        results=db_results
    )

    db_session.add(db_input)
    db_session.add_all(db_results)
    db_session.commit()

    return db_results
Beispiel #2
0
def seed_initial_data():
    backbones = [
        Backbone(
            name='miR-30a',
            flanks3_s='TGCCTACTGCCTCGGACTTCAAGGGGCTACTTTAGGAGCA',
            flanks3_a='TGCTCCTAAAGTAGCCCCTTGAAGTCCGAGGCAGTAGGCA',
            flanks5_s='CTAAAGAAGGTATATTGCTGTTGACAGTGAGCGAC',
            flanks5_a='GTCGCTCACTGTCAACAGCAATATACCTTCTTTAG',
            loop_s='CTGTGAAGCCACAGATGGG',
            loop_a='CCCATCTGTGGCTTCACAG',
            miRNA_s='UGUAAACAUCCUCGACUGGAAG',
            miRNA_a='CTTCCAGTCGAGGATGTTTGCAGC',
            miRNA_length=22,
            miRNA_min=19,
            miRNA_max=25,
            miRNA_end_5=-2,
            miRNA_end_3=0,
            structure='./data/structures/miR-30a',
            homogeneity=4,
            miRBase_link=('http://www.mirbase.org/cgi-bin/mirna_entry.pl'
                          '?acc=MI0000088'),
            active_strand=3
        ),
        Backbone(
            name='miR-155',
            flanks3_s=('GTGTATGATGCCTGTTACTAGCATTCACATGGAACAAATTGCTGCTGCCGTGGG'
                       'AGGATGACAAAGA'),
            flanks3_a=('TCTTTGTCATCCTCCCACGGCAGCAGCAATTTGTTCCATGTGAATGCTAGTAAC'
                       'AGGCATCATACAC'),
            flanks5_s='AGGCTTGCTGTAGGCTGTATGCTG',
            flanks5_a='CAGCATACCTACAGCAAGCCT',
            loop_s='TTTTGCCTCCAACTGA',
            loop_a='TCAGTTGGAGGCAAAA',
            miRNA_s='UUAAUGCUAAUCGUGAUAGGGGU',
            miRNA_a='CUCCUACAUAUUAGCAUUAACA',
            miRNA_length=23,
            miRNA_min=20,
            miRNA_max=26,
            miRNA_end_5=-2,
            miRNA_end_3=1,
            structure='./data/structures/miR-155',
            homogeneity=5,
            miRBase_link=('http://www.mirbase.org/cgi-bin/mirna_entry.pl'
                          '?acc=MIMAT0000646'),
            active_strand=5
        ),
        Backbone(
            name='miR-21',
            flanks3_s='CTGACATTTTGGTATCTTTCATCTGACCATCCATATCCAATGTTCTCATT',
            flanks3_a='AATGAGAACATTGGATATGGATGGTCAGATGAAAGATACCAAAATGTCAG',
            flanks5_s='TACCATCGTGACATCTCCATGGCTGTACCACCTTGTCGGG',
            flanks5_a='CCCGACAAGGTGGTACAGCCATGGAGATGTCACGATGGTA',
            loop_s='CTGTTGAATCTCATGG',
            loop_a='CCATGAGATTCAACAG',
            miRNA_s='UAGCUUAUCAGACUGAUGUUGA',
            miRNA_a='CAACACCAGUCGAUGGGCUGU',
            miRNA_length=22,
            miRNA_min=19,
            miRNA_max=24,
            miRNA_end_5=-1,
            miRNA_end_3=1,
            structure='./data/structures/miR-21',
            homogeneity=4,
            miRBase_link=('http://www.mirbase.org/cgi-bin/mirna_entry.pl'
                          '?acc=MI0000077'),
            active_strand=5
        ),
        Backbone(
            name='miR-122',
            flanks3_s='GCTACTGCTAGGCAATCCTTCCCTCGATAAATGTCTTGGCATCGTTTGCTT',
            flanks3_a='AAGCAAACGATGCCAAGACATTTATCGAGGGAAGGATTGCCTAGCAGTAGC',
            flanks5_s='TGGAGGTGAAGTTAACACCTTCGTGGCTACAGAGTTTCCTTAGCAGAGCTG',
            flanks5_a='CAGCTCTGCTAAGGAAACTCTGTAGCCACGAAGGTGTTAACTTCACCTCCA',
            loop_s='TGTCTAAACTATCA',
            loop_a='TGATAGTTTAGACA',
            miRNA_s='UGGAGUGUGACAAUGGUGUUUG',
            miRNA_a='AACGCCAUUAUCACACUAAAUA',
            miRNA_length=22,
            miRNA_min=21,
            miRNA_max=23,
            miRNA_end_5=-2,
            miRNA_end_3=2,
            structure='./data/structures/miR-122',
            homogeneity=5,
            miRBase_link=('http://www.mirbase.org/cgi-bin/mirna_entry.pl'
                          '?acc=MI0000442'),
            active_strand=5
        ),
        Backbone(
            name='miR-31',
            flanks3_s='CTTTCCTGTCTGACAGCAGCTTGGCTACCTCCGTCCTGTTCCTCCTTGTCTT',
            flanks3_a='AAGACAAGGAGGAACAGGACGGAGGTAGCCAAGCTGCTGTCAGACAGGAAAG',
            flanks5_s='CATAACAACGAAGAGGGATGGTATTGCTCCTGTAACTCGGAACTGGAGAGG',
            flanks5_a='CCTCTCCAGTTCCGAGTTACAGGAGCAATACCATCCCTCTTCGTTGTTATG',
            loop_s='GTTGAACTGGGAACC',
            loop_a='GGTTCCCAGTTCAAC',
            miRNA_s='AGGCAAGAUGCUGGCAUAGCU',
            miRNA_a='UGCUAUGCCAACAUAUUGCCAU',
            miRNA_length=21,
            miRNA_min=19,
            miRNA_max=23,
            miRNA_end_5=-1,
            miRNA_end_3=1,
            structure='./data/structures/miR-31',
            homogeneity=4,
            miRBase_link=('http://www.mirbase.org/cgi-bin/mirna_entry.pl'
                          '?acc=MI0000089'),
            active_strand=5
        ),
        Backbone(
            name='miR-26a',
            flanks3_s='GGGACGC',
            flanks3_a='GCGTCCC',
            flanks5_s='GTGGCCTCG',
            flanks5_a='CGAGGCCAC',
            loop_s='GTGCAGGTCCCAATGGG',
            loop_a='CCCATTGGGACCTGCAC',
            miRNA_s='UUCAAGUAAUCCAGGAUAGGCU',
            miRNA_a='CCUAUUCUUGGUUACUUGCACG',
            miRNA_length=22,
            miRNA_min=21,
            miRNA_max=23,
            miRNA_end_5=-2,
            miRNA_end_3=2,
            structure='./data/structures/miR-26a',
            homogeneity=4,
            miRBase_link=('http://www.mirbase.org/cgi-bin/mirna_entry.pl'
                          '?acc=MI0000083'),
            active_strand=5
        ),
        Backbone(
            name='miR-106b',
            flanks3_s='TCCAGCAGG',
            flanks3_a='CCTGCTGGA',
            flanks5_s='CCTGCCGGGGC',
            flanks5_a='GCCCCGGCAGG',
            loop_s='AGTGGTCCTCTCCGTGCTA',
            loop_a='TAGCACGGAGAGGACCACT',
            miRNA_s='UAAAGUGCUGACAGUGCAGAU',
            miRNA_a='CCGCACUGUGGGUACUUGCUGC',
            miRNA_length=21,
            miRNA_min=20,
            miRNA_max=22,
            miRNA_end_5=-3,
            miRNA_end_3=2,
            structure='./data/structures/miR-106b',
            homogeneity=2,
            miRBase_link=('http://www.mirbase.org/cgi-bin/mirna_entry.pl'
                          '?acc=MI0000734'),
            active_strand=0
        )
    ]
    immunos = [
        Immuno(
            sequence='UGUGU',
            receptor='TLR7 and TLR8',
            link='http://www.ncbi.nlm.nih.gov/pubmed/16609928'
        ),
        Immuno(
            sequence='GUCCUUCAA',
            receptor='TLR7 and TLR8',
            link='http://www.ncbi.nlm.nih.gov/pubmed/15723075'
        ),
        Immuno(
            sequence='GU',
            receptor='TLR7 and TLR8',
            link='http://www.ncbi.nlm.nih.gov/pubmed/16609928'
        ),
        Immuno(
            sequence='AU',
            receptor='TLR8',
            link='http://www.ncbi.nlm.nih.gov/pubmed/18322178'
        ),
        Immuno(
            sequence='UGGC',
            receptor='',
            link='http://www.ncbi.nlm.nih.gov/pubmed/16682561'
        ),
        Immuno(
            sequence='UUUUU',
            receptor='',
            link='http://www.ncbi.nlm.nih.gov/pubmed/15778705'
        )
    ]

    if db_session.query(Utr).count() == 0:
        filename = download.download_utr_database()
        for sequence, reference in parse_utr_database(filename):
            db_session.add(
                Utr(
                    sequence=sequence,
                    reference=reference
                )
            )
    if db_session.query(HumanmRNA).count() == 0:
        filename = download.download_human_all_database()
        for sequence, reference in parse_mRNA_database(filename):
            db_session.add(
                HumanmRNA(
                    sequence=sequence,
                    reference=reference
                )
            )

    if db_session.query(Backbone).count() == 0:
        db_session.add_all(backbones)
    if db_session.query(Immuno).count() == 0:
        db_session.add_all(immunos)
    db_session.commit()
Beispiel #3
0
def shmir_from_transcript_sequence(
    transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold,
    stimulatory_sequences
):
    """Generating function of shmir from transcript sequence.
    Args:
        transcript_name(str): Name of transcipt.
        minimum_CG(int): Minimum number of 'C' and 'G' nucleotide in sequence.
        maximum_CG(int): Maximum number of 'C' and 'G' nucleotide in sequence.
        maximum_offtarget(int): Maximum offtarget.
        scaffold(str): Name of frame of miRNA or 'all'.
        stimulatory_sequences(str): One of 'yes', 'no', 'no_difference'.

    Returns:
        list of sh-miR(s).
    """
    # check if results are in database
    try:
        stored_input = db_session.query(InputData).filter(
            func.lower(InputData.transcript_name) == transcript_name.lower(),
            InputData.minimum_CG == minimum_CG,
            InputData.maximum_CG == maximum_CG,
            InputData.maximum_offtarget == maximum_offtarget,
            func.lower(InputData.scaffold) == scaffold.lower(),
            func.lower(
                InputData.stimulatory_sequences
            ) == stimulatory_sequences.lower()
        ).outerjoin(InputData.results).one()
    except NoResultFound:
        pass
    else:
        return [result.as_json() for result in stored_input.results]

    # create path string
    path = "_".join(
        map(
            str,
            [transcript_name, minimum_CG, maximum_CG, maximum_offtarget,
             scaffold, stimulatory_sequences]
        )
    )

    mRNA = ncbi_api.get_mRNA(transcript_name)

    if scaffold == 'all':
        original_frames = db_session.query(Backbone).all()
    else:
        original_frames = db_session.query(Backbone).filter(
            func.lower(Backbone.name) == scaffold.lower()
        ).all()

    frames_by_name = {frame.name: frame for frame in original_frames}

    patterns = {
        frame.name: OrderedDict(
            sorted(
                json.loads(frame.regexp).items(),
                reverse=True
            )
        ) for frame in original_frames
    }

    best_sequences = defaultdict(list)

    for name, patterns_dict in patterns.iteritems():
        for regexp_type, sequences in find_by_patterns(patterns_dict, mRNA).iteritems():
            with allow_join_result():
                is_empty, sequences = generator_is_empty(sequences)
                if not is_empty:
                    best_sequences[name] = remove_none(
                        group(
                            validate_and_offtarget.s(
                                sequence,
                                maximum_offtarget,
                                minimum_CG,
                                maximum_CG,
                                stimulatory_sequences,
                                int(regexp_type)
                            ).set(queue="blast")
                            for sequence in sequences
                        ).apply_async().get()
                    )

    results = []
    for name, seq_dict in unpack_dict_to_list(best_sequences):
        if len(results) == 20:
            break
        with allow_join_result():
            shmir_result = shmir_from_fasta_string.s(
                seq_dict['sequence'],
                [frames_by_name[name]],
                seq_dict['offtarget'],
                seq_dict['regexp'],
                path
            ).set(queue="score").apply_async().get()

            if shmir_result:
                results.extend(shmir_result)

    if not results:
        best_sequences = []
        sequences = all_possible_sequences(mRNA, 19, 21)

        with allow_join_result():
            is_empty, sequences = generator_is_empty(sequences)
            if not is_empty:
                best_sequences = remove_none(
                    group(
                        validate_and_offtarget.s(
                            sequence,
                            maximum_offtarget,
                            minimum_CG,
                            maximum_CG,
                            stimulatory_sequences,
                            0
                        ).set(queue="blast")
                        for sequence in sequences
                    ).apply_async().get()
                )

        if best_sequences:
            with allow_join_result():
                results = chain(*remove_none(
                    group(
                        shmir_from_fasta_string.s(
                            seq_dict['sequence'], original_frames,
                            seq_dict['offtarget'], seq_dict['regexp'], path
                        ).set(queue="score")
                        for seq_dict in best_sequences
                    ).apply_async().get()
                ))

    sorted_results = sorted(
        results,
        key=operator.itemgetter(0),
        reverse=True
    )[:10]
    db_results = [Result(
        score=score,
        sh_mir=shmir,
        pdf=path_id,
        backbone=frames_by_name[frame_name].id,
        sequence=found_sequences[0],
    ) for score, shmir, frame_name, path_id, found_sequences in sorted_results]

    remove_bad_foldings(path, (result.get_task_id() for result in db_results))

    db_input = InputData(
        transcript_name=transcript_name,
        minimum_CG=minimum_CG,
        maximum_CG=maximum_CG,
        maximum_offtarget=maximum_offtarget,
        scaffold=scaffold,
        stimulatory_sequences=stimulatory_sequences,
        results=db_results
    )
    db_session.add(db_input)
    db_session.add_all(db_results)
    db_session.commit()

    return [result.as_json() for result in db_results]