Esempio n. 1
0
def run():
    file_step1 = './02-pipeline/step6.fna'
    """Retrieve known sequences"""
    knownseqfile = known()
    """Check if a BLAST DB of step7_knownseq already exists; Make BLAST DB if it doesn't"""
    blastdb = blast.check(knownseqfile)
    """BLAST unique mature peptides against known sequences"""
    blastoutfile = './02-pipeline/step7_blastp.csv'
    blast.blastp(blastdb, file_step1, blastoutfile)
    """Parse the BLASTp output file"""
    print 'Parsing BLASTp output...\n'
    blastout = []
    for b in blast.parse(
            blastoutfile):  #[cds_id, known sequence, %ID, length, evalue]
        blastout.append([str(b[0]), b[1], b[2], b[3], b[4]])
    """Populate the SQLite Annotated table with BLASTp results"""
    print 'Populating the Annotated table in the SQLite DB.'
    count = 0
    for b in blastout:
        c = mysqlpop.annotated(b)
        count += c
    print '{} hits have been entered in the SQLite annotated table.'.format(
        count)
    print 'DATA ENTRY INTO MYSQL ANNOTATED TABLE IS COMPLETE'

    filename = './02-pipeline/step7.csv'
    header = ['cds_id', 'knownNP id', 'PID', 'length', 'evalue']
    output.csv(filename, header, blastout)
    print 'The BLASTp results have were written to {}.\n'.format(filename)
Esempio n. 2
0
def run():
    file_step1 = './02-pipeline/step1.csv'
    """Retrieve seqreads table entries from the SQLite DB"""
    seqreads = []
    for l in mysqlout.seqreads():
        seqreads.append([str(l[0]), str(l[1]), str(l[2])])
    """Output to csv file"""
    filename = './02-pipeline/step2.csv'
    header = ['seqreads id', 'sequence', 'profile-HMM name']
    output.csv(filename, header, seqreads)
    print 'The hits (MySQL id and sequence) have been written to the file {}.\n'.format(
        filename)
Esempio n. 3
0
def run():
    file_step3 = './02-pipeline/step3.csv'

    print 'Running SignalP...\n'
    """Retrieve sequences from Step3 output and run SignalP"""
    nosignalpep = []
    for c in CDS(file_step3):
        s = signal(c, config.C['sp_cutoff'], config.C['sp_min_length'])[0]
        nosignalpep.append(s)
    """Output to csv file"""
    filename = './02-pipeline/step4.csv'
    header = ['cds_id', 'signal peptide position', 'nosignalpeptide']
    output.csv(filename, header, nosignalpep)
    print '\nSignalP results have been written to the file {}.\n'.format(
        filename)
Esempio n. 4
0
def output_data(stuff):
    "Dump the data appropriately."
    
    # pickle is always output as a backup
    session_id = str(datetime.datetime.now()).replace(" ",".")
    dump(tweets, open("OUTPUT_TWEETS_%s.pkl"%session_id,"wb"))

    if config["output"] == "json":
        output.json(tweets, session_id)
    elif config["output"] == "csv":
        output.csv(tweets, session_id) 
    elif config["output"] == "pickle":
        pass
    else: 
        print "Probably should have told you this before, but your output"
        print "format is unrecognized. Nevertheless, a serialized python pickle"
        print "has been dumped in its stead."
        print
Esempio n. 5
0
def run(evaluecutoff):
    file_step4 = './02-pipeline/step4.csv'
    matseq = []
    """Find mature sequences"""
    for s in sp(file_step4):
        mat = mature.findmat(s, path_fasta(), float(evaluecutoff),
                             config.C['mature_min_length'],
                             config.C['mature_max_length'])
        if mat is not None:
            matseq.append(mat)
    """Output to csv file """
    filename = './02-pipeline/step5.csv'
    header = ['cdsid_mature#', 'mature sequence']
    out = []
    for m in matseq:
        for mm in m:
            out.append(mm)

    output.csv(filename, header, out)
    print 'The mature peptides were written to the file {}.\n'.format(filename)
Esempio n. 6
0
def run():
    filename = './02-pipeline/step8.csv'
    fields = mysqlout.final(config.C['neuropeptide_family'])
    output.csv(filename, fields[0], fields[1:])
    analysis()
    print 'The output files have been written.'