def run(): file_step1 = './02-pipeline/step6.fna' """Retrieve known sequences""" knownseqfile = known() """Check if a BLAST DB of step7_knownseq already exists; Make BLAST DB if it doesn't""" blastdb = blast.check(knownseqfile) """BLAST unique mature peptides against known sequences""" blastoutfile = './02-pipeline/step7_blastp.csv' blast.blastp(blastdb, file_step1, blastoutfile) """Parse the BLASTp output file""" print 'Parsing BLASTp output...\n' blastout = [] for b in blast.parse( blastoutfile): #[cds_id, known sequence, %ID, length, evalue] blastout.append([str(b[0]), b[1], b[2], b[3], b[4]]) """Populate the SQLite Annotated table with BLASTp results""" print 'Populating the Annotated table in the SQLite DB.' count = 0 for b in blastout: c = mysqlpop.annotated(b) count += c print '{} hits have been entered in the SQLite annotated table.'.format( count) print 'DATA ENTRY INTO MYSQL ANNOTATED TABLE IS COMPLETE' filename = './02-pipeline/step7.csv' header = ['cds_id', 'knownNP id', 'PID', 'length', 'evalue'] output.csv(filename, header, blastout) print 'The BLASTp results have were written to {}.\n'.format(filename)
def run(): file_step1 = './02-pipeline/step1.csv' """Retrieve seqreads table entries from the SQLite DB""" seqreads = [] for l in mysqlout.seqreads(): seqreads.append([str(l[0]), str(l[1]), str(l[2])]) """Output to csv file""" filename = './02-pipeline/step2.csv' header = ['seqreads id', 'sequence', 'profile-HMM name'] output.csv(filename, header, seqreads) print 'The hits (MySQL id and sequence) have been written to the file {}.\n'.format( filename)
def run(): file_step3 = './02-pipeline/step3.csv' print 'Running SignalP...\n' """Retrieve sequences from Step3 output and run SignalP""" nosignalpep = [] for c in CDS(file_step3): s = signal(c, config.C['sp_cutoff'], config.C['sp_min_length'])[0] nosignalpep.append(s) """Output to csv file""" filename = './02-pipeline/step4.csv' header = ['cds_id', 'signal peptide position', 'nosignalpeptide'] output.csv(filename, header, nosignalpep) print '\nSignalP results have been written to the file {}.\n'.format( filename)
def output_data(stuff): "Dump the data appropriately." # pickle is always output as a backup session_id = str(datetime.datetime.now()).replace(" ",".") dump(tweets, open("OUTPUT_TWEETS_%s.pkl"%session_id,"wb")) if config["output"] == "json": output.json(tweets, session_id) elif config["output"] == "csv": output.csv(tweets, session_id) elif config["output"] == "pickle": pass else: print "Probably should have told you this before, but your output" print "format is unrecognized. Nevertheless, a serialized python pickle" print "has been dumped in its stead." print
def run(evaluecutoff): file_step4 = './02-pipeline/step4.csv' matseq = [] """Find mature sequences""" for s in sp(file_step4): mat = mature.findmat(s, path_fasta(), float(evaluecutoff), config.C['mature_min_length'], config.C['mature_max_length']) if mat is not None: matseq.append(mat) """Output to csv file """ filename = './02-pipeline/step5.csv' header = ['cdsid_mature#', 'mature sequence'] out = [] for m in matseq: for mm in m: out.append(mm) output.csv(filename, header, out) print 'The mature peptides were written to the file {}.\n'.format(filename)
def run(): filename = './02-pipeline/step8.csv' fields = mysqlout.final(config.C['neuropeptide_family']) output.csv(filename, fields[0], fields[1:]) analysis() print 'The output files have been written.'