def do_work(asm_tuple):
     asm_suffix,asm_access = asm_tuple
     #
     asm_suffix = asm_suffix.replace(' ','_')
     asm_root_name = '_'.join([asm_access,asm_suffix])
     asm_dir = os.path.join(path, asm_root_name)
     gb_fname = os.path.join(asm_dir,'_'.join([asm_root_name,'genomic.gbff']))
     # fa_fname = os.path.join(asm_dir,'_'.join([asm_root_name,'genomic.fna']))
     #  we already know that these files exist (moreso, we don't need *.fna - as all nucs are present in the genbanks)
     SeqRecs = SeqIO.parse(gb_fname,format='genbank')
     #
     # seqrec = dbx[seqrecid]
     # data = {"GenomicID":[],"fid":[],"pid":[],"cDNA":[],"protein":[],"product":[],"table":[],"status":[]}
     tmp_results = (cairi.extract_genes_features(seqrec,seqrec.seq) for seqrec in SeqRecs)
     to_return = pd.concat((pd.DataFrame(res) for res in tmp_results),ignore_index=True)
     to_return['assembly_accession'] = asm_access #the whole column of identical elements ...
     # pd.DataFrame(dat).to_csv(os.path.join(outpath,out(seqrecid)),index=False)
     return to_return
Ejemplo n.º 2
0
 def do_work(asm_tuple):
     asm_suffix, asm_access = asm_tuple
     #
     asm_suffix = asm_suffix.replace(' ', '_')
     asm_root_name = '_'.join([asm_access, asm_suffix])
     asm_dir = os.path.join(path, asm_root_name)
     gb_fname = os.path.join(asm_dir,
                             '_'.join([asm_root_name, 'genomic.gbff']))
     # fa_fname = os.path.join(asm_dir,'_'.join([asm_root_name,'genomic.fna']))
     #  we already know that these files exist (moreso, we don't need *.fna - as all nucs are present in the genbanks)
     SeqRecs = SeqIO.parse(gb_fname, format='genbank')
     #
     # seqrec = dbx[seqrecid]
     # data = {"GenomicID":[],"fid":[],"pid":[],"cDNA":[],"protein":[],"product":[],"table":[],"status":[]}
     tmp_results = (cairi.extract_genes_features(seqrec, seqrec.seq)
                    for seqrec in SeqRecs)
     to_return = pd.concat((pd.DataFrame(res) for res in tmp_results),
                           ignore_index=True)
     to_return[
         'assembly_accession'] = asm_access  #the whole column of identical elements ...
     # pd.DataFrame(dat).to_csv(os.path.join(outpath,out(seqrecid)),index=False)
     return to_return
 def do_work(seqrecid):
     seqrec = dbx[seqrecid]
     return cairi.extract_genes_features(seqrec,seqrec.seq)
 def do_work(seqrecid):
     seqrec = dbx[seqrecid]
     return cairi.extract_genes_features(seqrec, seqrec.seq)