def do_work(asm_tuple): asm_suffix,asm_access = asm_tuple # asm_suffix = asm_suffix.replace(' ','_') asm_root_name = '_'.join([asm_access,asm_suffix]) asm_dir = os.path.join(path, asm_root_name) gb_fname = os.path.join(asm_dir,'_'.join([asm_root_name,'genomic.gbff'])) # fa_fname = os.path.join(asm_dir,'_'.join([asm_root_name,'genomic.fna'])) # we already know that these files exist (moreso, we don't need *.fna - as all nucs are present in the genbanks) SeqRecs = SeqIO.parse(gb_fname,format='genbank') # # seqrec = dbx[seqrecid] # data = {"GenomicID":[],"fid":[],"pid":[],"cDNA":[],"protein":[],"product":[],"table":[],"status":[]} tmp_results = (cairi.extract_genes_features(seqrec,seqrec.seq) for seqrec in SeqRecs) to_return = pd.concat((pd.DataFrame(res) for res in tmp_results),ignore_index=True) to_return['assembly_accession'] = asm_access #the whole column of identical elements ... # pd.DataFrame(dat).to_csv(os.path.join(outpath,out(seqrecid)),index=False) return to_return
def do_work(asm_tuple): asm_suffix, asm_access = asm_tuple # asm_suffix = asm_suffix.replace(' ', '_') asm_root_name = '_'.join([asm_access, asm_suffix]) asm_dir = os.path.join(path, asm_root_name) gb_fname = os.path.join(asm_dir, '_'.join([asm_root_name, 'genomic.gbff'])) # fa_fname = os.path.join(asm_dir,'_'.join([asm_root_name,'genomic.fna'])) # we already know that these files exist (moreso, we don't need *.fna - as all nucs are present in the genbanks) SeqRecs = SeqIO.parse(gb_fname, format='genbank') # # seqrec = dbx[seqrecid] # data = {"GenomicID":[],"fid":[],"pid":[],"cDNA":[],"protein":[],"product":[],"table":[],"status":[]} tmp_results = (cairi.extract_genes_features(seqrec, seqrec.seq) for seqrec in SeqRecs) to_return = pd.concat((pd.DataFrame(res) for res in tmp_results), ignore_index=True) to_return[ 'assembly_accession'] = asm_access #the whole column of identical elements ... # pd.DataFrame(dat).to_csv(os.path.join(outpath,out(seqrecid)),index=False) return to_return
def do_work(seqrecid): seqrec = dbx[seqrecid] return cairi.extract_genes_features(seqrec,seqrec.seq)
def do_work(seqrecid): seqrec = dbx[seqrecid] return cairi.extract_genes_features(seqrec, seqrec.seq)