## script to convert sets of fasta files to genbank format import re from sys import argv from libs.common import from_dir, ensure_dir, load_fasta, write_genbank from Bio.Alphabet import generic_dna origin_dir = "data/"+argv[1] destin_dir = "data/"+argv[2]+"/" ensure_dir([destin_dir]) filenames = from_dir(origin_dir, re.compile(r'.*\.fas.*')) for filename in filenames: rec_name = filename[:filename.find('.fas')] record = load_fasta(origin_dir+"/"+filename) # make a genbank file of the contig gbk_file = "".join([destin_dir, rec_name, ".gbk"]) record.name = rec_name record.id = rec_name record.seq.alphabet = generic_dna write_genbank(gbk_file, record)
feature = SeqFeature(location=space_loc, type='contig', id='ctg_'+str(order[0][0])+c_note, qualifiers=quals) record.features.append(feature) c_note = '' for index in order[1:]: filename = origin_dir+base_name+str(index[0])+".fas" new_rec = load_fasta(filename) if index[1]: new_rec = new_rec.reverse_complement() c_note = '_RC' else: c_note = '' quals = {'locus_tag': 'ctg_'+str(index[0])+c_note} space_loc = FeatureLocation(len(record.seq), len(record.seq)+len(new_rec.seq)) feature = SeqFeature(location=space_loc, type='contig', id='ctg_'+str(index[0])+c_note, qualifiers=quals) record.features.append(feature) record += new_rec record += "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" record.id = argv[2] record.name = argv[2] record.seq.alphabet = generic_dna write_genbank(destin_file, record)
feature = SeqFeature(location=feat_loc, strand=strand_pos, id='cds_'+str(counter), type='CDS', qualifiers=quals) record.features.append(feature) counter +=1 # add annotations for Nx100 spacers sequence = str(record.seq) separator = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" regex = re.compile(separator, re.IGNORECASE) spacers = [match.start() for match in regex.finditer(str(record.seq))] #print spacers for spacer in spacers: space_loc = FeatureLocation(spacer, spacer+100) feature = SeqFeature(location=space_loc, type='spacer') record.features.append(feature) # save record with annotations record.description = rec_name+"_with_ORFs" record.name = rec_name record.dbxrefs = ["Project: "+argv[1]+"/"+rec_name] record.seq.alphabet = generic_dna write_genbank(annot_gbk, record) print "OK"