Esempio n. 1
0
## script to convert sets of fasta files to genbank format

import re
from sys import argv
from libs.common import from_dir, ensure_dir, load_fasta, write_genbank
from Bio.Alphabet import generic_dna

origin_dir = "data/"+argv[1]
destin_dir = "data/"+argv[2]+"/"

ensure_dir([destin_dir])

filenames = from_dir(origin_dir, re.compile(r'.*\.fas.*'))

for filename in filenames:
    rec_name = filename[:filename.find('.fas')]
    record = load_fasta(origin_dir+"/"+filename)

    # make a genbank file of the contig
    gbk_file = "".join([destin_dir, rec_name, ".gbk"])
    record.name = rec_name
    record.id = rec_name
    record.seq.alphabet = generic_dna
    write_genbank(gbk_file, record)

Esempio n. 2
0
feature = SeqFeature(location=space_loc, type='contig',
                     id='ctg_'+str(order[0][0])+c_note, qualifiers=quals)
record.features.append(feature)

c_note = ''

for index in order[1:]:
    filename = origin_dir+base_name+str(index[0])+".fas"
    new_rec = load_fasta(filename)
    if index[1]:
        new_rec = new_rec.reverse_complement()
        c_note = '_RC'
    else:
        c_note = ''
    quals = {'locus_tag': 'ctg_'+str(index[0])+c_note}
    space_loc = FeatureLocation(len(record.seq),
                                len(record.seq)+len(new_rec.seq))
    feature = SeqFeature(location=space_loc, type='contig',
                         id='ctg_'+str(index[0])+c_note, qualifiers=quals)
    record.features.append(feature)

    record += new_rec
    record += "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"

record.id = argv[2]
record.name = argv[2]
record.seq.alphabet = generic_dna

write_genbank(destin_file, record)

Esempio n. 3
0
        feature = SeqFeature(location=feat_loc,
                             strand=strand_pos,
                             id='cds_'+str(counter),
                             type='CDS',
                             qualifiers=quals)
        record.features.append(feature)
        counter +=1

    # add annotations for Nx100 spacers
    sequence = str(record.seq)
    separator = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
    regex = re.compile(separator, re.IGNORECASE)
    spacers = [match.start() for match in regex.finditer(str(record.seq))]
    #print spacers
    for spacer in spacers:
        space_loc = FeatureLocation(spacer, spacer+100)
        feature = SeqFeature(location=space_loc,
                             type='spacer')
        record.features.append(feature)

    # save record with annotations
    record.description = rec_name+"_with_ORFs"
    record.name = rec_name
    record.dbxrefs = ["Project: "+argv[1]+"/"+rec_name]
    record.seq.alphabet = generic_dna
    write_genbank(annot_gbk, record)

    print "OK"