예제 #1
0

### MAIN ###
### fill tables by org
for org in orgs:
    c.execute('insert into org (short) values ("%s")' % org)
    con.commit()
    orgid = c.execute('select id from org where short="%s"' %
                      org).fetchone()[0]

    print('%i %s' % (orgid, org))

    # genomic contig sequences
    gen = '%s.genome.fa' % org
    if not os.path.exists(gen): continue
    gs = Fasta.FastaSeqs()
    gs.loadseqs([gen])
    print('%i genomic contig seqs' % len(gs.seqs))
    for s in gs.seqs.values():
        ex = 'insert into genomic(org,name,seq) values(%i,"%s","%s")' % (
            orgid, s.name, s.seq)
        c.execute(ex)
    con.commit()

    gff = '%s.gff' % org
    features = read_gff(gff)

    # CDS models (seems to be present and usable in all gffs)
    for cds in features['CDS'].values():
        ex = 'insert into cds (org,seq,pid,start,end,strand) values (%i,"%s","%s","%s","%s","%s")' % (
            orgid, cds.seq, cds.fid, cds.start, cds.end, cds.strand)
예제 #2
0
f = sys.argv[2]


def expandprimer(primer):
    seqs = []
    choices = [degen_nucs[n] for n in primer]
    all_combinations_gen(0, len(primer), [], seqs, choices)
    return seqs


primers = expandprimer(primer)
print('%i primers' % len(primers))
for p in primers:
    print p

fs = Fasta.FastaSeqs()
fs.loadseqs([f])

#primers.append('A')

seqmatches = {}

for s in fs.seqs:
    seqmatches[s] = []
    for p in primers:

        prgx = re.compile(p, re.IGNORECASE)
        for match in prgx.finditer(fs.seqs[s].seq):
            seqmatches[s].append([p, match.start(), ''])

        rvscmp = re.compile(rvs_comp_str(p), re.IGNORECASE)
예제 #3
0
if __name__ == "__main__":
	import sys
	app = BLAST_nr()
	# args are blasttable, minpident, minalen
	bfnm = sys.argv[1]
	minpident = float(sys.argv[2])
	minalnlen = int(sys.argv[3])
	app.parseBLAST(bfnm,minpident,minalnlen)
#	print(app)
#	print(app.non_uni_clusters())
#	print('\n'.join(app.keys()))

	grp_path = 'BLASTn_grps'
	if not os.path.exists(grp_path): os.mkdir(grp_path)
	import Fasta
	fa = Fasta.FastaSeqs()
	fa.loadseqs([sys.argv[4]])
	sfx = '%i.%i' %(int(minpident),minalnlen)
	lngf = open('%s.lng.%s.fa' %(bfnm,sfx),'w')
	for k,mates in app.items():
		lngf.write('>%s\n%s\n' %(k,fa.seqs[k].seq))
		f = open('%s/%s.grp.%s.fa' %(grp_path,k,sfx),'w')
		f.write('>%s\n%s\n' %(k,fa.seqs[k].seq))
		for m in mates:
			f.write('>%s\n%s\n' %(m,fa.seqs[m].seq))
		f.close()
	lngf.close()
	exf = open('%s.extras.%s.fa' %(bfnm,sfx),'w')
	incids = app.allids()
	for k in fa.seqs:
		if not k in incids: