for taxa in taxalist: taxainfo = {} taxainfo['novel'] = record taxainfo['taxa'] = taxa taxainfo['taxid'] = ncbi.get_taxid(taxa) taxainfo['blast_record'] = get_BLAST(taxainfo['taxid'], qseqid) try: taxainfo['sbjctseq'] = taxainfo['blast_record'].alignments[0].hsps[0].sbjct[:50] print(taxainfo['sbjctseq']) try: taxainfo['GI'] = int(taxainfo['blast_record'].alignments[0].title.split('|')[1]) print("GI is: " + str(taxainfo['GI'])) except: print("GI not valid for " + taxa + " : " + taxainfo['blast_record'].alignments[0].title.split('|')[1]) try: taxainfo['seqdef'] = ncbi.get_gene_data(taxainfo['GI'])[0]['GBSeq_definition'] print('GBSeq_definition is ' + taxainfo['seqdef']) except: print("Unable to get GBSeq_definition") try: taxainfo['lineage'] = ncbi.get_lineage(taxainfo['taxid']) except: print("Unable to get Lineage") try: taxainfo['sequence'] = ncbi.get_gene_seq(taxainfo['GI']) except: print("unable to get sequence") except: print("No match found for " + taxa) taxarecords.append(taxainfo) handle = open('temp/' + record + '.pickle', 'w')
GROUP BY qseqid, genus ORDER BY i.taxid, pident, coverage""") #GROUP BY qseqid, i.taxid results = dbconnection.cursor() results.execute(query, (novelseq1, novelseq2, novelseq1, novelseq2)) results = list(results) # get homolog data homologs = () for homolog in results: taxid = homolog[1] sgi = homolog[4] print(taxid) lineage = ncbi.get_lineage(taxid) sequence = ncbi.get_gene_seq(sgi) seqdef = ncbi.get_gene_data(sgi)[0]['GBSeq_definition'] homologs = homologs + (homolog + (seqdef, lineage, sequence),) # Build a fasta file from homolog seq homologsRec1=[] homologsRec2=[] # Add novel sequences handle = open("notes/transcriptome/BothNyAd.fa", "rU") record_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta")) handle.close() sid = 'Diaphorina citri' name = 'Diaphorina citri' # Rename the novel sequence record_dict[novelseq2].name = name
record_dict[novelseq2].id = sid record_dict[novelseq1].description = novelseq1 record_dict[novelseq2].description = novelseq2 homologs = () for ataxa in list(alltaxa)[:4]: print("lookup " + ataxa) taxid = ncbi.get_taxid(ataxa) print(ataxa + " tax id is " + str(taxid)) qseq1 = str(record_dict[novelseq2].seq) blast_record = get_BLAST(taxid, qseq1) try: sbjctseq1 = blast_record.alignments[0].hsps[0].sbjct[:50] sgi1 = blast_record.alignments[0].title.split('|')[1] print("1", sbjctseq1) seqdef1 = ncbi.get_gene_data(sgi1)[0]['GBSeq_definition'] lineage1 = ncbi.get_lineage(taxid) sequence1 = ncbi.get_gene_seq(sgi1) except: print("No match") sgi1 = 0 qseq2 = str(record_dict[novelseq1].seq) blast_record = get_BLAST(taxid, qseq2) try: sbjctseq2 = blast_record.alignments[0].hsps[0].sbjct[:50] sgi2 = blast_record.alignments[0].title.split('|')[1] print("2", sbjctseq2) seqdef2 = ncbi.get_gene_data(sgi2)[0]['GBSeq_definition'] lineage2 = ncbi.get_lineage(taxid)