try: taxainfo['GI'] = int(taxainfo['blast_record'].alignments[0].title.split('|')[1]) print("GI is: " + str(taxainfo['GI'])) except: print("GI not valid for " + taxa + " : " + taxainfo['blast_record'].alignments[0].title.split('|')[1]) try: taxainfo['seqdef'] = ncbi.get_gene_data(taxainfo['GI'])[0]['GBSeq_definition'] print('GBSeq_definition is ' + taxainfo['seqdef']) except: print("Unable to get GBSeq_definition") try: taxainfo['lineage'] = ncbi.get_lineage(taxainfo['taxid']) except: print("Unable to get Lineage") try: taxainfo['sequence'] = ncbi.get_gene_seq(taxainfo['GI']) except: print("unable to get sequence") except: print("No match found for " + taxa) taxarecords.append(taxainfo) handle = open('temp/' + record + '.pickle', 'w') pickle.dump(taxarecords, handle) handle.close() homologrec = [] for trecord in taxarecords: try: seq = Seq(str(trecord['sequence']), generic_dna) except: print('probably no sequence data') tid = trecord['taxa'].replace(" ", "_")
WHERE qseqid in (%s, %s) GROUP BY qseqid, genus ORDER BY i.taxid, pident, coverage""") #GROUP BY qseqid, i.taxid results = dbconnection.cursor() results.execute(query, (novelseq1, novelseq2, novelseq1, novelseq2)) results = list(results) # get homolog data homologs = () for homolog in results: taxid = homolog[1] sgi = homolog[4] print(taxid) lineage = ncbi.get_lineage(taxid) sequence = ncbi.get_gene_seq(sgi) seqdef = ncbi.get_gene_data(sgi)[0]['GBSeq_definition'] homologs = homologs + (homolog + (seqdef, lineage, sequence),) # Build a fasta file from homolog seq homologsRec1=[] homologsRec2=[] # Add novel sequences handle = open("notes/transcriptome/BothNyAd.fa", "rU") record_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta")) handle.close() sid = 'Diaphorina citri' name = 'Diaphorina citri' # Rename the novel sequence
record_dict[novelseq2].description = novelseq2 homologs = () for ataxa in list(alltaxa)[:4]: print("lookup " + ataxa) taxid = ncbi.get_taxid(ataxa) print(ataxa + " tax id is " + str(taxid)) qseq1 = str(record_dict[novelseq2].seq) blast_record = get_BLAST(taxid, qseq1) try: sbjctseq1 = blast_record.alignments[0].hsps[0].sbjct[:50] sgi1 = blast_record.alignments[0].title.split('|')[1] print("1", sbjctseq1) seqdef1 = ncbi.get_gene_data(sgi1)[0]['GBSeq_definition'] lineage1 = ncbi.get_lineage(taxid) sequence1 = ncbi.get_gene_seq(sgi1) except: print("No match") sgi1 = 0 qseq2 = str(record_dict[novelseq1].seq) blast_record = get_BLAST(taxid, qseq2) try: sbjctseq2 = blast_record.alignments[0].hsps[0].sbjct[:50] sgi2 = blast_record.alignments[0].title.split('|')[1] print("2", sbjctseq2) seqdef2 = ncbi.get_gene_data(sgi2)[0]['GBSeq_definition'] lineage2 = ncbi.get_lineage(taxid) sequence2 = ncbi.get_gene_seq(sgi2) except: