# get the sequences from the sequence names novelseq = {} for sequence in seqnames: novelseq[sequence] = get_seqs(sequence) # Get homolog data for each sequence for record in novelseq: qseqid =novelseq[record].seq taxarecords = [] print('Getting homolog data for qseqid ' + record) for taxa in taxalist: taxainfo = {} taxainfo['novel'] = record taxainfo['taxa'] = taxa taxainfo['taxid'] = ncbi.get_taxid(taxa) taxainfo['blast_record'] = get_BLAST(taxainfo['taxid'], qseqid) try: taxainfo['sbjctseq'] = taxainfo['blast_record'].alignments[0].hsps[0].sbjct[:50] print(taxainfo['sbjctseq']) try: taxainfo['GI'] = int(taxainfo['blast_record'].alignments[0].title.split('|')[1]) print("GI is: " + str(taxainfo['GI'])) except: print("GI not valid for " + taxa + " : " + taxainfo['blast_record'].alignments[0].title.split('|')[1]) try: taxainfo['seqdef'] = ncbi.get_gene_data(taxainfo['GI'])[0]['GBSeq_definition'] print('GBSeq_definition is ' + taxainfo['seqdef']) except: print("Unable to get GBSeq_definition") try:
handle = open(noveltrans, "rU") record_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta")) handle.close() sid = genus + "_" + species name = genus + "_" + species record_dict[novelseq1].name = name record_dict[novelseq2].name = name record_dict[novelseq1].id = sid record_dict[novelseq2].id = sid record_dict[novelseq1].description = novelseq1 record_dict[novelseq2].description = novelseq2 homologs = () for ataxa in list(alltaxa)[:4]: print("lookup " + ataxa) taxid = ncbi.get_taxid(ataxa) print(ataxa + " tax id is " + str(taxid)) qseq1 = str(record_dict[novelseq2].seq) blast_record = get_BLAST(taxid, qseq1) try: sbjctseq1 = blast_record.alignments[0].hsps[0].sbjct[:50] sgi1 = blast_record.alignments[0].title.split('|')[1] print("1", sbjctseq1) seqdef1 = ncbi.get_gene_data(sgi1)[0]['GBSeq_definition'] lineage1 = ncbi.get_lineage(taxid) sequence1 = ncbi.get_gene_seq(sgi1) except: print("No match") sgi1 = 0