for taxa in taxalist:
     taxainfo = {}
     taxainfo['novel'] = record
     taxainfo['taxa'] = taxa
     taxainfo['taxid'] = ncbi.get_taxid(taxa)
     taxainfo['blast_record'] = get_BLAST(taxainfo['taxid'], qseqid)
     try:
         taxainfo['sbjctseq'] = taxainfo['blast_record'].alignments[0].hsps[0].sbjct[:50]
         print(taxainfo['sbjctseq'])
         try:
             taxainfo['GI'] = int(taxainfo['blast_record'].alignments[0].title.split('|')[1])
             print("GI is: " + str(taxainfo['GI']))
         except:
             print("GI not valid for " + taxa + " : " + taxainfo['blast_record'].alignments[0].title.split('|')[1])
         try:
             taxainfo['seqdef'] = ncbi.get_gene_data(taxainfo['GI'])[0]['GBSeq_definition']
             print('GBSeq_definition is ' + taxainfo['seqdef'])
         except:
             print("Unable to get GBSeq_definition")
         try:
             taxainfo['lineage'] = ncbi.get_lineage(taxainfo['taxid'])
         except:
             print("Unable to get Lineage")
         try:
             taxainfo['sequence'] = ncbi.get_gene_seq(taxainfo['GI'])
         except:
             print("unable to get sequence")
     except:
         print("No match found for " + taxa)
     taxarecords.append(taxainfo)
 handle = open('temp/' + record + '.pickle', 'w')
		  GROUP BY qseqid, genus
		  ORDER BY i.taxid, pident, coverage""")
#GROUP BY qseqid, i.taxid
results = dbconnection.cursor()
results.execute(query, (novelseq1, novelseq2, novelseq1, novelseq2))
results = list(results)

# get homolog data
homologs = ()
for homolog in results:
    taxid = homolog[1]
    sgi = homolog[4]
    print(taxid)
    lineage = ncbi.get_lineage(taxid)
    sequence = ncbi.get_gene_seq(sgi)
    seqdef = ncbi.get_gene_data(sgi)[0]['GBSeq_definition']
    homologs = homologs + (homolog + (seqdef, lineage, sequence),)


# Build a fasta file from homolog seq
homologsRec1=[]
homologsRec2=[]
# Add novel sequences
handle = open("notes/transcriptome/BothNyAd.fa", "rU")
record_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
handle.close()
sid = 'Diaphorina citri'
name = 'Diaphorina citri'

# Rename the novel sequence
record_dict[novelseq2].name = name
record_dict[novelseq2].id = sid
record_dict[novelseq1].description = novelseq1
record_dict[novelseq2].description = novelseq2

homologs = ()
for ataxa in list(alltaxa)[:4]:
    print("lookup " + ataxa)
    taxid = ncbi.get_taxid(ataxa)
    print(ataxa + " tax id is " + str(taxid))
    qseq1 = str(record_dict[novelseq2].seq)
    blast_record = get_BLAST(taxid, qseq1)
    try:
        sbjctseq1 = blast_record.alignments[0].hsps[0].sbjct[:50]
        sgi1 = blast_record.alignments[0].title.split('|')[1]
        print("1", sbjctseq1)
        seqdef1 = ncbi.get_gene_data(sgi1)[0]['GBSeq_definition']
        lineage1 = ncbi.get_lineage(taxid)
        sequence1 = ncbi.get_gene_seq(sgi1)
    except:
        print("No match")
        sgi1 = 0


    qseq2 = str(record_dict[novelseq1].seq)
    blast_record = get_BLAST(taxid, qseq2)
    try:
        sbjctseq2 = blast_record.alignments[0].hsps[0].sbjct[:50]
        sgi2 = blast_record.alignments[0].title.split('|')[1]
        print("2", sbjctseq2)
        seqdef2 = ncbi.get_gene_data(sgi2)[0]['GBSeq_definition']
        lineage2 = ncbi.get_lineage(taxid)