def other_blast(): groups_list = core_hgt_groups() groups_list.sort(key=len, reverse=True) for i in range(len(groups_list)): group_hits = [] kv.make_id_list_fasta(groups_list[i], 'core') results = blast_vs_db('tmp.fna', 'blast_databases/other') hits_collection = kv.get_collection('hits') if results: for j in range(len(results)): group_hits.append(results[j]) hits_collection.insert_one({'group':(i+1), 'group_hits':group_hits})
def get_links(group=None, perc_identity='99'): hits_collection = kv.get_collection('hits') group_hits = None if not os.path.isdir('circos/links/'): os.makedirs('circos/links/') out_name = 'circos/links/all_links_{}.txt'.format(perc_identity) if group: groups = core_hgt_groups() group_hits = sorted(groups, key=len, reverse=True)[group - 1] out_name = 'circos/links/group{}_links_{}.txt'.format(group, perc_identity) with open(out_name, 'w+') as out_handle: for species in hits_collection.find(): print species try: all_hits = species['core_hits_{}'.format(perc_identity)] hits_to_write = None if group: hits_to_write = {gene:all_hits[gene] for gene in all_hits if (species['species'], gene) in group_hits} else: hits_to_write = all_hits for gene in hits_to_write: if hits_to_write[gene]: s1_record = kv.get_mongo_record(species['species'], gene) s1_strain = kv.parse_species_name(species['species']) for hit in hits_to_write[gene]: s2_record = kv.get_mongo_record(hit[0], hit[1]) s2_strain = kv.parse_species_name(hit[0]) out_handle.write('{0}kvc_{1} {2} {3} {4}kvc_{5} {6} {7}\n'.format( s1_strain[2], s1_record['location']['contig'], s1_record['location']['start'], s1_record['location']['end'], s2_strain[2], s2_record['location']['contig'], s2_record['location']['start'], s2_record['location']['end'], ) ) except KeyError: pass