for region in [r for r in utils.regions if r in glfos[0]['seqs']]: aset, bset = [set(g['seqs'][region]) for g in glfos] tmpfo = glutils.get_empty_glfo( args.locus) # make a new glfo that will only have non-shared genes for glabel, gset, gfo in zip( args.names, [aset - bset, bset - aset], glfos): # <gset> is the genes that're only in <glabel> for ogene in gset: glutils.add_new_allele(tmpfo, { 'gene': '+'.join([ogene, glabel]), 'seq': gfo['seqs'][region][ogene], 'cpos': utils.cdn_pos(gfo, region, ogene) }, use_template_for_codon_info=False) # eh, maybe this doesn't really add anything? # # add the nearest genes that they both have for comparison NOTE this gives one comparison gene for *each* gene, so usually you get a bunch of comparison/'both' genes in each block in the ascii output # for bgene in aset & bset: # _, nearest_gene, _ = glutils.find_nearest_gene_with_same_cpos(glfos[0], glfos[0]['seqs'][region][bgene], new_cpos=utils.cdn_pos(glfos[0], region, bgene)) # i think it doesn't matter which glfo we get it from, so arbitrarily choose the first one # glutils.add_new_allele(tmpfo, {'gene' : '+'.join([nearest_gene, 'both']), 'seq' : glfos[0]['seqs'][region][nearest_gene], 'cpos' : utils.cdn_pos(glfos[0], region, bgene)}, use_template_for_codon_info=False) print '%s: only in:\n %12s: %2d %s\n %12s: %2d %s' % ( utils.color('green', region), args.names[0], len(aset - bset), utils.color_genes(sorted(aset - bset)), args.names[1], len(bset - aset), utils.color_genes(sorted(bset - aset))) if len(tmpfo['seqs'][region]) > 0: print ' comparing to nearest genes that were in both (labeled \'both\'):' glutils.print_glfo(tmpfo, only_region=region)
# ---------------------------------------------------------------------------------------- def get_genes(base, alleles=None): if alleles is None: # take all of 'em alleles = [ utils.allele(g) for g in glfo['seqs'][args.region] if base == get_base(g) ] return [ args.locus.upper() + args.region.upper() + base + '*' + al for al in alleles ] if args.bases == 'all': glutils.print_glfo(glfo) sys.exit(0) args.bases = utils.get_arg_list(args.bases) args.allele_numbers = utils.get_arg_list(args.allele_numbers) genes = [ g for base in args.bases for g in get_genes(base, args.allele_numbers) ] if len(genes) == 0: raise Exception( 'couldn\'t find any genes for the specified --bases %s\n choices:\n %s' % (' '.join(args.bases), ' '.join( sorted(set([get_base(g) for g in glfo['seqs'][args.region]]))))) args.other_genes = utils.get_arg_list(args.other_genes) if args.other_genes is not None: genes += args.other_genes
if alleles is None: # take all of 'em alleles = [ utils.allele(g) for g in glfo['seqs'][args.region] if base == get_base(g) ] return [ args.locus.upper() + args.region.upper() + base + '*' + al for al in alleles ] if args.bases == 'all': input_groupfcn = None # lambda g: str(utils.primary_version(g) in ['4', '5']) # this example puts all the 4 and 5 primary versions in one group, and everybody else in another glutils.print_glfo( glfo, only_region=(args.region if args.region != 'v' else None), input_groupfcn=input_groupfcn ) # not much point in doing only v, since it's the one that takes most of the time sys.exit(0) args.bases = utils.get_arg_list(args.bases) args.allele_numbers = utils.get_arg_list(args.allele_numbers) genes = [ g for base in args.bases for g in get_genes(base, args.allele_numbers) ] if len(genes) == 0: raise Exception( 'couldn\'t find any genes for the specified --bases %s\n choices:\n %s' % (' '.join(args.bases), ' '.join( sorted(set([get_base(g) for g in glfo['seqs'][args.region]]))))) args.other_genes = utils.get_arg_list(args.other_genes)