def get_single_performance(region, outdir, method, debug=False): sglfo = glutils.read_glfo(outdir + '/germlines/simulation', locus=args.locus) iglfo = glutils.read_glfo(outdir + '/' + method + '/sw/germline-sets', locus=args.locus) glutils.synchronize_glfos(ref_glfo=sglfo, new_glfo=iglfo, region=region) missing_alleles = set(sglfo['seqs'][region]) - set(iglfo['seqs'][region]) spurious_alleles = set(iglfo['seqs'][region]) - set(sglfo['seqs'][region]) if debug: if len(missing_alleles) > 0: print ' %2d missing %s' % (len(missing_alleles), ' '.join([utils.color_gene(g) for g in missing_alleles])) if len(spurious_alleles) > 0: print ' %2d spurious %s' % (len(spurious_alleles), ' '.join([utils.color_gene(g) for g in spurious_alleles])) if len(missing_alleles) == 0 and len(spurious_alleles) == 0: print ' none missing' return { 'missing' : len(missing_alleles), 'spurious' : len(spurious_alleles), 'total' : len([g for g in sglfo['seqs'][region] if '+' in g]), # anybody with a '+' should be a new allele }
def get_gene_sets(glsfnames, glslabels, ref_label=None): glfos = {} for label, fname in zip(glslabels, glsfnames): gldir = os.path.dirname(fname).replace('/' + args.locus, '') glfos[label] = glutils.read_glfo( gldir, args.locus ) # this is gonna fail for tigger since you only have the .fa if ref_label is not None: for label in [l for l in glslabels if l != ref_label]: print ' syncronizing %s names to match %s' % (label, ref_label) glutils.synchronize_glfos(ref_glfo=glfos[ref_label], new_glfo=glfos[label], region=args.region) gl_sets = { label: {g: seq for g, seq in glfos[label]['seqs'][args.region].items()} for label in glfos } all_genes = {g: s for gls in gl_sets.values() for g, s in gls.items()} return all_genes, gl_sets