def upton(iterable, truth_set, cutoff=1e-3, vmin=1/(3*60706.)): def genchunks(): nsmall, ones = 0, 0 for i, chunk in enumerate(iterable): if i % 100000 == 0: if i > 0: print i, chunk[0].chrom, chunk[0].start if len(chunk) < 8: nsmall += 1 continue mafs = (float(x.mafs) for x in chunk) score = sum((1.0 - m)**2.0 for m in mafs if m < cutoff) / float(len(chunk)) if score == 1: ones += 1 continue yield chunk, score sys.stderr.write("%d (%.2f%%) removed for being too short\n" % (nsmall, 100.0 * nsmall / float(i))) sys.stderr.write("%d (%.2f%%) removed for having score of 1\n" % (ones, 100.0 * ones /float(i))) print >>sys.stderr, i, "total chunks" # NOTE: this is for humvar only. not needed for clinvar. def humvar_pathogenic(d): return d['class'] == "deleterious" if "humvar" in truth_set: res = evaluate(genchunks(), truth_set, is_pathogenic=humvar_pathogenic) else: res = evaluate(genchunks(), truth_set) print metrics(res[True], res[False], "upton.auc.png")
def rvis(truth_set): interval = namedtuple('interval', ['chrom', 'start', 'end']) def genregions(): for d in ts.reader("rvis.bed"): score = float(d['pct']) chunk = [interval(d['chrom'], int(d['start']), int(d['end']))] yield chunk, -score res = evaluate(genregions(), truth_set) print metrics(res[True], res[False], "rvis.auc.png")