예제 #1
0
파일: upton.py 프로젝트: jimhavrilla/pmodel
def upton(iterable, truth_set, cutoff=1e-3, vmin=1/(3*60706.)):

    def genchunks():
        nsmall, ones = 0, 0
        for i, chunk in enumerate(iterable):
            if i % 100000 == 0:
                if i > 0:
                    print i, chunk[0].chrom, chunk[0].start
            if len(chunk) < 8:
                nsmall += 1
                continue
            mafs = (float(x.mafs) for x in chunk)
            score = sum((1.0 - m)**2.0 for m in mafs if m < cutoff) / float(len(chunk))
            if score == 1:
                ones += 1
                continue
            yield chunk, score
        sys.stderr.write("%d (%.2f%%) removed for being too short\n" % (nsmall,
                         100.0 * nsmall / float(i)))
        sys.stderr.write("%d (%.2f%%) removed for having score of 1\n" % (ones,
                         100.0 * ones /float(i)))
        print >>sys.stderr, i, "total chunks"

    # NOTE: this is for humvar only. not needed for clinvar.
    def humvar_pathogenic(d):
        return d['class'] == "deleterious"

    if "humvar" in truth_set:
        res = evaluate(genchunks(), truth_set, is_pathogenic=humvar_pathogenic)
    else:
        res = evaluate(genchunks(), truth_set)

    print metrics(res[True], res[False], "upton.auc.png")
예제 #2
0
파일: upton.py 프로젝트: jimhavrilla/pmodel
def rvis(truth_set):
    interval = namedtuple('interval', ['chrom', 'start', 'end'])
    def genregions():
        for d in ts.reader("rvis.bed"):
            score = float(d['pct'])
            chunk = [interval(d['chrom'], int(d['start']), int(d['end']))]
            yield chunk, -score
    res = evaluate(genregions(), truth_set)
    print metrics(res[True], res[False], "rvis.auc.png")