Esempio n. 1
0
def main(argv):
    d2l = getlims()
    d2t = gettargets()
    excerpt = '/Users/berriz/Work/scratch/excerpt.tsv'
    header, rows = rt.readtable(excerpt, headerrow=0)
    sigclass = co.namedtuple('sigclass', header[1:], rename=True)
    flds = set(si.SignatureData._fields)
    t2s = dict()
    rn.seed(0)
    for row in sorted(rows):
        drug, vv = row[0], row[1:]
        if drug not in d2t:
            continue
        rangetested = d2l[drug]
        signature = tuple(convert_conc_row(vv))
        isclinical = rn.random() < ISCLINICAL_CUTOFF
        isselective = rn.random() < ISSELECTIVE_CUTOFF
        isprimary = rn.random() < ISPRIMARY_CUTOFF

        data = si.SignatureData(**(sd.superdict(locals()).subdict(flds)))
        for t in d2t[drug]:
            t2s.setdefault(t, []).append(data)

    for vv in t2s.values():
        drugs = set(v.drug for v in vv)
        assert len(drugs) == len(vv)

    print '    LATEST =',
    pp.pprint(t2s)
Esempio n. 2
0
def main(argv):
    d2l = getlims()
    d2t = gettargets()
    excerpt = '/Users/berriz/Work/scratch/excerpt.tsv'
    header, rows = rt.readtable(excerpt, headerrow=0)
    sigclass = co.namedtuple('sigclass', header[1:], rename=True)
    flds = set(si.SignatureData._fields)
    t2s = dict()
    rn.seed(0)
    for row in sorted(rows):
        drug, vv = row[0], row[1:]
        if drug not in d2t:
            continue
        rangetested = d2l[drug]
        signature = tuple(convert_conc_row(vv))
        isclinical = rn.random() < ISCLINICAL_CUTOFF
        isselective = rn.random() < ISSELECTIVE_CUTOFF
        isprimary = rn.random() < ISPRIMARY_CUTOFF

        data = si.SignatureData(**(sd.superdict(locals()).subdict(flds)))
        for t in d2t[drug]:
            t2s.setdefault(t, []).append(data)

    for vv in t2s.values():
        drugs = set(v.drug for v in vv)
        assert len(drugs) == len(vv)

    print '    LATEST =',
    pp.pprint(t2s)
Esempio n. 3
0
def gettargets():
    tgts = '/Users/berriz/Work/attachments/MF/LBNL_Gray_drug_targets_encoded_10Feb.txt'
    _, rows = rt.readtable(tgts, headerrow=0)
    comma_split_re = re.compile(r'\s*,\s*')
    d2t = dict()
    for drug, info in rows:
        d2t.setdefault(drug, []).extend(comma_split_re.split(info))
    return d2t
Esempio n. 4
0
def gettargets():
    tgts = '/Users/berriz/Work/attachments/MF/LBNL_Gray_drug_targets_encoded_10Feb.txt'
    _, rows = rt.readtable(tgts, headerrow=0)
    comma_split_re = re.compile(r'\s*,\s*')
    d2t = dict()
    for drug, info in rows:
        d2t.setdefault(drug, []).extend(comma_split_re.split(info))
    return d2t
Esempio n. 5
0
def getlims():
    gi50 = '/Users/berriz/Work/scratch/gi50.tsv'
    header, namedrows = rt.readtable(gi50, headerrow=0)
    rows = tuple(convert_conc_row(nr[1:]) for nr in namedrows)
    cols = zip(*rows)
    errors = []

    def resolve(rng, candidates):
        assert rng[0] <= rng[1]

        found = []
        garbage = []
        puramierda = []

        for pl in candidates:
            if pl[0] <= rng[0] and rng[1] <= pl[1]:
                found.append(pl)
            elif pl[0] == rng[0] or rng[1] == pl[1]:
                garbage.append(pl)
            elif pl[0] < rng[0] or rng[1] < pl[1]:
                puramierda.append(pl)

        lfound = len(found)
        if lfound == 1:
            return found[0]

        if lfound == 0:
            lgarbage = len(garbage)
            if lgarbage == 1:
                errors.append('found garbage for %s %s' % (drug, rng))
                toshow = garbage
                ret = garbage[0]
            else:
                if lgarbage > 1 or len(puramierda):
                    errors.append('found PURA MIERDA for %s %s' % (drug, rng))
                    toshow = garbage + puramierda
                else:
                    errors.append('no dice for %s %s' % (drug, rng))
                    toshow = candidates
                ret = None
        elif lfound > 1:
            found2 = []
            for pl in found:
                if ((pl[0] == rng[0] <= rng[1])
                        or (rng[0] <= rng[1] == pl[1])):
                    found2.append(pl)

            if len(found2) == 1:
                ret = found2[0]
            else:
                (_, imin) = mm.minmax((t[1] - t[0] for t in found),
                                      warg=True)[0]
                ret = found[imin]

            errors.append('too many dice for %s %s:' % (drug, rng))
            toshow = found

        for t in toshow:
            errors.append('  %s' % (t, ))
        errors.append('')

        return ret

    d2l = dict()
    pls = possible_lims()
    for drug, col in zip(header[1:], cols):
        rng = lims(col)
        d2l[drug] = resolve(rng, pls[drug])

    if errors:
        print
        for e in errors:
            print e

    return d2l
Esempio n. 6
0
def getlims():
    gi50 = '/Users/berriz/Work/scratch/gi50.tsv'
    header, namedrows = rt.readtable(gi50, headerrow=0)
    rows = tuple(convert_conc_row(nr[1:]) for nr in namedrows)
    cols = zip(*rows)
    errors = []
    def resolve(rng, candidates):
        assert rng[0] <= rng[1]

        found = []
        garbage = []
        puramierda = []

        for pl in candidates:
            if pl[0] <= rng[0] and rng[1] <= pl[1]:
                found.append(pl)
            elif pl[0] == rng[0] or rng[1] == pl[1]:
                garbage.append(pl)
            elif pl[0] < rng[0] or rng[1] < pl[1]:
                puramierda.append(pl)

        lfound = len(found)
        if lfound == 1:
            return found[0]

        if lfound == 0:
            lgarbage = len(garbage)
            if lgarbage == 1:
                errors.append('found garbage for %s %s' % (drug, rng))
                toshow = garbage
                ret = garbage[0]
            else:
                if lgarbage > 1 or len(puramierda):
                    errors.append('found PURA MIERDA for %s %s' % (drug, rng))
                    toshow = garbage + puramierda
                else:
                    errors.append('no dice for %s %s' % (drug, rng))
                    toshow = candidates
                ret = None
        elif lfound > 1:
            found2 = []
            for pl in found:
                if ((pl[0] == rng[0] <= rng[1]) or
                    (rng[0] <= rng[1] == pl[1])):
                    found2.append(pl)

            if len(found2) == 1:
                ret = found2[0]
            else:
                (_, imin) = mm.minmax((t[1]-t[0] for t in found), warg=True)[0]
                ret = found[imin]

            errors.append('too many dice for %s %s:' % (drug, rng))
            toshow = found

        for t in toshow:
            errors.append('  %s' % (t,))
        errors.append('')

        return ret

    d2l = dict()
    pls = possible_lims()
    for drug, col in zip(header[1:], cols):
        rng = lims(col)
        d2l[drug] = resolve(rng, pls[drug])

    if errors:
        print
        for e in errors:
            print e

    return d2l