def main(argv): d2l = getlims() d2t = gettargets() excerpt = '/Users/berriz/Work/scratch/excerpt.tsv' header, rows = rt.readtable(excerpt, headerrow=0) sigclass = co.namedtuple('sigclass', header[1:], rename=True) flds = set(si.SignatureData._fields) t2s = dict() rn.seed(0) for row in sorted(rows): drug, vv = row[0], row[1:] if drug not in d2t: continue rangetested = d2l[drug] signature = tuple(convert_conc_row(vv)) isclinical = rn.random() < ISCLINICAL_CUTOFF isselective = rn.random() < ISSELECTIVE_CUTOFF isprimary = rn.random() < ISPRIMARY_CUTOFF data = si.SignatureData(**(sd.superdict(locals()).subdict(flds))) for t in d2t[drug]: t2s.setdefault(t, []).append(data) for vv in t2s.values(): drugs = set(v.drug for v in vv) assert len(drugs) == len(vv) print ' LATEST =', pp.pprint(t2s)
def gettargets(): tgts = '/Users/berriz/Work/attachments/MF/LBNL_Gray_drug_targets_encoded_10Feb.txt' _, rows = rt.readtable(tgts, headerrow=0) comma_split_re = re.compile(r'\s*,\s*') d2t = dict() for drug, info in rows: d2t.setdefault(drug, []).extend(comma_split_re.split(info)) return d2t
def getlims(): gi50 = '/Users/berriz/Work/scratch/gi50.tsv' header, namedrows = rt.readtable(gi50, headerrow=0) rows = tuple(convert_conc_row(nr[1:]) for nr in namedrows) cols = zip(*rows) errors = [] def resolve(rng, candidates): assert rng[0] <= rng[1] found = [] garbage = [] puramierda = [] for pl in candidates: if pl[0] <= rng[0] and rng[1] <= pl[1]: found.append(pl) elif pl[0] == rng[0] or rng[1] == pl[1]: garbage.append(pl) elif pl[0] < rng[0] or rng[1] < pl[1]: puramierda.append(pl) lfound = len(found) if lfound == 1: return found[0] if lfound == 0: lgarbage = len(garbage) if lgarbage == 1: errors.append('found garbage for %s %s' % (drug, rng)) toshow = garbage ret = garbage[0] else: if lgarbage > 1 or len(puramierda): errors.append('found PURA MIERDA for %s %s' % (drug, rng)) toshow = garbage + puramierda else: errors.append('no dice for %s %s' % (drug, rng)) toshow = candidates ret = None elif lfound > 1: found2 = [] for pl in found: if ((pl[0] == rng[0] <= rng[1]) or (rng[0] <= rng[1] == pl[1])): found2.append(pl) if len(found2) == 1: ret = found2[0] else: (_, imin) = mm.minmax((t[1] - t[0] for t in found), warg=True)[0] ret = found[imin] errors.append('too many dice for %s %s:' % (drug, rng)) toshow = found for t in toshow: errors.append(' %s' % (t, )) errors.append('') return ret d2l = dict() pls = possible_lims() for drug, col in zip(header[1:], cols): rng = lims(col) d2l[drug] = resolve(rng, pls[drug]) if errors: print for e in errors: print e return d2l
def getlims(): gi50 = '/Users/berriz/Work/scratch/gi50.tsv' header, namedrows = rt.readtable(gi50, headerrow=0) rows = tuple(convert_conc_row(nr[1:]) for nr in namedrows) cols = zip(*rows) errors = [] def resolve(rng, candidates): assert rng[0] <= rng[1] found = [] garbage = [] puramierda = [] for pl in candidates: if pl[0] <= rng[0] and rng[1] <= pl[1]: found.append(pl) elif pl[0] == rng[0] or rng[1] == pl[1]: garbage.append(pl) elif pl[0] < rng[0] or rng[1] < pl[1]: puramierda.append(pl) lfound = len(found) if lfound == 1: return found[0] if lfound == 0: lgarbage = len(garbage) if lgarbage == 1: errors.append('found garbage for %s %s' % (drug, rng)) toshow = garbage ret = garbage[0] else: if lgarbage > 1 or len(puramierda): errors.append('found PURA MIERDA for %s %s' % (drug, rng)) toshow = garbage + puramierda else: errors.append('no dice for %s %s' % (drug, rng)) toshow = candidates ret = None elif lfound > 1: found2 = [] for pl in found: if ((pl[0] == rng[0] <= rng[1]) or (rng[0] <= rng[1] == pl[1])): found2.append(pl) if len(found2) == 1: ret = found2[0] else: (_, imin) = mm.minmax((t[1]-t[0] for t in found), warg=True)[0] ret = found[imin] errors.append('too many dice for %s %s:' % (drug, rng)) toshow = found for t in toshow: errors.append(' %s' % (t,)) errors.append('') return ret d2l = dict() pls = possible_lims() for drug, col in zip(header[1:], cols): rng = lims(col) d2l[drug] = resolve(rng, pls[drug]) if errors: print for e in errors: print e return d2l