def _save(cube, key, path): # 'r+' apparently does not create the file if it doesn't # already exist, so... with open(path, "a"): pass with open(path, "r+") as fh: try: flock(fh, LOCK_EX | LOCK_NB) except IOError, e: warnings.warn("can't immediately write-lock " "the file (%s), blocking ..." % e) flock(fh, LOCK_EX) fh.seek(0, 0) try: cubedict = pickle.load(fh) except EOFError: cubedict = mkd() try: cubedict.set(key, cube) except Exception, e: import traceback as tb tb.print_exc() print "type:", type(e) print "str:", str(e) print "message: <<%s>>" % e.message cubedict.delete(key) cubedict.set(key, cube)
def main(argv): _parseargs(argv) path = PARAM.path_to_expmap with open(path) as fh: KeyCoords, ValCoords = [namedtuple(n, c) for n, c in zip(('KeyCoords', 'ValCoords'), parse_line(fh.next()))] _ii = set([KeyCoords._fields.index('ligand_name'), KeyCoords._fields.index('ligand_concentration')]) _is = [i for i in range(len(KeyCoords._fields)) if i not in _ii] _j = ValCoords._fields.index('plate') _k = ValCoords._fields.index('well') del _ii def _reduced_kv(key, val): return tuple([key[i] for i in _is] + [val[_j], val[_k][1:]]) def _delete_field(tuple_, _i=ValCoords._fields.index('field')): return tuple_[:_i] + tuple_[_i + 1:] control_conc = mkd(len(_reduced_kv(KeyCoords._fields, ValCoords._fields)), noclobber=True) OutputValCoords = namedtuple('OutputValCoords', _delete_field(ValCoords._fields)) print_record([nt._fields for nt in KeyCoords, OutputValCoords])
def regroup0(div, sz, group): lkp = defaultdict(set) newgroup = mkd(sz + 1, list) j = Coords._fields.index(div) first = True for ck, rs in group.iteritemsmk(): c, k = tuple(ck[:-1]), (ck[-1],) if div == 'assay' or div == 'plate': if first: first = False print c # prints ((),) for assay u = set() for r in rs: f = r[j] u.add(f) newgroup.get(c + (f,) + k).append(r) if len(u) > 1: lkp[c].add(tuple(sorted(u))) return lkp, newgroup
def main(argv): _parseargs(argv) path = PARAM.path_to_expmap _basekey = (PARAM.subassay, PARAM.assay) with open(path) as fh: KeyCoords, ValCoords = [namedtuple(n, c) for n, c in zip(("KeyCoords", "ValCoords"), parse_line(fh.next()))] assert "field" not in ValCoords._fields cube = mkd(len(KeyCoords._fields), noclobber=True) buf = [] for line in fh: key, val = [clas(*tpl) for clas, tpl in zip((KeyCoords, ValCoords), parse_line(line))] if _skip(key, val, *_basekey): continue data_path = get_data_path(val) sdc_paths = get_sdc_paths(data_path) wanted_features = get_wanted_features(val.channel) rawdata = get_rawdata(sdc_paths, wanted_features) assert rawdata.size assert len(wanted_features) == rawdata.shape[1] target = get_target(val) signal = get_signal(rawdata, target) data = mean_and_stddev(signal) ukey = tuple(unicode(k) for k in key) cube.set(ukey, data) buf.append(data) assert cube, "empty cube" _save(cube, _basekey, PARAM.output_path) return 0
def regroup(div, sz, group): lkps = defaultdict(set) j = Coords._fields.index(div) newgroup = mkd(sz, lambda: defaultdict(list)) # first = True for c, krs in group.iteritemsmk(): # c = cc[1:] if cc[0] == mkd.NIL else cc # c = cc[1:] if (len(cc) and cc[0] == mkd.NIL) else cc sigs = set() # if first: # first = False # print c # prints ((),) for assay for k, rs in krs.items(): u = set([r[j] for r in rs]) if len(u) > 1: sigs.add(tuple(sorted(u))) if sigs: assert len(set(map(len, sigs))) == 1 lkp = dict(sum([[(v, i + 1) for v in vs] for i, vs in enumerate(zip(*sorted(sigs)))], [])) torepno = lambda s: lkp.get(s, 0) # if div == 'well' or div == 'field': # ST() # pass else: torepno = lambda s: 0 lkps[c] = torepno for k, rs in krs.items(): for r in rs: f = r[j] newgroup.get(c + (f,))[k].append(r._replace(repno=r.repno + (torepno(f),))) return lkps, newgroup
path = sys.argv[1] ALT = bool(int(sys.argv[2])) if len(sys.argv) > 2 else False if ALT: print 'running under ALT: %s' % str(ALT) KeyCoords = namedtuple('KeyCoords', 'cell_line ligand_name ligand_concentration time signal') ValCoords = namedtuple('ValCoords', 'assay plate well field channel antibody') Coords = namedtuple('Coords', KeyCoords._fields + ('repno',) + ValCoords._fields) if ALT: BYSTUDY = mkd(1, list) else: pre = defaultdict(list) def convert(s): try: return float(s) if '.' in s else int(s) except ValueError: return s.decode('utf-8') with open(path) as fh: assert 'cell_line' in fh.next() # print '\t'.join((','.join(KeyCoords._fields), # ','.join(ValCoords._fields))) # print ','.join(KeyCoords._fields + ('\t',) + ValCoords._fields)
def main(argv): from itertools import product from string import ascii_lowercase as lc, ascii_uppercase as uc from multikeydict import MultiKeyDict as mkd nd0 = 4 nd1 = 1 nd = nd0 + nd1 dimnames0 = uc[:nd0] dimnames1 = lc[nd0:nd] dimnames = dimnames0 + dimnames1 range_nd0 = range(nd0) #range_nd0 = (3, 5, 7, 11) dimlengths0 = tuple([nd0 + i for i in range_nd0]) dimlengths1 = tuple([2] * nd1) dimlengths = dimlengths0 + dimlengths1 assert len(dimnames) == len(dimlengths) def mk_dimvals(names, lengths): def fmt(p, i): return '%s%d' % (p, i) return [[fmt(c, k) for k in range(j)] for c, j in zip(names, lengths)] # def mk_dimvals(names, lengths, offset=0): # def fmt(p, i): # return '%s%d' % (p, i) # def mk_iter(p, l, o): # def _iter(p=p, l=l, o=o): # b = 0 # while True: # for k in range(l): # yield fmt(p, k + b) # b += o # return _iter() # return [mk_iter(c, j, offset) # for c, j in zip(names, lengths)] dimvals0 = mk_dimvals(dimnames0, dimlengths0) # dimvals0 = mk_dimvals(dimnames0, dimlengths0, 1) dimvals1 = mk_dimvals(dimnames1, dimlengths1) dimspec = mk_dimspec(dimnames, dimvals0 + dimvals1) data0 = range(prod(dimlengths0)) data1 = array([(-1)**i * x for i, x in enumerate(1/(2 + arange(prod(dimlengths1))))]) data_mkd = mkd(maxdepth=nd0, noclobber=True) # def idx(i, l, *ps): # if l == 0: # return (i,) # else: # q, r = divmod(i, prod(ps)) # return (q,) + idx(r, l - 1, *ps[1:]) # ps = list(reversed([3, 5, 7, 11])) # #ps = [3, 5, 7] # ps = [7, 3, 5] # print dimvals0 # def to_rep(k, rxs, l): # if l == 0: # return (k,) # q, r = divmod(k, rxs[-1]) # return to_rep(q, rxs[:-1], l - 1) + (r,) # valsets = [set() for _ in range(nd0)] for i, ks in enumerate(product(*dimvals0)): data_mkd.set(ks, data1 + data0[i]) # continue # vs = list(to_rep(i, dimlengths0[1:], len(dimlengths0) - 1)) # for j, u in enumerate(vs[:-1]): # vs[j + 1] += u # ws = tuple(p + d for p, d in zip([s[0] for s in ks], map(str, vs))) # for s, w in zip(valsets, ws): # s.add(w) # data_mkd.set(ws, data1 + data0[i]) # print [tuple(sorted(vs, key=lambda w: (w[0], int(w[1:])))) for vs in data_mkd._dimvals()] # print [tuple(sorted(vs, key=lambda w: (w[0], int(w[1:])))) for vs in valsets] data = np.vstack(data_mkd.itervaluesmk()).reshape(dimlengths) if len(argv) > 1: bn = argv[1] else: bn = 'q_' + '-x-'.join(('x'.join(map(str, dimlengths0)), 'x'.join(map(str, dimlengths1)))) h5h = createh5h(bn)[0] add(h5h, dimspec, data) return 0
def main(argv): _parseargs(argv) outpath = PARAM.path_to_outfile if os.path.exists(outpath): import sys print >> sys.stderr, 'warning: clobbering an existing %s' % outpath with open(PARAM.path_to_expmap) as fh: KeyCoords, ValCoords = [namedtuple(n, c) for n, c in zip((u'KeyCoords', u'ValCoords'), parse_line(fh.next()))] OutputKeyCoords = namedtuple(u'OutputKeyCoords', KeyCoords._fields + (u'repno',)) global Cube # required for pickling class Cube(mkd): def __init__(self, *args, **kwargs): maxd = kwargs.get('maxdepth', len(OutputKeyCoords._fields)) super(Cube, self).__init__(maxdepth=maxd, noclobber=True) cubes = mkd(1, Cube) nvals = len(ValCoords._fields) start = PARAM.maskval + 1 vcmapper = KeyMapper(*([count(start)] * nvals)) # Sic! We want a # single counter shared # by all the component # keymappers del nvals maxid = start del start debug = PARAM.debug recordcount = 0 for line in fh: key, val = [clas(*tpl) for clas, tpl in zip((KeyCoords, ValCoords), parse_line(line))] subassay = get_subassay(val) repno = get_repno(key, val) newkey = tuple(map(unicode, key + (repno,))) newval = vcmapper.getid(val) maxid = max(maxid, *newval) cubes.get((subassay,)).set(newkey, newval) if not debug: continue recordcount += 1 if recordcount >= 10: break dtype = 'uint%d' % needed_bits(maxid) del maxid kcoords = tuple(map(unicode, OutputKeyCoords._fields)) vcoords = tuple(map(unicode, ValCoords._fields)) nonfactorial = set() for subassay, cube in cubes.items(): keys_tuple = list(cube.sortedkeysmk()) nonfactorial.update(get_feasible(keys_tuple)[0]) if nonfactorial: subperms = map(tuple, (sorted(nonfactorial), [i for i in range(len(kcoords)) if i not in nonfactorial])) del nonfactorial height = len(subperms[0]) assert height > 1 perm = sum(subperms, ()) predn = [tuple([kcoords[i] for i in s]) for s in subperms] kcoords = (predn[0],) + predn[1] del predn for subassay, cube in cubes.items(): cubes[subassay] = cube.permutekeys(perm).collapsekeys(height) del perm, height bricks = dict() for subassay, cube in cubes.items(): keys_tuple = list(cube.sortedkeysmk()) labels = get_labels(kcoords, keys_tuple) + \ ((PARAM.extra_dim_name, vcoords),) factors = tuple(kv[1] for kv in labels) shape = tuple(map(len, factors)) npcube = np.ones(shape=shape, dtype=dtype) * PARAM.maskval for key in keys_tuple: npcube[cube.index(key)] = cube.get(key) bricks[subassay] = hb.HyperBrick(npcube, labels) with h5h.Hdf5File(outpath, 'w') as h5: dir0 = h5.require_group('confounders') dir1 = h5.require_group('from_IR') keymap = vcmapper.mappers h5h.force_create_dataset(dir0, 'keymap', data=dump(keymap)) # reconstitute the above with: # keymap = yaml.load(<H5>['confounders/keymap'].value) # ...where <H5> stands for some h5py.File instance for subassay, hyperbrick in bricks.items(): empty_datacube = np.ndarray(hyperbrick.data.shape, dtype=PARAM.float) # the next step is not essential; also, there may be a # choice of fillvalue than the current one (NaN) empty_datacube.fill(PARAM.fillvalue) empty_hyperbrick = hb.HyperBrick(empty_datacube, hyperbrick.labels) for d, b in ((dir0, hyperbrick), (dir1, empty_hyperbrick)): h5h.write_hyperbrick(d.require_group(subassay), b) return 0
def get_repno(key, val, _lookup=mkd(1, IdSeq)): # NOTE: returns a singleton tuple (in the future, this repno # parameter may be a k-tuple for some k > 1) return (_lookup.get((key.cell_line, val.assay)),)