Ejemplo n.º 1
0
def _save(cube, key, path):
    # 'r+' apparently does not create the file if it doesn't
    # already exist, so...
    with open(path, "a"):
        pass

    with open(path, "r+") as fh:
        try:
            flock(fh, LOCK_EX | LOCK_NB)
        except IOError, e:
            warnings.warn("can't immediately write-lock " "the file (%s), blocking ..." % e)
            flock(fh, LOCK_EX)

        fh.seek(0, 0)

        try:
            cubedict = pickle.load(fh)
        except EOFError:
            cubedict = mkd()

        try:
            cubedict.set(key, cube)
        except Exception, e:
            import traceback as tb

            tb.print_exc()
            print "type:", type(e)
            print "str:", str(e)
            print "message: <<%s>>" % e.message
            cubedict.delete(key)
            cubedict.set(key, cube)
Ejemplo n.º 2
0
def main(argv):
    _parseargs(argv)

    path = PARAM.path_to_expmap

    with open(path) as fh:
        KeyCoords, ValCoords = [namedtuple(n, c)
                                for n, c in zip(('KeyCoords', 'ValCoords'),
                                                parse_line(fh.next()))]

        _ii = set([KeyCoords._fields.index('ligand_name'),
                   KeyCoords._fields.index('ligand_concentration')])
        _is = [i for i in range(len(KeyCoords._fields)) if i not in _ii]
        _j = ValCoords._fields.index('plate')
        _k = ValCoords._fields.index('well')
        del _ii
        def _reduced_kv(key, val):
            return tuple([key[i] for i in _is] + [val[_j], val[_k][1:]])

        def _delete_field(tuple_, _i=ValCoords._fields.index('field')):
            return tuple_[:_i] + tuple_[_i + 1:]

        control_conc = mkd(len(_reduced_kv(KeyCoords._fields,
                                           ValCoords._fields)), noclobber=True)

        OutputValCoords = namedtuple('OutputValCoords',
                                     _delete_field(ValCoords._fields))

        print_record([nt._fields for nt in KeyCoords, OutputValCoords])
Ejemplo n.º 3
0
def regroup0(div, sz, group):
    lkp = defaultdict(set)
    newgroup = mkd(sz + 1, list)
    j = Coords._fields.index(div)
    first = True
    for ck, rs in group.iteritemsmk():
        c, k = tuple(ck[:-1]), (ck[-1],)
        if div == 'assay' or div == 'plate':
            if first:
                first = False
                print c # prints ((),) for assay
        u = set()
        for r in rs:
            f = r[j]
            u.add(f)
            newgroup.get(c + (f,) + k).append(r)
        if len(u) > 1:
            lkp[c].add(tuple(sorted(u)))
    return lkp, newgroup
Ejemplo n.º 4
0
def main(argv):
    _parseargs(argv)

    path = PARAM.path_to_expmap

    _basekey = (PARAM.subassay, PARAM.assay)

    with open(path) as fh:
        KeyCoords, ValCoords = [namedtuple(n, c) for n, c in zip(("KeyCoords", "ValCoords"), parse_line(fh.next()))]
        assert "field" not in ValCoords._fields

        cube = mkd(len(KeyCoords._fields), noclobber=True)
        buf = []
        for line in fh:
            key, val = [clas(*tpl) for clas, tpl in zip((KeyCoords, ValCoords), parse_line(line))]

            if _skip(key, val, *_basekey):
                continue

            data_path = get_data_path(val)
            sdc_paths = get_sdc_paths(data_path)

            wanted_features = get_wanted_features(val.channel)
            rawdata = get_rawdata(sdc_paths, wanted_features)
            assert rawdata.size
            assert len(wanted_features) == rawdata.shape[1]

            target = get_target(val)
            signal = get_signal(rawdata, target)
            data = mean_and_stddev(signal)
            ukey = tuple(unicode(k) for k in key)
            cube.set(ukey, data)
            buf.append(data)

    assert cube, "empty cube"
    _save(cube, _basekey, PARAM.output_path)

    return 0
Ejemplo n.º 5
0
def regroup(div, sz, group):
    lkps = defaultdict(set)
    j = Coords._fields.index(div)
    newgroup = mkd(sz, lambda: defaultdict(list))
    # first = True
    for c, krs in group.iteritemsmk():
        # c = cc[1:] if cc[0] == mkd.NIL else cc
        # c = cc[1:] if (len(cc) and cc[0] == mkd.NIL) else cc
        sigs = set()
        # if first:
        #     first = False
        #     print c # prints ((),) for assay
        for k, rs in krs.items():
            u = set([r[j] for r in rs])
            if len(u) > 1:
                sigs.add(tuple(sorted(u)))

        if sigs:
            assert len(set(map(len, sigs))) == 1
            lkp = dict(sum([[(v, i + 1) for v in vs] for i, vs in
                            enumerate(zip(*sorted(sigs)))], []))
            torepno = lambda s: lkp.get(s, 0)
            # if div == 'well' or div == 'field':
            #     ST()
            #     pass
        else:
            torepno = lambda s: 0

        lkps[c] = torepno

        for k, rs in krs.items():
            for r in rs:
                f = r[j]
                newgroup.get(c + (f,))[k].append(r._replace(repno=r.repno + (torepno(f),)))

    return lkps, newgroup
Ejemplo n.º 6
0
path = sys.argv[1]
ALT = bool(int(sys.argv[2])) if len(sys.argv) > 2 else False
if ALT:
    print 'running under ALT: %s' % str(ALT)

KeyCoords = namedtuple('KeyCoords',
                       'cell_line ligand_name ligand_concentration time signal')

ValCoords = namedtuple('ValCoords',
                       'assay plate well field channel antibody')

Coords = namedtuple('Coords', KeyCoords._fields + ('repno',) + ValCoords._fields)

if ALT:
    BYSTUDY = mkd(1, list)
else:
    pre = defaultdict(list)

def convert(s):
    try:
        return float(s) if '.' in s else int(s)
    except ValueError:
        return s.decode('utf-8')

with open(path) as fh:
    assert 'cell_line' in fh.next()
    # print '\t'.join((','.join(KeyCoords._fields),
    #                  ','.join(ValCoords._fields)))
    # print ','.join(KeyCoords._fields + ('\t',) + ValCoords._fields)
Ejemplo n.º 7
0
def main(argv):
    from itertools import product
    from string import ascii_lowercase as lc, ascii_uppercase as uc

    from multikeydict import MultiKeyDict as mkd

    nd0 = 4
    nd1 = 1
    nd = nd0 + nd1
    dimnames0 = uc[:nd0]
    dimnames1 = lc[nd0:nd]
    dimnames = dimnames0 + dimnames1

    range_nd0 = range(nd0)
    #range_nd0 = (3, 5, 7, 11)
    dimlengths0 = tuple([nd0 + i for i in range_nd0])
    dimlengths1 = tuple([2] * nd1)
    dimlengths = dimlengths0 + dimlengths1

    assert len(dimnames) == len(dimlengths)
    def mk_dimvals(names, lengths):
        def fmt(p, i):
            return '%s%d' % (p, i)

        return [[fmt(c, k) for k in range(j)]
                for c, j in zip(names, lengths)]

    # def mk_dimvals(names, lengths, offset=0):
    #     def fmt(p, i):
    #         return '%s%d' % (p, i)

    #     def mk_iter(p, l, o):
    #         def _iter(p=p, l=l, o=o):
    #             b = 0
    #             while True:
    #                 for k in range(l):
    #                     yield fmt(p, k + b)
    #                 b += o
    #         return _iter()

    #     return [mk_iter(c, j, offset)
    #             for c, j in zip(names, lengths)]

    dimvals0 = mk_dimvals(dimnames0, dimlengths0)
    # dimvals0 = mk_dimvals(dimnames0, dimlengths0, 1)
    dimvals1 = mk_dimvals(dimnames1, dimlengths1)
    dimspec = mk_dimspec(dimnames, dimvals0 + dimvals1)

    data0 = range(prod(dimlengths0))
    data1 = array([(-1)**i * x for i, x in
                   enumerate(1/(2 + arange(prod(dimlengths1))))])
    data_mkd = mkd(maxdepth=nd0, noclobber=True)
    # def idx(i, l, *ps):
    #     if l == 0:
    #         return (i,)
    #     else:
    #         q, r = divmod(i, prod(ps))
    #         return (q,) + idx(r, l - 1, *ps[1:])

    # ps = list(reversed([3, 5, 7, 11]))
    # #ps = [3, 5, 7]
    # ps = [7, 3, 5]
    # print dimvals0
    # def to_rep(k, rxs, l):
    #     if l == 0:
    #         return (k,)
    #     q, r = divmod(k, rxs[-1])
    #     return to_rep(q, rxs[:-1], l - 1) + (r,)

    # valsets = [set() for _ in range(nd0)]
    for i, ks in enumerate(product(*dimvals0)):
        data_mkd.set(ks, data1 + data0[i])
        # continue

        # vs = list(to_rep(i, dimlengths0[1:], len(dimlengths0) - 1))
        # for j, u in enumerate(vs[:-1]):
        #     vs[j + 1] += u
        # ws = tuple(p + d for p, d in zip([s[0] for s in ks], map(str, vs)))
        # for s, w in zip(valsets, ws):
        #     s.add(w)
        # data_mkd.set(ws, data1 + data0[i])

    # print [tuple(sorted(vs, key=lambda w: (w[0], int(w[1:])))) for vs in data_mkd._dimvals()]
    # print [tuple(sorted(vs, key=lambda w: (w[0], int(w[1:])))) for vs in valsets]

    data = np.vstack(data_mkd.itervaluesmk()).reshape(dimlengths)

    if len(argv) > 1:
        bn = argv[1]
    else:
        bn = 'q_' + '-x-'.join(('x'.join(map(str, dimlengths0)),
                                'x'.join(map(str, dimlengths1))))
    h5h = createh5h(bn)[0]
    add(h5h, dimspec, data)
    return 0
def main(argv):
    _parseargs(argv)
    outpath = PARAM.path_to_outfile
    if os.path.exists(outpath):
        import sys
        print >> sys.stderr, 'warning: clobbering an existing %s' % outpath

    with open(PARAM.path_to_expmap) as fh:
        KeyCoords, ValCoords = [namedtuple(n, c)
                                for n, c in zip((u'KeyCoords', u'ValCoords'),
                                                parse_line(fh.next()))]

        OutputKeyCoords = namedtuple(u'OutputKeyCoords',
                                     KeyCoords._fields + (u'repno',))

        global Cube  # required for pickling
        class Cube(mkd):
            def __init__(self, *args, **kwargs):
                maxd = kwargs.get('maxdepth', len(OutputKeyCoords._fields))
                super(Cube, self).__init__(maxdepth=maxd, noclobber=True)
        cubes = mkd(1, Cube)

        nvals = len(ValCoords._fields)
        start = PARAM.maskval + 1
        vcmapper = KeyMapper(*([count(start)] * nvals)) # Sic! We want a
                                                        # single counter shared
                                                        # by all the component
                                                        # keymappers
        del nvals
        maxid = start
        del start

        debug = PARAM.debug
        recordcount = 0
        for line in fh:
            key, val = [clas(*tpl) for clas, tpl in
                        zip((KeyCoords, ValCoords), parse_line(line))]
            subassay = get_subassay(val)
            repno = get_repno(key, val)
            newkey = tuple(map(unicode, key + (repno,)))
            newval = vcmapper.getid(val)
            maxid = max(maxid, *newval)
            cubes.get((subassay,)).set(newkey, newval)
            if not debug:
                continue
            recordcount += 1
            if recordcount >= 10:
                break

    dtype = 'uint%d' % needed_bits(maxid)
    del maxid

    kcoords = tuple(map(unicode, OutputKeyCoords._fields))
    vcoords = tuple(map(unicode, ValCoords._fields))

    nonfactorial = set()

    for subassay, cube in cubes.items():
        keys_tuple = list(cube.sortedkeysmk())
        nonfactorial.update(get_feasible(keys_tuple)[0])

    if nonfactorial:
        subperms = map(tuple, (sorted(nonfactorial),
                               [i for i in range(len(kcoords))
                                if i not in nonfactorial]))
        del nonfactorial
        height = len(subperms[0])
        assert height > 1
        perm = sum(subperms, ())

        predn = [tuple([kcoords[i] for i in s]) for s in subperms]
        kcoords = (predn[0],) + predn[1]
        del predn
        for subassay, cube in cubes.items():
            cubes[subassay] = cube.permutekeys(perm).collapsekeys(height)
        del perm, height

    bricks = dict()
    for subassay, cube in cubes.items():
        keys_tuple = list(cube.sortedkeysmk())
        labels = get_labels(kcoords, keys_tuple) + \
                 ((PARAM.extra_dim_name, vcoords),)

        factors = tuple(kv[1] for kv in labels)
        shape = tuple(map(len, factors))
        npcube = np.ones(shape=shape, dtype=dtype) * PARAM.maskval
        for key in keys_tuple:
            npcube[cube.index(key)] = cube.get(key)

        bricks[subassay] = hb.HyperBrick(npcube, labels)

    with h5h.Hdf5File(outpath, 'w') as h5:
        dir0 = h5.require_group('confounders')
        dir1 = h5.require_group('from_IR')

        keymap = vcmapper.mappers
        h5h.force_create_dataset(dir0, 'keymap', data=dump(keymap))
        # reconstitute the above with:
        #     keymap = yaml.load(<H5>['confounders/keymap'].value)
        # ...where <H5> stands for some h5py.File instance

        for subassay, hyperbrick in bricks.items():
            empty_datacube = np.ndarray(hyperbrick.data.shape,
                                        dtype=PARAM.float)

            # the next step is not essential; also, there may be a
            # choice of fillvalue than the current one (NaN)
            empty_datacube.fill(PARAM.fillvalue)

            empty_hyperbrick = hb.HyperBrick(empty_datacube,
                                             hyperbrick.labels)

            for d, b in ((dir0, hyperbrick), (dir1, empty_hyperbrick)):
                h5h.write_hyperbrick(d.require_group(subassay), b)

    return 0
def get_repno(key, val, _lookup=mkd(1, IdSeq)):
    # NOTE: returns a singleton tuple (in the future, this repno
    # parameter may be a k-tuple for some k > 1)
    return (_lookup.get((key.cell_line, val.assay)),)