def main(argv): _parseargs(argv) precube = unpickle_precube(PARAM.path_to_precube) output = dict() # └── from_IR # │ # ├── GF # │ ├── data # │ └── labels # │ # └── CK # ├── data # └── labels with h5h.Hdf5File(PARAM.path_to_h5, 'r+') as h5: keymap = [dict((v, k) for k, v in d.items()) for d in load(h5['confounders/keymap'].value)] from_IR = h5.require_group('from_IR') for subassay, grp in h5['confounders'].items(): if subassay not in precube: continue subassay_dir = from_IR.require_group(subassay) dcube = precube[subassay] confounders = grp['data'] labels = orddict(load(grp['labels'].value)) fnames, factors = labels.keys(), labels.values() confounder_index = fnames.index('confounder') dlabels = list(labels.items()) dlabels[confounder_index] = PARAM.extra_dim shape = map(len, [kv[1] for kv in dlabels]) output[subassay] = output_datacube = np.ndarray(shape) del shape assay_index = factors[confounder_index].index('assay') del confounder_index assay_dict = keymap[assay_index] for ii in product(*map(range, confounders.shape[:-1])): assay = assay_dict[confounders[ii][assay_index]] kk = tuple([f[i] for f, i in zip(labels.values(), ii)]) output_datacube[ii] = dcube.get((assay, kk[0][0]) + kk[1:]) brick = hb.HyperBrick(output_datacube, dlabels) h5h.write_hyperbrick(subassay_dir, brick) return 0
def main(argv): _parseargs(argv) bricks = dict() openh5 = h5h.Hdf5File with openh5(PARAM.h5path, 'r+') as h5: source = 'from_IR' target = h5.require_group('from_IR_w_zeros') for subassay in 'GF', 'CK': brick = h5h.read_hyperbrick(h5['/'.join((source, subassay))]) fullbrick = propagate_controls(brick) h5h.write_hyperbrick(target.require_group(subassay), fullbrick) return 0
def main(argv): _parseargs(argv) outpath = PARAM.path_to_outfile if os.path.exists(outpath): import sys print >> sys.stderr, 'warning: clobbering an existing %s' % outpath with open(PARAM.path_to_expmap) as fh: KeyCoords, ValCoords = [namedtuple(n, c) for n, c in zip((u'KeyCoords', u'ValCoords'), parse_line(fh.next()))] OutputKeyCoords = namedtuple(u'OutputKeyCoords', KeyCoords._fields + (u'repno',)) global Cube # required for pickling class Cube(mkd): def __init__(self, *args, **kwargs): maxd = kwargs.get('maxdepth', len(OutputKeyCoords._fields)) super(Cube, self).__init__(maxdepth=maxd, noclobber=True) cubes = mkd(1, Cube) nvals = len(ValCoords._fields) start = PARAM.maskval + 1 vcmapper = KeyMapper(*([count(start)] * nvals)) # Sic! We want a # single counter shared # by all the component # keymappers del nvals maxid = start del start debug = PARAM.debug recordcount = 0 for line in fh: key, val = [clas(*tpl) for clas, tpl in zip((KeyCoords, ValCoords), parse_line(line))] subassay = get_subassay(val) repno = get_repno(key, val) newkey = tuple(map(unicode, key + (repno,))) newval = vcmapper.getid(val) maxid = max(maxid, *newval) cubes.get((subassay,)).set(newkey, newval) if not debug: continue recordcount += 1 if recordcount >= 10: break dtype = 'uint%d' % needed_bits(maxid) del maxid kcoords = tuple(map(unicode, OutputKeyCoords._fields)) vcoords = tuple(map(unicode, ValCoords._fields)) nonfactorial = set() for subassay, cube in cubes.items(): keys_tuple = list(cube.sortedkeysmk()) nonfactorial.update(get_feasible(keys_tuple)[0]) if nonfactorial: subperms = map(tuple, (sorted(nonfactorial), [i for i in range(len(kcoords)) if i not in nonfactorial])) del nonfactorial height = len(subperms[0]) assert height > 1 perm = sum(subperms, ()) predn = [tuple([kcoords[i] for i in s]) for s in subperms] kcoords = (predn[0],) + predn[1] del predn for subassay, cube in cubes.items(): cubes[subassay] = cube.permutekeys(perm).collapsekeys(height) del perm, height bricks = dict() for subassay, cube in cubes.items(): keys_tuple = list(cube.sortedkeysmk()) labels = get_labels(kcoords, keys_tuple) + \ ((PARAM.extra_dim_name, vcoords),) factors = tuple(kv[1] for kv in labels) shape = tuple(map(len, factors)) npcube = np.ones(shape=shape, dtype=dtype) * PARAM.maskval for key in keys_tuple: npcube[cube.index(key)] = cube.get(key) bricks[subassay] = hb.HyperBrick(npcube, labels) with h5h.Hdf5File(outpath, 'w') as h5: dir0 = h5.require_group('confounders') dir1 = h5.require_group('from_IR') keymap = vcmapper.mappers h5h.force_create_dataset(dir0, 'keymap', data=dump(keymap)) # reconstitute the above with: # keymap = yaml.load(<H5>['confounders/keymap'].value) # ...where <H5> stands for some h5py.File instance for subassay, hyperbrick in bricks.items(): empty_datacube = np.ndarray(hyperbrick.data.shape, dtype=PARAM.float) # the next step is not essential; also, there may be a # choice of fillvalue than the current one (NaN) empty_datacube.fill(PARAM.fillvalue) empty_hyperbrick = hb.HyperBrick(empty_datacube, hyperbrick.labels) for d, b in ((dir0, hyperbrick), (dir1, empty_hyperbrick)): h5h.write_hyperbrick(d.require_group(subassay), b) return 0