def get_yvalues(ole: olefile.OleFileIO, ydata: tuple, header: tuple) -> tuple: return pipe( unpack('f' * header.N * header.channels, ydata) if ydata else (), partition(header.N), tuple, )
def atop(func, out, out_ind, *args, **kwargs): """ Array object version of dask.array.top """ dtype = kwargs.get('dtype', None) arginds = list(partition(2, args)) # [x, ij, y, jk] -> [(x, ij), (y, jk)] numblocks = dict([(a.name, a.numblocks) for a, ind in arginds]) argindsstr = list(concat([(a.name, ind) for a, ind in arginds])) dsk = top(func, out, out_ind, *argindsstr, numblocks=numblocks) # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions shapes = dict((a.name, a.shape) for a, _ in arginds) nameinds = [(a.name, i) for a, i in arginds] dims = broadcast_dimensions(nameinds, shapes) shape = tuple(dims[i] for i in out_ind) blockdim_dict = dict((a.name, a.blockdims) for a, _ in arginds) blockdimss = broadcast_dimensions(nameinds, blockdim_dict) blockdims = tuple(blockdimss[i] for i in out_ind) dsks = [a.dask for a, _ in arginds] return Array(merge(dsk, *dsks), out, shape, blockdims=blockdims, dtype=dtype)
def streaming_pca(samples, n_components=2, batch_size=50): ipca = decomposition.IncrementalPCA(n_components=n_components, batch_size=batch_size) _ = list(tz.pipe(samples, curried.partition(batch_size), curried.map(np.array), curried.map(ipca.partial_fit))) return ipca
def non_yaml_lines(lines): return pipe( [-1] + all_indices(lines) + [len(lines)], partition(2), vmap(lambda s, e: lines[s + 1:e]), reduce(lambda a, b: a + b) )
def _build_facets(plat_df, A, n_cols=2, suites=None): assert (suites is not None), "Pass `suites` as kw arg in platforms2json" rows = [] for srow in z.partition(n_cols, suites, pad=None): row = [] for sname in (s for s in srow if s): gdf = plat_df.query("sname == @sname") if not len(gdf): continue pdf = agg_n_modes(gdf) row.append(pl_suite_modes(pdf, sname, A)) if len(row): row_plot = A.hconcat(*row) rows.append(row_plot) return A.vconcat(*rows)
def parsebook(fn="src/txt/hp1.txt", vb=False): p = print if vb else (lambda *x, **y: None) if isinstance(fn, int): fn = "src/txt/hp{}.txt".format(fn) p("Reading {}".format(fn)) with open(fn, "rb") as f: txt = f.read().decode("utf-8-sig") gd = bookpat_re.search(txt).groupdict() booktitle = gd["title"] body = gd["body"] chs = chapsep_re.split(body)[1:] book = {int(chnum): Chapter(int(chnum), title, text) for chnum, title, text in z.partition(3, chs)} return Book(booktitle, book)
def yaml_data(lines): def render(raw, data): return jinja2.Template(raw).render(**data) return maybe_pipe( all_indices(lines), short_circuit(bool), # catch null YAML early partition(2), vmap(lambda s, e: lines[s + 1:e]), map('\n'.join), lambda lines: '\n'.join(lines), lambda raw: (raw, yaml.load(raw)), vcall(render), lambda raw: (raw, yaml.load(raw)), vcall(render), lambda data: data[1], )
def _make_samples(meta, shuffle): def _to_sample(person, images): # Random images needed for representation interpolation (3.5) x1 = _get_random_image() x2 = _get_random_image() return m(id=person["id_class"] - 1, images=freeze(list(images)), x1=freeze(x1), x2=freeze(x2)) samples = pipe( meta["persons"], tz.take(limit) if limit is not None else tz.identity, tz.map(lambda p: m(p=p, i=tz.partition( args.N_images, _shuffled(p["images"]) if shuffle else p["images"]))), tz.mapcat(lambda s: [_to_sample(s.p, i) for i in s.i]), tz.take(limit) if limit is not None else tz.identity, list) if shuffle: random.shuffle(samples) return samples
def run(mode, modelname, forcenew, epochs): allow_train = True if mode == "train" or mode == "both" else False allow_generate = True if mode == "eval" or mode == "both" else False # Get the samples and 'work it' sample_batches = list( tz.pipe( data.get_samples("s&p500", "1d", datetime(1980, 1, 1), datetime(2018, 1, 1), random_state=np.random.random_integers(0, 234234)), data.samples_to_batch_generator(128))) # Load or create the model if forcenew or not UpscalerModel.exists(modelname): upscaler_model = UpscalerModel.create_model(5) else: upscaler_model = UpscalerModel.load_model(modelname) if allow_train: for epoch in range(epochs): print("Starting epoch: {}".format(epoch)) critic_generator_advantage = 1.0 batch_idx = 0 for y_time, x, y, batch_unscaler in sample_batches: # Generate "fake" data noise_mod = (1.0 - epoch / epochs) * 0.2 noised = lambda a: np.random.normal(scale=noise_mod, size=a.shape) + a x = [noised(k) for k in x] generated_y = upscaler_model.generate_output(x) assert len( generated_y ) == 3, "Expected exactly 3 output vector. Got {}".format( len(generated_y)) # Train the critic real_samples = y fake_samples = generated_y critic_eval_result = upscaler_model.train_critic( real_samples, fake_samples, 1.0 / critic_generator_advantage) # Train the generator (adverserial) generator_eval_result = upscaler_model.train_generator( x, critic_generator_advantage) # Print the current results print("Epoch: {}, BatchIdx={} results:".format( epoch, batch_idx + 1)) print("\t Critic: {}".format(critic_eval_result)) print("\t Generator: {}".format(generator_eval_result)) ohlc = batch_unscaler(y=generated_y)[0] print("\t Valid/Invalid: {} vs {} => {:.2%}%".format( *analysis.calculate_ohlc_stats(ohlc))) # Apply another level of training to the critic to deter invalid OHLC ohlc_validvec = tz.pipe(ohlc, tz.map(analysis.is_valid_ohlc), list) invalid_ohlc_samples_x = tz.pipe( zip(*generated_y + [ohlc_validvec]), tz.filter(lambda t: t[-1] == False), tz.map(lambda t: t[:-1]), unzip, tz.map(np.array), list) inv_loss = upscaler_model.train_critic_invalid( invalid_ohlc_samples_x) print("\t Invalid loss: {} ({}# samples)".format( inv_loss, len(invalid_ohlc_samples_x[0]))) critic_generator_advantage = critic_eval_result[1][ 1] #generator_eval_result[0] / critic_eval_result[1][0] batch_idx = batch_idx + 1 if epoch % 10 == 0: # Save the last generated sample y_time = y_time.reshape(y[1].shape[:-1] + (1, )) ohlc = batch_unscaler(y=generated_y)[1] ohlc = np.concatenate([y_time, ohlc], axis=2)[-1] last_ohlc_df = pd.DataFrame( ohlc, columns=["date", "open", "high", "low", "close", "volume"]) analysis.plot_ohlc_tofile(last_ohlc_df, "./output/e{}.png".format(epoch)) if (modelname != "tmp"): print("Model saved as: {}".format( upscaler_model.save_model(modelname))) if allow_generate: def best_of_group(ohlc_group): """ Select 1 OHLC row per candiate which is valid """ valid_ohlc_rows = tz.pipe(ohlc_group, tz.filter(lambda p: p[0]), list) if any(valid_ohlc_rows): return valid_ohlc_rows[0][1] else: return ohlc_group[0][1] NUM_CANDIATES = 25 vohlc = [] for y_time, x, y, batch_unscaler in sample_batches: x = [np.repeat(tx, NUM_CANDIATES, axis=0) for tx in x] x = [tx + np.random.normal(size=tx.shape, scale=0.05) for tx in x] ohlc = upscaler_model.generate_output(x) ohlc_candidate_vecs = [ batch_unscaler(y=[ox[k::NUM_CANDIATES] for ox in ohlc])[1] for k in range(NUM_CANDIATES) ] ohlc = ohlc[1] for k in range(NUM_CANDIATES): ohlc[k::NUM_CANDIATES] = ohlc_candidate_vecs[k] ohlc = [item for sublist in ohlc for item in sublist] # OHLC contains NUM_CANDIATES per day - rebuild the series by picking the first valid candidate per day ohlc = tz.pipe( ohlc, tz.map(lambda ohlc_row: (analysis.is_valid_ohlc(ohlc_row), ohlc_row)), tz.partition(NUM_CANDIATES), tz.map(best_of_group), list, np.array) # Re-apply the time axis ohlc = np.concatenate([y_time.reshape(-1, 1), ohlc], axis=1) vohlc.extend(ohlc) # high[2], low[3] print("\n\nValid/Invalid: {} / {} => {:.2%}%".format( *analysis.calculate_ohlc_stats(ohlc[:, 1:]))) # Build a dataframe from the ohlc data and resample to 1w resolution for comparison with the original last_ohlc_df = pd.DataFrame( ohlc, columns=["date", "open", "high", "low", "close", "volume"]) last_ohlc_df = last_ohlc_df.set_index(last_ohlc_df["date"]) last_ohlc_df.sort_index(inplace=True) last_ohlc_df = last_ohlc_df.ix[datetime(2017, 1, 1 ):datetime(2018, 1, 1)] last_ohlc_df = analysis.resample_ohlc(last_ohlc_df, "1w").dropna(how='any') analysis.plot_ohlc_tofile(last_ohlc_df, "./output/{}_1w.png".format(modelname)) analysis.plot_ohlc(last_ohlc_df) # Plot the original data in a week resolution df = pd.read_csv("data/gspc.csv", parse_dates=["Date"]) df = df.rename( columns={ "Date": "date", "Open": "open", "High": "high", "Low": "low", "Close": "close", "Adj Close": "adjclose", "Volume": "volume" }) df = df.set_index(df["date"]) df = df.ix[datetime(2017, 1, 1):datetime(2018, 1, 1)] df = analysis.resample_ohlc(df, "1w") analysis.plot_ohlc_tofile( df, "./output/{}_1w_original.png".format(modelname)) analysis.plot_ohlc(df) print("Program complete")
def streaming_PCA(samples, n_components=2, batch_size=100): ipca = decomposition.IncrementalPCA(n_components=n_components, batch_size=batch_size) tz.pipe(samples, cur.partition(batch_size), cur.map(np.array), cur.map(ipca.partial_fit), tz.last) return ipca
def top(func, output, out_indices, *arrind_pairs, **kwargs): """ Tensor operation Applies a function, ``func``, across blocks from many different input dasks. We arrange the pattern with which those blocks interact with sets of matching indices. E.g. top(func, 'z', 'i', 'x', 'i', 'y', 'i') yield an embarassingly parallel communication pattern and is read as z_i = func(x_i, y_i) More complex patterns may emerge, including multiple indices top(func, 'z', 'ij', 'x', 'ij', 'y', 'ji') $$ z_{ij} = func(x_{ij}, y_{ji}) $$ Indices missing in the output but present in the inputs results in many inputs being sent to one function (see examples). Examples -------- Simple embarassing map operation >>> inc = lambda x: x + 1 >>> top(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} Simple operation on two datasets >>> add = lambda x, y: x + y >>> top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Operation that flips one of the datasets >>> addT = lambda x, y: x + y.T # Transpose each chunk >>> # z_ij ~ x_ij y_ji >>> # .. .. .. notice swap >>> top(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Dot product with contraction over ``j`` index. Yields list arguments >>> top(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} Supports Broadcasting rules >>> top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))} """ numblocks = kwargs['numblocks'] argpairs = list(partition(2, arrind_pairs)) assert set(numblocks) == set(pluck(0, argpairs)) all_indices = pipe(argpairs, pluck(1), concat, set) dummy_indices = all_indices - set(out_indices) # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions dims = broadcast_dimensions(argpairs, numblocks) # (0, 0), (0, 1), (0, 2), (1, 0), ... keytups = list(product(*[range(dims[i]) for i in out_indices])) # {i: 0, j: 0}, {i: 0, j: 1}, ... keydicts = [dict(zip(out_indices, tup)) for tup in keytups] # {j: [1, 2, 3], ...} For j a dummy index of dimension 3 dummies = dict((i, list(range(dims[i]))) for i in dummy_indices) # Create argument lists valtups = [] for kd in keydicts: args = [] for arg, ind in argpairs: tups = lol_tuples((arg,), ind, kd, dummies) tups2 = zero_broadcast_dimensions(tups, numblocks[arg]) args.append(tups2) valtups.append(tuple(args)) # Add heads to tuples keys = [(output,) + kt for kt in keytups] vals = [(func,) + vt for vt in valtups] return dict(zip(keys, vals))
def top(func, output, out_indices, *arrind_pairs, **kwargs): """ Tensor operation Applies a function, ``func``, across blocks from many different input dasks. We arrange the pattern with which those blocks interact with sets of matching indices. E.g. top(func, 'z', 'i', 'x', 'i', 'y', 'i') yield an embarassingly parallel communication pattern and is read as z_i = func(x_i, y_i) More complex patterns may emerge, including multiple indices top(func, 'z', 'ij', 'x', 'ij', 'y', 'ji') $$ z_{ij} = func(x_{ij}, y_{ji}) $$ Indices missing in the output but present in the inputs results in many inputs being sent to one function (see examples). Examples -------- Simple embarassing map operation >>> inc = lambda x: x + 1 >>> top(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} Simple operation on two datasets >>> add = lambda x, y: x + y >>> top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Operation that flips one of the datasets >>> addT = lambda x, y: x + y.T # Transpose each chunk >>> # z_ij ~ x_ij y_ji >>> # .. .. .. notice swap >>> top(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Dot product with contraction over ``j`` index. Yields list arguments >>> top(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} Supports Broadcasting rules >>> top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))} """ numblocks = kwargs['numblocks'] argpairs = list(partition(2, arrind_pairs)) assert set(numblocks) == set(pluck(0, argpairs)) all_indices = pipe(argpairs, pluck(1), concat, set) dummy_indices = all_indices - set(out_indices) # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions dims = broadcast_dimensions(argpairs, numblocks) # (0, 0), (0, 1), (0, 2), (1, 0), ... keytups = list(product(*[range(dims[i]) for i in out_indices])) # {i: 0, j: 0}, {i: 0, j: 1}, ... keydicts = [dict(zip(out_indices, tup)) for tup in keytups] # {j: [1, 2, 3], ...} For j a dummy index of dimension 3 dummies = dict((i, list(range(dims[i]))) for i in dummy_indices) # Create argument lists valtups = [] for kd in keydicts: args = [] for arg, ind in argpairs: tups = lol_tuples((arg, ), ind, kd, dummies) tups2 = zero_broadcast_dimensions(tups, numblocks[arg]) args.append(tups2) valtups.append(tuple(args)) # Add heads to tuples keys = [(output, ) + kt for kt in keytups] vals = [(func, ) + vt for vt in valtups] return dict(zip(keys, vals))