Example #1
0
def get_yvalues(ole: olefile.OleFileIO, ydata: tuple,
                header: tuple) -> tuple:
    return pipe(
        unpack('f' * header.N * header.channels, ydata) if ydata else (),
        partition(header.N),
        tuple,
    )
Example #2
0
def atop(func, out, out_ind, *args, **kwargs):
    """ Array object version of dask.array.top """
    dtype = kwargs.get('dtype', None)
    arginds = list(partition(2, args))  # [x, ij, y, jk] -> [(x, ij), (y, jk)]
    numblocks = dict([(a.name, a.numblocks) for a, ind in arginds])
    argindsstr = list(concat([(a.name, ind) for a, ind in arginds]))

    dsk = top(func, out, out_ind, *argindsstr, numblocks=numblocks)

    # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions
    shapes = dict((a.name, a.shape) for a, _ in arginds)
    nameinds = [(a.name, i) for a, i in arginds]
    dims = broadcast_dimensions(nameinds, shapes)
    shape = tuple(dims[i] for i in out_ind)

    blockdim_dict = dict((a.name, a.blockdims) for a, _ in arginds)
    blockdimss = broadcast_dimensions(nameinds, blockdim_dict)
    blockdims = tuple(blockdimss[i] for i in out_ind)

    dsks = [a.dask for a, _ in arginds]
    return Array(merge(dsk, *dsks),
                 out,
                 shape,
                 blockdims=blockdims,
                 dtype=dtype)
Example #3
0
def streaming_pca(samples, n_components=2, batch_size=50):
    ipca = decomposition.IncrementalPCA(n_components=n_components,
                                        batch_size=batch_size)
    _ = list(tz.pipe(samples, curried.partition(batch_size),
                     curried.map(np.array),
                     curried.map(ipca.partial_fit)))
    return ipca
Example #4
0
def non_yaml_lines(lines):
    return pipe(
        [-1] + all_indices(lines) + [len(lines)],
        partition(2),
        vmap(lambda s, e: lines[s + 1:e]),
        reduce(lambda a, b: a + b)
    )
def _build_facets(plat_df, A, n_cols=2, suites=None):
    assert (suites is not None), "Pass `suites` as kw arg in platforms2json"
    rows = []
    for srow in z.partition(n_cols, suites, pad=None):
        row = []
        for sname in (s for s in srow if s):
            gdf = plat_df.query("sname == @sname")
            if not len(gdf):
                continue
            pdf = agg_n_modes(gdf)
            row.append(pl_suite_modes(pdf, sname, A))
        if len(row):
            row_plot = A.hconcat(*row)
            rows.append(row_plot)

    return A.vconcat(*rows)
Example #6
0
def parsebook(fn="src/txt/hp1.txt", vb=False):
    p = print if vb else (lambda *x, **y: None)
    if isinstance(fn, int):
        fn = "src/txt/hp{}.txt".format(fn)
    p("Reading {}".format(fn))
    with open(fn, "rb") as f:
        txt = f.read().decode("utf-8-sig")

    gd = bookpat_re.search(txt).groupdict()

    booktitle = gd["title"]
    body = gd["body"]

    chs = chapsep_re.split(body)[1:]
    book = {int(chnum): Chapter(int(chnum), title, text) for chnum, title, text in z.partition(3, chs)}
    return Book(booktitle, book)
Example #7
0
def yaml_data(lines):
    def render(raw, data):
        return jinja2.Template(raw).render(**data)

    return maybe_pipe(
        all_indices(lines),
        short_circuit(bool),  # catch null YAML early
        partition(2),
        vmap(lambda s, e: lines[s + 1:e]),
        map('\n'.join),
        lambda lines: '\n'.join(lines),
        lambda raw: (raw, yaml.load(raw)),
        vcall(render),
        lambda raw: (raw, yaml.load(raw)),
        vcall(render),
        lambda data: data[1],
    )
Example #8
0
def atop(func, out, out_ind, *args, **kwargs):
    """ Array object version of dask.array.top """
    dtype = kwargs.get('dtype', None)
    arginds = list(partition(2, args)) # [x, ij, y, jk] -> [(x, ij), (y, jk)]
    numblocks = dict([(a.name, a.numblocks) for a, ind in arginds])
    argindsstr = list(concat([(a.name, ind) for a, ind in arginds]))

    dsk = top(func, out, out_ind, *argindsstr, numblocks=numblocks)

    # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions
    shapes = dict((a.name, a.shape) for a, _ in arginds)
    nameinds = [(a.name, i) for a, i in arginds]
    dims = broadcast_dimensions(nameinds, shapes)
    shape = tuple(dims[i] for i in out_ind)

    blockdim_dict = dict((a.name, a.blockdims) for a, _ in arginds)
    blockdimss = broadcast_dimensions(nameinds, blockdim_dict)
    blockdims = tuple(blockdimss[i] for i in out_ind)

    dsks = [a.dask for a, _ in arginds]
    return Array(merge(dsk, *dsks), out, shape, blockdims=blockdims,
                dtype=dtype)
Example #9
0
    def _make_samples(meta, shuffle):
        def _to_sample(person, images):
            # Random images needed for representation interpolation (3.5)
            x1 = _get_random_image()
            x2 = _get_random_image()
            return m(id=person["id_class"] - 1,
                     images=freeze(list(images)),
                     x1=freeze(x1),
                     x2=freeze(x2))

        samples = pipe(
            meta["persons"],
            tz.take(limit) if limit is not None else tz.identity,
            tz.map(lambda p: m(p=p,
                               i=tz.partition(
                                   args.N_images,
                                   _shuffled(p["images"])
                                   if shuffle else p["images"]))),
            tz.mapcat(lambda s: [_to_sample(s.p, i) for i in s.i]),
            tz.take(limit) if limit is not None else tz.identity, list)
        if shuffle:
            random.shuffle(samples)
        return samples
Example #10
0
def run(mode, modelname, forcenew, epochs):
    allow_train = True if mode == "train" or mode == "both" else False
    allow_generate = True if mode == "eval" or mode == "both" else False

    # Get the samples and 'work it'
    sample_batches = list(
        tz.pipe(
            data.get_samples("s&p500",
                             "1d",
                             datetime(1980, 1, 1),
                             datetime(2018, 1, 1),
                             random_state=np.random.random_integers(0,
                                                                    234234)),
            data.samples_to_batch_generator(128)))

    # Load or create the model
    if forcenew or not UpscalerModel.exists(modelname):
        upscaler_model = UpscalerModel.create_model(5)
    else:
        upscaler_model = UpscalerModel.load_model(modelname)

    if allow_train:
        for epoch in range(epochs):
            print("Starting epoch: {}".format(epoch))
            critic_generator_advantage = 1.0

            batch_idx = 0
            for y_time, x, y, batch_unscaler in sample_batches:
                # Generate "fake" data
                noise_mod = (1.0 - epoch / epochs) * 0.2
                noised = lambda a: np.random.normal(scale=noise_mod,
                                                    size=a.shape) + a
                x = [noised(k) for k in x]
                generated_y = upscaler_model.generate_output(x)
                assert len(
                    generated_y
                ) == 3, "Expected exactly 3 output vector. Got {}".format(
                    len(generated_y))

                # Train the critic
                real_samples = y
                fake_samples = generated_y
                critic_eval_result = upscaler_model.train_critic(
                    real_samples, fake_samples,
                    1.0 / critic_generator_advantage)

                # Train the generator (adverserial)
                generator_eval_result = upscaler_model.train_generator(
                    x, critic_generator_advantage)

                # Print the current results
                print("Epoch: {}, BatchIdx={} results:".format(
                    epoch, batch_idx + 1))
                print("\t Critic: {}".format(critic_eval_result))
                print("\t Generator: {}".format(generator_eval_result))
                ohlc = batch_unscaler(y=generated_y)[0]
                print("\t Valid/Invalid: {} vs {} => {:.2%}%".format(
                    *analysis.calculate_ohlc_stats(ohlc)))

                # Apply another level of training to the critic to deter invalid OHLC
                ohlc_validvec = tz.pipe(ohlc, tz.map(analysis.is_valid_ohlc),
                                        list)
                invalid_ohlc_samples_x = tz.pipe(
                    zip(*generated_y + [ohlc_validvec]),
                    tz.filter(lambda t: t[-1] == False),
                    tz.map(lambda t: t[:-1]), unzip, tz.map(np.array), list)
                inv_loss = upscaler_model.train_critic_invalid(
                    invalid_ohlc_samples_x)
                print("\t Invalid loss: {} ({}# samples)".format(
                    inv_loss, len(invalid_ohlc_samples_x[0])))

                critic_generator_advantage = critic_eval_result[1][
                    1]  #generator_eval_result[0] / critic_eval_result[1][0]
                batch_idx = batch_idx + 1

            if epoch % 10 == 0:
                # Save the last generated sample
                y_time = y_time.reshape(y[1].shape[:-1] + (1, ))
                ohlc = batch_unscaler(y=generated_y)[1]
                ohlc = np.concatenate([y_time, ohlc], axis=2)[-1]
                last_ohlc_df = pd.DataFrame(
                    ohlc,
                    columns=["date", "open", "high", "low", "close", "volume"])
                analysis.plot_ohlc_tofile(last_ohlc_df,
                                          "./output/e{}.png".format(epoch))

        if (modelname != "tmp"):
            print("Model saved as: {}".format(
                upscaler_model.save_model(modelname)))

    if allow_generate:

        def best_of_group(ohlc_group):
            """ Select 1 OHLC row per candiate which is valid """
            valid_ohlc_rows = tz.pipe(ohlc_group, tz.filter(lambda p: p[0]),
                                      list)
            if any(valid_ohlc_rows): return valid_ohlc_rows[0][1]
            else: return ohlc_group[0][1]

        NUM_CANDIATES = 25
        vohlc = []
        for y_time, x, y, batch_unscaler in sample_batches:
            x = [np.repeat(tx, NUM_CANDIATES, axis=0) for tx in x]
            x = [tx + np.random.normal(size=tx.shape, scale=0.05) for tx in x]
            ohlc = upscaler_model.generate_output(x)

            ohlc_candidate_vecs = [
                batch_unscaler(y=[ox[k::NUM_CANDIATES] for ox in ohlc])[1]
                for k in range(NUM_CANDIATES)
            ]
            ohlc = ohlc[1]
            for k in range(NUM_CANDIATES):
                ohlc[k::NUM_CANDIATES] = ohlc_candidate_vecs[k]
            ohlc = [item for sublist in ohlc for item in sublist]

            # OHLC contains NUM_CANDIATES per day - rebuild the series by picking the first valid candidate per day
            ohlc = tz.pipe(
                ohlc,
                tz.map(lambda ohlc_row:
                       (analysis.is_valid_ohlc(ohlc_row), ohlc_row)),
                tz.partition(NUM_CANDIATES), tz.map(best_of_group), list,
                np.array)
            # Re-apply the time axis
            ohlc = np.concatenate([y_time.reshape(-1, 1), ohlc], axis=1)
            vohlc.extend(ohlc)

        # high[2], low[3]
        print("\n\nValid/Invalid: {} / {} => {:.2%}%".format(
            *analysis.calculate_ohlc_stats(ohlc[:, 1:])))

        # Build a dataframe from the ohlc data and resample to 1w resolution for comparison with the original
        last_ohlc_df = pd.DataFrame(
            ohlc, columns=["date", "open", "high", "low", "close", "volume"])
        last_ohlc_df = last_ohlc_df.set_index(last_ohlc_df["date"])
        last_ohlc_df.sort_index(inplace=True)
        last_ohlc_df = last_ohlc_df.ix[datetime(2017, 1, 1
                                                ):datetime(2018, 1, 1)]
        last_ohlc_df = analysis.resample_ohlc(last_ohlc_df,
                                              "1w").dropna(how='any')
        analysis.plot_ohlc_tofile(last_ohlc_df,
                                  "./output/{}_1w.png".format(modelname))
        analysis.plot_ohlc(last_ohlc_df)

        # Plot the original data in a week resolution
        df = pd.read_csv("data/gspc.csv", parse_dates=["Date"])
        df = df.rename(
            columns={
                "Date": "date",
                "Open": "open",
                "High": "high",
                "Low": "low",
                "Close": "close",
                "Adj Close": "adjclose",
                "Volume": "volume"
            })
        df = df.set_index(df["date"])
        df = df.ix[datetime(2017, 1, 1):datetime(2018, 1, 1)]
        df = analysis.resample_ohlc(df, "1w")
        analysis.plot_ohlc_tofile(
            df, "./output/{}_1w_original.png".format(modelname))
        analysis.plot_ohlc(df)

    print("Program complete")
Example #11
0
def streaming_PCA(samples, n_components=2, batch_size=100):
    ipca = decomposition.IncrementalPCA(n_components=n_components,
                                        batch_size=batch_size)
    tz.pipe(samples, cur.partition(batch_size), cur.map(np.array),
            cur.map(ipca.partial_fit), tz.last)
    return ipca
Example #12
0
def top(func, output, out_indices, *arrind_pairs, **kwargs):
    """ Tensor operation

    Applies a function, ``func``, across blocks from many different input
    dasks.  We arrange the pattern with which those blocks interact with sets
    of matching indices.  E.g.

        top(func, 'z', 'i', 'x', 'i', 'y', 'i')

    yield an embarassingly parallel communication pattern and is read as

        z_i = func(x_i, y_i)

    More complex patterns may emerge, including multiple indices

        top(func, 'z', 'ij', 'x', 'ij', 'y', 'ji')

        $$ z_{ij} = func(x_{ij}, y_{ji}) $$

    Indices missing in the output but present in the inputs results in many
    inputs being sent to one function (see examples).

    Examples
    --------

    Simple embarassing map operation

    >>> inc = lambda x: x + 1
    >>> top(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (inc, ('x', 0, 0)),
     ('z', 0, 1): (inc, ('x', 0, 1)),
     ('z', 1, 0): (inc, ('x', 1, 0)),
     ('z', 1, 1): (inc, ('x', 1, 1))}

    Simple operation on two datasets

    >>> add = lambda x, y: x + y
    >>> top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2),
    ...                                                      'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)),
     ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)),
     ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))}

    Operation that flips one of the datasets

    >>> addT = lambda x, y: x + y.T  # Transpose each chunk
    >>> #                                        z_ij ~ x_ij y_ji
    >>> #               ..         ..         .. notice swap
    >>> top(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2),
    ...                                                       'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)),
     ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)),
     ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))}

    Dot product with contraction over ``j`` index.  Yields list arguments

    >>> top(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2),
    ...                                                          'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)],
                            [('y', 0, 0), ('y', 1, 0)]),
     ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)],
                            [('y', 0, 1), ('y', 1, 1)]),
     ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)],
                            [('y', 0, 0), ('y', 1, 0)]),
     ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)],
                            [('y', 0, 1), ('y', 1, 1)])}

    Supports Broadcasting rules

    >>> top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2),
    ...                                                      'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)),
     ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)),
     ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))}
    """
    numblocks = kwargs['numblocks']
    argpairs = list(partition(2, arrind_pairs))

    assert set(numblocks) == set(pluck(0, argpairs))

    all_indices = pipe(argpairs, pluck(1), concat, set)
    dummy_indices = all_indices - set(out_indices)

    # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions
    dims = broadcast_dimensions(argpairs, numblocks)

    # (0, 0), (0, 1), (0, 2), (1, 0), ...
    keytups = list(product(*[range(dims[i]) for i in out_indices]))
    # {i: 0, j: 0}, {i: 0, j: 1}, ...
    keydicts = [dict(zip(out_indices, tup)) for tup in keytups]

    # {j: [1, 2, 3], ...}  For j a dummy index of dimension 3
    dummies = dict((i, list(range(dims[i]))) for i in dummy_indices)

    # Create argument lists
    valtups = []
    for kd in keydicts:
        args = []
        for arg, ind in argpairs:
            tups = lol_tuples((arg,), ind, kd, dummies)
            tups2 = zero_broadcast_dimensions(tups, numblocks[arg])
            args.append(tups2)
        valtups.append(tuple(args))

    # Add heads to tuples
    keys = [(output,) + kt for kt in keytups]
    vals = [(func,) + vt for vt in valtups]

    return dict(zip(keys, vals))
Example #13
0
def top(func, output, out_indices, *arrind_pairs, **kwargs):
    """ Tensor operation

    Applies a function, ``func``, across blocks from many different input
    dasks.  We arrange the pattern with which those blocks interact with sets
    of matching indices.  E.g.

        top(func, 'z', 'i', 'x', 'i', 'y', 'i')

    yield an embarassingly parallel communication pattern and is read as

        z_i = func(x_i, y_i)

    More complex patterns may emerge, including multiple indices

        top(func, 'z', 'ij', 'x', 'ij', 'y', 'ji')

        $$ z_{ij} = func(x_{ij}, y_{ji}) $$

    Indices missing in the output but present in the inputs results in many
    inputs being sent to one function (see examples).

    Examples
    --------

    Simple embarassing map operation

    >>> inc = lambda x: x + 1
    >>> top(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (inc, ('x', 0, 0)),
     ('z', 0, 1): (inc, ('x', 0, 1)),
     ('z', 1, 0): (inc, ('x', 1, 0)),
     ('z', 1, 1): (inc, ('x', 1, 1))}

    Simple operation on two datasets

    >>> add = lambda x, y: x + y
    >>> top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2),
    ...                                                      'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)),
     ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)),
     ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))}

    Operation that flips one of the datasets

    >>> addT = lambda x, y: x + y.T  # Transpose each chunk
    >>> #                                        z_ij ~ x_ij y_ji
    >>> #               ..         ..         .. notice swap
    >>> top(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2),
    ...                                                       'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)),
     ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)),
     ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))}

    Dot product with contraction over ``j`` index.  Yields list arguments

    >>> top(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2),
    ...                                                          'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)],
                            [('y', 0, 0), ('y', 1, 0)]),
     ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)],
                            [('y', 0, 1), ('y', 1, 1)]),
     ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)],
                            [('y', 0, 0), ('y', 1, 0)]),
     ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)],
                            [('y', 0, 1), ('y', 1, 1)])}

    Supports Broadcasting rules

    >>> top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2),
    ...                                                      'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)),
     ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)),
     ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))}
    """
    numblocks = kwargs['numblocks']
    argpairs = list(partition(2, arrind_pairs))

    assert set(numblocks) == set(pluck(0, argpairs))

    all_indices = pipe(argpairs, pluck(1), concat, set)
    dummy_indices = all_indices - set(out_indices)

    # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions
    dims = broadcast_dimensions(argpairs, numblocks)

    # (0, 0), (0, 1), (0, 2), (1, 0), ...
    keytups = list(product(*[range(dims[i]) for i in out_indices]))
    # {i: 0, j: 0}, {i: 0, j: 1}, ...
    keydicts = [dict(zip(out_indices, tup)) for tup in keytups]

    # {j: [1, 2, 3], ...}  For j a dummy index of dimension 3
    dummies = dict((i, list(range(dims[i]))) for i in dummy_indices)

    # Create argument lists
    valtups = []
    for kd in keydicts:
        args = []
        for arg, ind in argpairs:
            tups = lol_tuples((arg, ), ind, kd, dummies)
            tups2 = zero_broadcast_dimensions(tups, numblocks[arg])
            args.append(tups2)
        valtups.append(tuple(args))

    # Add heads to tuples
    keys = [(output, ) + kt for kt in keytups]
    vals = [(func, ) + vt for vt in valtups]

    return dict(zip(keys, vals))