Esempio n. 1
0
def test_normalize_function():
    def f1(a, b, c=1):
        pass
    def f2(a, b=1, c=2):
        pass
    def f3(a):
        pass

    assert normalize_function(f2)

    f = lambda a: a
    assert normalize_function(f)

    assert (normalize_function(partial(f2, b=2)) ==
            normalize_function(partial(f2, b=2)))

    assert (normalize_function(partial(f2, b=2)) !=
            normalize_function(partial(f2, b=3)))

    assert (normalize_function(partial(f1, b=2)) !=
            normalize_function(partial(f2, b=2)))

    assert (normalize_function(compose(f2, f3)) ==
            normalize_function(compose(f2, f3)))

    assert (normalize_function(compose(f2, f3)) !=
            normalize_function(compose(f2, f1)))

    assert normalize_function(curry(f2)) == normalize_function(curry(f2))
    assert normalize_function(curry(f2)) != normalize_function(curry(f1))
    assert (normalize_function(curry(f2, b=1)) ==
            normalize_function(curry(f2, b=1)))
    assert (normalize_function(curry(f2, b=1)) !=
            normalize_function(curry(f2, b=2)))
Esempio n. 2
0
def _tree_reduce(x, aggregate, axis, keepdims, dtype, split_every=None,
                combine=None):
    """Perform the tree reduction step of a reduction.

    Lower level, users should use ``reduction`` or ``arg_reduction`` directly.
    """
    # Normalize split_every
    split_every = split_every or _globals.get('split_every', 4)
    if isinstance(split_every, dict):
        split_every = dict((k, split_every.get(k, 2)) for k in axis)
    elif isinstance(split_every, int):
        n = builtins.max(int(split_every ** (1/(len(axis) or 1))), 2)
        split_every = dict.fromkeys(axis, n)
    else:
        split_every = dict((k, v) for (k, v) in enumerate(x.numblocks) if k in axis)

    # Reduce across intermediates
    depth = 1
    for i, n in enumerate(x.numblocks):
        if i in split_every and split_every[i] != 1:
            depth = int(builtins.max(depth, ceil(log(n, split_every[i]))))
    func = compose(partial(combine or aggregate, axis=axis, keepdims=True),
                   partial(_concatenate2, axes=axis))
    for i in range(depth - 1):
        x = partial_reduce(func, x, split_every, True, None)
    func = compose(partial(aggregate, axis=axis, keepdims=keepdims),
                   partial(_concatenate2, axes=axis))
    return partial_reduce(func, x, split_every, keepdims=keepdims,
                          dtype=dtype)
Esempio n. 3
0
def _tree_reduce(x, aggregate, axis, keepdims, dtype, split_every=None,
                 combine=None, name=None, concatenate=True):
    """ Perform the tree reduction step of a reduction.

    Lower level, users should use ``reduction`` or ``arg_reduction`` directly.
    """
    # Normalize split_every
    split_every = split_every or config.get('split_every', 4)
    if isinstance(split_every, dict):
        split_every = dict((k, split_every.get(k, 2)) for k in axis)
    elif isinstance(split_every, Integral):
        n = builtins.max(int(split_every ** (1 / (len(axis) or 1))), 2)
        split_every = dict.fromkeys(axis, n)
    else:
        raise ValueError("split_every must be a int or a dict")

    # Reduce across intermediates
    depth = 1
    for i, n in enumerate(x.numblocks):
        if i in split_every and split_every[i] != 1:
            depth = int(builtins.max(depth, ceil(log(n, split_every[i]))))
    func = partial(combine or aggregate, axis=axis, keepdims=True)
    if concatenate:
        func = compose(func, partial(_concatenate2, axes=axis))
    for i in range(depth - 1):
        x = partial_reduce(func, x, split_every, True, dtype=dtype,
                           name=(name or funcname(combine or aggregate)) + '-partial')
    func = partial(aggregate, axis=axis, keepdims=keepdims)
    if concatenate:
        func = compose(func, partial(_concatenate2, axes=axis))
    return partial_reduce(func, x, split_every, keepdims=keepdims, dtype=dtype,
                          name=(name or funcname(aggregate)) + '-aggregate')
Esempio n. 4
0
def get_service_step(service_recipe):
    """
    Get step timedelta: The smaller duration of service_recipe's periods.
    """
    def diff(start, end):
        return end - start
    res_delta_diffs = compose(map(lambda p: diff(*p)), get('delta_periods'))
    return compose(min, map(min), map(res_delta_diffs))(service_recipe)
Esempio n. 5
0
 def _common(self, Z, y):
     scale = Scaler(Z)
     transform = compose(prepend_x0, Scaler.normalize)
     X = transform(scale)
     data = zip(X, y)
     h_theta0 = [0.] * len(X[0])
     coeff = compose(scale.denormalize, 
                     get(0), 
                     lin_reg(J, gradJ, h_theta0, it_max=2000))
     h_thetad = coeff(data)
     return h_thetad
Esempio n. 6
0
File: Huawei.py Progetto: sjava/olt
def zhongji(ip='', username='', password=''):
    try:
        result = []
        child = telnet(ip, username, password)
        child.sendline(
            "display cu section bbs-config | in link-aggregation")
        while True:
            index = child.expect([hw_prompt, hw_pager], timeout=120)
            if index == 0:
                result.append(child.before)
                child.sendline('quit')
                child.expect(':')
                child.sendline('y')
                child.close()
                break
            else:
                result.append(child.before)
                child.send(" ")
                continue
    except (pexpect.EOF, pexpect.TIMEOUT) as e:
        return ['fail', None, ip]
    rslt = ''.join(result).split('\r\n')[1:-1]
    rec = [x.replace('\x1b[37D', '').strip().split()[2:]
           for x in rslt if 'add-member' in x]

    def port(x):
        p = x[2].split(',')
        p1 = ['/'.join((x[1], y)) for y in p]
        return list(cons(x[0], p1))

    ff = lambda x, y: merge_with(compose(unique, concat), x, y)
    rec1 = [port(x) for x in rec]
    rec2 = [{x[0]: x} for x in rec1]
    rec3 = reduce(ff, rec2, dict())
    return ['success', rec3, ip]
def parse_people(do_request):
    logger.info('Parsing people')

    def parse_representative(doc):
        doc = doc('div.wpsPortletBody')
        raw_birth_date = doc('fieldset table').eq(0).find('td').eq(1).text().replace(' ', '')
        return {
            'name': doc.find('h3').eq(0).text(),
            'birthDate': arrow.get(raw_birth_date, 'D.M.YYYY') if raw_birth_date else None,
            'image': DZ_RS_URL + doc.find('img').eq(0).attr('src'),
            'group': doc('.panelBox100 a').attr('href'),
            'location': doc(u'*:contains("Volilno okro")').parent().text().split(':')[1].strip(),
            'gender': "F" if 'Poslanka' in str(doc) else "M",
        }

    # get all people
    return toolz.compose(
        # get back metadata
        curried.map(parse_representative),
        # visit person's link
        curried.map(do_request),
        # get a link for each person
        lambda doc: doc("p.podnaslovOsebaLI a").map(lambda i, r: pq(r).attr('href')),
        # get page with a list of people
        do_request,
    )(DZ_RS_PEOPLE_URL)
Esempio n. 8
0
def doctable(ctx):
    df = pd.read_csv('./docs/flight-options.csv')

    # open an existing document
    doc = docx.Document('./docs/style-reference.docx')

    as_int = partial(format_decimal, format='#')
    as_usd = partial(format_currency, currency='USD')

    s = doc.sections[0]
    width = s.page_width - s.left_margin - s.right_margin

    doc.add_picture('./docs/diagrams_002.png', width=width)

    formatters = {
        'ticket_price': as_usd,
        'total_hours': as_int,
        'trip': as_int,
        'airline': partial(shorten_long_name, width=20),
        'selected': compose({0: 'No', 1: 'Yes'}.get, int)
    }
    add_table(df, doc, table_style='Plain Table 3', formatters=formatters)

    # save the doc
    doc.save('./docs/test.docx')
Esempio n. 9
0
 def create_store(reducer, initial_state=None):
     store = yield from next_handler(reducer, initial_state)
     dispatch = store.dispatch
     middleware_api = dict(dispatch=lambda action: store.dispatch(action), state_func=lambda: store.state)
     chain = map(lambda middleware: middleware(**middleware_api), middlewares)
     store.dispatch = toolz.compose(*chain)(dispatch)
     return store
Esempio n. 10
0
def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None):
    """ General version of reductions

    >>> reduction(my_array, np.sum, np.sum, axis=0, keepdims=False)  # doctest: +SKIP
    """
    if axis is None:
        axis = tuple(range(x.ndim))
    if isinstance(axis, int):
        axis = (axis,)

    chunk2 = partial(chunk, axis=axis, keepdims=True)
    aggregate2 = partial(aggregate, axis=axis, keepdims=keepdims)

    inds = tuple(range(x.ndim))
    tmp = atop(chunk2, next(names), inds, x, inds)

    inds2 = tuple(i for i in inds if i not in axis)

    result = atop(compose(aggregate2, curry(_concatenate2, axes=axis)),
                  next(names), inds2, tmp, inds, dtype=dtype)

    if keepdims:
        dsk = result.dask.copy()
        for k in flatten(result._keys()):
            k2 = (k[0],) + insert_many(k[1:], axis, 0)
            dsk[k2] = dsk.pop(k)
        blockdims = insert_many(result.blockdims, axis, [1])
        return Array(dsk, result.name, blockdims=blockdims, dtype=dtype)
    else:
        return result
Esempio n. 11
0
File: switch.py Progetto: sjava/olt
def interface_check():
    clear_log()
    cmd = "match(s:Switch) where s.model='T64G' or s.model='S9306' or s.model='S9303' or s.model='S8905' return s.ip,s.model"
    #  cmd = "match(s:Switch) where s.model='S9306' or s.model='s9303' return s.ip,s.model limit 2"
    nodes = graph.cypher.execute(cmd)
    switchs = [(x[0], x[1]) for x in nodes]
    list(map(compose(output_interface, get_interface), switchs))
Esempio n. 12
0
    def test_quantiles_uneven_buckets(self):
        permute = partial(permute_rows, 5)
        shape = (5, 5)

        factor_data = permute(log1p(arange(25, dtype=float).reshape(shape)))
        mask_data = permute(self.eye_mask(shape=shape))

        f = F()
        m = Mask()

        permuted_array = compose(permute, partial(array, dtype=int64_dtype))
        self.check_terms(
            terms={
                '3_masked': f.quantiles(bins=3, mask=m),
                '7_masked': f.quantiles(bins=7, mask=m),
            },
            initial_workspace={
                f: factor_data,
                m: mask_data,
            },
            expected={
                '3_masked': permuted_array([[-1, 0,  0,  1,  2],
                                            [0, -1,  0,  1,  2],
                                            [0,  0, -1,  1,  2],
                                            [0,  0,  1, -1,  2],
                                            [0,  0,  1,  2, -1]]),
                '7_masked': permuted_array([[-1, 0,  2,  4,  6],
                                            [0, -1,  2,  4,  6],
                                            [0,  2, -1,  4,  6],
                                            [0,  2,  4, -1,  6],
                                            [0,  2,  4,  6, -1]]),
            },
            mask=self.build_mask(self.ones_mask(shape=shape)),
        )
Esempio n. 13
0
def cli(board_source, key, token, to, output, board):
    """Hi, I'm TrelloScribe. I take Trello boards and turn them into documents!"""
    # Compose a sequence of functions based on the options chosen
    # Note toolz.compose() works right to left
    read_phase = {
        'id': download_board(key, token),
        'name': toolz.compose(download_board(key, token), search_boards(key, token)),
        'file': read_board
    }
    convert_phase = {
        'raw': partial(json.dumps, indent=2),
        'md': ast_to_md,
        'html': toolz.compose(md_to_html, ast_to_md)
    }
    toolz.pipe(board, read_phase[board_source], trello_to_ast,
               convert_phase[to], partial(click.echo, file=output))
Esempio n. 14
0
def input2output(query: Dict[str, Any], fields: List[str], options: Dict[str, int]) -> None:
    inout = compose(formated_output(fields),
                    get_output(fields),
                    get_info,
                    make_query(query, fields))
    html_text = inout(options)
    viewoutput(html_text)
Esempio n. 15
0
def build_task_nodes(files=None, select=None, task_uuid=None,
                     human_readable=True):
    """
    Build the task nodes given some input data, query criteria and formatting
    options.
    """
    def task_transformers():
        if human_readable:
            yield _convert_timestamp
        yield json.loads

    def filter_funcs():
        if select is not None:
            for query in select:
                yield filter_by_jmespath(query)

        if task_uuid is not None:
            yield filter_by_uuid(task_uuid)

    if files is None:
        files = [sys.stdin]

    tree = Tree()
    tasks = imap(compose(*task_transformers()),
                 chain.from_iterable(files))
    return tree.nodes(tree.merge_tasks(tasks, filter_funcs())),
Esempio n. 16
0
    def _lookup_symbol_strict(self, symbol, as_of_date):
        # split the symbol into the components, if there are no
        # company/share class parts then share_class_symbol will be empty
        company_symbol, share_class_symbol = split_delimited_symbol(symbol)
        try:
            owners = self.symbol_ownership_map[company_symbol, share_class_symbol]
            assert owners, "empty owners list for %r" % symbol
        except KeyError:
            # no equity has ever held this symbol
            raise SymbolNotFound(symbol=symbol)

        if not as_of_date:
            if len(owners) > 1:
                # more than one equity has held this ticker, this is ambigious
                # without the date
                raise MultipleSymbolsFound(
                    symbol=symbol, options=set(map(compose(self.retrieve_asset, attrgetter("sid")), owners))
                )

            # exactly one equity has ever held this symbol, we may resolve
            # without the date
            return self.retrieve_asset(owners[0].sid)

        for start, end, sid, _ in owners:
            if start <= as_of_date < end:
                # find the equity that owned it on the given asof date
                return self.retrieve_asset(sid)

        # no equity held the ticker on the given asof date
        raise SymbolNotFound(symbol=symbol)
Esempio n. 17
0
def display_task_tree(args):
    """
    Read the input files, apply any command-line-specified behaviour and
    display the task tree.
    """

    def task_transformers():
        if args.human_readable:
            yield _convert_timestamp
        yield json.loads

    def filter_funcs():
        if args.select:
            for query in args.select:
                yield filter_by_jmespath(query)

        if args.task_uuid:
            yield filter_by_uuid(args.task_uuid)

    if not args.files:
        args.files.append(sys.stdin)

    tree = Tree()
    tasks = imap(compose(*task_transformers()), chain.from_iterable(args.files))
    render_task_nodes(
        write=sys.stdout.write,
        nodes=tree.nodes(tree.merge_tasks(tasks, filter_funcs())),
        ignored_task_keys=set(args.ignored_task_keys) or None,
        field_limit=args.field_limit,
    )
Esempio n. 18
0
def ref_with_vcf_dicts_strategy_factory(draw):
    '''
    Generate vcf records for randomish locations along a randomishly generated
    reference sequence. Each vcf record generator will have a randomish sized
    "chunk" of the reference to use

    Returns (reference sequence(str), iterable(vcf dicts))
    '''
    seq = draw(st.text(alphabet='ACGT', min_size=10, max_size=20))
    size = len(seq)
    # This gets you a list of numbers that are randomish and increasing
    ranges = draw(rolling_sum(1, 3, int(size/2)).map(lambda xs: ifilter(lambda x: x < size, xs)) )#.filter(_not(bool)))
    # Stream lets you re-use a generator without draining it.
    # Pairs will hold start/stop values for each part of sequence
    pairs = Stream() << partition(2, ranges)
    # POSs will contain the start position of each vcf row
    POSs = Stream() << imap(operator.itemgetter(0), pairs)
    # VCF files start at index 1; python starts at 0
    pairs_offset_1 = imap(lambda x: (x[0] - 1, x[1] - 1), pairs)
    #grab the pieces of the reference to build our elts from
    chunks = map(lambda x: seq[x[0]:x[1]], pairs_offset_1)
    #random chromosome name
    chrom = draw(st.text(string.ascii_letters))
    # Draw a new record for each of the Positions we have made
    vcfs = map(compose(draw, partial(vcf_dict_strategy_factory, chrom)), POSs, chunks)
    #TODO: ranges must be non-empty. Assuming vcfs for now.
    # vcfs can be a a generator
    #assume(len(vcfs) > 0)
    return (seq, vcfs)
Esempio n. 19
0
def ngram_tuples(n, string, minlen=3, maxlen=25):
    """
    Creates ngram tuples of size 'n' from 'string'.
    Also, changes string to lowercase, removes generic stopwords and splits on all non alphanumeric.

    Ex:
        In [2]: list(ngram_tuples(n=1, string='Just another example text.'))
        Out[2]: [('another',), ('example',), ('text',)]

        In [2]: list(ngram_tuples(n=2, string='Just another example text.'))
        Out[2]: [('another', 'example'), ('example', 'text')]

        In [11]: list(ngram_tuples(3, 'I needed a longer example text for this example.'))
        Out[11]:
            [('needed', 'longer', 'example'),
             ('longer', 'example', 'text'),
             ('example', 'text', 'example')]


    minlen - filter out words that have fewer characters than 'minlen'.
    maxlen - filter out words that have more characters than 'maxlen'.
    """
    return tlz.pipe(string,
                    lower,
                    simple_split,
                    filter_longer_than(maxlen),
                    tlz.compose(tlz.concat, map_c(splitter_of_words)),
                    filter_shorter_than(minlen),
                    filter_stopwords,
                    sliding_window_c(n))
Esempio n. 20
0
    def lookup_by_supplementary_field(self, field_name, value, as_of_date):
        try:
            owners = self.equity_supplementary_map[
                field_name,
                value,
            ]
            assert owners, 'empty owners list for %r' % (field_name, value)
        except KeyError:
            # no equity has ever held this value
            raise ValueNotFoundForField(field=field_name, value=value)

        if not as_of_date:
            if len(owners) > 1:
                # more than one equity has held this value, this is ambigious
                # without the date
                raise MultipleValuesFoundForField(
                    field=field_name,
                    value=value,
                    options=set(map(
                        compose(self.retrieve_asset, attrgetter('sid')),
                        owners,
                    )),
                )
            # exactly one equity has ever held this value, we may resolve
            # without the date
            return self.retrieve_asset(owners[0].sid)

        for start, end, sid, _ in owners:
            if start <= as_of_date < end:
                # find the equity that owned it on the given asof date
                return self.retrieve_asset(sid)

        # no equity held the value on the given asof date
        raise ValueNotFoundForField(field=field_name, value=value)
Esempio n. 21
0
def format_results(terminal_width, key_list, separator, text_list,
                   left_align=True, min_factor=3, **kwargs):
    """Returns formatted results in two columns.
    """
    key_width = max(map(len, key_list))
    separator_length = len(separator)
    desc_wrap = toolz.identity
    if terminal_width:
        if key_width / terminal_width > .5:
            key_width = terminal_width // 2 - 3
        text_width = terminal_width - key_width - separator_length
        if text_width * min_factor > terminal_width:
            desc_wrap = toolz.compose(
                ('\n' + ' ' * (key_width + separator_length)).join,
                toolz.partial(textwrap.wrap, width=text_width, **kwargs),
            )

    if left_align:
        fmt = '%-*s%s%s'
    else:
        fmt = '%*s%s%s'

    for key, text in zip(key_list, text_list):
        text = desc_wrap(text)
        if len(key) > key_width:
            yield fmt % (key_width, key, separator, '')
            yield fmt % (key_width, '', ' ' * separator_length, text)
        else:
            yield fmt % (key_width, key, separator, text)
Esempio n. 22
0
File: dask.py Progetto: blaze/blaze
def compute_up(expr, data, **kwargs):
    leaf = expr._leaves()[0]
    chunk = symbol('chunk', DataShape(*(tuple(map(first, data.chunks)) +
                                        (leaf.dshape.measure,))))
    (chunk, chunk_expr), (agg, agg_expr) = split(expr._child, expr,
                                                 chunk=chunk)

    inds = tuple(range(ndim(leaf)))
    dtype = expr.dshape.measure.to_numpy_dtype()
    tmp = atop(
        curry(compute_it, chunk_expr, [chunk], **kwargs),
        inds,
        data,
        inds,
        dtype=dtype,
    )

    return atop(
        compose(
            curry(compute_it, agg_expr, [agg], **kwargs),
            curry(_concatenate2, axes=expr.axis),
        ),
        tuple(i for i in inds if i not in expr.axis),
        tmp,
        inds,
        dtype=dtype,
    )
Esempio n. 23
0
def forcastall(intid):
    data=map(int,read_artist(intid)["action_1"])
    sun=training(data,4)
    fun=toolz.compose(str,int)
    
    predictdata=map(fun,toolz.take(60,sun))    #focast 60 days
    with open("./past_forcast/{aid}.csv".format(aid=intid),"wt") as f:
        f.write(",".join(predictdata))
Esempio n. 24
0
def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None,
              split_every=None, combine=None):
    """ General version of reductions

    >>> reduction(my_array, np.sum, np.sum, axis=0, keepdims=False)  # doctest: +SKIP
    """
    if axis is None:
        axis = tuple(range(x.ndim))
    if isinstance(axis, int):
        axis = (axis,)
    axis = tuple(i if i >= 0 else x.ndim + i for i in axis)

    if dtype and 'dtype' in getargspec(chunk).args:
        chunk = partial(chunk, dtype=dtype)
    if dtype and 'dtype' in getargspec(aggregate).args:
        aggregate = partial(aggregate, dtype=dtype)

    # Normalize split_every
    split_every = split_every or _globals.get('split_every', 4)
    if isinstance(split_every, dict):
        split_every = dict((k, split_every.get(k, 2)) for k in axis)
    elif isinstance(split_every, int):
        n = builtins.max(int(split_every ** (1/(len(axis) or 1))), 2)
        split_every = dict.fromkeys(axis, n)
    else:
        split_every = dict((k, v) for (k, v) in enumerate(x.numblocks) if k in axis)

    # Map chunk across all blocks
    inds = tuple(range(x.ndim))
    tmp = atop(partial(chunk, axis=axis, keepdims=True), inds, x, inds)
    tmp._chunks = tuple((1,)*len(c) if i in axis else c for (i, c)
                        in enumerate(tmp.chunks))

    # Reduce across intermediates
    depth = 1
    for i, n in enumerate(tmp.numblocks):
        if i in split_every and split_every[i] != 1:
            depth = int(builtins.max(depth, ceil(log(n, split_every[i]))))
    func = compose(partial(combine or aggregate, axis=axis, keepdims=True),
                   partial(_concatenate2, axes=axis))
    for i in range(depth - 1):
        tmp = partial_reduce(func, tmp, split_every, True, None)
    func = compose(partial(aggregate, axis=axis, keepdims=keepdims),
                   partial(_concatenate2, axes=axis))
    return partial_reduce(func, tmp, split_every, keepdims=keepdims,
                          dtype=dtype)
Esempio n. 25
0
        def minimize(self, f_df, x0, display=sys.stdout, maxiter=1e3):

            self.display = display
            self.theta = x0

            # setup
            xk = self.algorithm.send(destruct(x0).copy())
            store = defaultdict(list)
            runtimes = []
            if len(self.operators) == 0:
                self.operators = [proxops.identity()]

            # setup
            obj, grad = wrap(f_df, x0)
            transform = compose(destruct, *reversed(self.operators), self.restruct)

            self.optional_print(tp.header(['Iteration', 'Objective', '||Grad||', 'Runtime']))
            try:
                for k in count():

                    # setup
                    tstart = perf_counter()
                    f = obj(xk)
                    df = grad(xk)
                    xk = transform(self.algorithm.send(df))
                    runtimes.append(perf_counter() - tstart)
                    store['f'].append(f)

                    # Update display
                    self.optional_print(tp.row([k,
                                                f,
                                                np.linalg.norm(destruct(df)),
                                                tp.humantime(runtimes[-1])]))

                    if k >= maxiter:
                        break

            except KeyboardInterrupt:
                pass

            self.optional_print(tp.bottom(4))

            # cleanup
            self.optional_print(u'\u279b Final objective: {}'.format(store['f'][-1]))
            self.optional_print(u'\u279b Total runtime: {}'.format(tp.humantime(sum(runtimes))))
            self.optional_print(u'\u279b Per iteration runtime: {} +/- {}'.format(
                tp.humantime(np.mean(runtimes)),
                tp.humantime(np.std(runtimes)),
            ))

            # result
            return OptimizeResult({
                'x': self.restruct(xk),
                'f': f,
                'df': self.restruct(df),
                'k': k,
                'obj': np.array(store['f']),
            })
Esempio n. 26
0
def common_subexpression(*tables):
    """ Common sub expression between subtables

    >>> t = TableSymbol('t', '{x: int, y: int}')
    >>> common_subexpression(t['x'], t['y'])
    t
    """
    sets = [set(t.subterms()) for t in tables]
    return builtins.max(set.intersection(*sets),
                        key=compose(len, str))
Esempio n. 27
0
def wrap(f_df, xref, size=1):
    """
    Memoizes an objective + gradient function, and splits it into
    two functions that return just the objective and gradient, respectively.

    Parameters
    ----------
    f_df : function
        Must be unary (takes a single argument)

    size : int, optional
        Size of the cache (Default=1)

    """

    memoized_f_df = lrucache(lambda x: f_df(restruct(x, xref)), size)
    objective = compose(first, memoized_f_df)
    gradient = compose(destruct, second, memoized_f_df)
    return objective, gradient
Esempio n. 28
0
    def test_top_and_bottom_with_groupby_and_mask(self, dtype, seed):
        permute = partial(permute_rows, seed)
        permuted_array = compose(permute, partial(array, dtype=int64_dtype))

        shape = (8, 8)

        # Shuffle the input rows to verify that we correctly pick out the top
        # values independently of order.
        factor_data = permute(arange(0, 64, dtype=dtype).reshape(shape))
        classifier_data = permuted_array([[0, 0, 1, 1, 2, 2, 0, 0],
                                          [0, 0, 1, 1, 2, 2, 0, 0],
                                          [0, 1, 2, 3, 0, 1, 2, 3],
                                          [0, 1, 2, 3, 0, 1, 2, 3],
                                          [0, 0, 0, 0, 1, 1, 1, 1],
                                          [0, 0, 0, 0, 1, 1, 1, 1],
                                          [0, 0, 0, 0, 0, 0, 0, 0],
                                          [0, 0, 0, 0, 0, 0, 0, 0]])

        f = self.f
        c = self.c

        self.check_terms(
            terms={
                'top2': f.top(2, groupby=c),
                'bottom2': f.bottom(2, groupby=c),
            },
            initial_workspace={
                f: factor_data,
                c: classifier_data,
            },
            expected={
                # Should be the rightmost two entries in classifier_data,
                # ignoring the off-diagonal.
                'top2': permuted_array([[0, 1, 1, 1, 1, 1, 1, 0],
                                        [0, 1, 1, 1, 1, 1, 0, 1],
                                        [1, 1, 1, 1, 1, 0, 1, 1],
                                        [1, 1, 1, 1, 0, 1, 1, 1],
                                        [0, 1, 1, 0, 0, 0, 1, 1],
                                        [0, 1, 0, 1, 0, 0, 1, 1],
                                        [0, 0, 0, 0, 0, 0, 1, 1],
                                        [0, 0, 0, 0, 0, 0, 1, 1]], dtype=bool),
                # Should be the rightmost two entries in classifier_data,
                # ignoring the off-diagonal.
                'bottom2': permuted_array([[1, 1, 1, 1, 1, 1, 0, 0],
                                           [1, 1, 1, 1, 1, 1, 0, 0],
                                           [1, 1, 1, 1, 1, 0, 1, 1],
                                           [1, 1, 1, 1, 0, 1, 1, 1],
                                           [1, 1, 0, 0, 1, 1, 0, 0],
                                           [1, 1, 0, 0, 1, 1, 0, 0],
                                           [1, 0, 1, 0, 0, 0, 0, 0],
                                           [0, 1, 1, 0, 0, 0, 0, 0]],
                                          dtype=bool),
            },
            mask=self.build_mask(permute(rot90(self.eye_mask(shape=shape)))),
        )
Esempio n. 29
0
def scale_data(train_data, test_data):
    Z_train, y_train = zip(*train_data)
    scale = Scaler()
    scale.fit(Z_train)
    transform = compose(prepend_x0, scale.transform)
    scaledX_train = transform(Z_train)
    scaled_train = list(zip(scaledX_train, y_train))
    Z_test, y_test = zip(*test_data)
    scaledX_test = transform(Z_test)
    scaled_test = list(zip(scaledX_test, y_test))
    return scaled_train, scaled_test
Esempio n. 30
0
 def words():
     yield instructions.LOAD_CONST(compose(
         pprint,
         partial(sorted, key=op.attrgetter('name')),
         dict.values,
     ))
     yield instructions.LOAD_CONST(globals)
     yield instructions.CALL_FUNCTION(0)
     yield instructions.CALL_FUNCTION(1)
     yield instructions.POP_TOP()
     yield next_instruction()
Esempio n. 31
0
def _get_data(args, columns):
    warehouse_conditions = ("warehouse = %(warehouse)s" if args.get(
        "warehouse"
    ) else (
        "warehouse IN (SELECT name FROM `tabWarehouse` WHERE company = %(company)s)"
    ))
    items = frappe.db.sql(
        """
            SELECT
                i.item_code AS item_code,
                i.brand AS brand,
                i.item_name AS item_name,
                id.default_supplier AS supplier,
                p.price_list_rate AS price,
                b.actual_qty AS stock
            FROM `tabItem` AS i
            LEFT JOIN `tabItem Price` AS p
                ON p.item_code = i.item_code AND p.price_list = %(price_list)s
            LEFT JOIN (
                SELECT
                    item_code, SUM(actual_qty) AS actual_qty
                FROM `tabBin`
                WHERE {warehouse_conditions}
                GROUP BY item_code
            ) AS b
                ON b.item_code = i.item_code
            LEFT JOIN `tabItem Default` AS id
                ON id.parent = i.name AND id.company = %(company)s
        """.format(warehouse_conditions=warehouse_conditions),
        values={
            "price_list": args.get("price_list"),
            "company": args.get("company"),
            "warehouse": args.get("warehouse"),
        },
        as_dict=1,
    )
    sles = frappe.db.sql(
        """
            SELECT item_code, posting_date, actual_qty
            FROM `tabStock Ledger Entry`
            WHERE docstatus < 2 AND
                voucher_type = 'Sales Invoice' AND
                company = %(company)s AND
                {warehouse_conditions} AND
                posting_date BETWEEN %(start_date)s AND %(end_date)s
        """.format(warehouse_conditions=warehouse_conditions),
        values={
            "company": args.get("company"),
            "warehouse": args.get("warehouse"),
            "start_date": args.get("start_date"),
            "end_date": args.get("end_date"),
        },
        as_dict=1,
    )
    keys = compose(list, partial(pluck, "fieldname"))(columns)
    periods = filter(lambda x: x.get("start_date") and x.get("end_date"),
                     columns)

    set_consumption = _set_consumption(sles, periods)

    make_row = compose(partial(keyfilter, lambda k: k in keys),
                       set_consumption)

    return map(make_row, items)
Esempio n. 32
0
def validator(train_data: pd.DataFrame,
              split_fn: SplitterFnType,
              train_fn: LearnerFnType,
              eval_fn: EvalFnType,
              perturb_fn_train: PerturbFnType = identity,
              perturb_fn_test: PerturbFnType = identity,
              predict_oof: bool = False) -> ValidatorReturnType:
    """
    Splits the training data into folds given by the split function and
    performs a train-evaluation sequence on each fold by calling
    ``validator_iteration``.

    Parameters
    ----------
    train_data : pandas.DataFrame
        A Pandas' DataFrame with training data

    split_fn : function pandas.DataFrame ->  list of tuple
        Partially defined split function that takes a dataset and returns
        a list of folds. Each fold is a Tuple of arrays. The fist array in
        each tuple contains training indexes while the second array
        contains validation indexes.

    train_fn : function pandas.DataFrame -> prediction_function, predictions_dataset, logs
        A partially defined learning function that takes a training set and
        returns a predict function, a dataset with training predictions and training
        logs.

    eval_fn : function pandas.DataFrame -> dict
        A partially defined evaluation function that takes a dataset with prediction and
        returns the evaluation logs.

    perturb_fn_train : PerturbFnType
        A partially defined corruption function that takes a dataset and returns
        a corrupted dataset. Perturbation applied at train-time.

    perturb_fn_test : PerturbFnType
        A partially defined corruption function that takes a dataset and returns
        a corrupted dataset. Perturbation applied at test-time.

    predict_oof : bool
        Whether to return out of fold predictions on the logs

    Returns
    ----------
    A list of log-like dictionary evaluations.
    """

    folds, logs = split_fn(train_data)

    train_fn = compose(train_fn, perturb_fn_train)
    eval_fn = compose(eval_fn, perturb_fn_test)

    def fold_iter(fold: Tuple[int, Tuple[pd.Index, pd.Index]]) -> LogType:
        (fold_num, (train_index, test_indexes)) = fold
        return validator_iteration(train_data, train_index, test_indexes,
                                   fold_num, train_fn, eval_fn, predict_oof)

    zipped_logs = pipe(folds, enumerate, map(fold_iter), partial(zip, logs))

    def _join_split_log(
            log_tuple: Tuple[LogType, LogType]) -> Tuple[LogType, LogType]:
        train_log = {}
        split_log, validator_log = log_tuple
        train_log["train_log"] = validator_log["train_log"]
        return train_log, assoc(dissoc(validator_log, "train_log"),
                                "split_log", split_log)

    def get_perturbed_columns(perturbator: PerturbFnType) -> List[str]:
        args = inspect.getfullargspec(perturbator).kwonlydefaults
        return args['cols']

    train_logs, validator_logs = zip(*map(_join_split_log, zipped_logs))
    first_train_log = first(train_logs)

    perturbator_log = {
        'perturbated_train': [],
        'perturbated_test': []
    }  # type: LogType
    if perturb_fn_train != identity:
        perturbator_log['perturbated_train'] = get_perturbed_columns(
            perturb_fn_train)
    if perturb_fn_test != identity:
        perturbator_log['perturbated_test'] = get_perturbed_columns(
            perturb_fn_test)
    first_train_log = assoc(first_train_log, "perturbator_log",
                            perturbator_log)

    return assoc(first_train_log, "validator_log", list(validator_logs))
Esempio n. 33
0
    def test_quantiles_masked(self, seed):
        permute = partial(permute_rows, seed)

        # 7 x 7 so that we divide evenly into 2/3/6-tiles after including the
        # nan value in each row.
        shape = (7, 7)

        # Shuffle the input rows to verify that we don't depend on the order.
        # Take the log to ensure that we don't depend on linear scaling or
        # integrality of inputs
        factor_data = permute(log1p(arange(49, dtype=float).reshape(shape)))
        factor_data_w_nans = where(
            permute(rot90(self.eye_mask(shape=shape))),
            factor_data,
            nan,
        )
        mask_data = permute(self.eye_mask(shape=shape))

        f = F()
        f_nans = OtherF()
        m = Mask()

        # Apply the same shuffle we applied to the input rows to our
        # expectations. Doing it this way makes it obvious that our
        # expectation corresponds to our input, while still testing against
        # a range of input orderings.
        permuted_array = compose(permute, partial(array, dtype=int64_dtype))

        self.check_terms(
            terms={
                '2_masked': f.quantiles(bins=2, mask=m),
                '3_masked': f.quantiles(bins=3, mask=m),
                '6_masked': f.quantiles(bins=6, mask=m),
                '2_nans': f_nans.quantiles(bins=2),
                '3_nans': f_nans.quantiles(bins=3),
                '6_nans': f_nans.quantiles(bins=6),
            },
            initial_workspace={
                f: factor_data,
                f_nans: factor_data_w_nans,
                m: mask_data,
            },
            expected={
                # Expected results here are the same as in
                # test_quantiles_unmasked, except with diagonals of -1s
                # interpolated to match the effects of masking and/or input
                # nans.
                '2_masked':
                permuted_array([[-1, 0, 0, 0, 1, 1, 1], [0, -1, 0, 0, 1, 1, 1],
                                [0, 0, -1, 0, 1, 1, 1], [0, 0, 0, -1, 1, 1, 1],
                                [0, 0, 0, 1, -1, 1, 1], [0, 0, 0, 1, 1, -1, 1],
                                [0, 0, 0, 1, 1, 1, -1]]),
                '3_masked':
                permuted_array([[-1, 0, 0, 1, 1, 2, 2], [0, -1, 0, 1, 1, 2, 2],
                                [0, 0, -1, 1, 1, 2, 2], [0, 0, 1, -1, 1, 2, 2],
                                [0, 0, 1, 1, -1, 2, 2], [0, 0, 1, 1, 2, -1, 2],
                                [0, 0, 1, 1, 2, 2, -1]]),
                '6_masked':
                permuted_array([[-1, 0, 1, 2, 3, 4, 5], [0, -1, 1, 2, 3, 4, 5],
                                [0, 1, -1, 2, 3, 4, 5], [0, 1, 2, -1, 3, 4, 5],
                                [0, 1, 2, 3, -1, 4, 5], [0, 1, 2, 3, 4, -1, 5],
                                [0, 1, 2, 3, 4, 5, -1]]),
                '2_nans':
                permuted_array([[0, 0, 0, 1, 1, 1, -1], [0, 0, 0, 1, 1, -1, 1],
                                [0, 0, 0, 1, -1, 1, 1], [0, 0, 0, -1, 1, 1, 1],
                                [0, 0, -1, 0, 1, 1, 1], [0, -1, 0, 0, 1, 1, 1],
                                [-1, 0, 0, 0, 1, 1, 1]]),
                '3_nans':
                permuted_array([[0, 0, 1, 1, 2, 2, -1], [0, 0, 1, 1, 2, -1, 2],
                                [0, 0, 1, 1, -1, 2, 2], [0, 0, 1, -1, 1, 2, 2],
                                [0, 0, -1, 1, 1, 2, 2], [0, -1, 0, 1, 1, 2, 2],
                                [-1, 0, 0, 1, 1, 2, 2]]),
                '6_nans':
                permuted_array([[0, 1, 2, 3, 4, 5, -1], [0, 1, 2, 3, 4, -1, 5],
                                [0, 1, 2, 3, -1, 4, 5], [0, 1, 2, -1, 3, 4, 5],
                                [0, 1, -1, 2, 3, 4, 5], [0, -1, 1, 2, 3, 4, 5],
                                [-1, 0, 1, 2, 3, 4, 5]]),
            },
            mask=self.build_mask(self.ones_mask(shape=shape)),
        )
    "voucher_no",
    "batch_no",
]


def execute(filters=None):
    columns, data = stock_ledger(filters)
    return _get_columns(columns), _get_data(data, filters)


_get_columns = compose(
    list,
    partial(filter, lambda x: x.get("fieldname") in _fields),
    lambda x: x[:5] + [{
        "label": _("Default Supplier"),
        "fieldname": "default_supplier",
        "fieldtype": "Link",
        "options": "Supplier",
        "width": 100,
    }] + x[5:],
)


def _get_data(data, filters):
    item_codes = compose(list, unique, partial(pluck, "item_code"))(data)
    if not item_codes:
        return data
    query = frappe.db.sql(
        """
            SELECT
                i.item_code AS item_code,
Esempio n. 35
0
    def lookup_symbol(self, symbol, as_of_date, fuzzy=False):
        """
        Return matching Equity of name symbol in database.

        If multiple Equities are found and as_of_date is not set,
        raises MultipleSymbolsFound.

        If no Equity was active at as_of_date raises SymbolNotFound.
        """

        # Format inputs
        if as_of_date is not None:
            as_of_date = pd.Timestamp(normalize_date(as_of_date))

        company_symbol, share_class_symbol, fuzzy_symbol = \
            split_delimited_symbol(symbol)

        equities_cols = self.equities.c
        if as_of_date:
            ad_value = as_of_date.value

            if fuzzy:
                # Search for a single exact match on the fuzzy column
                fuzzy_candidates = sa.select((equities_cols.sid,)).where(
                    (equities_cols.fuzzy_symbol == fuzzy_symbol) &
                    (equities_cols.start_date <= ad_value) &
                    (equities_cols.end_date >= ad_value),
                ).execute().fetchall()

                # If exactly one SID exists for fuzzy_symbol, return that sid
                if len(fuzzy_candidates) == 1:
                    return self._retrieve_equity(fuzzy_candidates[0]['sid'])

            # Search for exact matches of the split-up company_symbol and
            # share_class_symbol
            candidates = sa.select((equities_cols.sid,)).where(
                (equities_cols.company_symbol == company_symbol) &
                (equities_cols.share_class_symbol == share_class_symbol) &
                (equities_cols.start_date <= ad_value) &
                (equities_cols.end_date >= ad_value),
            ).execute().fetchall()

            # If exactly one SID exists for symbol, return that symbol
            if len(candidates) == 1:
                return self._retrieve_equity(candidates[0]['sid'])

            # If no SID exists for symbol, return SID with the
            # highest-but-not-over end_date
            elif not candidates:
                sid = sa.select((equities_cols.sid,)).where(
                    (equities_cols.company_symbol == company_symbol) &
                    (equities_cols.share_class_symbol == share_class_symbol) &
                    (equities_cols.start_date <= ad_value),
                ).order_by(
                    equities_cols.end_date.desc(),
                ).scalar()
                if sid is not None:
                    return self._retrieve_equity(sid)

            # If multiple SIDs exist for symbol, return latest start_date with
            # end_date as a tie-breaker
            elif len(candidates) > 1:
                sid = sa.select((equities_cols.sid,)).where(
                    (equities_cols.company_symbol == company_symbol) &
                    (equities_cols.share_class_symbol == share_class_symbol) &
                    (equities_cols.start_date <= ad_value),
                ).order_by(
                    equities_cols.start_date.desc(),
                    equities_cols.end_date.desc(),
                ).scalar()
                if sid is not None:
                    return self._retrieve_equity(sid)

            raise SymbolNotFound(symbol=symbol)

        else:
            # If this is a fuzzy look-up, check if there is exactly one match
            # for the fuzzy symbol
            if fuzzy:
                fuzzy_sids = sa.select((equities_cols.sid,)).where(
                    (equities_cols.fuzzy_symbol == fuzzy_symbol)
                ).execute().fetchall()
                if len(fuzzy_sids) == 1:
                    return self._retrieve_equity(fuzzy_sids[0]['sid'])

            sids = sa.select((equities_cols.sid,)).where(
                (equities_cols.company_symbol == company_symbol) &
                (equities_cols.share_class_symbol == share_class_symbol)
            ).execute().fetchall()
            if len(sids) == 1:
                return self._retrieve_equity(sids[0]['sid'])
            elif not sids:
                raise SymbolNotFound(symbol=symbol)
            else:
                raise MultipleSymbolsFound(
                    symbol=symbol,
                    options=list(map(
                        compose(self._retrieve_equity, itemgetter('sid')),
                        sids,
                    ))
                )
Esempio n. 36
0
    return keyfilter(lambda k: k in whitelist, d)


def sum_by(key):
    return compose(sum, partial(map, lambda x: x.get(key)))


def key_by(key, items):
    return reduceby(key, lambda a, x: merge(a, x), items, {})


split_to_list = excepts(
    AttributeError,
    compose(
        list,
        partial(filter, lambda x: x),
        partial(map, lambda x: x.strip()),
        lambda x: x.split(","),
    ),
    lambda x: None,
)


def with_report_error_check(data_fn):
    def fn(*args, **kwargs):
        try:
            return data_fn(*args, **kwargs)
        except ProgrammingError:
            return []

    return fn
Esempio n. 37
0
def get_pet_relations(pet):
    return compose(list,
                   partial(pluck,
                           'customer'))(frappe.get_all('Pet Relation',
                                                       filters={'parent': pet},
                                                       fields=['customer']))
Esempio n. 38
0
from skimage import color
from funcy import iffy, constantly, tap, rpartial
from toolz import memoize, curry, compose, pipe
from toolz.curried import map, juxt, mapcat, concatv
from toolz.sandbox.core import unzip
from geopandas import gpd
from osgeo import ogr, gdal, osr
from lenses import lens

from abfs.path import *
from abfs.constants import *
from abfs.group_data_split import GroupDataSplit, DEFAULT_SPLIT_CONFIG
from abfs.conversions import area_in_square_feet
from abfs.segmentation_augmentation import SegmentationAugmentation, MOVE_SCALE_ROTATE

list_unzip = compose(map(list), unzip)
list_concatv = compose(list, concatv)

BLACK = 0
BINARY_WHITE = 1
ALWAYS_TRUE = lambda df: df.index != -1


class Data():
    def __init__(self,
                 config,
                 split_config=DEFAULT_SPLIT_CONFIG,
                 seg_aug_config=MOVE_SCALE_ROTATE,
                 batch_size=16,
                 override_df=None,
                 aug_random_seed=None,
Esempio n. 39
0
@dispatch(Selection, RDD)
def compute_up(t, rdd, **kwargs):
    predicate = optimize(t.predicate, rdd)
    predicate = rrowfunc(predicate, t._child)
    return rdd.filter(predicate)


rdd_reductions = {
    reductions.sum: RDD.sum,
    reductions.min: RDD.min,
    reductions.max: RDD.max,
    reductions.count: RDD.count,
    reductions.mean: RDD.mean,
    reductions.var: RDD.variance,
    reductions.std: RDD.stdev,
    reductions.nunique: compose(RDD.count, RDD.distinct)
}


@dispatch(tuple(rdd_reductions), RDD)
def compute_up(t, rdd, **kwargs):
    return rdd_reductions[type(t)](rdd)


def istruthy(x):
    return not not x


@dispatch(reductions.any, RDD)
def compute_up(t, rdd, **kwargs):
    return istruthy(rdd.filter(identity).take(1))
def encode_coord_to_str(coord):
    return compose(convert_to_str, fix_integers, split_str_to_int,
                   invert_if_negative(coord), left_shift, to_binary,
                   multiply_round)(coord)
Esempio n. 41
0
def truncate_categorical(df: pd.DataFrame,
                         columns_to_truncate: List[str],
                         percentile: float,
                         replacement: Union[str, float] = -9999,
                         replace_unseen: Union[str, float] = -9999,
                         store_mapping: bool = False) -> LearnerReturnType:
    """
    Truncate infrequent categories and replace them by a single one.
    You can think of it like "others" category.

    The default behaviour is to replace the original values. To store
    the transformed values in a new column, specify `prefix` or `suffix`
    in the parameters, or specify a dictionary with the desired column
    mapping using the `columns_mapping` parameter.

    Parameters
    ----------
    df : pandas.DataFrame
        A Pandas' DataFrame that must contain a `prediction_column` columns.

    columns_to_truncate : list of str
        The df columns names to perform the truncation.

    percentile : float
        Categories less frequent than the percentile will be replaced by the
        same one.

    replacement: int, str, float or nan
        The value to use when a category is less frequent that the percentile
        variable.

    replace_unseen : int, str, float, or nan
        The value to impute unseen categories.

    store_mapping : bool (default: False)
        Whether to store the feature value -> integer dictionary in the log.
    """
    get_categs = lambda col: (df[col].value_counts() / len(df)).to_dict()
    update = lambda d: map(
        lambda kv: (kv[0], replacement)
        if kv[1] <= percentile else (kv[0], kv[0]), d.items())
    categs_to_dict = lambda categ_dict: dict(categ_dict)

    vec = {
        column: compose(categs_to_dict, update, get_categs)(column)
        for column in columns_to_truncate
    }

    def p(new_df: pd.DataFrame) -> pd.DataFrame:
        return apply_replacements(new_df, columns_to_truncate, vec,
                                  replace_unseen)

    p.__doc__ = learner_pred_fn_docstring("truncate_categorical")

    log: LearnerLogType = {
        'truncate_categorical': {
            'transformed_column': columns_to_truncate,
            'replace_unseen': replace_unseen
        }
    }

    if store_mapping:
        log["truncate_categorical"]["mapping"] = vec

    return p, p(df), log
Esempio n. 42
0
    Check if the word is in the vocab of the model

    Parameters
    ----------
    word : str

    Returns
    -------
    str
        it returns the original word or an empty string
        if the word is not in the vocabulary
    """
    return word if word in nlp.vocab else ""


parse_ = fp.compose(vocab, lemma, keyword, nlp)


def parse(texts):
    """
    Map the parsing function across the list of texts.

    The parsing function currently is:
    - pass through a spacy model (nlp)
    - get the keyword of the sentence (keyword)
    - lemma the resulting word (lemma)
    - vocab check that the word is in the vocabulary (vocab)

    For the single document version use: ``parse_``

    Parameters
Esempio n. 43
0
def sumDigits(values: Iterable[int]) -> int:
    curryMap = curry(map)
    return compose(sum, concat, curryMap(toDigits))(values)
Esempio n. 44
0
    def _write_internal(self, iterator, assets):
        """
        Internal implementation of write.

        `iterator` should be an iterator yielding pairs of (asset, ctable).
        """
        total_rows = 0
        first_row = {}
        last_row = {}
        calendar_offset = {}

        # Maps column name -> output carray.
        columns = {
            k: carray(array([], dtype=uint32))
            for k in US_EQUITY_PRICING_BCOLZ_COLUMNS
        }

        earliest_date = None
        sessions = self._calendar.sessions_in_range(self._start_session,
                                                    self._end_session)

        if assets is not None:

            @apply
            def iterator(iterator=iterator, assets=set(assets)):
                for asset_id, table in iterator:
                    if asset_id not in assets:
                        raise ValueError('unknown asset id %r' % asset_id)
                    yield asset_id, table

        for asset_id, table in iterator:
            nrows = len(table)
            for column_name in columns:
                if column_name == 'id':
                    # We know what the content of this column is, so don't
                    # bother reading it.
                    columns['id'].append(
                        full((nrows, ), asset_id, dtype='uint32'), )
                    continue

                columns[column_name].append(table[column_name])

            if earliest_date is None:
                earliest_date = table["day"][0]
            else:
                earliest_date = min(earliest_date, table["day"][0])

            # Bcolz doesn't support ints as keys in `attrs`, so convert
            # assets to strings for use as attr keys.
            asset_key = str(asset_id)

            # Calculate the index into the array of the first and last row
            # for this asset. This allows us to efficiently load single
            # assets when querying the data back out of the table.
            first_row[asset_key] = total_rows
            last_row[asset_key] = total_rows + nrows - 1
            total_rows += nrows

            table_day_to_session = compose(
                self._calendar.minute_to_session_label,
                partial(Timestamp, unit='s', tz='UTC'),
            )
            asset_first_day = table_day_to_session(table['day'][0])
            asset_last_day = table_day_to_session(table['day'][-1])

            asset_sessions = sessions[sessions.slice_indexer(
                asset_first_day, asset_last_day)]
            assert len(table) == len(asset_sessions), (
                'Got {} rows for daily bars table with first day={}, last '
                'day={}, expected {} rows.\n'
                'Missing sessions: {}\n'
                'Extra sessions: {}'.format(
                    len(table),
                    asset_first_day.date(),
                    asset_last_day.date(),
                    len(asset_sessions),
                    asset_sessions.difference(
                        to_datetime(
                            np.array(table['day']),
                            unit='s',
                            utc=True,
                        )).tolist(),
                    to_datetime(
                        np.array(table['day']),
                        unit='s',
                        utc=True,
                    ).difference(asset_sessions).tolist(),
                ))

            # Calculate the number of trading days between the first date
            # in the stored data and the first date of **this** asset. This
            # offset used for output alignment by the reader.
            calendar_offset[asset_key] = sessions.get_loc(asset_first_day)

        # This writes the table to disk.
        full_table = ctable(
            columns=[
                columns[colname] for colname in US_EQUITY_PRICING_BCOLZ_COLUMNS
            ],
            names=US_EQUITY_PRICING_BCOLZ_COLUMNS,
            rootdir=self._filename,
            mode='w',
        )

        full_table.attrs['first_trading_day'] = (earliest_date if earliest_date
                                                 is not None else iNaT)

        full_table.attrs['first_row'] = first_row
        full_table.attrs['last_row'] = last_row
        full_table.attrs['calendar_offset'] = calendar_offset
        full_table.attrs['calendar_name'] = self._calendar.name
        full_table.attrs['start_session_ns'] = self._start_session.value
        full_table.attrs['end_session_ns'] = self._end_session.value
        full_table.flush()
        return full_table
def _get_keys(args):
    return compose(list, partial(pluck, "fieldname"), _get_columns)(args)
Esempio n. 46
0
 def visitor(self):
     return compose(reversed, list)
Esempio n. 47
0
def sum_by(key):
    return compose(sum, partial(map, lambda x: x.get(key)))
Esempio n. 48
0
def checkSum(n: int) -> int:
    return compose(sumDigits, doubleEveryOther, toDigits)(n)
Esempio n. 49
0
 def __zip_candles_data_frame(self, df):
     return compose(zip)(
         *map(df.get, candles_utils.columns_with_ticker.keys())
     )
Esempio n. 50
0
f1(2)(3)

from functools import partial
 
f3 = partial(f, 2)
f3(3)

#From toolz

def stem(word):
	""" Stem word to primitive form """
	return word.lower().rstrip(",.!:;'-\"").lstrip("'\"")

from toolz import compose, frequencies, partial
wordcount = compose(frequencies, partial(map, stem), str.split)

sentence = "This cat jumped over this other cat!"
wordcount(sentence)
{'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1}


########################################  ############
#Python precompiled modules: http://www.lfd.uci.edu/~gohlke/pythonlibs/
#######################################

#  http://web.comlab.ox.ac.uk/oucl/work/jeremy.gibbons/publications/spigot.pdf
 
 
def pi_digits():
    """generator for digits of pi"""
Esempio n. 51
0
def isValid(n: int) -> bool:
    curryEquals = curry(operator.eq)
    curryMod = curry(operator.mod)
    return compose(curryEquals(0), flip(curryMod, 10), checkSum)(n)
Esempio n. 52
0
    def _lookup_symbol_fuzzy(self, symbol, as_of_date):
        symbol = symbol.upper()
        company_symbol, share_class_symbol = split_delimited_symbol(symbol)
        try:
            owners = self.fuzzy_symbol_ownership_map[
                company_symbol + share_class_symbol
            ]
            assert owners, 'empty owners list for %r' % symbol
        except KeyError:
            # no equity has ever held a symbol matching the fuzzy symbol
            raise SymbolNotFound(symbol=symbol)

        if not as_of_date:
            if len(owners) == 1:
                # only one valid match
                return self.retrieve_asset(owners[0].sid)

            options = []
            for _, _, sid, sym in owners:
                if sym == symbol:
                    # there are multiple options, look for exact matches
                    options.append(self.retrieve_asset(sid))

            if len(options) == 1:
                # there was only one exact match
                return options[0]

            # there are more than one exact match for this fuzzy symbol
            raise MultipleSymbolsFound(
                symbol=symbol,
                options=set(options),
            )

        options = []
        for start, end, sid, sym in owners:
            if start <= as_of_date < end:
                # see which fuzzy symbols were owned on the asof date.
                options.append((sid, sym))

        if not options:
            # no equity owned the fuzzy symbol on the date requested
            SymbolNotFound(symbol=symbol)

        if len(options) == 1:
            # there was only one owner, return it
            return self.retrieve_asset(options[0][0])

        for sid, sym in options:
            if sym == symbol:
                # look for an exact match on the asof date
                return self.retrieve_asset(sid)

        # multiple equities held tickers matching the fuzzy ticker but
        # there are no exact matches
        raise MultipleSymbolsFound(
            symbol=symbol,
            options=set(map(
                compose(self.retrieve_asset, itemgetter(0)),
                options,
            )),
        )
Esempio n. 53
0
def doubleEveryOther(values: Iterable[int]) -> Iterable[int]:
    return compose(myZipWith(operator.mul, cycle([1, 2])), reverse)(values)
def decode(coord_str):
    return compose(fix_coords, group_pairs, maybe_invert_shift,
                   coords_arr_to_bin, split_str)(coord_str)
Esempio n. 55
0
    Parameters
    ----------
    query
        the querylike object to evaluate
    api
        the API to handle the request
    loaders
        The registry of object loaders
    auth
        The authentication object
    sender
        The request sender
    """
    return thread_last(query, attrgetter('__req__'), api.prepare,
                       (flip(api.add_auth), auth), sender, api.parse,
                       loaders(query.__rtype__))


_simple_json_api = Api(prepare=methodcaller('add_prefix', 'https://'),
                       parse=compose(json.loads, methodcaller('decode'),
                                     attrgetter('content')),
                       add_auth=lambda req, auth:
                       (req if auth is None else req.add_basic_auth(auth)))
simple_resolve = partial(resolve,
                         api=_simple_json_api,
                         loaders=load.simple_registry,
                         auth=None,
                         sender=http.urllib_sender())
"""a basic resolver"""
Esempio n. 56
0
def execute(filters=None):
    columns = _get_columns()
    keys = compose(list, partial(pluck, "fieldname"))(columns)
    clauses, values = _get_filters(filters)
    data = _get_data(clauses, values, keys)
    return columns, data
Esempio n. 57
0
def filter_whitespace(tokenset):
    """
    Filters out tokens that are only whitespace.
    """
    return tlz.filter(tlz.compose(bool, str.strip), tokenset)
Esempio n. 58
0
        (
            # different Python class, right side is param
            dt.date,
            ibis.param(dt.timestamp),
            False,
        ),
    ],
)
def test_scalar_parameter_compare(left, right, expected):
    assert left.equals(right) == expected


@pytest.mark.parametrize(
    ('case', 'creator'),
    [
        (datetime.now(), toolz.compose(methodcaller('time'), ibis.timestamp)),
        ('now', toolz.compose(methodcaller('time'), ibis.timestamp)),
        (datetime.now().time(), ibis.time),
        ('10:37', ibis.time),
    ],
)
@pytest.mark.parametrize(('left', 'right'), [(1, 'a'), ('a', 1), (1.0, 2.0),
                                             (['a'], [1])])
def test_between_time_failure_time(case, creator, left, right):
    value = creator(case)
    with pytest.raises(TypeError):
        value.between(left, right)


def test_custom_type_binary_operations():
    class Foo(ir.ValueExpr):
Esempio n. 59
0
def from_blaze(
    expr,
    deltas='auto',
    checkpoints='auto',
    loader=None,
    resources=None,
    odo_kwargs=None,
    missing_values=None,
    no_deltas_rule='warn',
    no_checkpoints_rule='warn',
    apply_deltas_adjustments=True,
):
    """Create a Pipeline API object from a blaze expression.

    Parameters
    ----------
    expr : Expr
        The blaze expression to use.
    deltas : Expr, 'auto' or None, optional
        The expression to use for the point in time adjustments.
        If the string 'auto' is passed, a deltas expr will be looked up
        by stepping up the expression tree and looking for another field
        with the name of ``expr._name`` + '_deltas'. If None is passed, no
        deltas will be used.
    checkpoints : Expr, 'auto' or None, optional
        The expression to use for the forward fill checkpoints.
        If the string 'auto' is passed, a checkpoints expr will be looked up
        by stepping up the expression tree and looking for another field
        with the name of ``expr._name`` + '_checkpoints'. If None is passed,
        no checkpoints will be used.
    loader : BlazeLoader, optional
        The blaze loader to attach this pipeline dataset to. If None is passed,
        the global blaze loader is used.
    resources : dict or any, optional
        The data to execute the blaze expressions against. This is used as the
        scope for ``bz.compute``.
    odo_kwargs : dict, optional
        The keyword arguments to pass to odo when evaluating the expressions.
    missing_values : dict[str -> any], optional
        A dict mapping column names to missing values for those columns.
        Missing values are required for integral columns.
    no_deltas_rule : {'warn', 'raise', 'ignore'}, optional
        What should happen if ``deltas='auto'`` but no deltas can be found.
        'warn' says to raise a warning but continue.
        'raise' says to raise an exception if no deltas can be found.
        'ignore' says take no action and proceed with no deltas.
    no_checkpoints_rule : {'warn', 'raise', 'ignore'}, optional
        What should happen if ``checkpoints='auto'`` but no checkpoints can be
        found. 'warn' says to raise a warning but continue.
        'raise' says to raise an exception if no deltas can be found.
        'ignore' says take no action and proceed with no deltas.
    apply_deltas_adjustments : bool, optional
        Whether or not deltas adjustments should be applied for this dataset.
        True by default because not applying deltas adjustments is an exception
        rather than the rule.

    Returns
    -------
    pipeline_api_obj : DataSet or BoundColumn
        Either a new dataset or bound column based on the shape of the expr
        passed in. If a table shaped expression is passed, this will return
        a ``DataSet`` that represents the whole table. If an array-like shape
        is passed, a ``BoundColumn`` on the dataset that would be constructed
        from passing the parent is returned.
    """
    if 'auto' in {deltas, checkpoints}:
        invalid_nodes = tuple(filter(is_invalid_deltas_node, expr._subterms()))
        if invalid_nodes:
            raise TypeError(
                'expression with auto %s may only contain (%s) nodes,'
                " found: %s" % (
                    ' or '.join(
                        ['deltas'] if deltas is not None else [] +
                        ['checkpoints'] if checkpoints is not None else [], ),
                    ', '.join(map(get__name__, valid_deltas_node_types)),
                    ', '.join(
                        set(map(compose(get__name__, type), invalid_nodes)), ),
                ), )
    deltas = _get_metadata(
        'deltas',
        expr,
        deltas,
        no_deltas_rule,
    )
    checkpoints = _get_metadata(
        'checkpoints',
        expr,
        checkpoints,
        no_checkpoints_rule,
    )

    # Check if this is a single column out of a dataset.
    if bz.ndim(expr) != 1:
        raise TypeError(
            'expression was not tabular or array-like,'
            ' %s dimensions: %d' % (
                'too many' if bz.ndim(expr) > 1 else 'not enough',
                bz.ndim(expr),
            ), )

    single_column = None
    if isscalar(expr.dshape.measure):
        # This is a single column. Record which column we are to return
        # but create the entire dataset.
        single_column = rename = expr._name
        field_hit = False
        if not isinstance(expr, traversable_nodes):
            raise TypeError(
                "expression '%s' was array-like but not a simple field of"
                " some larger table" % str(expr), )
        while isinstance(expr, traversable_nodes):
            if isinstance(expr, bz.expr.Field):
                if not field_hit:
                    field_hit = True
                else:
                    break
            rename = expr._name
            expr = expr._child
        dataset_expr = expr.relabel({rename: single_column})
    else:
        dataset_expr = expr

    measure = dataset_expr.dshape.measure
    if not isrecord(measure) or AD_FIELD_NAME not in measure.names:
        raise TypeError(
            "The dataset must be a collection of records with at least an"
            " '{ad}' field. Fields provided: '{fields}'\nhint: maybe you need"
            " to use `relabel` to change your field names".format(
                ad=AD_FIELD_NAME,
                fields=measure,
            ), )
    _check_datetime_field(AD_FIELD_NAME, measure)
    dataset_expr, deltas, checkpoints = _ensure_timestamp_field(
        dataset_expr,
        deltas,
        checkpoints,
    )

    if deltas is not None and (sorted(deltas.dshape.measure.fields) != sorted(
            measure.fields)):
        raise TypeError(
            'baseline measure != deltas measure:\n%s != %s' % (
                measure,
                deltas.dshape.measure,
            ), )
    if (checkpoints is not None and
        (sorted(checkpoints.dshape.measure.fields) != sorted(measure.fields))):
        raise TypeError(
            'baseline measure != checkpoints measure:\n%s != %s' % (
                measure,
                checkpoints.dshape.measure,
            ), )

    # Ensure that we have a data resource to execute the query against.
    _check_resources('expr', dataset_expr, resources)
    _check_resources('deltas', deltas, resources)
    _check_resources('checkpoints', checkpoints, resources)

    # Create or retrieve the Pipeline API dataset.
    if missing_values is None:
        missing_values = {}
    ds = new_dataset(dataset_expr, deltas, frozenset(missing_values.items()))

    # Register our new dataset with the loader.
    (loader if loader is not None else global_loader)[ds] = ExprData(
        bind_expression_to_resources(dataset_expr, resources),
        bind_expression_to_resources(deltas, resources)
        if deltas is not None else None,
        bind_expression_to_resources(checkpoints, resources)
        if checkpoints is not None else None,
        odo_kwargs=odo_kwargs,
        apply_deltas_adjustments=apply_deltas_adjustments)
    if single_column is not None:
        # We were passed a single column, extract and return it.
        return getattr(ds, single_column)
    return ds
Esempio n. 60
0
import requests
from toolz import (
    compose, )

fetch_rates = lambda: requests.get('https://api.gemini.com/v1/pricefeed')
parse_json = lambda x: x.json()
pred = lambda x: x['pair'] == 'ETHUSD'
filter_on_ethusd_pair = lambda x: filter(pred, x)
get_only_element = lambda x: x[0]
get_price = lambda x: x.get('price')
fetch_exchange_rate = compose(
    float,
    get_price,
    get_only_element,
    list,
    filter_on_ethusd_pair,
    parse_json,
    fetch_rates,
)