Esempio n. 1
0
def write_tables(fname, models, year):
    """
    Write all tables injected into `models` to a pandas.HDFStore file.
    If year is not None it will be used to prefix the table names so that
    multiple years can go in the same file.

    Parameters
    ----------
    fname : str
        File name for HDFStore. Will be opened in append mode and closed
        at the end of this function.
    models : list of str
        Models from which to gather injected tables for saving.
    year : int or None
        If an integer, used as a prefix along with table names for
        labeling DataFrames in the HDFStore.

    """
    models = (get_model(m) for m in toolz.unique(models))
    table_names = toolz.unique(toolz.concat(m._tables_used() for m in models))
    tables = (get_table(t) for t in table_names)

    key_template = '{}/{{}}'.format(year) if year is not None else '{}'

    with pd.get_store(fname, mode='a') as store:
        for t in tables:
            store[key_template.format(t.name)] = t.to_frame()
Esempio n. 2
0
def render_tabular(api, options=None):
  """Entry point for the tabular reporter interface."""
  # determine separator
  separator = options.get('report.separator', '\t')
  human = options.get('report.human')
  panel = options.get('report.panel')
  samples = options.get('report.samples')
  group = options.get('report.group')

  # read gene panel file if it has been set
  if panel:
    superblock_ids = [line.rstrip() for line in panel]
  else:
    superblock_ids = None

  # get sample ID, group and cutoff from metadata
  sample_query = limit_query(api.samples(), group=group, samples=samples)
  metadata = ((sample.id, sample.group_id, sample.cutoff)
              for sample in sample_query)

  # get the data
  base_query = limit_query(api.average_metrics(superblock_ids=superblock_ids),
                           group=group,
                           samples=samples)

  queries = [metadata,
             base_query,
             api.diagnostic_yield(superblock_ids=superblock_ids,
                                  group_id=group, sample_ids=samples),
             api.sex_checker(group_id=group, sample_ids=samples)]

  # group multiple queries by sample ID (first column)
  key_metrics = groupby(get(0), concat(queries))

  # get the column names dynamically from the query
  headers = concatv(['sample_id', 'group_id', 'cutoff'],
                    (column['name'] for column
                     in base_query.column_descriptions),
                    ['diagnostic yield', 'gender'])

  unique_headers = unique(headers)

  # iterate over all values, concat different query results, and keep
  # only the unique values (excluding second sample_id)
  data = (unique(concat(values)) for values in itervalues(key_metrics))

  if human:
    # export key_metrics in a more human friendly format
    return tabulate(data, unique_headers)

  # yield headers
  return '\n'.join(cons('#' + separator.join(unique_headers),
                        stringify_list(data, separator=separator)))
Esempio n. 3
0
File: sql.py Progetto: blaze/blaze
def compute_up(expr, args, **kwargs):
    from_objs = list(unique(concat(map(get_all_froms, args))))
    if len(from_objs) > 1:
        # TODO: how do you do this in sql? please send help
        raise ValueError('only columns from the same table can be merged')

    cols = list(unique(concat(map(get_unsafe_inner_columns, args, expr.args))))
    sel = sa.select(cols, from_obj=from_objs[0])
    where = unify_wheres(args)
    if where is not None:
        sel = sel.where(where)
    return sel
Esempio n. 4
0
def compile_components(summary, schema):
    """Given a ``Summary`` object and a table schema, returning 5 sub-functions.

    Parameters
    ----------
    summary : Summary
        The expression describing the aggregations to be computed.

    Returns
    -------
    A tuple of the following functions:

    ``create(shape)``
        Takes the aggregate shape, and returns a tuple of initialized numpy
        arrays.

    ``info(df)``
        Takes a dataframe, and returns preprocessed 1D numpy arrays of the
        needed columns.

    ``append(i, x, y, *aggs_and_cols)``
        Appends the ``i``th row of the table to the ``(x, y)`` bin, given the
        base arrays and columns in ``aggs_and_cols``. This does the bulk of the
        work.

    ``combine(base_tuples)``
        Combine a list of base tuples into a single base tuple. This forms the
        reducing step in a reduction tree.

    ``finalize(aggs)``
        Given a tuple of base numpy arrays, returns the finalized
        ``dynd`` array.
    """
    paths, reds = zip(*preorder_traversal(summary))

    # List of base reductions (actually computed)
    bases = list(unique(concat(r._bases for r in reds)))
    dshapes = [b.out_dshape(schema) for b in bases]
    # List of tuples of (append, base, input columns, temps)
    calls = [_get_call_tuples(b, d) for (b, d) in zip(bases, dshapes)]
    # List of unique column names needed
    cols = list(unique(concat(pluck(2, calls))))
    # List of temps needed
    temps = list(pluck(3, calls))

    create = make_create(bases, dshapes)
    info = make_info(cols)
    append = make_append(bases, cols, calls)
    combine = make_combine(bases, dshapes, temps)
    finalize = make_finalize(bases, summary, schema)

    return create, info, append, combine, finalize
Esempio n. 5
0
def columns_in_filters(filters):
    """
    Returns a list of the columns used in a set of query filters.

    Parameters
    ----------
    filters : list of str or str
        List of the filters as passed passed to ``apply_filter_query``.

    Returns
    -------
    columns : list of str
        List of all the strings mentioned in the filters.

    """
    if not filters:
        return []

    if not isinstance(filters, str):
        filters = ' '.join(filters)

    columns = []
    reserved = {'and', 'or', 'in', 'not'}

    for toknum, tokval, _, _, _ in generate_tokens(StringIO(filters).readline):
        if toknum == NAME and tokval not in reserved:
            columns.append(tokval)

    return list(toolz.unique(columns))
Esempio n. 6
0
    def schema(self):
        group = self.grouper.schema[0].parameters[0]
        reduction_name = type(self.apply).__name__
        apply = self.apply.dshape[0].parameters[0]
        params = unique(group + apply, key=lambda x: x[0])

        return dshape(Record(list(params)))
def test_multi_column_join():
    metadata = sa.MetaData()
    lhs = sa.Table('aaa', metadata,
                   sa.Column('x', sa.Integer),
                   sa.Column('y', sa.Integer),
                   sa.Column('z', sa.Integer))

    rhs = sa.Table('bbb', metadata,
                   sa.Column('w', sa.Integer),
                   sa.Column('x', sa.Integer),
                   sa.Column('y', sa.Integer))

    L = symbol('L', 'var * {x: int, y: int, z: int}')
    R = symbol('R', 'var * {w: int, x: int, y: int}')
    joined = join(L, R, ['x', 'y'])

    expected = lhs.join(rhs, (lhs.c.x == rhs.c.x)
                           & (lhs.c.y == rhs.c.y))
    expected = select(list(unique(expected.columns, key=lambda c:
        c.name))).select_from(expected)

    result = compute(joined, {L: lhs, R: rhs})

    assert str(result) == str(expected)

    assert str(select(result)) == str(select(expected))

    # Schemas match
    print(result.c.keys())
    print(joined.fields)
    assert list(result.c.keys()) == list(joined.fields)
Esempio n. 8
0
File: sql.py Progetto: earney/blaze
def compute_up(expr, data, **kwargs):
    if not valid_grouper(expr):
        raise TypeError("Grouper must have a non-nested record or one "
                        "dimensional collection datashape, "
                        "got %s of type %r with dshape %s" %
                        (expr.grouper, type(expr.grouper).__name__, expr.dshape))

    s = alias_it(data)

    if valid_reducer(expr.apply):
        reduction = compute(expr.apply, s, post_compute=False)
    else:
        raise TypeError('apply must be a Summary expression')

    grouper = get_inner_columns(compute(expr.grouper, s, post_compute=False))
    reduction_columns = pipe(reduction.inner_columns,
                             map(get_inner_columns),
                             concat)
    columns = list(unique(chain(grouper, reduction_columns)))
    if (not isinstance(s, sa.sql.selectable.Alias) or
            (hasattr(s, 'froms') and isinstance(s.froms[0],
                                                sa.sql.selectable.Join))):
        assert len(s.froms) == 1, 'only a single FROM clause supported for now'
        from_obj, = s.froms
    else:
        from_obj = None

    return reconstruct_select(columns,
                              getattr(s, 'element', s),
                              from_obj=from_obj,
                              group_by=grouper)
Esempio n. 9
0
def find_names(node):
    """Return the unique :class:`ast.Name` instances in an AST.

    Parameters
    ----------
    node : ast.AST

    Returns
    -------
    unique_names : List[ast.Name]

    Examples
    --------
    >>> import ast
    >>> node = ast.parse('a + b')
    >>> names = find_names(node)
    >>> names  # doctest: +ELLIPSIS
    [<_ast.Name object at 0x...>, <_ast.Name object at 0x...>]
    >>> names[0].id
    'a'
    >>> names[1].id
    'b'
    """
    return list(
        toolz.unique(
            filter(None, NameFinder().find(node)),
            key=lambda node: (node.id, type(node.ctx)),
        )
    )
Esempio n. 10
0
def compute_up(expr, data, scope=None, **kwargs):
    data = lower_column(data)
    grouper = compute(
        expr.grouper,
        scope,
        post_compute=False,
        return_type='native',
        **kwargs
    )

    app = expr.apply
    reductions = [
        compute(
            val,
            data,
            post_compute=None,
            return_type='native',
        ).label(name)
        for val, name in zip(app.values, app.fields)
    ]

    froms = list(unique(chain(get_all_froms(grouper),
                              concat(map(get_all_froms, reductions)))))
    inner_cols = list(getattr(grouper, 'inner_columns', [grouper]))
    grouper_cols = inner_cols[:]
    inner_cols.extend(concat(
        getattr(getattr(r, 'element', None), 'inner_columns', [r])
        for r in reductions
    ))
    wheres = unify_wheres([grouper] + reductions)
    sel = unify_froms(sa.select(inner_cols, whereclause=wheres), froms)
    return sel.group_by(*grouper_cols)
Esempio n. 11
0
    def _get_variables(self):
        """Collect variables, updates and auxiliary variables.

        In addition collects all :class:`.Scan` ops and recurses in the
        respective inner Theano graphs.

        """
        updates = OrderedDict()

        shared_outputs = [o for o in self.outputs if is_shared_variable(o)]
        usual_outputs = [o for o in self.outputs if not is_shared_variable(o)]
        variables = shared_outputs

        if usual_outputs:
            # Sort apply nodes topologically, get variables and remove
            # duplicates
            inputs = graph.inputs(self.outputs)
            sorted_apply_nodes = graph.io_toposort(inputs, usual_outputs)
            self.scans = list(unique([node.op for node in sorted_apply_nodes
                                     if isinstance(node.op, Scan)],
                                     key=lambda op: id(op)))
            self._scan_graphs = [ComputationGraph(scan.outputs)
                                 for scan in self.scans]

            seen = set()
            main_vars = (
                [var for var in list(chain(
                    *[apply_node.inputs for apply_node in sorted_apply_nodes]))
                 if not (var in seen or seen.add(var))] +
                [var for var in self.outputs if var not in seen])

            # While preserving order add auxiliary variables, and collect
            # updates
            seen = set()
            # Intermediate variables could be auxiliary
            seen_avs = set(main_vars)
            variables = []
            for var in main_vars:
                variables.append(var)
                for annotation in getattr(var.tag, 'annotations', []):
                    if annotation not in seen:
                        seen.add(annotation)
                        new_avs = [
                            av for av in annotation.auxiliary_variables
                            if not (av in seen_avs or seen_avs.add(av))]
                        variables.extend(new_avs)
                        updates = dict_union(updates, annotation.updates)

        # If shared_variables is assigned default_update (cloned), we cannot eval()
        # it to get the real numpy array value, hence, try to trace back
        # original shared variable
        def shared_variable_filter(var):
            if is_shared_variable(var) and hasattr(var, 'default_update'):
                for annotation in var.tag.annotations:
                    if hasattr(annotation, var.name) and \
                       is_shared_variable(getattr(annotation, var.name)):
                        return getattr(annotation, var.name)
            return var
        self.variables = map(shared_variable_filter, variables)
        self.updates = updates
Esempio n. 12
0
    def all_subaccounts(self):
        """
        Returns an iterator of all subaccounts that have a recorded transaction
        with the account.

        """
        return toolz.unique(t.subaccount for t in self.transactions)
Esempio n. 13
0
    def choosers_columns_used(self):
        """
        Columns from the choosers table that are used for filtering.

        """
        return list(toolz.unique(toolz.concat(
            m.choosers_columns_used() for m in self.models.values())))
Esempio n. 14
0
  def diagnostic_yield(self, metric='completeness', cutoff=1,
                       superblock_ids=None, group_id=None, sample_ids=None):
    """Calculate diagnostic yield."""
    # extract column to filter on
    metric_column = getattr(BlockData, metric)

    # set up the base query for all blocks
    total_query = self.total_count(BlockData)

    if superblock_ids:
      # apply the superblock filter on the Block class level
      total_query = total_query.join(BlockData.parent)\
                               .filter(Block.superblock_id.in_(superblock_ids))

    # extend base query to include only passed blocks
    pass_query = total_query.filter(metric_column >= cutoff)

    # optionally limit query
    queries = [limit_query(query, group=group_id, samples=sample_ids)
               for query in (total_query, pass_query)]

    # group multiple queries by sample ID (first column)
    metrics = groupby(get(0), concat(queries))

    # iterate over all values, concat different query results, and keep
    # only the unique values (excluding second sample_id)
    combined = (unique(concat(values)) for values in itervalues(metrics))

    # calculate diagnostic yield by simple division
    for sample_id, group_id, total, covered in combined:
      yield sample_id, group_id, (covered / total)
Esempio n. 15
0
File: Zte.py Progetto: sjava/olt
def gpon_svlan(ip='', username='', password='', slots=None):
    ports = product(slots, range(1, 9))
    cmds = map(
        lambda x: "show service-port interface gpon-olt_1/{0}/{1}".format(x[0], x[1]), ports)
    try:
        svlan = []
        child = telnet(ip, username, password)
        for cmd in cmds:
            result = []
            child.sendline(cmd)
            while True:
                index = child.expect(
                    [zte_prompt, zte_pager], timeout=120)
                if index == 0:
                    result.append(child.before)
                    break
                else:
                    result.append(child.before)
                    child.send(' ')
                    continue
            r = ''.join(result).split('\r\n')[1:-1]
            v = [x.replace('\x08', '').strip().split()[1]
                 for x in r if 'OK' in x and 'YES' in x]
            v1 = [x for x in v if x.isdigit()]
            p = re.findall(r'\d/\d{1,2}/\d', cmd)
            svlan += product(p, unique(v1))
        child.sendline('exit')
        child.close()
    except (pexpect.EOF, pexpect.TIMEOUT) as e:
        return ['fail', None, ip]

    return ['success', svlan, ip]
Esempio n. 16
0
    def alts_columns_used(self):
        """
        Columns from the alternatives table that are used for filtering.

        """
        return list(toolz.unique(toolz.concat(
            m.alts_columns_used() for m in self.models.values())))
Esempio n. 17
0
 def __repr__(self) -> str:
     return '{}({})'.format(
         self.name,
         ', '.join(
             '{}={!r}'.format(slot, getattr(self, slot))
             for slot in toolz.unique(self.__slots__ + ('nullable',))
         ),
     )
Esempio n. 18
0
    def columns_used(self):
        """
        Columns from any table used in the model. May come from either
        the choosers or alternatives tables.

        """
        return list(toolz.unique(toolz.concat(
            m.columns_used() for m in self.models.values())))
Esempio n. 19
0
    def choosers_columns_used(self):
        """
        Columns from the choosers table that are used for filtering.

        """
        return list(toolz.unique(toolz.concatv(
            util.columns_in_filters(self.choosers_predict_filters),
            util.columns_in_filters(self.choosers_fit_filters))))
Esempio n. 20
0
    def columns_used(self):
        """
        Returns all the columns used across all models in the group
        for filtering and in the model expression.

        """
        return list(toolz.unique(toolz.concat(
            m.columns_used() for m in self.models.values())))
Esempio n. 21
0
    def interaction_columns_used(self):
        """
        Columns from the interaction dataset used for filtering and in
        the model. These may come originally from either the choosers or
        alternatives tables.

        """
        return list(toolz.unique(toolz.concat(
            m.interaction_columns_used() for m in self.models.values())))
Esempio n. 22
0
def test_EqualityHashKey_index_key():
    d1 = {'firstname': 'Alice', 'age': 21, 'data': {}}
    d2 = {'firstname': 'Alice', 'age': 34, 'data': {}}
    d3a = {'firstname': 'Bob', 'age': 56, 'data': {}}
    d3b = {'firstname': 'Bob', 'age': 56, 'data': {}}
    EqualityHashFirstname = curry(EqualityHashKey, 'firstname')
    assert list(unique(3*[d1, d2, d3a, d3b],
                       key=EqualityHashFirstname)) == [d1, d2, d3a]
    EqualityHashFirstnameAge = curry(EqualityHashKey, ['firstname', 'age'])
    assert list(unique(3*[d1, d2, d3a, d3b],
                       key=EqualityHashFirstnameAge)) == [d1, d2, d3a]
    list1 = [0] * 10
    list2 = [0] * 100
    list3a = [1] * 10
    list3b = [1] * 10
    EqualityHash0 = curry(EqualityHashKey, 0)
    assert list(unique(3*[list1, list2, list3a, list3b],
                       key=EqualityHash0)) == [list1, list2, list3a]
Esempio n. 23
0
def get_users_list(user, repo_name_list):
    github = Github(login_or_token=user.github_access_token)
    git_user = github.get_user()

    user_list = set()
    for repo_name in repo_name_list:
        repo = get_a_repo(git_user, repo_name)
        for g_user in repo.get_collaborators():
            user_list.add(g_user)
    return sorted([user for user in toolz.unique(user_list, key=lambda x: x.login)], key=lambda x: x.login)
Esempio n. 24
0
    def columns_used(self):
        """
        Columns from any table used in the model. May come from either
        the choosers or alternatives tables.

        """
        return list(toolz.unique(toolz.concatv(
            self.choosers_columns_used(),
            self.alts_columns_used(),
            self.interaction_columns_used())))
Esempio n. 25
0
    def schema(self):
        group = self.grouper.schema[0].parameters[0]
        if isinstance(self.apply.dshape[0], Record):
            apply = self.apply.dshape[0].parameters[0]
        else:
            apply = (('0', self.apply.dshape),)

        params = unique(group + apply, key=lambda x: x[0])

        return dshape(Record(list(params)))
Esempio n. 26
0
    def columns_used(self):
        """
        Returns all the columns used in this model for filtering
        and in the model expression.

        """
        return list(toolz.unique(toolz.concatv(
            util.columns_in_filters(self.fit_filters),
            util.columns_in_filters(self.predict_filters),
            util.columns_in_formula(self.model_expression))))
Esempio n. 27
0
    def interaction_columns_used(self):
        """
        Columns from the interaction dataset used for filtering and in
        the model. These may come originally from either the choosers or
        alternatives tables.

        """
        return list(toolz.unique(toolz.concatv(
            util.columns_in_filters(self.interaction_predict_filters),
            util.columns_in_formula(self.model_expression))))
Esempio n. 28
0
    def columns_used(self):
        """
        Returns all the columns used across all models in the group
        for filtering and in the model expression.

        """
        return list(toolz.unique(toolz.concatv(
            util.columns_in_filters(self.fit_filters),
            util.columns_in_filters(self.predict_filters),
            self._group.columns_used())))
Esempio n. 29
0
def align_partitions(*dfs):
    """ Mutually partition and align DataFrame blocks

    This serves as precursor to multi-dataframe operations like join, concat,
    or merge.

    Parameters
    ----------
    dfs: sequence of dd.DataFrame, dd.Series and dd.base.Scalar
        Sequence of dataframes to be aligned on their index

    Returns
    -------
    dfs: sequence of dd.DataFrame, dd.Series and dd.base.Scalar
        These must have consistent divisions with each other
    divisions: tuple
        Full divisions sequence of the entire result
    result: list
        A list of lists of keys that show which data exist on which
        divisions
    """
    _is_broadcastable = partial(is_broadcastable, dfs)
    dfs1 = [df for df in dfs
            if isinstance(df, _Frame) and
            not _is_broadcastable(df)]
    if len(dfs) == 0:
        raise ValueError("dfs contains no DataFrame and Series")
    if not all(df.known_divisions for df in dfs1):
        raise ValueError("Not all divisions are known, can't align "
                         "partitions. Please use `set_index` "
                         "to set the index.")

    divisions = list(unique(merge_sorted(*[df.divisions for df in dfs1])))
    if len(divisions) == 1:  # single value for index
        divisions = (divisions[0], divisions[0])
    dfs2 = [df.repartition(divisions, force=True)
            if isinstance(df, _Frame) else df for df in dfs]

    result = list()
    inds = [0 for df in dfs]
    for d in divisions[:-1]:
        L = list()
        for i, df in enumerate(dfs2):
            if isinstance(df, _Frame):
                j = inds[i]
                divs = df.divisions
                if j < len(divs) - 1 and divs[j] == d:
                    L.append((df._name, inds[i]))
                    inds[i] += 1
                else:
                    L.append(None)
            else:    # Scalar has no divisions
                L.append(None)
        result.append(L)
    return dfs2, tuple(divisions), result
Esempio n. 30
0
    def from_collections(cls, name, layer, dependencies=()):
        """ Construct a HighLevelGraph from a new layer and a set of collections

        This constructs a HighLevelGraph in the common case where we have a single
        new layer and a set of old collections on which we want to depend.

        This pulls out the ``__dask_layers__()`` method of the collections if
        they exist, and adds them to the dependencies for this new layer.  It
        also merges all of the layers from all of the dependent collections
        together into the new layers for this graph.

        Parameters
        ----------
        name : str
            The name of the new layer
        layer : Mapping
            The graph layer itself
        dependencies : List of Dask collections
            A lit of other dask collections (like arrays or dataframes) that
            have graphs themselves

        Examples
        --------

        In typical usage we make a new task layer, and then pass that layer
        along with all dependent collections to this method.

        >>> def add(self, other):
        ...     name = 'add-' + tokenize(self, other)
        ...     layer = {(name, i): (add, input_key, other)
        ...              for i, input_key in enumerate(self.__dask_keys__())}
        ...     graph = HighLevelGraph.from_collections(name, layer, dependencies=[self])
        ...     return new_collection(name, graph)
        """
        layers = {name: layer}
        deps = {}
        deps[name] = set()
        for collection in toolz.unique(dependencies, key=id):
            if is_dask_collection(collection):
                graph = collection.__dask_graph__()
                if isinstance(graph, HighLevelGraph):
                    layers.update(graph.layers)
                    deps.update(graph.dependencies)
                    with ignoring(AttributeError):
                        deps[name] |= set(collection.__dask_layers__())
                else:
                    key = id(graph)
                    layers[key] = graph
                    deps[name].add(key)
                    deps[key] = set()
            else:
                raise TypeError(type(collection))

        return cls(layers, deps)
Esempio n. 31
0
 def __hash__(self):
     custom_parts = tuple(
         getattr(self, slot)
         for slot in toolz.unique(self.__slots__ + ('nullable', )))
     return hash((type(self), ) + custom_parts)
Esempio n. 32
0
File: types.py Progetto: shshe/ibis
def distinct_roots(*expressions):
    roots = toolz.concat(
        expression._root_tables() for expression in expressions
    )
    return list(toolz.unique(roots, key=id))
Esempio n. 33
0
def physical_tables_node(node):
    # Iterative case. Any other Node's physical roots are the unique physical
    # roots of that Node's root tables.
    return list(unique(concat(map(physical_tables, node.root_tables()))))
Esempio n. 34
0
def physical_tables_node(node):
    # Iterative case. Any other Node's physical roots are the unique physical
    # roots of that Node's root tables.
    tables = toolz.concat(map(physical_tables, node.root_tables()))
    return list(toolz.unique(tables, key=id))
Esempio n. 35
0
 def __iter__(self):
     return unique(concat(self.dicts.values()))
Esempio n. 36
0
def find_source_table(expr):
    """Find the first table expression observed for each argument that the
    expression depends on

    Parameters
    ----------
    expr : ir.Expr

    Returns
    -------
    table_expr : ir.TableExpr

    Examples
    --------
    >>> import ibis
    >>> t = ibis.table([('a', 'double'), ('b', 'string')], name='t')
    >>> expr = t.mutate(c=t.a + 42.0)
    >>> expr  # doctest: +NORMALIZE_WHITESPACE
    ref_0
    UnboundTable[table]
      name: t
      schema:
        a : double
        b : string
    Selection[table]
      table:
        Table: ref_0
      selections:
        Table: ref_0
        c = Add[double*]
          left:
            a = Column[double*] 'a' from table
              ref_0
          right:
            Literal[double]
              42.0
    >>> find_source_table(expr)
    UnboundTable[table]
      name: t
      schema:
        a : double
        b : string
    >>> left = ibis.table([('a', 'int64'), ('b', 'string')])
    >>> right = ibis.table([('c', 'int64'), ('d', 'string')])
    >>> result = left.inner_join(right, left.a == right.c)
    >>> find_source_table(result)  # doctest: +ELLIPSIS
    Traceback (most recent call last):
    ...
    NotImplementedError: More than one base table not implemented
    """
    first_tables = []

    stack = [expr]
    seen = set()

    while stack:
        e = stack.pop()
        op = e.op()

        if op not in seen:
            seen.add(op)

            arguments = [
                arg for arg in reversed(list(op.flat_args()))
                if isinstance(arg, ir.Expr)
            ]
            first_tables.extend(arg for arg in arguments
                                if isinstance(arg, ir.TableExpr))
            stack.extend(arg for arg in arguments
                         if not isinstance(arg, ir.TableExpr))

    options = list(toolz.unique(first_tables, key=id))

    if len(options) > 1:
        raise NotImplementedError('More than one base table not implemented')

    return options[0]
Esempio n. 37
0
 def inputs(self):
     return tuple(unique(concat(v.inputs for v in self.values)))
Esempio n. 38
0
 def _leaves(self):
     return list(unique(tconcat(i._leaves() for i in self.children)))
Esempio n. 39
0
def distinct_roots(*expressions):
    # TODO: move to analysis
    roots = toolz.concat(expr.op().root_tables() for expr in expressions)
    return list(toolz.unique(roots))
Esempio n. 40
0
def flat_unique(ls):
    """Flatten ``ls``, filter by unique id, and return a list"""
    return list(unique(chain.from_iterable(ls), key=id))
Esempio n. 41
0
def find_source_table(expr):
    """Find the first table expression observed for each argument that the
    expression depends on

    Parameters
    ----------
    expr : ir.Expr

    Returns
    -------
    table_expr : ir.TableExpr

    Examples
    --------
    >>> import ibis
    >>> t = ibis.table([('a', 'double'), ('b', 'string')], name='t')
    >>> expr = t.mutate(c=t.a + 42.0)
    >>> expr  # doctest: +NORMALIZE_WHITESPACE
    ref_0
    UnboundTable[table]
      name: t
      schema:
        a : float64
        b : string
    Selection[table]
      table:
        Table: ref_0
      selections:
        Table: ref_0
        c = Add[float64*]
          left:
            a = Column[float64*] 'a' from table
              ref_0
          right:
            Literal[float64]
              42.0
    >>> find_source_table(expr)
    UnboundTable[table]
      name: t
      schema:
        a : float64
        b : string
    >>> left = ibis.table([('a', 'int64'), ('b', 'string')])
    >>> right = ibis.table([('c', 'int64'), ('d', 'string')])
    >>> result = left.inner_join(right, left.a == right.c)
    >>> find_source_table(result)  # doctest: +ELLIPSIS
    Traceback (most recent call last):
    ...
    NotImplementedError: More than one base table not implemented
    """
    def finder(expr):
        if isinstance(expr, ir.TableExpr):
            return lin.halt, expr
        else:
            return lin.proceed, None

    first_tables = lin.traverse(finder, expr.op().flat_args())
    options = list(toolz.unique(first_tables, key=operator.methodcaller('op')))

    if len(options) > 1:
        raise NotImplementedError('More than one base table not implemented')

    return options[0]
Esempio n. 42
0
def main():
    yearn = Yearn(load_strategies=False)
    excluded = {
        "0xBa37B002AbaFDd8E89a1995dA52740bbC013D992",
        "0xe2F6b9773BF3A015E2aA70741Bde1498bdB9425b",
        "0xBFa4D8AA6d8a379aBFe7793399D3DdaCC5bBECBB",
    }
    resp = requests.get("https://raw.githubusercontent.com/iearn-finance/yearn-assets/master/icons/aliases.json").json()
    aliases = {item["address"]: item for item in resp}
    tokens = []

    # Token derived by products
    for product in yearn.registries:
        vaults = [item.vault for item in yearn.registries[product].vaults if str(item.vault) not in excluded]
        metadata = multicall_matrix(vaults, ["name", "symbol", "decimals"])
        for vault in vaults:
            tokens.append(
                TokenInfo(
                    chainId=1,
                    address=str(vault),
                    name=aliases.get(str(vault), metadata[vault])["name"],
                    decimals=metadata[vault]["decimals"],
                    symbol=aliases.get(str(vault), metadata[vault])["symbol"],
                    logoURI=f"https://raw.githubusercontent.com/yearn/yearn-assets/master/icons/tokens/{vault}/logo.svg",
                    tags=[product],
                )
            )

    # Token from special / side projects
    special = [
        Contract("0xD0660cD418a64a1d44E9214ad8e459324D8157f1") # WOOFY
    ]
    metadata = multicall_matrix(special, ["name", "symbol", "decimals"])
    for token in special:
        tokens.append(
            TokenInfo(
                chainId=1,
                address=str(token),
                name=aliases.get(str(token), metadata[token])["name"],
                decimals=metadata[token]["decimals"],
                symbol=aliases.get(str(token), metadata[token])["symbol"],
                logoURI=f"https://raw.githubusercontent.com/yearn/yearn-assets/master/icons/tokens/{token}/logo.svg",
                tags=["special"],
            )
        )

    deploy_blocks = {token.address: contract_creation_block(token.address) for token in tokens}
    tokens = unique(tokens, key=lambda token: token.address)
    tokens = sorted(tokens, key=lambda token: deploy_blocks[token.address])
    version = Version(major=1, minor=len(tokens), patch=0)
    timestamp = datetime.fromtimestamp(get_block_timestamp(max(deploy_blocks.values())), timezone.utc).isoformat()
    logo = "https://raw.githubusercontent.com/yearn/yearn-assets/master/icons/tokens/0x0bc529c00C6401aEF6D220BE8C6Ea1667F6Ad93e/logo.svg"

    print(f"{version=}\n{timestamp=}")
    tokenlist = TokenList("Yearn", timestamp, version, tokens, logoURI=logo)
    for token in tokenlist.tokens:
        assert len(token.symbol) <= 20, f"{token.symbol} > 20 chars, uniswap is unhappy"

    path = Path("static/tokenlist.json")
    path.parent.mkdir(exist_ok=True)
    path.write_text(json.dumps(tokenlist.to_dict(), separators=(",", ":")))
    print(f"saved to {path}")
Esempio n. 43
0
 def __repr__(self):
     return '{}({})'.format(
         self.name, ', '.join('{}={!r}'.format(slot, getattr(self, slot))
                              for slot in toolz.unique(self.__slots__ +
                                                       ('nullable', ))))
Esempio n. 44
0
def unify_wheres(selectables):
    clauses = list(
        unique((s._whereclause
                for s in selectables if hasattr(s, '_whereclause')),
               key=str))
    return reduce(and_, clauses) if clauses else None
Esempio n. 45
0
def get_all_froms(function):
    return list(unique(concat(map(get_all_froms, function.clauses.clauses))))
Esempio n. 46
0
def bottom_up_until_type_break(expr, scope, **kwargs):
    """ Traverse bottom up until data changes significantly

    Parameters
    ----------

    expr: Expression
        Expression to compute
    scope: dict
        namespace matching leaves of expression to data

    Returns
    -------

    expr: Expression
        New expression with lower subtrees replaced with leaves
    scope: dict
        New scope with entries for those leaves

    Examples
    --------

    >>> import numpy as np

    >>> s = symbol('s', 'var * {name: string, amount: int}')
    >>> data = np.array([('Alice', 100), ('Bob', 200), ('Charlie', 300)],
    ...                 dtype=[('name', 'S7'), ('amount', 'i8')])

    This computation completes without changing type.  We get back a leaf
    symbol and a computational result

    >>> e = (s.amount + 1).distinct()
    >>> bottom_up_until_type_break(e, {s: data}) # doctest: +SKIP
    (amount, {amount: array([101, 201, 301])})

    This computation has a type change midstream (``list`` to ``int``), so we
    stop and get the unfinished computation.

    >>> e = s.amount.sum() + 1
    >>> bottom_up_until_type_break(e, {s: data})
    (amount_sum + 1, {<`amount_sum` symbol; dshape='int64'>: 600})
    """
    # 0. Base case.  Return if expression is in scope
    if expr in scope:
        leaf = makeleaf(expr)
        return leaf, {leaf: scope[expr]}

    inputs = list(unique(expr._inputs))

    # 1. Recurse down the tree, calling this function on children
    #    (this is the bottom part of bottom up)
    exprs, new_scopes = zip(
        *[bottom_up_until_type_break(i, scope, **kwargs) for i in inputs])

    # 2. Form new (much shallower) expression and new (more computed) scope
    new_scope = toolz.merge(new_scopes)
    new_expr = expr._subs(
        {i: e
         for i, e in zip(inputs, exprs) if not i.isidentical(e)})

    old_expr_leaves = expr._leaves()
    old_data_leaves = [scope.get(leaf) for leaf in old_expr_leaves]

    # 3. If the leaves have changed substantially then stop
    key = lambda x: str(type(x))
    if type_change(sorted(new_scope.values(), key=key),
                   sorted(old_data_leaves, key=key)):
        return new_expr, new_scope
    # 4. Otherwise try to do some actual work
    try:
        leaf = makeleaf(expr)
        _data = [new_scope[i] for i in new_expr._inputs]
    except KeyError:
        return new_expr, new_scope
    try:
        return leaf, {
            leaf: compute_up(new_expr, *_data, scope=new_scope, **kwargs)
        }
    except NotImplementedError:
        return new_expr, new_scope
Esempio n. 47
0
def get_all_froms(colelement):
    return list(unique(concat(map(get_all_froms, colelement.get_children()))))
Esempio n. 48
0
def plot_cache(results,
               dsk,
               start_time,
               metric_name,
               palette='GnBu',
               label_size=60,
               **kwargs):
    """Visualize the results of profiling in a bokeh plot.

    Parameters
    ----------
    results : sequence
        Output of CacheProfiler.results
    dsk : dict
        The dask graph being profiled.
    start_time : float
        Start time of the profile.
    metric_name : string
        Metric used to measure cache size
    palette : string, optional
        Name of the bokeh palette to use, must be key in bokeh.palettes.brewer.
    label_size: int (optional)
        Maximum size of output labels in plot, defaults to 60
    **kwargs
        Other keyword arguments, passed to bokeh.figure. These will override
        all defaults set by visualize.

    Returns
    -------
    The completed bokeh plot object.
    """

    defaults = dict(title="Profile Results",
                    tools="hover,save,reset,resize,wheel_zoom,xpan",
                    plot_width=800,
                    plot_height=300)
    defaults.update(
        (k, v) for (k, v) in kwargs.items() if k in bp.Figure.properties())

    if results:
        starts, ends = list(zip(*results))[3:]
        tics = list(sorted(unique(starts + ends)))
        groups = groupby(lambda d: pprint_task(d[1], dsk, label_size), results)
        data = {}
        for k, vals in groups.items():
            cnts = dict.fromkeys(tics, 0)
            for v in vals:
                cnts[v.cache_time] += v.metric
                cnts[v.free_time] -= v.metric
            data[k] = list(accumulate(add, pluck(1, sorted(cnts.items()))))

        tics = [i - start_time for i in tics]
        p = bp.figure(x_range=[0, max(tics)], **defaults)

        for (key, val), color in zip(data.items(),
                                     get_colors(palette, data.keys())):
            p.line('x',
                   'y',
                   line_color=color,
                   line_width=3,
                   source=bp.ColumnDataSource({
                       'x': tics,
                       'y': val,
                       'label': [key for i in val]
                   }))

    else:
        p = bp.figure(y_range=[0, 10], x_range=[0, 10], **defaults)
    p.grid.grid_line_color = None
    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.yaxis.axis_label = "Cache Size ({0})".format(metric_name)
    p.xaxis.axis_label = "Time (s)"

    hover = p.select(HoverTool)
    hover.tooltips = """
    <div>
        <span style="font-size: 14px; font-weight: bold;">Task:</span>&nbsp;
        <span style="font-size: 10px; font-family: Monaco, monospace;">@label</span>
    </div>
    """
    return p
Esempio n. 49
0
def get_all_froms(sel):
    return list(unique(sel.locate_all_froms()))
Esempio n. 50
0
def physical_tables_join(join):
    # Physical roots of Join nodes are the unique physical roots of their
    # left and right TableNodes.
    func = compose(physical_tables, methodcaller('op'))
    return list(unique(concat(map(func, (join.left, join.right)))))
Esempio n. 51
0
def get_inner_columns(f):
    unique_columns = unique(concat(map(get_inner_columns, f.clauses)))
    lowered = [x.label(getattr(x, 'name', None)) for x in unique_columns]
    return [getattr(sa.func, f.name)(*lowered)]
Esempio n. 52
0
 def __getstate__(self):
     return {
         slot: getattr(self, slot)
         for slot in toolz.unique(self.__slots__ + ('nullable', ))
     }
Esempio n. 53
0
 def __iter__(self):
     return toolz.unique(toolz.concat(self.layers.values()))