Example #1
0
File: core.py Project: dnil/chanjo
def gender_from_bam(bam_path, prefix=''):
  """Predict the gender from a BAM alignment file.

  Args:
    bam_path (path): path to a BAM alignment file
    prefix (str, optional): string to prefix to 'X', 'Y'

  Returns:
    Gender: tuple of X coverage, Y coverage, and sex prediction

  Examples:
    >>> gender_from_bam('alignment.bam', prefix='chr')
    Gender(x_coverage=123.31, y_coverage=0.13, sex='female')
  """
  # setup: connect to a BAM file
  bam = BamFile(bam_path)

  # step 0: fake some BED interval rows (already 1,1-based!)
  fake_bed_rows = [("%sX" % prefix, 1, 59373566),
                   ("%sY" % prefix, 69362, 11375310)]

  # step 1: run the pipeline
  sequence = pipe(
    fake_bed_rows,
    map(lambda interval: bam(*interval)),
    map(average)
  )

  # step: make the prediction
  x_coverage, y_coverage = list(sequence)
  sex = predict_gender(x_coverage, y_coverage)
  return Gender(x_coverage, y_coverage, sex)
Example #2
0
def parser(filename, *args, **kwargs):
    g = nx.DiGraph()
    tz.pipe(filename, c_open(mode='r'),
            c.map(str.strip),
            c.map(c_split(sep=',')),
            g.add_edges_from)
    return g
Example #3
0
def streaming_pca(samples, n_components=2, batch_size=50):
    ipca = decomposition.IncrementalPCA(n_components=n_components,
                                        batch_size=batch_size)
    _ = list(tz.pipe(samples, curried.partition(batch_size),
                     curried.map(np.array),
                     curried.map(ipca.partial_fit)))
    return ipca
Example #4
0
 def map(self, func, data):  # pylint: disable=no-self-use
     return pipe(
         data,
         map(func),
         map(DummyResult),
         list
     )
Example #5
0
def ghost_internal(x, axes):
    """ Share boundaries between neighboring blocks

    Parameters
    ----------

    x: da.Array
        A dask array
    axes: dict
        The size of the shared boundary per axis

    The axes dict informs how many cells to overlap between neighboring blocks
    {0: 2, 2: 5} means share two cells in 0 axis, 5 cells in 2 axis
    """
    dims = list(map(len, x.blockdims))
    expand_key2 = partial(expand_key, dims=dims)
    interior_keys = pipe(x._keys(), flatten,
                                    map(expand_key2), map(flatten),
                                    concat, list)
    interior_slices = dict((k, fractional_slice(k, axes))
                            for k in interior_keys)

    shape = (3,) * x.ndim
    name = next(ghost_names)
    ghost_blocks = dict(((name,) + k[1:],
                         (rec_concatenate, (concrete, expand_key2(k))))
                        for k in interior_keys)

    blockdims = [  [bds[0] + axes.get(i, 0)]
                 + [bd + axes.get(i, 0) * 2 for bd in bds[1:-1]]
                 + [bds[-1] + axes.get(i, 0)]
                 for i, bds in enumerate(x.blockdims)]

    return Array(merge(interior_slices, ghost_blocks, x.dask),
                 name, blockdims=blockdims)
Example #6
0
 def __str__(self):
     labels = self.labels
     if all(map(isvalid_identifier, map(first, labels))):
         rest = ', '.join('%s=%r' % l for l in labels)
     else:
         rest = '{%s}' % ', '.join('%r: %r' % l for l in labels)
     return '%s.relabel(%s)' % (self._child, rest)
Example #7
0
def outer_dict(dict_in):
    """Outer product of dictionary values

    Args:
      dict_in: a dictionary with iterable values

    Returns:
      a list of dictionaries

    >>> assert pipe(
    ...     dict(a=[1], b=[2, 3]),
    ...     curry(outer_dict),
    ...     lambda x: x == [dict(a=1, b=2), dict(a=1, b=3)]
    ... )
    """
    return pipe(
        dict_in.items(),
        lambda x: zip(*x),
        list,
        lambda x: (x[0], product(*x[1])),
        tlam(lambda x, y: zip(repeat(x), y)),
        map(lambda x: zip(*x)),
        map(dict),
        list
    )
def parse_people(do_request):
    logger.info('Parsing people')

    def parse_representative(doc):
        doc = doc('div.wpsPortletBody')
        raw_birth_date = doc('fieldset table').eq(0).find('td').eq(1).text().replace(' ', '')
        return {
            'name': doc.find('h3').eq(0).text(),
            'birthDate': arrow.get(raw_birth_date, 'D.M.YYYY') if raw_birth_date else None,
            'image': DZ_RS_URL + doc.find('img').eq(0).attr('src'),
            'group': doc('.panelBox100 a').attr('href'),
            'location': doc(u'*:contains("Volilno okro")').parent().text().split(':')[1].strip(),
            'gender': "F" if 'Poslanka' in str(doc) else "M",
        }

    # get all people
    return toolz.compose(
        # get back metadata
        curried.map(parse_representative),
        # visit person's link
        curried.map(do_request),
        # get a link for each person
        lambda doc: doc("p.podnaslovOsebaLI a").map(lambda i, r: pq(r).attr('href')),
        # get page with a list of people
        do_request,
    )(DZ_RS_PEOPLE_URL)
Example #9
0
 def __str__(self):
     labels = self.labels
     if all(map(isvalid_identifier, map(first, labels))):
         rest = ", ".join("%s=%r" % l for l in labels)
     else:
         rest = "{%s}" % ", ".join("%r: %r" % l for l in labels)
     return "%s.relabel(%s)" % (self._child, rest)
Example #10
0
def get_service_step(service_recipe):
    """
    Get step timedelta: The smaller duration of service_recipe's periods.
    """
    def diff(start, end):
        return end - start
    res_delta_diffs = compose(map(lambda p: diff(*p)), get('delta_periods'))
    return compose(min, map(min), map(res_delta_diffs))(service_recipe)
Example #11
0
def functional():
  return count_by(itemgetter('hour'),
                  map(json.loads,
                      filter(None,
                             mapcat(lambda output: output.strip().split('\n'),
                                    map(lambda date: logs[date.strftime('%Y/%m/%d')],
                                        map(lambda days_ago: today - timedelta(days=days_ago),
                                            range(1, days_of_logs + 1)))))))
Example #12
0
def piped():
  return (_| range(1, days_of_logs + 1)
           | map(lambda days_ago: today - timedelta(days=days_ago))
           | map(lambda date: logs[date.strftime('%Y/%m/%d')])
           | mapcat(lambda output: output.strip().split('\n'))
           | filter(None)
           | map(json.loads)
           | count_by(itemgetter('hour'))
           |_)
Example #13
0
File: csv.py Project: Casolt/blaze
 def _iter(self, usecols=None):
     from blaze.api.into import into
     dfs = self.pandas_read_csv(usecols=usecols,
                                chunksize=self.chunksize,
                                dtype='O',
                                parse_dates=[])
     return pipe(dfs, map(partial(pd.DataFrame.fillna, value='')),
                      map(partial(into, list)),
                      concat)
Example #14
0
def opt_weight_ir_grid(df, alphas, look_ahead_pers, long_only=True, tilt_weights=None):
    """exhaustive grid search over alphas, look_ahead_per, norm_types 
    returning dataframe of cumulative returns for each optimal portfolio construction"""
    norm_types = [2,]
    end_date = df.index[-(look_ahead_pers[-1] + 1)]
    p = pipe(product(alphas, norm_types, look_ahead_pers),
             map(lambda x: list(x) + [calc_opt_weight_portfolio_ir(df, x[0], x[1], x[2], long_only, tilt_weights)]),
             map(lambda x: dict(zip(['alpha', 'norm_type', 'look_ahead_per', 'ir'], x))))
    return pd.DataFrame(list(p))
Example #15
0
 def __calculate_max_column_length(column_key):
     max_value_length = pipe(
         data,
         iterkeys,
         map(lambda key: data[key][column_key]),
         pvector,
         map(str),
         map(len),
         max
     )
     return max(max_value_length, len(str(column_key)))
Example #16
0
def discover_sqlcontext(ctx):
    try:
        table_names = list(map(str, ctx.tableNames()))
    except AttributeError:
        java_names = ctx._ssql_ctx.catalog().tables().keySet()
        table_names = list(scala_set_to_set(ctx, java_names))

    table_names.sort()

    dshapes = zip(table_names, map(discover, map(ctx.table, table_names)))
    return datashape.DataShape(datashape.Record(dshapes))
Example #17
0
def destruct(x):
    """
    Deconstructs a data structure into a 1-D np.ndarray (via multiple dispatch)
    Converts a list of numpy arrays to a single array
    """

    # make sure the values are all numpy arrays
    list(map(enforce(np.ndarray), x))

    # unravel each array, c
    return pipe(x, map(np.ravel), concat, list, np.array)
Example #18
0
File: sql.py Project: blaze/blaze
def compute_up(expr, args, **kwargs):
    from_objs = list(unique(concat(map(get_all_froms, args))))
    if len(from_objs) > 1:
        # TODO: how do you do this in sql? please send help
        raise ValueError('only columns from the same table can be merged')

    cols = list(unique(concat(map(get_unsafe_inner_columns, args, expr.args))))
    sel = sa.select(cols, from_obj=from_objs[0])
    where = unify_wheres(args)
    if where is not None:
        sel = sel.where(where)
    return sel
Example #19
0
def export_intervals(chanjo_db, include_header=True, bed_score=0):
  r"""Return BED-formatted interval lines from existing ``chanjo_db``.

  BED lines are ready to be printed or written to a file.

  Args:
    chanjo_db (session): ``sqlalchemy.orm.session`` object with a
      ``.query``-method
    include_header (bool, optional): whether to include BED header
    bed_score (int, optional): dummy score (0-1000) to insert at field 5
      to complete the BED format

  Yields:
    str: stringified and tab-delimited interval

  Examples:
    >>> from chanjo import export_intervals, Store
    ... # instantiate a new connection to a Chanjo database
    >>> db = Store('./coverage.sqlite')
    >>> with open('intervals.sorted.bed', 'w') as stream:
    ...   # write intervals in BED-format with appropriate headers
    ...   for bed_line in export_intervals(db):
    ...     stream.write(bed_line + '\n')
  """
  if include_header:
    yield '#chrom\tchromStart\tchromEnd\tname\tscore\tstrand'

  # setup up which columns to fetch to make BED file
  # column 5 is just a silly default for the "score" field in BED
  i = Interval  # alias
  columns = (i.contig, i.start - 1, i.end, i.id, i.strand)

  # BED files are tab-delimited
  delimiter = '\t'

  # 1. fetch interval tuples from the database (producer)
  # 2. stringify each item in each subsequence (interval tuple)
  # 3. join lines on tab-character
  # 4. prepend the header
  bed_lines = pipe(
    fetch_records(chanjo_db, columns),
    map(map(str)),                        # convert fields to strings
    map(juxt(compose(list, take(4)),      # keep first 4 fields
             lambda _: [str(bed_score)],  # insert BED score
             compose(list, last))),       # keep last field
    map(concat),                          # flatten each item
    map(delimiter.join)                   # join on \t
  )

  for bed_line in bed_lines:
    yield bed_line
Example #20
0
	def __init__(self, bamfile, outdir):
		self.bamfile = bamfile
		stat = self.indexbamfile()
		self.outdir = outdir
		assert self.bamfile and self.outdir and stat, "Input error"
		self._bam = pysam.Samfile(bamfile)
		self._prealloc_func = partial(np.zeros, dtype=np.int)
		self.fake_bed_rows = [("chrX", 1, 59373566), ("chrY", 69362, 11375310)]
		self.sequence = pipe(self.fake_bed_rows,
		                     map(lambda interval: self.depthreader(*interval)),
		                     map(average)
		                     )
		self.x_coverage, self.y_coverage = list(self.sequence)
		self.sex = self.predict_gender()
Example #21
0
def ipython_display(specs):
    """Run publish_display_data for the JS and HTML

    Args:
      specs: a list of Vega specs
    """
    pipe(
        specs,
        map(lambda x: (uuid.uuid4(), vega.Vega(x))),
        list,
        do(html_publish_map),
        map(tlam(js_publish)),
        list
    )
Example #22
0
def overlap_internal(x, axes):
    """ Share boundaries between neighboring blocks

    Parameters
    ----------

    x: da.Array
        A dask array
    axes: dict
        The size of the shared boundary per axis

    The axes input informs how many cells to overlap between neighboring blocks
    {0: 2, 2: 5} means share two cells in 0 axis, 5 cells in 2 axis
    """
    dims = list(map(len, x.chunks))
    expand_key2 = partial(expand_key, dims=dims, axes=axes)

    # Make keys for each of the surrounding sub-arrays
    interior_keys = pipe(x.__dask_keys__(), flatten, map(expand_key2),
                         map(flatten), concat, list)

    name = 'overlap-' + tokenize(x, axes)
    getitem_name = 'getitem-' + tokenize(x, axes)
    interior_slices = {}
    overlap_blocks = {}
    for k in interior_keys:
        frac_slice = fractional_slice((x.name,) + k, axes)
        if (x.name,) + k != frac_slice:
            interior_slices[(getitem_name,) + k] = frac_slice
        else:
            interior_slices[(getitem_name,) + k] = (x.name,) + k
            overlap_blocks[(name,) + k] = (concatenate3,
                                           (concrete, expand_key2((None,) + k, name=getitem_name)))

    chunks = []
    for i, bds in enumerate(x.chunks):
        if len(bds) == 1:
            chunks.append(bds)
        else:
            left = [bds[0] + axes.get(i, 0)]
            right = [bds[-1] + axes.get(i, 0)]
            mid = []
            for bd in bds[1:-1]:
                mid.append(bd + axes.get(i, 0) * 2)
            chunks.append(left + mid + right)

    dsk = merge(interior_slices, overlap_blocks)
    dsk = sharedict.merge(x.dask, (name, dsk))

    return Array(dsk, name, chunks, dtype=x.dtype)
Example #23
0
File: core.py Project: dnil/chanjo
def ccds_to_bed(ccds_stream):
  """Convert CCDS dump to Chanjo-style BED stream.

  Main entry point for default Chanjo converter (ccds). It converts
  a sorted (start, chrom) CCDS database to the Chanjo BED-format.

  Args:
    ccds_stream (file): file handle to read CCDS lines from

  Yields:
    Interval: interval with merged block and superblock ids
  """
  return pipe(
    ccds_stream,
    filter(grep('Public')),                    # filter out Public tx
    map(text_type.rstrip),                     # strip \n and spaces
    map(split(sep='\t')),                      # split into list
    map(extract_intervals),                    # convert to Interval
    concat,                                    # flatten
    map(rename_sex_interval),                  # rename sex contigs
    partial(lazy_groupby, key=attrgetter('contig')),  # group by contig
    pluck(1),                                  # extract second item
    map(groupby(attrgetter('name'))),          # non-lazy group by id
    map(valmap(merge_related_elements)),       # group intervals
    map(itervalues),                           # extract values
    map(partial(sorted, key=attrgetter('start'))),  # sort by start pos
    concat                                     # flatten
  )
Example #24
0
def save_reviews(product_id, tag, reviews):
    f = open("data/{}.{}.csv".format(product_id, tag), "w")
    for review in reviews:
        print review
        f.write("{},{}\n".format(
            tag, 
            pipe(
                review, 
                _.split("\n"),
                map(_.strip()),
                map(_.encode("utf-8")),
                SF(" ".join)(_)
            )
        ))
    f.close()
Example #25
0
def compute_up(expr, data, scope=None, **kwargs):
    data = lower_column(data)
    grouper = compute(
        expr.grouper,
        scope,
        post_compute=False,
        return_type='native',
        **kwargs
    )

    app = expr.apply
    reductions = [
        compute(
            val,
            data,
            post_compute=None,
            return_type='native',
        ).label(name)
        for val, name in zip(app.values, app.fields)
    ]

    froms = list(unique(chain(get_all_froms(grouper),
                              concat(map(get_all_froms, reductions)))))
    inner_cols = list(getattr(grouper, 'inner_columns', [grouper]))
    grouper_cols = inner_cols[:]
    inner_cols.extend(concat(
        getattr(getattr(r, 'element', None), 'inner_columns', [r])
        for r in reductions
    ))
    wheres = unify_wheres([grouper] + reductions)
    sel = unify_froms(sa.select(inner_cols, whereclause=wheres), froms)
    return sel.group_by(*grouper_cols)
Example #26
0
def select_or_selectable_to_frame(el, **kwargs):
    columns, rows = batch(el)
    row = next(rows, None)
    if row is None:
        return pd.DataFrame(columns=columns)
    return pd.DataFrame(list(chain([tuple(row)], map(tuple, rows))),
                        columns=columns)
Example #27
0
 def __getattr__(self, key):
     if key == '_hash':
         raise AttributeError()
     try:
         return _attr_cache[(self, key)]
     except:
         pass
     try:
         result = object.__getattribute__(self, key)
     except AttributeError:
         fields = dict(zip(map(valid_identifier, self.fields),
                           self.fields))
         if self.fields and key in fields:
             if isscalar(self.dshape.measure):  # t.foo.foo is t.foo
                 result = self
             else:
                 result = self[fields[key]]
         else:
             d = toolz.merge(schema_methods(self.dshape.measure),
                             dshape_methods(self.dshape))
             if key in d:
                 func = d[key]
                 if func in method_properties:
                     result = func(self)
                 else:
                     result = boundmethod(func, self)
             else:
                 raise
     _attr_cache[(self, key)] = result
     return result
Example #28
0
def fractional_slice(task, axes):
    """

    >>> fractional_slice(('x', 5.1), {0: 2})  # doctest: +SKIP
    (getitem, ('x', 6), (slice(0, 2),))

    >>> fractional_slice(('x', 3, 5.1), {0: 2, 1: 3})  # doctest: +SKIP
    (getitem, ('x', 3, 5), (slice(None, None, None), slice(-3, None)))

    >>> fractional_slice(('x', 2.9, 5.1), {0: 2, 1: 3})  # doctest: +SKIP
    (getitem, ('x', 3, 5), (slice(0, 2), slice(-3, None)))
    """
    rounded = (task[0],) + tuple(map(round, task[1:]))

    index = []
    for i, (t, r) in enumerate(zip(task[1:], rounded[1:])):
        depth = axes.get(i, 0)
        if t == r:
            index.append(slice(None, None, None))
        elif t < r:
            index.append(slice(0, depth))
        elif t > r and depth == 0:
            index.append(slice(0, 0))
        else:
            index.append(slice(-depth, None))

    index = tuple(index)

    if all(ind == slice(None, None, None) for ind in index):
        return task
    else:
        return (getitem, rounded, index)
Example #29
0
File: sql.py Project: earney/blaze
def compute_up(expr, data, **kwargs):
    if not valid_grouper(expr):
        raise TypeError("Grouper must have a non-nested record or one "
                        "dimensional collection datashape, "
                        "got %s of type %r with dshape %s" %
                        (expr.grouper, type(expr.grouper).__name__, expr.dshape))

    s = alias_it(data)

    if valid_reducer(expr.apply):
        reduction = compute(expr.apply, s, post_compute=False)
    else:
        raise TypeError('apply must be a Summary expression')

    grouper = get_inner_columns(compute(expr.grouper, s, post_compute=False))
    reduction_columns = pipe(reduction.inner_columns,
                             map(get_inner_columns),
                             concat)
    columns = list(unique(chain(grouper, reduction_columns)))
    if (not isinstance(s, sa.sql.selectable.Alias) or
            (hasattr(s, 'froms') and isinstance(s.froms[0],
                                                sa.sql.selectable.Join))):
        assert len(s.froms) == 1, 'only a single FROM clause supported for now'
        from_obj, = s.froms
    else:
        from_obj = None

    return reconstruct_select(columns,
                              getattr(s, 'element', s),
                              from_obj=from_obj,
                              group_by=grouper)
Example #30
0
def compute_down(expr, data, **kwargs):
    """ Compile a blaze expression to a sparksql expression"""
    leaves = expr._leaves()

    # make sure we only have a single leaf node
    if len(leaves) != 1:
        raise ValueError('Must compile from exactly one root database')

    leaf, = leaves

    # field expressions on the database are Field instances with a record
    # measure whose immediate child is the database leaf
    tables = pipe(expr._subterms(), filter(istable(leaf)), list)

    # raise if we don't have tables in our database
    if not tables:
        raise ValueError('Expressions not referencing a table cannot be '
                         'compiled')

    # make new symbols for each table
    new_leaves = [symbol(t._name, t.dshape) for t in tables]

    # sub them in the expression
    expr = expr._subs(dict(zip(tables, new_leaves)))

    # compute using sqlalchemy
    scope = dict(zip(new_leaves, map(make_sqlalchemy_table, tables)))
    query = compute(expr, scope)

    # interpolate params
    compiled = literalquery(query, dialect=HiveDialect())
    return data.sql(str(compiled))
Example #31
0
 def is_invalid(value):
     return not any(map(lambda r: value in r, valid_ranges))
Example #32
0
def statements_individual_creator(rules: List[Rule]):
    statements = get_rules_statements(rules)

    statements_vector = pipe(statements, map(lambda s: s.threshold), list,
                             np.array)
    return creator.Individual(statements_vector)
def cli(dry_run, input_dir, ignore_refresh, output_dir, pattern, verbose, transaction):

    dag = {}

    file_paths = walk_directory_recursively(input_dir)

    if verbose:
        print('Found %d Scripts in %s' % (len(file_paths), input_dir))

    entities = pipe(file_paths,
        map(lambda file_path: process_file(file_path, pattern)),
        list
    )

    if verbose:
        total_views = pipe(entities,
            map(get('views')),
            map(count),
            sum
        )
        total_deps = pipe(entities,
            map(get('view_dependencies')),
            map(count),
            sum
        )
        print('Identified %d Materialized Views, Containing %d View Dependencies' % (total_views, total_deps))

    view_content = {}
    dag = {}
    for entity in entities:
        view_content.update(
            {view: format_content(entity) for view in entity['views']}
        )
        dag.update(
            {view: entity['view_dependencies'] for view in entity['views']}
        )

    sorted_views = toposort_flatten(dag)

    if verbose:
        print("\nMaterialized View Dependencies:")
        pprint_color(
            valmap(lambda val: list(val), valfilter(lambda val: val, dag))
        )

    create_views = pipe(sorted_views,
        map(lambda view: view_content[view]),
        unique,
        list
    )

    create_script = generate_script(create_views, transaction)

    refresh_prefix = 'REFRESH MATERIALIZED VIEW CONCURRENTLY '
    if transaction:
        refresh_prefix = '  ' + refresh_prefix
    refresh_views = pipe(sorted_views,
        filter(lambda view: re.search(pattern, view) and not (ignore_refresh and re.search(ignore_refresh , view))),
        map(lambda view: refresh_prefix + view + ';'),
        list
    )

    if verbose:
        print('Selecting %d Materialized Views for Refresh' % len(refresh_views))

    refresh_script = generate_script(refresh_views, transaction, "\n\n")

    if dry_run:
        print('Dry Run Option Enabled - Skipping Script Generation')
        return

    timestr = time.strftime("%Y%m%d-%H%M%S")

    serialize_script('create', timestr, create_script, output_dir, verbose)
    serialize_script('refresh', timestr, refresh_script, output_dir, verbose)
Example #34
0
def sql_to_iterator(t, **kwargs):
    _, rows = batch(sa.select([t]))
    return map(tuple, rows)
Example #35
0
def deep_map_f(keys, f, dictionary):
    return deep_transform(keys, comp(list, map(f)), dictionary)
Example #36
0
def validate(doc, method):
    get_so_count = compose(len, list, unique,
                           map(lambda x: x.against_sales_order))
    if get_so_count(doc.items) != 1:
        frappe.throw(
            frappe._("Cannot create document with multiple Sales Orders"))
Example #37
0

def _create_sales_invoice(doc):
    invoice = frappe.new_doc("Sales Invoice")
    invoice.flags.ignore_permissions = True
    make_sales_invoice(doc.name, target_doc=invoice)
    invoice.is_pos = 1
    invoice.payments = []
    invoice.append("payments", {
        "mode_of_payment": "Cash",
        "amount": invoice.rounded_total
    })
    invoice.save()
    invoice.submit()


_get_so = compose(first, filter(None), map(lambda x: x.against_sales_order),
                  lambda x: x.items)


def _get_item_description(items):
    item_names = [x.item_name for x in items]
    return ("{}".format(item_names[0]) if len(item_names) == 1 else
            "{} +{} more item(s)".format(item_names[0],
                                         len(item_names) - 1))


def _format_datetime(dt_str):
    return "{0:%a} {0:%b} {0.day}, {0.year} {0:%H}:{0:%M} {0:%p}".format(
        frappe.utils.get_datetime(dt_str))
Example #38
0
    compose_left(*funcs)(data)
    map(accept_one(compose), zip(repeat(map), funcs))


    
def modafinil() -> None:
    dates, _numbers, treatments, _times = pipe(
        open("data/modafinil-data"),
        map(curry(str.split)(maxsplit=3)), # list of rows
        map(curry(zip, row_format)), # Iteratable[Tuple[Callable, str]]
        map(map(variadic(apply))), # Iterable[Iterable[apply(*Tuple[Callable, str])]]
        # that is,  Iterable[Iterable[X]]



    )
    dates, _numbers, treatments, _times = pipe(
        open("data/modafinil-data"),
        map(curry(str.split)(maxsplit=3)),
        map(parsed_row),
        transpose,
    )
    return pd.Series(treatments, index=pd.DatetimeIndex(dates, name="dates"))


if __name__ == "__main__":
    sleep = fitbit.get_data("2019-03-22", "2019-04-27")
    modafinil_treatments = modafinil()
    summary = test(sleep.efficiency, modafinil_treatments)
    print("; ".join(map("{}={}".format, summary.items())))
Example #39
0
def sparksql_dataframe_to_list(df, dshape=None, **kwargs):
    result = df.collect()
    if (dshape is not None and iscollection(dshape) and
            not isrecord(dshape.measure)):
        return list(map(get(0), result))
    return result
Example #40
0
def get_history(name):
    booking_logs = frappe.get_all(
        "Booking Log",
        filters={"booking_order": name},
        fields=[
            "'Booking Log' as doctype",
            "posting_datetime",
            "booking_order",
            "shipping_order",
            "station",
            "activity",
            "loading_operation",
            "loading_unit",
            "sum(no_of_packages) as no_of_packages",
            "sum(weight_actual) as weight_actual",
        ],
        order_by="posting_datetime",
        group_by="posting_datetime,activity",
    )

    get_shipping_logs = compose(
        concat,
        map(lambda x: frappe.get_all(
            "Shipping Log",
            filters={
                "shipping_order":
                x[0].get("shipping_order"),
                "activity": ("in", ["Stopped", "Moving"]),
                "posting_datetime": (
                    "between",
                    [
                        x[0].get("posting_datetime"), x[1].get(
                            "posting_datetime")
                    ],
                ),
            },
            fields=[
                "'Shipping Log' as doctype",
                "posting_datetime",
                "shipping_order",
                "station",
                "activity",
            ],
            order_by="posting_datetime",
        ) if x[0].get("shipping_order") else []),
        sliding_window(2),
    )

    shipping_logs = get_shipping_logs(
        booking_logs + [{
            "posting_datetime": frappe.utils.now()
        }])

    def get_message(log):
        if log.get("doctype") == "Booking Log":
            if log.get("loading_unit") == "Weight":
                return "{} {} units by weight at {}".format(
                    log.get("activity"),
                    abs(log.get("weight_actual")),
                    log.get("station"),
                )
            return "{} {} packages at {}".format(
                log.get("activity"), abs(log.get("no_of_packages")),
                log.get("station"))

        if log.get("doctype") == "Shipping Log":
            prepo = "to" if log.get("activity") == "Moving" else "at"
            return "{} {} {}".format(log.get("activity"), prepo,
                                     log.get("station"))

        return ""

    def get_link(log):
        if log.get("doctype") == "Shipping Log":
            return "#Form/Shipping Order/{}".format(log.get("shipping_order"))

        if log.get("doctype") == "Booking Log" and log.get(
                "loading_operation"):
            return "#Form/Loading Operation/{}".format(
                log.get("loading_operation"))

        return ""

    def get_event(log):
        return {
            "datetime": log.get("posting_datetime"),
            "status": log.get("activity"),
            "message": get_message(log),
            "link": get_link(log),
        }

    return sorted(
        [get_event(x) for x in concat([booking_logs, shipping_logs])],
        key=lambda x: frappe.utils.get_datetime(x.get("datetime")),
    )
Example #41
0
def get_rules_statements(rules):
    return pipe(
        rules,
        map(lambda r: list(r.statements)),
        reduce(list.__add__),
    )
Example #42
0
 def _condition(self, types, *args, **kwargs) -> bool:
     if not isinstance(types, Iterable):
         types = tuple([types])
     return pipe(zip(args, types), map(lambda arg: isinstance(*arg)), all)
Example #43
0
                    return f(*args, **kwargs)
                except:
                    if i >= n - 1:
                        raise
                    time.sleep(timeout)

        return helper2

    return helper1


def bottom(x):
    pass


force = compose(any, map(bottom))


def my_fun(x):
    return x + 1


def url_join(*args_):
    args = [x for x in args_ if x]

    assert ''.join(args).count('?') <= 1
    assert ''.join(args).count('?') == 0 or '?' in args[-1]

    a1, params = partition(lambda x: '?' not in x, args)

    if params:
Example #44
0
def sql_to_iterator(t, bind=None, **kwargs):
    _, rows = batch(sa.select([t]), bind=bind)
    return map(tuple, rows)
Example #45
0
def discover_row_proxy(rp):
    return Record(list(zip(rp.keys(), map(discover, rp.values()))))
Example #46
0
def compute_up(t, lhs, rhs, **kwargs):
    if isinstance(lhs, ColumnElement):
        lhs = select(lhs)
    if isinstance(rhs, ColumnElement):
        rhs = select(rhs)
    if name(lhs) == name(rhs):
        left_suffix, right_suffix = t.suffixes
        lhs = lhs.alias('%s%s' % (name(lhs), left_suffix))
        rhs = rhs.alias('%s%s' % (name(rhs), right_suffix))

    lhs = alias_it(lhs)
    rhs = alias_it(rhs)

    if isinstance(lhs, Select):
        lhs = lhs.alias(next(aliases))
        left_conds = [lhs.c.get(c) for c in listpack(t.on_left)]
    else:
        ldict = dict((c.name, c) for c in inner_columns(lhs))
        left_conds = [ldict.get(c) for c in listpack(t.on_left)]

    if isinstance(rhs, Select):
        rhs = rhs.alias(next(aliases))
        right_conds = [rhs.c.get(c) for c in listpack(t.on_right)]
    else:
        rdict = dict((c.name, c) for c in inner_columns(rhs))
        right_conds = [rdict.get(c) for c in listpack(t.on_right)]

    condition = reduce(and_, map(eq, left_conds, right_conds))

    # Perform join
    if t.how == 'inner':
        join = _join_selectables(lhs, rhs, condition=condition)
        main = lhs
    elif t.how == 'left':
        main, other = lhs, rhs
        join = _join_selectables(lhs, rhs, condition=condition, isouter=True)
    elif t.how == 'right':
        join = _join_selectables(rhs, lhs, condition=condition, isouter=True)
        main = rhs
    else:
        # http://stackoverflow.com/questions/20361017/sqlalchemy-full-outer-join
        raise ValueError("SQLAlchemy doesn't support full outer Join")
    """
    We now need to arrange the columns in the join to match the columns in
    the expression.  We care about order and don't want repeats
    """
    if isinstance(join, Select):

        def cols(x):
            if isinstance(x, Select):
                return list(x.inner_columns)
            else:
                return list(x.columns)
    else:
        cols = lambda x: list(x.columns)

    main_cols = cols(main)
    left_cols = cols(lhs)
    left_names = set(map(_getname, left_cols))
    right_cols = cols(rhs)
    right_names = set(map(_getname, right_cols))

    left_suffix, right_suffix = t.suffixes
    fields = [
        f.replace(left_suffix, '').replace(right_suffix, '') for f in t.fields
    ]
    columns = [c for c in main_cols if c.name in t._on_left]
    columns += [
        _clean_join_name(right_names, left_suffix, c) for c in left_cols
        if c.name in fields and c.name not in t._on_left
    ]
    columns += [
        _clean_join_name(left_names, right_suffix, c) for c in right_cols
        if c.name in fields and c.name not in t._on_right
    ]

    if isinstance(join, Select):
        return join.with_only_columns(columns)
    else:
        return sa.select(columns, from_obj=join)
Example #47
0
def select_to_iterator(sel, dshape=None, **kwargs):
    func = pluck(0) if dshape and isscalar(dshape.measure) else map(tuple)
    _, rows = batch(sel)
    return func(rows)
Example #48
0
def get_inner_columns(sel):
    try:
        return list(sel.inner_columns)
    except AttributeError:
        return list(map(lower_column, sel.c.values()))
Example #49
0
File: sql.py Project: kwin-wang/odo
    'string': sa.Text,
    'date': sa.Date,
    'time': sa.Time,
    'datetime': sa.DateTime,
    'bool': sa.Boolean,
    "timedelta[unit='D']": sa.Interval(second_precision=0, day_precision=9),
    "timedelta[unit='h']": sa.Interval(second_precision=0, day_precision=0),
    "timedelta[unit='m']": sa.Interval(second_precision=0, day_precision=0),
    "timedelta[unit='s']": sa.Interval(second_precision=0, day_precision=0),
    "timedelta[unit='ms']": sa.Interval(second_precision=3, day_precision=0),
    "timedelta[unit='us']": sa.Interval(second_precision=6, day_precision=0),
    "timedelta[unit='ns']": sa.Interval(second_precision=9, day_precision=0),
    # ??: sa.types.LargeBinary,
}

revtypes = dict(map(reversed, types.items()))

revtypes.update({
    sa.DATETIME: datetime_,
    sa.TIMESTAMP: datetime_,
    sa.FLOAT: float64,
    sa.DATE: date_,
    sa.BIGINT: int64,
    sa.INTEGER: int_,
    sa.BIGINT: int64,
    sa.types.NullType: string,
    sa.REAL: float32,
    sa.Float: float64,
    sa.Float(precision=24): float32,
    sa.Float(precision=53): float64,
})
Example #50
0
def filter_memory_data(yaml_data):
    """Filter the memory time data from the meta.yaml's

    Args:
      yaml_data: the benchmark YAML data

    Returns:
      memory versus time data
    """

    def time_ratio(data):
        """Calcuate the sim_time over wall_time ration
        """

        def not0(value):
            """Set to 1e-10 if 0
            """
            if value == 0:
                return 1e-10
            return value

        return pipe(
            data[-1],
            juxt(
                lambda x: x.get("sim_time", x.get("time")),
                lambda x: x.get("wall_time", x.get("time")),
            ),
            lambda x: float(x[1]) / not0(float(x[0])),
        )

    def memory_usage(data):
        """Calculate the memory usage in KB
        """
        unit_map = dict(GB=1048576., KB=1., MB=1024., B=1. / 1024.)
        if isinstance(data, dict):
            data_ = data
        else:
            data_ = data[-1]
        key = next(k for k in data_.keys() if "value" in k)
        return float(data_[key]) * unit_map[data_.get("unit", "KB")]

    def make_datum(data):
        """Build an item in the data list for one simulation
        """
        return dict(
            name="efficiency",
            values=[
                dict(
                    time_ratio=time_ratio(data["run_time"]),
                    memory_usage=memory_usage(data["memory_usage"]),
                )
            ],
        )

    return pipe(
        yaml_data,
        dict,
        valmap(lambda x: x["data"]),
        valmap(
            filter(lambda item: item["name"].lower() in ("memory_usage", "run_time"))
        ),
        valmap(map(lambda x: (x["name"], x["values"]))),
        valmap(dict),
        valmap(make_datum),
        itemmap(lambda item: (item[0], update_dict(item[1], name=item[0]))),
        lambda dict_: sorted(list(dict_.values()), key=lambda item: item["name"]),
    )
Example #51
0
def get_inner_columns(sel):
    inner_columns = list(sel.inner_columns)
    assert len(inner_columns) == 1, 'ScalarSelect should have only ONE column'
    return list(map(lower_column, inner_columns))
Example #52
0
 def __call__(self, *args, **kwargs):
     values = map(lambda x: x(*args, **kwargs), self)
     return OrderedDict(zip(self, list(values)))
Example #53
0
def get_inner_columns(f):
    unique_columns = unique(concat(map(get_inner_columns, f.clauses)))
    lowered = [x.label(getattr(x, 'name', None)) for x in unique_columns]
    return [getattr(sa.func, f.name)(*lowered)]
Example #54
0
        proc(e)


if __name__ == "__main__":

    if len(sys.argv) != 1:
        print("USAGE: python3 generate_dd_txt.py ")
        sys.exit(1)

    fname, output_dir = sys.argv[0], "zrm_phone_xhe_shape"

    if not Path(output_dir).exists():
        os.makedirs(output_dir)

    char_to_shape = pipe(CharShapeTable.select(),
                         map(lambda e: (e.char, e.shapes)),
                         reduceby(lambda e: e[0], lambda e1, e2: e1),
                         valmap(lambda e: e[1]),
                         dict
                         )
    print(f"total {len(char_to_shape)} char shapes")

    char_to_phones = pipe(CharPhoneTable.select(),
                          map(lambda e: (e.char, e.zrm)),
                          groupby(lambda e: e[0]),
                          valmap(lambda phones: [e[1] for e in phones]),
                          dict
                          )
    print(f"total {len(char_to_phones)} char phones")

    one_hit_char_items = generate_one_hit_char(60000)
Example #55
0
 def is_valid(field, value):
     return any(map(lambda _range: value in _range, rules[field]))
Example #56
0
            current_and_distance: Tuple[Text,
                                        int]) -> Iterable[Tuple[Text, int]]:
        current, distance = current_and_distance
        if distance < radius:
            yield from map(lambda neighbor: (neighbor, distance + 1),
                           get_neighbors(current))

    return map(
        toolz.first,
        graph_traverse(source=(source, 0),
                       get_neighbors=get_neighbors_limiting_radius),
    )


edges_to_graph = toolz.compose(
    curried.valmap(toolz.compose(frozenset, curried.map(toolz.second))),
    curried.groupby(toolz.first),
)

graph_to_edges = toolz.compose_left(
    curried.keymap(lambda x: (x, )),
    dict.items,
    curried.mapcat(functional.star(itertools.product)),
)

reverse_graph = toolz.compose_left(
    graph_to_edges, curried.map(toolz.compose_left(reversed, tuple)),
    edges_to_graph)

cliques_to_graph = toolz.compose_left(
    curried.mapcat(lambda clique: itertools.permutations(clique, r=2)),
Example #57
0
def parse_ticket(ticket):
    return list(map(int, ticket.split(",")))
Example #58
0
def json_lines_to_iterator(j, encoding='utf-8', **kwargs):
    with json_lines(j.path, encoding=encoding) as lines:
        for item in pipe(lines, filter(nonempty), map(json.loads)):
            yield item
Example #59
0
def discover_sqlcontext(ctx):
    table_names = sorted(map(str, ctx.tableNames()))
    dshapes = zip(table_names, map(discover, map(ctx.table, table_names)))
    return datashape.DataShape(datashape.Record(dshapes))
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology

import json
import sys
import pickle

fcompose = lambda *args: compose(*args[::-1])

mapdict = lambda **kwargs: map(lambda data: dict(
    dict((k, f(data)) for k, f in kwargs.items()), **data))


## Helper functions
@curry
def dfassign(df, **kwargs):
    return df.assign(**dict(((k, f(df)) for k, f in kwargs.items())))


## View the images
reshape = lambda arr: arr if len(arr.shape) == 2 else arr[..., 0]
to_array = lambda image: reshape(numpy.asarray(image.convert("L")))


def plt_arrays(arrs):
    """Plot a set of (n, n) arrays as row column sub plots.