def pandas_read_csv(self, usecols=None, **kwargs): """ Use pandas.read_csv with the right keyword arguments In particular we know what dtypes should be, which columns are dates, etc... """ dtypes, dates = dshape_to_pandas(self.schema) if usecols: if builtins.all(isinstance(c, int) for c in usecols): usecols = get(usecols, self.columns) dates = [name for name in dates if name in usecols] result = pd.read_csv(self.path, names=kwargs.pop('names', self.columns), usecols=usecols, compression={'gz': 'gzip', 'bz2': 'bz2'}.get(ext(self.path)), dtype=kwargs.pop('dtype', dtypes), parse_dates=kwargs.pop('parse_dates', dates), encoding=kwargs.pop('encoding', self.encoding), header=0 if self.header else None, **merge(kwargs, clean_dialect(self.dialect))) reorder = get(list(usecols)) if usecols and len(usecols) > 1 else identity if isinstance(result, (pd.Series, pd.DataFrame)): return reorder(result) else: return map(reorder, result)
def test_markov_tables(): markov_features = ['markov_N', 'markov_R', 'markov_NR', 'markov_RN', 'markov_NN', 'markov_RR'] assert(t.get(markov_features, f.markov_tables(markovtesttree), str(0.0)) == ('0.809523809524', '0.190476190476', '0.285714285714', '0.285714285714', '0.428571428571', '0.0')) assert(t.get(markov_features, f.markov_tables(smalltree), str(0.0)) == ('1.0', '0.0', '0.0', '0.0', '1.0', '0.0')) assert(t.get(markov_features, f.markov_tables(f.compress(smalltree)), str(0.0)) == ('1.0', '0.0', '0.0', '0.0', '0.0', '0.0'))
def select_permits(): types = get(permit_type_checkbox.active, permit_types) res_non = get(res_non_checkbox.active, res_non_types) selected = df[(df.year >= min_year.value) & (df.year <= max_year.value) & (df.permit_value >= min_permit_cost.value) & (df.permit_value <= max_permit_cost.value) & (df.type.isin(types)) & (df.res_non.isin(res_non))] return selected
def records_to_tuples(ds, data): """ Transform records into tuples Examples -------- >>> seq = [{'a': 1, 'b': 10}, {'a': 2, 'b': 20}] >>> list(records_to_tuples('var * {a: int, b: int}', seq)) [(1, 10), (2, 20)] >>> records_to_tuples('{a: int, b: int}', seq[0]) # single elements (1, 10) >>> records_to_tuples('var * int', [1, 2, 3]) # pass through on non-records [1, 2, 3] See Also -------- tuples_to_records """ if isinstance(ds, (str, unicode)): ds = dshape(ds) if isinstance(ds.measure, Record) and len(ds.shape) == 1: return pluck(ds.measure.names, data, default=None) if isinstance(ds.measure, Record) and len(ds.shape) == 0: return get(ds.measure.names, data) if not isinstance(ds.measure, Record): return data raise NotImplementedError()
def partial_reduce(func, x, split_every, keepdims=False, dtype=None, name=None): """Partial reduction across multiple axes. Parameters ---------- func : function x : Array split_every : dict Maximum reduction block sizes in each dimension. Example ------- Reduce across axis 0 and 2, merging a maximum of 1 block in the 0th dimension, and 3 blocks in the 2nd dimension: >>> partial_reduce(np.min, x, {0: 1, 2: 3}) # doctest: +SKIP """ name = name or 'p_reduce-' + tokenize(func, x, split_every, keepdims, dtype) parts = [list(partition_all(split_every.get(i, 1), range(n))) for (i, n) in enumerate(x.numblocks)] keys = product(*map(range, map(len, parts))) out_chunks = [tuple(1 for p in partition_all(split_every[i], c)) if i in split_every else c for (i, c) in enumerate(x.chunks)] if not keepdims: out_axis = [i for i in range(x.ndim) if i not in split_every] getter = lambda k: get(out_axis, k) keys = map(getter, keys) out_chunks = list(getter(out_chunks)) dsk = {} for k, p in zip(keys, product(*parts)): decided = dict((i, j[0]) for (i, j) in enumerate(p) if len(j) == 1) dummy = dict(i for i in enumerate(p) if i[0] not in decided) g = lol_tuples((x.name,), range(x.ndim), decided, dummy) dsk[(name,) + k] = (func, g) return Array(merge(dsk, x.dask), name, out_chunks, dtype=dtype)
def post_compute(e, q, d): """ Execute a query using MongoDB's aggregation pipeline The compute_up functions operate on Mongo Collection / list-of-dict queries. Once they're done we need to actually execute the query on MongoDB. We do this using the aggregation pipeline framework. http://docs.mongodb.org/manual/core/aggregation-pipeline/ """ d = {'$project': toolz.merge({'_id': 0}, # remove mongo identifier dict((col, 1) for col in e.fields))} q = q.append(d) if not e.dshape.shape: # not a collection result = q.coll.aggregate(list(q.query))['result'][0] if isscalar(e.dshape.measure): return result[e._name] else: return get(e.fields, result) dicts = q.coll.aggregate(list(q.query))['result'] if isscalar(e.dshape.measure): return list(pluck(e.fields[0], dicts, default=None)) # dicts -> values else: return list(pluck(e.fields, dicts, default=None)) # dicts -> tuples
def finalize(bases): shape = bases[0].shape[:2] out = nd.empty(shape, dshape) for path, finalizer, inds in zip(paths, finalizers, indices): arr = reduce(getattr, path, out) np_arr = nd.as_numpy(arr.view_scalars(arr.dtype.value_type)) np_arr[:] = finalizer(*get(inds, bases)) return out
def __init__(self, lhs, rhs, on_left=None, on_right=None): self.lhs = lhs self.rhs = rhs if not on_left and not on_right: on_left = on_right = unpack(list(sorted( set(lhs.columns) & set(rhs.columns), key=lhs.columns.index))) if not on_right: on_right = on_left if isinstance(on_left, tuple): on_left = list(on_left) if isinstance(on_right, tuple): on_right = list(on_right) self._on_left = tuple(on_left) if isinstance(on_left, list) else on_left self._on_right = (tuple(on_right) if isinstance(on_right, list) else on_right) if get(on_left, lhs.schema[0]) != get(on_right, rhs.schema[0]): raise TypeError("Schema's of joining columns do not match")
def func(scheduler): """ Get CPU and Memory usage on each worker """ workers = [k for k, v in sorted(scheduler.ncores.items(), key=lambda x: x[0], reverse=True)] nannies = [(ip, scheduler.nannies[(ip, port)]) for ip, port in workers] dicts = [get(-1, scheduler.resource_logs[w], dict()) for w in nannies] return {'workers': workers, 'cpu': [d.get('cpu_percent', -1) for d in dicts], 'memory': [d.get('memory_percent', -1) for d in dicts]}
def rget(sequence, key, default=None): """Get element in a sequence or dict. Like toolz.get but with parameters in reverse order. Args: sequence (sequence or dict): sequence or dict key (str or int): key to access in sequence Returns: object: value behind the key """ return get(key, sequence, default=default)
def join(lhs, rhs, on_left=None, on_right=None, how='inner'): if not on_left and not on_right: on_left = on_right = unpack(list(sorted( set(lhs.columns) & set(rhs.columns), key=lhs.columns.index))) if not on_right: on_right = on_left if isinstance(on_left, tuple): on_left = list(on_left) if isinstance(on_right, tuple): on_right = list(on_right) if get(on_left, lhs.schema[0]) != get(on_right, rhs.schema[0]): raise TypeError("Schema's of joining columns do not match") _on_left = tuple(on_left) if isinstance(on_left, list) else on_left _on_right = (tuple(on_right) if isinstance(on_right, list) else on_right) how = how.lower() if how not in ('inner', 'outer', 'left', 'right'): raise ValueError("How parameter should be one of " "\n\tinner, outer, left, right." "\nGot: %s" % how) return Join(lhs, rhs, _on_left, _on_right, how)
def get_prox_key(infos, key, default=None, reduce=None, array=True): """ Build array of prox output for each operator and iteration. Return an {#iterations} by {#prox operators} array (unless a reduction is performed). reduce is usually, np.mean, np.min, or np.max Some prox outputs may be `None` or `{}`. In that case, return the `default` value. """ g = ([get(key, p, default=default) for p in info['prox_infos']] for info in infos ) if reduce: g = (reduce(row) for row in g) g = list(g) if array: g = np.array(g) return g
def _make_segregator(sles, groupby_filter, partitions): groupby_fn = compose( partial(get, "key", default=None), excepts(StopIteration, first, lambda __: {}), partial(flip, filter, partitions), groupby_filter, ) sles_grouped = groupby(groupby_fn, sles) def seg_filter(x): return lambda sl: sl.get("item_code") == x summer = compose(operator.neg, sum, partial(pluck, "actual_qty")) def seg_reducer(item_code): def fn(a, p): key = get("key", p, None) seger = get("seger", p, lambda __: None) return merge(a, {key: seger(item_code)}) return fn segregator_fns = [ merge( x, { "seger": compose( summer, partial(flip, filter, get(x.get("key"), sles_grouped, [])), seg_filter, ) }, ) for x in partitions ] def fn(item_code): return reduce(seg_reducer(item_code), segregator_fns, {}) return fn
def main(input_file, output_file): reader = csv.reader(input_file) writer = csv.DictWriter( output_file, fieldnames=["date", "latitude", "longitude"] + listmap(second, WEATHER_FIELDS), ) writer.writeheader() for row in reader: date, latitude, longitude, _, payload = row weather = get_in(["daily", "data", 0], json.loads(payload), {}) writer.writerow({ "date": date, "latitude": latitude, "longitude": longitude, **{ field_name: get(field, weather, None) for field, field_name in WEATHER_FIELDS }, })
def transform(example, output_size=(HEIGHT, WIDTH), training=True): image, objects, image_id = decode(example) # if training: # image = random_resize(image, output_size, ratio_range=(0.8, 1.2)) # image, objects = random_crop(image, objects, output_size) # image, objects = random_hflip(image, objects, 0.5) # else: image = resize(image, output_size) image = normalize(image, [123.68, 116.779, 103.939], [58.393, 57.12, 57.375]) image, objects = pad_to(image, objects, output_size) gt_bboxes, gt_labels = get(['gt_bbox', 'gt_label'], objects) gt_bboxes = coords_to_absolute(gt_bboxes, tf.shape(image)[:2]) bbox_targets, labels, centerness = atss_match( gt_bboxes, gt_labels, anchors, num_level_bboxes, topk=9, centerness=True) return image, {'bbox_target': bbox_targets, 'label': labels, 'centerness': centerness, 'image_id': image_id}
def Slideshow(text): ''' Shows an image with arrows to go to next/previous images. ---image-slider url1::caption1 url2::caption2 ... Will make an image slider with those images in that order. ''' lines = list(filter(lambda x: x.strip() != "", text.split('\n'))) name = f'ss-{int(datetime.now().timestamp())}-{re.sub(r"[^a-z]", "", lines[0])}' return ['div.slideshow', {'data-transition': 'fade'}, *[ [['input.slideshow--bullet', {'type': 'radio', 'name': name, 'id': f'{name}-item-{i}', 'checked': i == 0}], ['div.slideshow--item', {'data-pos': f"{i+1}/{len(lines)}"}, ['img', {'src': line.split('::')[0]}], ['div.slideshow--caption', t.get(1, line.split('::'), '')], ['label.slideshow--nav.slideshow--nav-previous', {'for': f'{name}-item-{(i-1)%len(lines)}'}, f'Go to slide {(i-1)%len(lines) + 1}'], ['label.slideshow--nav.slideshow--nav-next', {'for': f'{name}-item-{(i+1)%len(lines)}'}, f'Go to slide {(i+1)%len(lines) + 1}'] ] ] for (i, line) in enumerate(lines)]]
def partial_reduce(func, x, split_every, keepdims=False, dtype=None, name=None): """ Partial reduction across multiple axes. Parameters ---------- func : function x : Array split_every : dict Maximum reduction block sizes in each dimension. Examples -------- Reduce across axis 0 and 2, merging a maximum of 1 block in the 0th dimension, and 3 blocks in the 2nd dimension: >>> partial_reduce(np.min, x, {0: 1, 2: 3}) # doctest: +SKIP """ name = (name or funcname(func)) + '-' + tokenize(func, x, split_every, keepdims, dtype) parts = [list(partition_all(split_every.get(i, 1), range(n))) for (i, n) in enumerate(x.numblocks)] keys = product(*map(range, map(len, parts))) out_chunks = [tuple(1 for p in partition_all(split_every[i], c)) if i in split_every else c for (i, c) in enumerate(x.chunks)] if not keepdims: out_axis = [i for i in range(x.ndim) if i not in split_every] getter = lambda k: get(out_axis, k) keys = map(getter, keys) out_chunks = list(getter(out_chunks)) dsk = {} for k, p in zip(keys, product(*parts)): decided = dict((i, j[0]) for (i, j) in enumerate(p) if len(j) == 1) dummy = dict(i for i in enumerate(p) if i[0] not in decided) g = lol_tuples((x.name,), range(x.ndim), decided, dummy) dsk[(name,) + k] = (func, g) return Array(sharedict.merge(x.dask, (name, dsk), dependencies={name: {x.name}}), name, out_chunks, dtype=dtype)
def make_envs(cost=1.00, n=100, seed=None, variance_structure="constant_high"): if seed is not None: np.random.seed(seed) sigmas = get( variance_structure, { "constant_high": [0, 20, 20, 20], "increasing": [0, 2, 4, 20], "decreasing": [0, 20, 10, 5], "constant_low": [0, 1, 1, 1], }) def reward(depth): if depth > 0: return Normal(0, sigmas[depth]).to_discrete(6) return 0. branching = [4, 1, 2] envs = [ MouselabEnv.new_symmetric(branching, reward, cost=cost) for _ in range(n) ] return envs
def types_of_fields(fields, expr): """ Get the types of fields in an expression Examples -------- >>> from blaze import symbol >>> expr = symbol('e', 'var * {x: int64, y: float32}') >>> types_of_fields('y', expr) ctype("float32") >>> types_of_fields(['y', 'x'], expr) (ctype("float32"), ctype("int64")) >>> types_of_fields('x', expr.x) ctype("int64") """ if isinstance(expr.dshape.measure, Record): return get(fields, expr.dshape.measure) else: if isinstance(fields, (tuple, list, set)): assert len(fields) == 1 fields, = fields assert fields == expr._name return expr.dshape.measure
def onload(self): all_fees = frappe.db.sql( """ SELECT si.rounded_total AS amount, fee.status AS status, fee.to_date AS end_date FROM `tabGym Fee` AS fee, `tabSales Invoice` AS si WHERE fee.docstatus = 1 AND fee.membership = '{membership}' AND fee.reference_invoice = si.name ORDER BY fee.to_date DESC """.format(membership=self.name), as_dict=True, ) unpaid_fees = filter(lambda x: x.get('status') == 'Unpaid', all_fees) self.set_onload('total_invoices', count(all_fees)) self.set_onload('unpaid_invoices', count(unpaid_fees)) outstanding = reduce(operator.add, pluck('amount', unpaid_fees), 0) self.set_onload('outstanding', outstanding) paid_fees = filter(lambda x: x.get('status') == 'Paid', all_fees) end_date = get('end_date', first(paid_fees)) if paid_fees else None self.set_onload('end_date', end_date)
def preprocess(example, output_size=(HEIGHT, WIDTH), max_objects=50, training=True): image, objects, image_id = decode(example) # if training: # image = random_resize(image, output_size, ratio_range=(0.8, 1.2)) # image, objects = random_crop(image, objects, output_size) # image, objects = random_hflip(image, objects, 0.5) # else: image = resize(image, output_size) image = normalize(image, [123.68, 116.779, 103.939], [58.393, 57.12, 57.375]) image, objects = pad_to(image, objects, output_size) gt_bboxes, gt_labels = get(['gt_bbox', 'gt_label'], objects) gt_bboxes = coords_to_absolute(gt_bboxes, tf.shape(image)[:2]) objects = {**objects, 'gt_bbox': gt_bboxes} bbox_targets, labels, ignore = max_iou_match(gt_bboxes, gt_labels, bbox_coder, pos_iou_thr=0.5, neg_iou_thr=0.4, encode_bbox=False) objects = pad_objects(objects, max_objects) return image, { 'bbox_target': bbox_targets, 'label': labels, 'ignore': ignore, **objects, 'image_id': image_id }
def get_pos_data(): from erpnext.accounts.doctype.sales_invoice.pos import get_pos_data data = get_pos_data() allowed_items = get("bin_data", data, {}).keys() prices = _get_item_prices(allowed_items) def set_prices(item): get_price = compose(partial(get, seq=prices, default={}), partial(get, "item_code")) return merge(item, get_price(item)) trans_items = compose( partial(map, set_prices), partial(filter, lambda x: x.get("name") in allowed_items), partial(get, "items", default=[]), ) add_branch = compose( flip(merge, {"os_branch": get_user_branch()}), lambda x: x.as_dict(), partial(get, "doc", default={}), ) return merge(data, {"items": trans_items(data), "doc": add_branch(data)})
def preprocess(example, output_size=(HEIGHT, WIDTH), max_objects=50, training=True): image, objects, image_id = decode(example) # if training: # image = random_resize(image, output_size, ratio_range=(0.8, 1.2)) # image, objects = random_crop(image, objects, output_size) # image, objects = random_hflip(image, objects, 0.5) # else: image = resize(image, output_size) image = normalize(image, [123.68, 116.779, 103.939], [58.393, 57.12, 57.375]) image, objects = pad_to(image, objects, output_size) gt_bboxes, gt_labels = get(['gt_bbox', 'gt_label'], objects) gt_bboxes = coords_to_absolute(gt_bboxes, tf.shape(image)[:2]) objects = {**objects, 'gt_bbox': gt_bboxes} bbox_targets, labels, centerness = fcos_match(gt_bboxes, gt_labels, points, num_level_points, strides=strides, radius=0.5) objects = pad_objects(objects, max_objects) return image, { 'bbox_target': bbox_targets, 'label': labels, 'centerness': centerness, **objects, 'image_id': image_id }
def partial_reduce(func, x, split_every, keepdims=False, dtype=None, name=None, reduced_meta=None): """ Partial reduction across multiple axes. Parameters ---------- func : function x : Array split_every : dict Maximum reduction block sizes in each dimension. Examples -------- Reduce across axis 0 and 2, merging a maximum of 1 block in the 0th dimension, and 3 blocks in the 2nd dimension: >>> partial_reduce(np.min, x, {0: 1, 2: 3}) # doctest: +SKIP """ name = (name or funcname(func)) + '-' + tokenize(func, x, split_every, keepdims, dtype) parts = [ list(partition_all(split_every.get(i, 1), range(n))) for (i, n) in enumerate(x.numblocks) ] keys = product(*map(range, map(len, parts))) out_chunks = [ tuple(1 for p in partition_all(split_every[i], c)) if i in split_every else c for (i, c) in enumerate(x.chunks) ] if not keepdims: out_axis = [i for i in range(x.ndim) if i not in split_every] getter = lambda k: get(out_axis, k) keys = map(getter, keys) out_chunks = list(getter(out_chunks)) dsk = {} for k, p in zip(keys, product(*parts)): decided = dict((i, j[0]) for (i, j) in enumerate(p) if len(j) == 1) dummy = dict(i for i in enumerate(p) if i[0] not in decided) g = lol_tuples((x.name, ), range(x.ndim), decided, dummy) dsk[(name, ) + k] = (func, g) graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x]) meta = x._meta if reduced_meta is not None: try: meta = func(reduced_meta, meta=True) # no meta keyword argument exists for func, and it isn't required except TypeError: meta = func(reduced_meta) # when no work can be computed on the empty array (e.g., func is a ufunc) except ValueError: pass # some functions can't compute empty arrays (those for which reduced_meta # fall into the ValueError exception) and we have to rely on reshaping # the array according to len(out_chunks) if not np.isscalar(meta) and meta.ndim != len(out_chunks): if len(out_chunks) == 0: meta = meta.sum() else: meta = meta.reshape((0, ) * len(out_chunks)) if np.isscalar(meta): return Array(graph, name, out_chunks, dtype=dtype) else: return Array(graph, name, out_chunks, meta=meta.astype(dtype))
skip_prefetch=True)) it = iter(ds_train) example = next(it) image, objects, image_id = decode(example) # if training: # image = random_resize(image, output_size, ratio_range=(0.8, 1.2)) # image, objects = random_crop(image, objects, output_size) # image, objects = random_hflip(image, objects, 0.5) # else: image = resize(image, output_size) image = normalize(image, [123.68, 116.779, 103.939], [58.393, 57.12, 57.375]) image, objects = pad_to(image, objects, output_size) gt_bboxes, gt_labels = get(['gt_bbox', 'gt_label'], objects) gt_bboxes = coords_to_absolute(gt_bboxes, tf.shape(image)[:2]) objects = {**objects, 'gt_bbox': gt_bboxes} bbox_targets, labels, centerness = atss_match(gt_bboxes, gt_labels, anchors, num_level_bboxes, topk=9, centerness=True) objects = pad_objects(objects, max_objects)
def combine(base_tuples): bases = tuple(np.stack(bs) for bs in zip(*base_tuples)) return tuple(f(*get(inds, bases)) for (f, inds) in calls)
def _devices(attrs): return [ '{}:{}'.format(*get(['PathOnHost', 'PathInContainer'], dev)) for dev in attrs ]
def __getitem__(self, key): if isinstance(key, list): return RecordAggregate(dict(zip(key, get(key, self._data))), self.x_axis, self.y_axis) return self._data[key]
def fn(row): mops = get(row.get("sales_invoice"), payments_grouped, {}) return merge(row, mops, {"total_collected": sum(mops.values())})
def seger(sum_fn, x): return compose( sum_fn, partial(flip, filter, get(x.get("key"), sales_grouped, [])), seg_filter, )
def finalize(bases, **kwargs): data = {key: finalizer(get(inds, bases), **kwargs) for (key, finalizer, inds) in calls} return RecordAggregate(data, **kwargs)
def test_get(): for tup in tuples: get([1, 2], tup)
def finalize(bases, **kwargs): data = {key: finalizer(get(inds, bases), **kwargs) for (key, finalizer, inds) in calls} return xr.Dataset(data)
def test_get(): for tup in tuples: get(1, tup)
def output_transform(output): bbox_preds, cls_scores, centerness = get( ['bbox_pred', 'cls_score', 'centerness'], output, default=None) return postprocess(bbox_preds, cls_scores, bbox_coder, centerness, topk=100, iou_threshold=0.6, score_threshold=0.05, use_sigmoid=True)
def fn(a, p): key = get("key", p, None) seger = get("seger", p, lambda __: None) return merge(a, {key: seger(item_code)})
lambda x: ("x" not in x) or ("y" not in x), "XY plot does not have X and Y.", ), ( {"histogram"}, lambda x: ("step" in x) and ("bins" in x), "Histogram cannot have STEP and BINS.", ), ( {"line", "scatter", "bar"}, lambda x: ("agg" in x["x"]) and ("agg" in x["y"]), "XY plot cannot have an aggregation on X and Y.", ), ( {"histogram", "pie"}, lambda x: ("agg" in get("x", x, {})) or ("agg" in get("y", x, {})) or ("agg" in get("axis", x, {})), "Histograms and pie charts cannot have aggregations.", ), ( {"histogram", "pie"}, lambda x: ("temporal" in get("x", x, {})) or ("temporal" in get("y", x, {})) or ("temporal" in get("axis", x, {})), "Histograms and pie charts cannot have temporal axes.", ), ( {"histogram"}, lambda x: ("x" in x) and ("y" in x), "Histograms can have X or Y, not both.", ), (
list(t.get(markov_features, markov_tables(tree), str(0.0))) + list(t.get(markov_features, markov_tables(compress(tree)), str(0.0))))) if __name__ == '__main__': markov_features = ['markov_N', 'markov_R', 'markov_NR', 'markov_RN', 'markov_NN', 'markov_RR'] opts, args = getopt.getopt(sys.argv[1:], 'l:n', ['--language', '--normalize']) language = None normalize = False for o,a in opts: if o == '-l' or o == '--language': language = a + ',' if o == '-n' or o == '--normalize': normalize = True # Generate header for feature values print(('language,' if language is not None else '') + (','.join(','.join(x[0] + (lambda s: '' if s[0] == '<' else '__' + s)(nf.__name__) for nf in x[2]) for x in features) if normalize else ','.join(x[0] for x in features)) + ',' + ','.join(markov_features) + ',' + ','.join([('compressed_' + f) for f in markov_features])) # For each tree, compute its associated feature values for line in fileinput.input(args): tree = parse_sexp(line)[0] print((language or '') + ','.join([str(','.join(str(float(f[1](tree)/float(nf(tree)))) for nf in f[2]) if normalize else f[1](tree)) for f in features] + list(t.get(markov_features, markov_tables(tree), str(0.0))) + list(t.get(markov_features, markov_tables(compress(tree)), str(0.0)))))
def make_price(item_code): return compose( excepts(StopIteration, first, lambda x: {}), partial(get, item_code, default=[]), lambda x: get(x, prices, {}), )
def main(us_hexagons, historical_sightings, model_file, debug, output_file): logger.info(f"Reading hexagons from {us_hexagons.name}.") squatchcast_locations = pd.read_csv(us_hexagons) logger.info(f"Read {squatchcast_locations.shape[0]} hexagons.") logger.info( f"Reading historical sightings from {historical_sightings.name}.") historical_sightings_frame = pd.read_csv(historical_sightings).query( "~latitude.isnull()") logger.info( f"Read {historical_sightings_frame.shape[0]} historical_sightings.") if debug: logger.warning("Debug selected, pulling top five records.") squatchcast_locations = squatchcast_locations.head() num_locations = squatchcast_locations.shape[0] lats = [] lons = [] logger.info("Extracting hexagon lat / lon values.") for _, row in tqdm(squatchcast_locations.iterrows(), total=num_locations): lat, lon = h3.h3_to_geo(row.hex_address) lats.append(lat) lons.append(lon) squatchcast_locations.loc[:, "latitude"] = lats squatchcast_locations.loc[:, "longitude"] = lons session = requests.Session() logger.info(f"Retrieving the weather for {num_locations} " "locations.") weather_conditions = [] failed = 0 for _, row in tqdm(squatchcast_locations.iterrows(), total=num_locations): request = create_weather_request(row.latitude, row.longitude, DARK_SKY_KEY) try: weather_response = session.get(request) # Make sure the response worked. weather_response.raise_for_status() # Now parse the json. weather_conditions.append(weather_response.json()) except requests.HTTPError: failed += 1 logger.info(f"{failed} requests to Dark Sky failed.") # Extract the features a list of dicts. Plan is to turn that into a # data frame and concatenate them to the squatchcast_locations. logger.info("Unpacking weather results.") squatchcast_features = [] for weather in tqdm(weather_conditions, total=num_locations): # Append the current features. daily = get_in(["daily", "data"], weather, []) latitude = get("latitude", weather, np.nan) longitude = get("longitude", weather, np.nan) for conditions in daily: get_condition = curry(get)(seq=conditions, default=np.nan) squatchcast_features.append({ "date": datetime.utcfromtimestamp( get_condition("time")).strftime("%Y-%m-%d"), "latitude": latitude, "longitude": longitude, "precip_type": get("precipType", conditions, "no_precipitation"), "temperature_high": get_condition("temperatureHigh"), "temperature_low": get_condition("temperatureLow"), "dew_point": get_condition("dewPoint"), "humidity": get_condition("humidity"), "cloud_cover": get_condition("cloudCover"), "moon_phase": get_condition("moonPhase"), "precip_intensity": get_condition("precipIntensity"), "precip_probability": get_condition("precipProbability"), "pressure": get_condition("pressure"), "uv_index": get_condition("uvIndex"), "visibility": get_condition("visibility"), "wind_bearing": get_condition("windBearing"), "wind_speed": get_condition("windSpeed"), }) squatchcast_frame = pd.DataFrame.from_records(squatchcast_features) logger.info(f"Loading model from {model_file}.") model = load(model_file) logger.info( f"Getting predictions for {squatchcast_frame.shape[0]} locations.") with yaspin(text="👣 Calculating squatchcast. 👣", color="cyan"): squatchcast_frame.loc[:, "squatchcast"] = model.predict_proba( squatchcast_frame[RAW_FEATURES])[:, 1] # Get the resoluton the US hexagon file is at and index the squatchcast # results by that resolution. us_resolution = h3.h3_get_resolution( squatchcast_locations.head(1).hex_address[0]) squatchcast_frame.loc[:, "hex_address"] = np.apply_along_axis( lambda x: h3.geo_to_h3(x[0], x[1], us_resolution), axis=1, arr=squatchcast_frame[["latitude", "longitude"]].values, ) historical_sightings_frame.loc[:, "hex_address"] = np.apply_along_axis( lambda x: h3.geo_to_h3(x[0], x[1], us_resolution), axis=1, arr=historical_sightings_frame[["latitude", "longitude"]].values, ) historical_sightings_agg = ( historical_sightings_frame.groupby("hex_address").agg({ "number": "count" }).reset_index()) # Now we need, for each day, a complete hexagonification of the US. We'll # do this in a groupby and concatenate. visualization_frames = [] for date, frame in squatchcast_frame.groupby("date"): # Merge weather and US hexagons. weather_location_merge = pd.merge( squatchcast_locations.drop(columns=["latitude", "longitude"]), frame, on="hex_address", how="left", ) # Merge historical sightings. visualization_frames.append( pd.merge( weather_location_merge, historical_sightings_agg, on="hex_address", how="left", ).fillna(0).astype({ "number": "int" }).rename(columns={"number": "historical_sightings"})) pd.concat(visualization_frames).to_csv(output_file, index=False)
def get_embedded(sentences): data = [] labels = [] for buffer in sentences: if len(buffer) < 2: continue stack = [] stack.append(buffer.pop(0)) stack.append(buffer.pop(0)) while True: concat = [] """""" """""" """""" """""" """""" """""" """""" """""" """""" """"" Sw contains 18 elements """ """""" """""" """""" """""" """""" """""" """""" """""" """""" "" concat.append( toolz.get(stack[-1].word, word2vec, np.zeros(300) ) if len(stack) > 0 else np.zeros(300)) concat.append( toolz.get(stack[-2].word, word2vec, np.zeros(300) ) if len(stack) > 1 else np.zeros(300)) concat.append( toolz.get(stack[-3].word, word2vec, np.zeros(300) ) if len(stack) > 2 else np.zeros(300)) concat.append( toolz.get(buffer[0].word, word2vec, np.zeros(300) ) if len(buffer) > 0 else np.zeros(300)) concat.append( toolz.get(buffer[1].word, word2vec, np.zeros(300) ) if len(buffer) > 1 else np.zeros(300)) concat.append( toolz.get(buffer[2].word, word2vec, np.zeros(300) ) if len(buffer) > 2 else np.zeros(300)) concat.append( toolz.get(stack[-1].children[0]. word, word2vec, np.zeros(300)) if len(stack) > 0 and len(stack[-1].children) > 0 else np.zeros(300)) concat.append( toolz.get(stack[-1].children[1]. word, word2vec, np.zeros(300)) if len(stack) > 0 and len(stack[-1].children) > 1 else np.zeros(300)) concat.append( toolz.get(stack[-1].children[-1]. word, word2vec, np.zeros(300)) if len(stack) > 0 and len(stack[-1].children) > 0 else np.zeros(300)) concat.append( toolz.get(stack[-1].children[-2]. word, word2vec, np.zeros(300)) if len(stack) > 0 and len(stack[-1].children) > 1 else np.zeros(300)) concat.append( toolz.get(stack[-2].children[0]. word, word2vec, np.zeros(300)) if len(stack) > 1 and len(stack[-2].children) > 0 else np.zeros(300)) concat.append( toolz.get(stack[-2].children[1]. word, word2vec, np.zeros(300)) if len(stack) > 1 and len(stack[-2].children) > 1 else np.zeros(300)) concat.append( toolz.get(stack[-2].children[-1]. word, word2vec, np.zeros(300)) if len(stack) > 1 and len(stack[-2].children) > 0 else np.zeros(300)) concat.append( toolz.get(stack[-2].children[-2]. word, word2vec, np.zeros(300)) if len(stack) > 1 and len(stack[-2].children) > 1 else np.zeros(300)) concat.append( toolz.get(stack[-1].children[0].children[0]. word, word2vec, np.zeros(300)) if len(stack) > 0 and len(stack[-1].children) > 0 and len(stack[-1].children[0].children) > 0 else np.zeros(300)) concat.append( toolz.get(stack[-1].children[-1].children[-1]. word, word2vec, np.zeros(300)) if len(stack) > 0 and len(stack[-1].children) > 0 and len(stack[-1].children[-1].children) > 0 else np. zeros(300)) concat.append( toolz.get(stack[-2].children[0].children[0]. word, word2vec, np.zeros(300)) if len(stack) > 1 and len(stack[-2].children) > 0 and len(stack[-2].children[0].children) > 0 else np.zeros(300)) concat.append( toolz.get(stack[-2].children[-1].children[-1]. word, word2vec, np.zeros(300)) if len(stack) > 1 and len(stack[-2].children) > 0 and len(stack[-2].children[-1].children) > 0 else np. zeros(300)) """""" """""" """""" """""" """""" """""" """""" """""" """""" """"" St contains 18 elements """ """""" """""" """""" """""" """""" """""" """""" """""" """""" "" concat.append( toolz.get(stack[-1].tag, tag2vec, np.zeros(100) ) if len(stack) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-2].tag, tag2vec, np.zeros(100) ) if len(stack) > 1 else np.zeros(100)) concat.append( toolz.get(stack[-3].tag, tag2vec, np.zeros(100) ) if len(stack) > 2 else np.zeros(100)) concat.append( toolz.get(buffer[0].tag, tag2vec, np.zeros(100) ) if len(buffer) > 0 else np.zeros(100)) concat.append( toolz.get(buffer[1].tag, tag2vec, np.zeros(100) ) if len(buffer) > 1 else np.zeros(100)) concat.append( toolz.get(buffer[2].tag, tag2vec, np.zeros(100) ) if len(buffer) > 2 else np.zeros(100)) concat.append( toolz.get(stack[-1].children[0].tag, tag2vec, np.zeros(100) ) if len(stack) > 0 and len(stack[-1].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-1].children[1].tag, tag2vec, np.zeros(100) ) if len(stack) > 0 and len(stack[-1].children) > 1 else np.zeros(100)) concat.append( toolz.get(stack[-1].children[-1].tag, tag2vec, np. zeros(100)) if len(stack) > 0 and len(stack[-1].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-1].children[-2].tag, tag2vec, np. zeros(100)) if len(stack) > 0 and len(stack[-1].children) > 1 else np.zeros(100)) concat.append( toolz.get(stack[-2].children[0].tag, tag2vec, np.zeros(100) ) if len(stack) > 1 and len(stack[-2].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-2].children[1].tag, tag2vec, np.zeros(100) ) if len(stack) > 1 and len(stack[-2].children) > 1 else np.zeros(100)) concat.append( toolz.get(stack[-2].children[-1].tag, tag2vec, np. zeros(100)) if len(stack) > 1 and len(stack[-2].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-2].children[-2].tag, tag2vec, np. zeros(100)) if len(stack) > 1 and len(stack[-2].children) > 1 else np.zeros(100)) concat.append( toolz.get(stack[-1].children[0].children[0]. tag, tag2vec, np.zeros(100)) if len(stack) > 0 and len(stack[-1].children) > 0 and len(stack[-1].children[0].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-1].children[-1].children[-1]. tag, tag2vec, np.zeros(100)) if len(stack) > 0 and len(stack[-1].children) > 0 and len(stack[-1].children[-1].children) > 0 else np. zeros(100)) concat.append( toolz.get(stack[-2].children[0].children[0]. tag, tag2vec, np.zeros(100)) if len(stack) > 1 and len(stack[-2].children) > 0 and len(stack[-2].children[0].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-2].children[-1].children[-1]. tag, tag2vec, np.zeros(100)) if len(stack) > 1 and len(stack[-2].children) > 0 and len(stack[-2].children[-1].children) > 0 else np. zeros(100)) """""" """""" """""" """""" """""" """""" """""" """""" """""" """"" Sl contains 12 elements """ """""" """""" """""" """""" """""" """""" """""" """""" """""" "" concat.append( toolz.get(stack[-1].children[0].label, label2vec, np.zeros(100)) if len(stack) > 0 and len(stack[-1].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-1].children[1].label, label2vec, np.zeros(100)) if len(stack) > 0 and len(stack[-1].children) > 1 else np.zeros(100)) concat.append( toolz.get(stack[-1].children[-1].label, label2vec, np.zeros(100)) if len(stack) > 0 and len(stack[-1].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-1].children[-2].label, label2vec, np.zeros(100)) if len(stack) > 0 and len(stack[-1].children) > 1 else np.zeros(100)) concat.append( toolz.get(stack[-2].children[0].label, label2vec, np.zeros(100)) if len(stack) > 1 and len(stack[-2].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-2].children[1].label, label2vec, np.zeros(100)) if len(stack) > 1 and len(stack[-2].children) > 1 else np.zeros(100)) concat.append( toolz.get(stack[-2].children[-1].label, label2vec, np.zeros(100)) if len(stack) > 1 and len(stack[-2].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-2].children[-2].label, label2vec, np.zeros(100)) if len(stack) > 1 and len(stack[-2].children) > 1 else np.zeros(100)) concat.append( toolz.get(stack[-1].children[0].children[0]. label, label2vec, np.zeros(100)) if len(stack) > 0 and len(stack[-1].children) > 0 and len(stack[-1].children[0].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-1].children[-1].children[-1]. label, label2vec, np.zeros(100)) if len(stack) > 0 and len(stack[-1].children) > 0 and len(stack[-1].children[-1].children) > 0 else np. zeros(100)) concat.append( toolz.get(stack[-2].children[0].children[0]. label, label2vec, np.zeros(100)) if len(stack) > 1 and len(stack[-2].children) > 0 and len(stack[-2].children[0].children) > 0 else np.zeros(100)) concat.append( toolz.get(stack[-2].children[-1].children[-1]. label, label2vec, np.zeros(100)) if len(stack) > 1 and len(stack[-2].children) > 0 and len(stack[-2].children[-1].children) > 0 else np. zeros(100)) if len(stack) >= 2 and stack[-1].parent == stack[-2].index: """""" """""" """""" """""" """""" """""" """""" """""" """""" """"" RIGHT-ARC (stack[-2] => stack[-1]) """ """""" """""" """""" """""" """""" """""" """""" """""" """""" "" data.append(np.concatenate(concat)) labels.append(label_id[stack[-1].label] + len(label_id) * 0 + 1) stack[-2].children.append(stack.pop(-1)) elif len(stack) >= 2 and stack[-2].parent == stack[-1].index: """""" """""" """""" """""" """""" """""" """""" """""" """""" """"" LEFT-ARC (stack[-2] <= stack[-1]) """ """""" """""" """""" """""" """""" """""" """""" """""" """""" "" data.append(np.concatenate(concat)) labels.append(label_id[stack[-2].label] + len(label_id) * 1 + 1) stack[-1].children.append(stack.pop(-2)) else: """""" """""" """""" """""" """""" """""" """""" """""" """""" """"" SHIFT """ """""" """""" """""" """""" """""" """""" """""" """""" """""" "" if not buffer: break data.append(np.concatenate(concat)) labels.append(0) stack.append(buffer.pop(0)) return np.array(data), np.array(labels)
def finalize(bases, **kwargs): data = { key: finalizer(get(inds, bases), **kwargs) for (key, finalizer, inds) in calls } return xr.Dataset(data)
def fn(a, p): key = get("key", p, None) seger = get("seger", p, lambda __: None) return merge(a, {key: seger(branch)})
def all_productions(): return {k: t.get(['id', 'name'], db[k]) for k in db}
def directeur(self) -> Profile | None: direction = self.get_directeurs() direction = [d for d in direction if d.is_directeur] return toolz.get(0, direction, None)
def main(report_file, weather_file, weather_join_file): weather_reader = csv.reader(weather_file) # Load the weather into a dictionary. weather_cache = { # Extract the dict with the weather information. (r[0], r[1]): get_in(["daily", "data", 0], json.loads(r[-1]), {}) for r in weather_reader } report_reader = csv.DictReader(report_file) fieldnames = report_reader.fieldnames + [ "temperature_high", "temperature_mid", "temperature_low", "dew_point", "humidity", "cloud_cover", "moon_phase", "precip_intensity", "precip_probability", "precip_type", "pressure", "summary", "uv_index", "visibility", "wind_bearing", "wind_speed" ] writer = csv.DictWriter(weather_join_file, fieldnames=fieldnames) writer.writeheader() for line in report_reader: weather = get((line["geohash"], line["date"]), weather_cache, {}) temperature_high = get("temperatureHigh", weather, None) temperature_low = get("temperatureLow", weather, None) line["temperature_high"] = temperature_high line["temperature_mid"] = ( temperature_low + (temperature_high - temperature_low) / 2) if temperature_high and temperature_low else None line["temperature_low"] = temperature_low line["dew_point"] = get("dewPoint", weather, None) line["humidity"] = get("humidity", weather, None) line["cloud_cover"] = get("cloudCover", weather, None) line["moon_phase"] = get("moonPhase", weather, None) line["precip_intensity"] = get("precipIntensity", weather, None) line["precip_probability"] = get("precipProbability", weather, None) line["precip_type"] = get("precipType", weather, None) line["pressure"] = get("pressure", weather, None) line["summary"] = get("summary", weather, None) line["uv_index"] = get("uvIndex", weather, None) line["visibility"] = get("visibility", weather, None) line["wind_bearing"] = get("windBearing", weather, None) line["wind_speed"] = get("windSpeed", weather, None) writer.writerow(line)
def featurize(tree, normalize=True): markov_features = ['markov_N', 'markov_R', 'markov_NR', 'markov_RN', 'markov_NN', 'markov_RR'] tree = parse_sexp(tree)[0] return (','.join([str(','.join(str(float(f[1](tree)/float(nf(tree)))) for nf in f[2]) if normalize else f[1](tree)) for f in features] + list(t.get(markov_features, markov_tables(tree), str(0.0))) + list(t.get(markov_features, markov_tables(compress(tree)), str(0.0)))))