def to_nn(self, shape, scale_pixels=False): """Convert data to neural network inputs/outputs """ return pipe( self.df.ImageId.unique(), map(self._to_single_nn(shape)), list_unzip, iffy(constantly(self.augment), self._augment_nn), map(np.array), list, iffy(constantly(scale_pixels), lens[0].modify(lambda x: x / 255)), self._reshape_output)
def by_topic_nodes(references=False, use_set=False, simple_label=False): """Transform init() frame into the following columns: id, label, simple_label (optional), reference_count, references (optional) """ by_topic = compose(topic_id, second) options = { 'id': group_attr(topic_id), 'label': group_attr(topic_text), 'reference_count': len } if references: references_option = { 'references': compose(list, map(first)) } options = {**options, **references_option} if simple_label: options = { **options, 'simple_label': compose(iffy(lambda x: isinstance(x, str), lambda x: x.replace('(', '').replace(')', '')), group_attr(topic_text)) } df = _by_group_transform(by_topic, options.items()) if references and use_set: df.references = df.references.apply(frozenset) return df
def representation(text_list, translation=esv, include_svd=True): """Converts list of sentences to a vocabulary-vectorized, SVD-reduced representation """ return pipe( text_list, vocabulary_vectorizer(translation=translation).transform, iffy(constantly(include_svd), svd(translation=translation).transform), )
def default(self, obj): ordered_attrs = pipe( partial(map, lambda attr: (attr, getattr(obj, attr))), partial(remove_values, isnone), partial(remove_values, all_fn(isa(list, dict), isempty)), partial(walk_values, iffy(isa(dict), sort_dict)), OrderedDict) if isinstance(obj, Context): return ordered_attrs(['key', 'operator', 'operand', 'match_all']) elif isinstance(obj, Binding): return ordered_attrs(['keys', 'command', 'args', 'context']) else: return super().default(obj)
def _read_sheet(sheet: Worksheet): rows = sheet.rows fields = [cell.value for cell in next(rows)] return [ dict( zip( fields, [ F.iffy(F.notnone, F.identity, identity(cell.value))( safe_loads( (str(cell.value) or "").replace("‘", "'").replace( "’", "'").replace("´", "'").replace( "“", '"').replace("”", '"'))) for cell in row ], )) for row in rows ]
def get_data_frame(document_id: str, raw: bool = False, auth_args: Auth = Auth.shared()): auth = Auth(auth_args) document = Document.get(document_id, auth_args=auth_args) file_id = pipe( document.get("content", []), c.filter(lambda c: c.get("format", {}).get("code") == "ocr-text-file-id"), c.first, c.get("attachment", default={}), c.get("url"), iffy(isa(str), lambda url: url.split("/")[-1]), ) if file_id is None: raise ValueError( f"No block file found for document: '{document_id}'") files = Files(auth.session()) filename = files.download(file_id, "/tmp/") frame = pd.read_json(filename, lines=True) os.remove(filename) if raw or len(frame) == 0: return frame return Block.sort( frame.drop(["Geometry"], axis=1).join( pd.json_normalize(frame.Geometry)).pipe( partial( Frame.expand, custom_columns=[ Frame.codeable_like_column_expander("Polygon") ], )).set_index("Id"))
NON_REPORTED_EXCEPTIONS = ["QueryExecutionError"] def before_send(event, hint): if "exc_info" in hint: exc_type, exc_value, tb = hint["exc_info"] if any([(e in str(type(exc_value))) for e in NON_REPORTED_EXCEPTIONS]): return None return event def init(): if settings.SENTRY_DSN: sentry_sdk.init( dsn=settings.SENTRY_DSN, environment=settings.SENTRY_ENVIRONMENT, release=__version__, before_send=before_send, send_default_pii=True, integrations=[ FlaskIntegration(), SqlalchemyIntegration(), RedisIntegration(), RqIntegration(), ], ) capture_message = iffy(lambda _: settings.SENTRY_DSN, sentry_sdk.capture_message)
def parse_grid(data=None, sep=None, strip='udr', comment='#', empty='', ignore_blank=True, dtype=None, on_empty=None, quiet=False): msg = fn.identity if quiet else print if data is None: data = clipboard_get() lines = data.splitlines() # Strip leading blank line (generally from pasting something into triple # quotes) if lines and lines[0] == '': lines = lines[1:] if not lines: msg("No data to parse.") return np.empty((0, 0), dtype=dtype) # Strip comments and blank lines if comment: lines = [re.sub(comment + '.*$', '', line) for line in lines] if ignore_blank: lines = [line for line in lines if line] # Determine splitter and split lines into items if sep is None: sep = guess_splitter([line for line in lines if line]) if sep is not None and sep != '': msg(f"Separating by {sep!r}") lines = [re.split(sep, line) for line in lines] else: msg(f"Separating by character") lines = [list(line) for line in lines] # Pad out shape to rectangular lens = [len(line) for line in lines] width = max(lens) for line in lines: if len(line) < width: line.extend([''] * (width - len(line))) grid = np.array(lines, dtype=str) grid[grid == empty] = '' if strip: grid = subrect(grid, dirs=strip) # Determine dtype if dtype is None: dtype = object if all(re.match(r'^\d+$', d) for d in grid.flat if d): dtype = int elif all(re.match(r'^\d+\.?\d*$', d) for d in grid.flat if d): dtype = float # xform coerces items to type if dtype is object: xform = lambda x: x if x != empty else None else: xform = fn.iffy(dtype, default=on_empty) dtype = object if '' in grid.flat and on_empty is None else dtype h, w = grid.shape result = np.array([[xform(val) for val in line] for line in grid], dtype=dtype) msg(f"Array is {h} rows x {w} cols of type {result.dtype}") return result
def passages_by_uuid(uuid, include_text=False): references = find_by_uuid(uuid).apply(reference.init_raw_row, axis=1).tolist() return pipe(references, passage.init, iffy(constantly(include_text), passage.text))
NON_REPORTED_EXCEPTIONS = ["QueryExecutionError"] def before_send(event, hint): if "exc_info" in hint: exc_type, exc_value, tb = hint["exc_info"] if any([(e in str(type(exc_value))) for e in NON_REPORTED_EXCEPTIONS]): return None return event def init(): if settings.SENTRY_DSN: sentry_sdk.init( dsn=settings.SENTRY_DSN, environment=settings.SENTRY_ENVIRONMENT, release=__version__, before_send=before_send, send_default_pii=True, integrations=[ FlaskIntegration(), SqlalchemyIntegration(), RedisIntegration(), RqIntegration(), ], ) capture_exception = iffy(lambda _: settings.SENTRY_DSN, sentry_sdk.capture_exception)
def map_types(self, types, func): return self.map(partial(walk_values, iffy(isa(types), func)))
def import_stage(self, harvest_object): ''' The import stage will receive a HarvestObject object and will be responsible for: - performing any necessary action with the fetched object (e.g create a CKAN package). Note: if this stage creates or updates a package, a reference to the package must be added to the HarvestObject. Additionally, the HarvestObject must be flagged as current. - creating the HarvestObject - Package relation (if necessary) - creating and storing any suitable HarvestObjectErrors that may occur. - returning True if everything went as expected, False otherwise. :param harvest_object: HarvestObject object :returns: True if everything went right, False if errors were found ''' logger.debug("in import stage: %s" % harvest_object.guid) if not harvest_object: logger.error('No harvest object received') self._save_object_error('No harvest object received') return False try: self._set_config(harvest_object.job.source.config) context = {'model': model, 'session': Session, 'user': self.user} package_dict = json.loads(harvest_object.content) package_dict['id'] = munge_title_to_name(harvest_object.guid) package_dict['name'] = package_dict['id'] # add owner_org source_dataset = get_action('package_show')( { 'ignore_auth': True }, { 'id': harvest_object.source.id }) owner_org = source_dataset.get('owner_org') package_dict['owner_org'] = owner_org try: prev_dict = iffy(json.loads)(_get_content( some( compose(partial(eq, package_dict['id']), attrgetter('guid')), harvest_object.source.jobs[-2].objects))) if prev_dict and prev_dict.get( 'integrity') == package_dict['integrity']: logger.info('Package not changed. Skip update') return False except IndexError: logger.debug('Skip integrity check. No previous data.') # logger.debug('Create/update package using dict: %s' % package_dict) self._create_or_update_package(package_dict, harvest_object, 'package_show') Session.commit() logger.debug("Finished record") except: logger.exception('Something went wrong!') self._save_object_error('Exception in import stage', harvest_object) return False return True
def show_experiments(all_experiments, pager=True, no_timestamp=False, **kwargs): include_metrics = _parse_filter_list(kwargs.pop("include_metrics", [])) exclude_metrics = _parse_filter_list(kwargs.pop("exclude_metrics", [])) include_params = _parse_filter_list(kwargs.pop("include_params", [])) exclude_params = _parse_filter_list(kwargs.pop("exclude_params", [])) metric_names, param_names = _collect_names( all_experiments, include_metrics=include_metrics, exclude_metrics=exclude_metrics, include_params=include_params, exclude_params=exclude_params, ) metric_headers = _normalize_headers(metric_names) param_headers = _normalize_headers(param_names) td = experiments_table( all_experiments, metric_headers, metric_names, param_headers, param_names, kwargs.get("sort_by"), kwargs.get("sort_order"), kwargs.get("precision"), ) if no_timestamp: td.drop("Created") baseline_styler = iffy(constantly({"style": "bold"}), default={}) row_styles = lmap(baseline_styler, td.column("is_baseline")) td.drop("is_baseline") merge_headers = ["Experiment", "queued", "ident_guide", "parent"] td.column("Experiment")[:] = map(prepare_exp_id, td.as_dict(merge_headers)) td.drop(*merge_headers[1:]) headers = {"metrics": metric_headers, "params": param_headers} styles = { "Experiment": { "no_wrap": True, "header_style": "black on grey93" }, "Created": { "header_style": "black on grey93" }, } header_bg_colors = {"metrics": "cornsilk1", "params": "light_cyan1"} styles.update({ header: { "justify": "left" if typ == "metrics" else "params", "header_style": f"black on {header_bg_colors[typ]}", "collapse": idx != 0, "no_wrap": typ == "metrics", } for typ, hs in headers.items() for idx, header in enumerate(hs) }) td.render( pager=pager, borders=True, rich_table=True, header_styles=styles, row_styles=row_styles, )
return fn def mkclass(name: str, bases: Tuple = (), **clsattrs: Any) -> Any: "Does mkclass" Gen = type(name, (Base, ) + bases, clsattrs) return Gen def arity(fn: Callable) -> int: "Returns the number of arguments required by `fn`." return len(inspect.signature(fn).parameters) always_tup = funcy.iffy(funcy.complement(funcy.is_seqcont), lambda x: (x, )) class Piping(pipelib.BasePiping): """Piping objects is for (ab)using Python operator overloading to build small pipeline-DSL's. The most basic one will simply refuse to do anything - you have to give it instructions/permissions on everything it's made for ;-). """ class Fresh(object): "Marker for Piping instances that never has been run" pass class Executed(object):
def show_experiments(all_experiments, pager=True, no_timestamp=False, **kwargs): include_metrics = _parse_filter_list(kwargs.pop("include_metrics", [])) exclude_metrics = _parse_filter_list(kwargs.pop("exclude_metrics", [])) include_params = _parse_filter_list(kwargs.pop("include_params", [])) exclude_params = _parse_filter_list(kwargs.pop("exclude_params", [])) metric_names, param_names = _collect_names( all_experiments, include_metrics=include_metrics, exclude_metrics=exclude_metrics, include_params=include_params, exclude_params=exclude_params, ) metric_headers = _normalize_headers(metric_names) param_headers = _normalize_headers(param_names) td = experiments_table( all_experiments, metric_headers, metric_names, param_headers, param_names, kwargs.get("sort_by"), kwargs.get("sort_order"), kwargs.get("precision"), ) styles = [ { "no_wrap": True, "header_style": "black on grey93" }, { "header_style": "black on grey93" }, *[{ "justify": "right", "header_style": "black on cornsilk1", "no_wrap": True, "collapse": idx != 0, } for idx, _ in enumerate(metric_headers)], *[{ "justify": "left", "header_style": "black on light_cyan1", "collapse": idx != 0, } for idx, _ in enumerate(param_headers)], ] if no_timestamp: td.drop("Created") styles.pop(1) baseline_styler = iffy(constantly({"style": "bold"}), default={}) row_styles = lmap(baseline_styler, td.column("is_baseline")) td.drop("is_baseline") td.render( pager=pager, borders=True, rich_table=True, header_styles=styles, row_styles=row_styles, )