def bind_layout(self, layout, errors_missing=lib_errors.RAISE): """Returns a clone of this How with its target bound to a specific layout. Since the target of a How instance point to a string label colum name, a concrete LabelColumn instance, a generic GType class, or a GTypeLabeler instance, some work may be necessary to map self.target to a specific layout: 1. If self.target is GTypeTarget: - target.gtype matches one column: new LabelColumnTarget(layout.label_column(gtype)) - target.gtype matches multiple columns: raise MultipleLabelColumnsError (always) - target.gtype matches no columns: raise NoLabelColumnsError (RAISE) or None (DROP) 2. If self.target is GTypeLabelerTarget: - target.labeler matches one column: new LabelColumnTarget(layout.label_column(gtype, label_kwargs)) - target.labeler matches multiple columns: raise MultipleLabelColumnsError (always) - target.labeler matches no columns: raise NoLabelColumnsError (RAISE) or None (DROP) 3. If self.target is LabelColumnNameTarget: - target.name in layout: new LabelColumnTarget(layout.label_column(name)) - target.name not in layout: raise NoLabelColumnsError (RAISE) or None (DROP) 4. If self.target is LabelColumnTarget: - target.label_column in layout: target unchanged - target.label_column not in layout: raise NoLabelColumnsError (RAISE) or None (DROP) """ lib_errors.validate_errors(errors=errors_missing, drop_ok=True, raise_ok=True) try: return self.clone(target=self.target.bind_layout(layout=layout)) except lib_errors.NoLabelColumnsError: if errors_missing == lib_errors.DROP: return None raise
def parse_hows(hows, require=(), bind_layout=None, errors=lib_errors.RAISE): # TODO(rob): Add support for binding to tables, datasets, and users? def parse_how(how, via_column=None): if how is None: raise TypeError('how is None') if isinstance(how, (tuple, list)): # Every element [how, how, ...] must/will belong to the same provided via_column. return [more_itertools.one(parse_how(how=h, via_column=via_column)) for h in how] if isinstance(how, How): if via_column is None: # Assert that this how already belongs to *some* via_column, since one wasn't given. if how.via_column is None: raise ValueError('how.via_column is empty: %r' % (how,)) elif (how.via_column is not None) and (how.via_column != via_column): # Assert that this how already belongs to provided via_column. raise ValueError('how.via_column does not match %s: %r' % (via_column, how.via_column)) elif how.via_column is None: return [how.clone(via_column=via_column)] return [how] if via_column: # This how is a target that must be converted to a how that belongs to via_column. return [How(target=parse_target(how), via_column=via_column)] raise TypeError('how has bare target; try how={col_name: target}: %r' % (how,)) lib_errors.validate_errors(errors=errors, drop_ok=True, raise_ok=True) # 1. Provided hows may be specified as {via_column: target/How}, [How, ...], or How. if isinstance(hows, dict): # dict: must be a mapping of {via_column: how}, which is converted to [How, How, ...] here. ret = [] for via_column, target in hows.items(): ret.extend(parse_how(how=target, via_column=via_column)) elif isinstance(hows, (tuple, list)): # tuple: must contain How instances which already belong to some via_column. for how in hows: if not isinstance(how, How): raise TypeError('tuple how has bare target; try how={col_name: target}: %r' % (how,)) if how.via_column is None: raise ValueError('how.via_column is empty: %r' % (how,)) ret = hows else: # obj: how must be coerceable into How instance that already belongs to some via_column. ret = parse_how(how=hows) # 2. If provided, bind all how clauses to a given layout. if bind_layout: ret = [h.bind_layout(layout=bind_layout, errors_missing=errors) for h in ret] ret = tuple(x for x in ret if x is not None) # 3. Assert that all required attributes are present on all how clauses. if require: ret = list(ret) for i, how in enumerate(ret): for require_attr in require: if getattr(how.target, require_attr, None) is None: if errors == lib_errors.RAISE: raise TypeError('cannot extract %s: %r' % (require_attr, how.target)) if errors == lib_errors.DROP: ret[i] = None break ret = tuple(x for x in ret if x is not None) return ret
def right_index_of(self, value, errors=lib_errors.RAISE): lib_errors.validate_errors(errors, coerce_ok=True, ignore_ok=True, raise_ok=True) if value not in self: if errors == lib_errors.RAISE: raise ValueError('value out of sequence: %r' % (value,)) if errors == lib_errors.COERCE: value = self.head if value < self.head else self.bomb return math.ceil((value - self.head) / self.step)
def fit_snug_hows(hows, layouts, errors_missing=lib_errors.RAISE): """Returns Hows bound as tightly as possible to a set of layouts.""" def how_snug_labeler(how, layouts): if how.target.labeler is not None: # How.target has a labeler; may be LabelColumnTarget or GTypeLabelerTarget. gtype = how.target.gtype snug_step = how.target.labeler.step snug_head = how.target.labeler.head elif layouts: # Extract the minimum value of step for this how across all layouts. layout_gtypes = set() layout_steps = set() layout_heads = set() for layout in layouts: # Bind how to layout if possible; ignore this how if it can't be bound to layout (DROP). # This will legitimately happen with the 2 layouts: ('lat_lng', 'year_month'). bound_how = how.bind_layout(layout=layout, errors_missing=lib_errors.DROP) if bound_how is not None: layout_gtypes.add(bound_how.target.gtype) layout_steps.add(bound_how.target.labeler.step) layout_heads.add(bound_how.target.labeler.head) if len(layout_gtypes) > 1: # Since a GTypeLabeler can label exactly one gtype, crash if we found multiple. raise lib_errors.MultipleLabelColumnsError('found %d gtype matches in layouts: %r' % (len(layout_gtypes), how)) if len(layout_gtypes) == 0: # A spurious how clause (no matches with any layouts) also cannot create a labeler. raise lib_errors.NoLabelColumnsError('found 0 gtype matches in layouts: %r' % (how,)) # We were provided with at least one layout; use min(step/head) for their labeler. gtype = more_itertools.one(layout_gtypes) snug_step = min(layout_steps) snug_head = min(layout_heads) else: # With no provided layouts, how.target *must* have a gtype or we can't create a labeler. gtype = how.target.gtype snug_step = gtype.SANE_STEP snug_head = pdutils.coalesce(gtype.HEAD, 0) return gtype.labeler(step=snug_step, head=snug_head) def fit_how(how, layouts, errors_missing): if how.target.labeler is not None: # How is already bound to a labeler (GTypeLabelerTarget or LabelColumnTarget); done. return how # else: How is bound to a gtype/label_column_name; return GTypeLabeler() with the most snug fit. try: labeler = how_snug_labeler(how=how, layouts=layouts) return how.clone(target=labeler) except lib_errors.NoLabelColumnsError: if errors_missing == lib_errors.DROP: return None raise lib_errors.validate_errors(errors=errors_missing, drop_ok=True, raise_ok=True) hows = parse_hows(hows=hows) fit_hows = [fit_how(how=h, layouts=layouts, errors_missing=errors_missing) for h in hows] return tuple(h for h in fit_hows if h is not None)