def qplot(x=None, y=None, data=None, facets=None, margins=False, geom='auto', xlim=None, ylim=None, log='', main=None, xlab=None, ylab=None, asp=None, **kwargs): """ Quick plot Parameters ---------- x : str | array_like x aesthetic y : str | array_like y aesthetic data : dataframe Data frame to use (optional). If not specified, will create one, extracting arrays from the current environment. geom : str | list *geom(s)* to do the drawing. If ``auto``, defaults to 'point' if ``x`` and ``y`` are specified or 'histogram' if only ``x`` is specified. xlim : tuple x-axis limits ylim : tuple y-axis limits log : str in ``{'x', 'y', 'xy'}`` Which variables to log transform. main : str Plot title xlab : str x-axis label ylab : str y-axis label asp : str | float The y/x aspect ratio. **kwargs : dict Arguments passed on to the geom. Returns ------- p: ggplot ggplot object """ # Extract all recognizable aesthetic mappings from the parameters # String values e.g "I('red')", "I(4)" are not treated as mappings environment = EvalEnvironment.capture(1) aesthetics = {} if x is None else {'x': x} if y is not None: aesthetics['y'] = y def is_mapping(value): """ Return True if value is not enclosed in I() function """ with suppress(AttributeError): return not (value.startswith('I(') and value.endswith(')')) return True def I(value): return value I_env = EvalEnvironment([{'I': I}]) for ae in kwargs.keys() & all_aesthetics: value = kwargs[ae] if is_mapping(value): aesthetics[ae] = value else: kwargs[ae] = I_env.eval(value) # List of geoms if is_string(geom): geom = [geom] elif isinstance(geom, tuple): geom = list(geom) if data is None: data = pd.DataFrame() # Work out plot data, and modify aesthetics, if necessary def replace_auto(lst, str2): """ Replace all occurences of 'auto' in with str2 """ for i, value in enumerate(lst): if value == 'auto': lst[i] = str2 return lst if 'auto' in geom: if 'sample' in aesthetics: replace_auto(geom, 'qq') elif y is None: # If x is discrete we choose geom_bar & # geom_histogram otherwise. But we need to # evaluate the mapping to find out the dtype env = environment.with_outer_namespace({'factor': pd.Categorical}) if isinstance(aesthetics['x'], str): try: x = env.eval(aesthetics['x'], inner_namespace=data) except Exception: msg = "Could not evaluate aesthetic 'x={}'" raise PlotnineError(msg.format(aesthetics['x'])) elif not hasattr(aesthetics['x'], 'dtype'): x = np.asarray(aesthetics['x']) if array_kind.discrete(x): replace_auto(geom, 'bar') else: replace_auto(geom, 'histogram') else: if x is None: if pdtypes.is_list_like(aesthetics['y']): aesthetics['x'] = range(len(aesthetics['y'])) xlab = 'range(len(y))' ylab = 'y' else: # We could solve the issue in layer.compute_asthetics # but it is not worth the extra complexity raise PlotnineError("Cannot infer how long x should be.") replace_auto(geom, 'point') p = ggplot(aes(**aesthetics), data=data, environment=environment) def get_facet_type(facets): with suppress(PlotnineError): parse_grid_facets(facets) return 'grid' with suppress(PlotnineError): parse_wrap_facets(facets) return 'wrap' warn( "Could not determine the type of faceting, " "therefore no faceting.", PlotnineWarning) return 'null' if facets: facet_type = get_facet_type(facets) if facet_type == 'grid': p += facet_grid(facets, margins=margins) elif facet_type == 'wrap': p += facet_wrap(facets) else: p += facet_null() # Add geoms for g in geom: geom_name = 'geom_{}'.format(g) geom_klass = Registry[geom_name] stat_name = 'stat_{}'.format(geom_klass.DEFAULT_PARAMS['stat']) stat_klass = Registry[stat_name] # find params recognized = ( kwargs.keys() & (geom_klass.DEFAULT_PARAMS.keys() | geom_klass.aesthetics() | stat_klass.DEFAULT_PARAMS.keys() | stat_klass.aesthetics())) recognized = recognized - aesthetics.keys() params = {ae: kwargs[ae] for ae in recognized} p += geom_klass(**params) # pd.Series objects have name attributes. In a dataframe, the # series have the name of the column. labels = {} for ae in scaled_aesthetics & kwargs.keys(): with suppress(AttributeError): labels[ae] = kwargs[ae].name with suppress(AttributeError): labels['x'] = xlab if xlab is not None else x.name with suppress(AttributeError): labels['y'] = ylab if ylab is not None else y.name if main is not None: labels['title'] = main if 'x' in log: p += scale_x_log10() if 'y' in log: p += scale_y_log10() if labels: p += labs(**labels) if asp: p += theme(aspect_ratio=asp) return p
def subset(self, which_terms): """Create a new :class:`DesignMatrixBuilder` that includes only a subset of the terms that this object does. For example, if `builder` has terms `x`, `y`, and `z`, then:: builder2 = builder.subset(["x", "z"]) will return a new builder that will return design matrices with only the columns corresponding to the terms `x` and `z`. After we do this, then in general these two expressions will return the same thing (here we assume that `x`, `y`, and `z` each generate a single column of the output):: build_design_matrix([builder], data)[0][:, [0, 2]] build_design_matrix([builder2], data)[0] However, a critical difference is that in the second case, `data` need not contain any values for `y`. This is very useful when doing prediction using a subset of a model, in which situation R usually forces you to specify dummy values for `y`. If using a formula to specify the terms to include, remember that like any formula, the intercept term will be included by default, so use `0` or `-1` in your formula if you want to avoid this. :arg which_terms: The terms which should be kept in the new :class:`DesignMatrixBuilder`. If this is a string, then it is parsed as a formula, and then the names of the resulting terms are taken as the terms to keep. If it is a list, then it can contain a mixture of term names (as strings) and :class:`Term` objects. .. versionadded: 0.2.0 """ factor_to_evaluators = {} for evaluator in self._evaluators: factor_to_evaluators[evaluator.factor] = evaluator design_info = self.design_info term_name_to_term = dict(zip(design_info.term_names, design_info.terms)) if isinstance(which_terms, basestring): # We don't use this EvalEnvironment -- all we want to do is to # find matching terms, and we can't do that use == on Term # objects, because that calls == on factor objects, which in turn # compares EvalEnvironments. So all we do with the parsed formula # is pull out the term *names*, which the EvalEnvironment doesn't # effect. This is just a placeholder then to allow the ModelDesc # to be created: env = EvalEnvironment({}) desc = ModelDesc.from_formula(which_terms, env) if desc.lhs_termlist: raise PatsyError("right-hand-side-only formula required") which_terms = [term.name() for term in desc.rhs_termlist] terms = [] evaluators = set() term_to_column_builders = {} for term_or_name in which_terms: if isinstance(term_or_name, basestring): if term_or_name not in term_name_to_term: raise PatsyError("requested term %r not found in " "this DesignMatrixBuilder" % (term_or_name, )) term = term_name_to_term[term_or_name] else: term = term_or_name if term not in self._termlist: raise PatsyError("requested term '%s' not found in this " "DesignMatrixBuilder" % (term, )) for factor in term.factors: evaluators.add(factor_to_evaluators[factor]) terms.append(term) column_builder = self._term_to_column_builders[term] term_to_column_builders[term] = column_builder return DesignMatrixBuilder(terms, evaluators, term_to_column_builders)