def predict_autosequence(config, context, predict_index, fit_model=True, update_column=None): if len(context.train_index & predict_index): logging.warning("Train and predict indices overlap...") x, y = None, None if fit_model: x, y = fit(config, context) logging.debug(x.columns) logging.debug(config.model.coef_) ctx = context.copy() ps = [] for i in predict_index: ctx.data = context.data x = get_x(config, ctx) predict_x = x.reindex([i]) # make actual predictions p = config.model.predict(predict_x.values) if update_column is not None: ctx.data[update_column][i] = p[0] ps.append(p[0]) try: preds = Series(ps, index=predict_index) except: preds = DataFrame(ps, index=predict_index) # prediction post-processing if config.prediction is not None: context.data[config.predictions_name] = preds preds = build_target_safe(config.prediction, context) preds = preds.reindex(predict_index) preds.name = '' return preds, x, y
def evaluate(config, ctx, predict_index, predict_method=None, predict_update_column=None): if predict_method is None: result = predict(config, ctx, predict_index) else: # TODO: hacky! result = predict_method(config, ctx, predict_index, update_column=predict_update_column) preds = result['predictions'] y = result['actuals'] try: if config.actual is not None: actuals = build_target_safe(config.actual, ctx).reindex(predict_index) else: actuals = y.reindex(predict_index) #TODO: HACK -- there may not be an actual attribute on the config except AttributeError: actuals = y.reindex(predict_index) scores = {} for metric in config.metrics: name = get_metric_name(metric) if hasattr(metric, 'score'): scores[name] = metric.score(actuals, preds) else: scores[name] = metric(actuals, preds) return scores, result
def predict(config, context, predict_index, fit_model=True, model_name=None): if len(context.train_index & predict_index): logging.warning("Train and predict indices overlap...") x, y = None, None if model_name: config.model = context.store.load(model_name) if not model_name and fit_model: x, y = fit(config, context) # TODO: possible to have x loaded without new prediction rows if x is None: # rebuild just the necessary x: ctx = context.copy() ctx.data = context.data.ix[predict_index] x = get_x(config, ctx) try: # we may or may not have y's in predict context # we get them if we can for metrics and reporting y = get_y(config, ctx) except KeyError: pass logging.debug(x.columns) predict_x = x.reindex(predict_index) logging.info("Making predictions... ") # make actual predictions ps = config.model.predict(predict_x.values) try: preds = Series(ps, index=predict_x.index) except: preds = DataFrame(ps, index=predict_x.index) logging.info("...done.") # prediction post-processing if config.prediction is not None: old = context.data context.data = context.data.reindex(predict_x.index) context.data[config.predictions_name] = preds preds = build_target_safe(config.prediction, context) preds = preds.reindex(predict_x.index) context.data = old preds.name = '' actuals = y.reindex(predict_index) # TODO: handle multi-variate predictions predict_x['predictions'] = preds predict_x['actuals'] = actuals config.update_reporters_with_predictions(context, predict_x, actuals, preds) return predict_x
def build_target(self): y, ff = build_target_safe(self.target, self.data) self.y = y self.negatives = y[~y.astype('bool')].index self.positives = y[y.astype('bool')].index
def get_y(config, context): return build_target_safe(config.target, context)