コード例 #1
0
ファイル: folds.py プロジェクト: mengfan/ramp
 def __iter__(self):
     for i in range(self.folds):
         y = build_target(self.config.target, self.context)
         positives = y[y != 0].index
         negatives = y[y == 0].index
         np = len(positives)
         nn = len(negatives)
         test_positives = random.sample(positives, self.pos_test)
         test_negatives = random.sample(negatives, self.neg_test)
         train_positives = random.sample(positives - test_positives,
                                         self.pos_train)
         train_negatives = random.sample(negatives - test_negatives,
                                         self.neg_train)
         test = test_positives + test_negatives
         train = train_positives + train_negatives
         if self.verbose:
             print "Sampled Folds:"
             print "\tPos\tNeg\tPos pct"
             print "Train:\t%d\t%d\t%0.3f" % (
                 self.pos_train, self.neg_train,
                 self.pos_train / float(self.pos_train + self.neg_train))
             print "Test:\t%d\t%d\t%0.3f" % (
                 self.pos_test, self.neg_test,
                 self.pos_test / float(self.neg_test + self.pos_test))
         yield pd.Index(train), pd.Index(test)
コード例 #2
0
ファイル: models.py プロジェクト: robotsamurai/ramp
def get_xy(config, context):
    x = build_featureset(config.features, context)
    y = build_target(config.target, context)

    if config.column_subset:
        x = x[config.column_subset]
    return x, y
コード例 #3
0
ファイル: folds.py プロジェクト: mengfan/ramp
 def __iter__(self):
     for i in range(self.folds):
         y = build_target(self.config.target, self.context)
         positives = y[y != 0].index
         negatives = y[y == 0].index
         np = len(positives)
         print "posssss", np, len(y)
         nn = len(negatives)
         test_positives = random.sample(
             positives, int(np * self.positive_proportion_test))
         np_test = len(test_positives)
         test_negatives = random.sample(
             negatives, int(np_test * (1 / self.positive_ratio_test - 1)))
         nn_test = len(test_negatives)
         test = test_positives + test_negatives
         if self.positive_ratio_train:
             train_negs = random.sample(
                 negatives - test_negatives,
                 int((np - np_test) * (1 / self.positive_ratio_train - 1)))
             train = train_negs + list(positives - test_positives)
             nn_train = len(train_negs)
         else:
             train = y.index - test
             nn_train = nn - nn_test
         if self.verbose:
             print "Weighted Sample Folds:"
             print "\tPos\tNeg\tPos pct"
             print "Train:\t%d\t%d\t%0.3f" % (
                 np - np_test, nn_train,
                 (np - np_test) / float(np - np_test + nn_train))
             print "Test:\t%d\t%d\t%0.3f" % (np_test, nn_test, np_test /
                                             float(nn_test + np_test))
         yield pd.Index(train), pd.Index(test)
コード例 #4
0
ファイル: models.py プロジェクト: mengfan/ramp
def evaluate(config,
             ctx,
             predict_index,
             predict_method=None,
             predict_update_column=None):
    if predict_method is None:
        result = predict(config, ctx, predict_index)
    else:
        # TODO: hacky!
        result = predict_method(config,
                                ctx,
                                predict_index,
                                update_column=predict_update_column)
    preds = result['predictions']
    y = result['actuals']

    try:
        if config.actual is not None:
            actuals = build_target(config.actual, ctx).reindex(predict_index)
        else:
            actuals = y.reindex(predict_index)
    #TODO: HACK -- there may not be an actual attribute on the config
    except AttributeError:
        actuals = y.reindex(predict_index)

    scores = {}
    for metric in config.metrics:
        name = get_metric_name(metric)
        if hasattr(metric, 'score'):
            scores[name] = metric.score(actuals, preds)
        else:
            scores[name] = metric(actuals, preds)
    return scores, result
コード例 #5
0
ファイル: models.py プロジェクト: mengfan/ramp
def evaluate(config, ctx, predict_index,
             predict_method=None, predict_update_column=None):
    if predict_method is None:
        result = predict(config, ctx, predict_index)
    else:
        # TODO: hacky!
        result = predict_method(config, ctx, predict_index, update_column=predict_update_column)
    preds = result['predictions']
    y = result['actuals']

    try:
        if config.actual is not None:
            actuals = build_target(config.actual, ctx).reindex(predict_index)
        else:
            actuals = y.reindex(predict_index)
    #TODO: HACK -- there may not be an actual attribute on the config
    except AttributeError:
        actuals = y.reindex(predict_index)

    scores = {}
    for metric in config.metrics:
        name = get_metric_name(metric)
        if hasattr(metric, 'score'):
            scores[name] = metric.score(actuals, preds)
        else:
            scores[name] = metric(actuals, preds)
    return scores, result
コード例 #6
0
ファイル: folds.py プロジェクト: mengfan/ramp
 def __iter__(self):
     for i in range(self.folds):
         y = build_target(self.config.target, self.context)
         positives = y[y != 0].index
         negatives = y[y == 0].index
         np = len(positives)
         print "posssss", np, len(y)
         nn = len(negatives)
         test_positives = random.sample(positives, int(np * self.positive_proportion_test))
         np_test = len(test_positives)
         test_negatives = random.sample(negatives, int(np_test * (1 / self.positive_ratio_test  - 1)))
         nn_test = len(test_negatives)
         test = test_positives + test_negatives
         if self.positive_ratio_train:
             train_negs = random.sample(negatives - test_negatives, int((np - np_test) * (1 / self.positive_ratio_train - 1)))
             train = train_negs + list(positives - test_positives)
             nn_train = len(train_negs)
         else:
             train = y.index - test
             nn_train = nn - nn_test
         if self.verbose:
             print "Weighted Sample Folds:"
             print "\tPos\tNeg\tPos pct"
             print "Train:\t%d\t%d\t%0.3f" % (np - np_test, nn_train, (np - np_test) / float( np - np_test + nn_train))
             print "Test:\t%d\t%d\t%0.3f" % (np_test, nn_test, np_test / float(nn_test + np_test))
         yield pd.Index(train), pd.Index(test)
コード例 #7
0
ファイル: models.py プロジェクト: Afey/ramp
def predict_autosequence(config, context, predict_index, fit_model=True, update_column=None):
    if len(context.train_index & predict_index):
        logging.warning("Train and predict indices overlap...")
    
    x, y = None, None
    
    if fit_model:
        x, y = fit(config, context)
    
    logging.debug(x.columns)
    logging.debug(config.model.coef_)
    
    ctx = context.copy()
    ps = []
    for i in predict_index:
        ctx.data = context.data
        x = get_x(config, ctx)
        predict_x = x.reindex([i])
    
        # make actual predictions
        p = config.model.predict(predict_x.values)
        if update_column is not None:
            ctx.data[update_column][i] = p[0]
        ps.append(p[0])
    try:
        preds = Series(ps, index=predict_index)
    except:
        preds = DataFrame(ps, index=predict_index)
    # prediction post-processing
    if config.prediction is not None:
        context.data[config.predictions_name] = preds
        preds = build_target(config.prediction, context)
        preds = preds.reindex(predict_index)
    preds.name = ''
    return preds, x, y
コード例 #8
0
ファイル: models.py プロジェクト: mengfan/ramp
def predict(config, context, predict_index, fit_model=True, model_name=None):
    if len(context.train_index & predict_index):
        print "WARNING: train and predict indices overlap..."

    x, y = None, None

    if model_name:
        config.model = context.store.load(model_name)

    if not model_name and fit_model:
        x, y = fit(config, context)

    # TODO: possible to have x loaded without new prediction rows
    if x is None:
        # rebuild just the necessary x:
        ctx = context.copy()
        ctx.data = context.data.ix[predict_index]
        x = get_x(config, ctx)
        try:
            # we may or may not have y's in predict context
            # we get them if we can for metrics and reporting
            y = get_y(config, ctx)
        except KeyError:
            pass

    if debug:
        print x.columns

    predict_x = x.reindex(predict_index)

    print "Making predictions... ",
    # make actual predictions
    ps = config.model.predict(predict_x.values)
    try:
        preds = Series(ps, index=predict_x.index)
    except:
        preds = DataFrame(ps, index=predict_x.index)
    print "[OK]"
    # prediction post-processing
    if config.prediction is not None:
        old = context.data
        context.data = context.data.reindex(predict_x.index)
        context.data[config.predictions_name] = preds
        preds = build_target(config.prediction, context)
        preds = preds.reindex(predict_x.index)
        context.data = old
    preds.name = ''
    actuals = y.reindex(predict_index)
    # TODO: handle multi-variate predictions
    predict_x['predictions'] = preds
    predict_x['actuals'] = actuals
    config.update_reporters_with_predictions(context, predict_x, actuals,
                                             preds)
    return predict_x
コード例 #9
0
ファイル: models.py プロジェクト: mengfan/ramp
def predict(config, context, predict_index, fit_model=True, model_name=None):
    if len(context.train_index & predict_index):
        print "WARNING: train and predict indices overlap..."

    x, y = None, None

    if model_name:
        config.model = context.store.load(model_name)

    if not model_name and fit_model:
        x, y = fit(config, context)

    # TODO: possible to have x loaded without new prediction rows
    if x is None:
        # rebuild just the necessary x:
        ctx = context.copy()
        ctx.data = context.data.ix[predict_index]
        x = get_x(config, ctx)
        try:
            # we may or may not have y's in predict context
            # we get them if we can for metrics and reporting
            y = get_y(config, ctx)
        except KeyError:
            pass

    if debug:
        print x.columns

    predict_x = x.reindex(predict_index)

    print "Making predictions... ",
    # make actual predictions
    ps = config.model.predict(predict_x.values)
    try:
        preds = Series(ps, index=predict_x.index)
    except:
        preds = DataFrame(ps, index=predict_x.index)
    print "[OK]"
    # prediction post-processing
    if config.prediction is not None:
        old = context.data
        context.data = context.data.reindex(predict_x.index)
        context.data[config.predictions_name] = preds
        preds = build_target(config.prediction, context)
        preds = preds.reindex(predict_x.index)
        context.data = old
    preds.name = ''
    actuals = y.reindex(predict_index)
    # TODO: handle multi-variate predictions
    predict_x['predictions'] = preds
    predict_x['actuals'] = actuals
    config.update_reporters_with_predictions(context, predict_x, actuals, preds)
    return predict_x
コード例 #10
0
ファイル: models.py プロジェクト: ahmed26/ramp
def predict(config, context, predict_index, fit_model=True):
    if len(context.train_index & predict_index):
        print "WARNING: train and predict indices overlap..."

    x, y = None, None

    if fit_model:
        x, y = fit(config, context)

    # TODO: possible to have x loaded without new prediction rows
    if x is None:
        # rebuild just the necessary x:
        ctx = context.copy()
        ctx.data = context.data.ix[predict_index]
        x = get_x(config, ctx)
        try:
            # we may or may not have y's in predict context
            # we get them if we can for metrics and reporting
            y = get_y(config, ctx)
        except KeyError:
            pass

    if debug:
        print x.columns
        print config.model.coef_

    predict_x = x.reindex(predict_index)

    print "Making predictions... ",
    # make actual predictions
    ps = config.model.predict(predict_x.values)
    try:
        preds = Series(ps, index=predict_x.index)
    except:
        preds = DataFrame(ps, index=predict_x.index)
    print "[OK]"
    # prediction post-processing
    if config.prediction is not None:
        context.data[config.predictions_name] = preds
        preds = build_target(config.prediction, context)
        preds = get_single_column(preds).reindex(predict_x.index)
    preds.name = ''
    return preds, x, y
コード例 #11
0
ファイル: models.py プロジェクト: vibster/ramp
def predict(config, context, predict_index, fit_model=True):
    if len(context.train_index & predict_index):
        print "WARNING: train and predict indices overlap..."

    x, y = None, None

    if fit_model:
        x, y = fit(config, context)

    # TODO: possible to have x loaded without new prediction rows
    if x is None:
        # rebuild just the necessary x:
        ctx = context.copy()
        ctx.data = context.data.ix[predict_index]
        x = get_x(config, ctx)
        try:
            # we may or may not have y's in predict context
            # we get them if we can for metrics and reporting
            y = get_y(config, ctx)
        except KeyError:
            pass

    if debug:
        print x.columns
        print config.model.coef_

    predict_x = x.reindex(predict_index)

    # make actual predictions
    ps = config.model.predict(predict_x.values)
    try:
        preds = Series(ps, index=predict_x.index)
    except:
        preds = DataFrame(ps, index=predict_x.index)

    # prediction post-processing
    if config.prediction is not None:
        context.data[config.predictions_name] = preds
        preds = build_target(config.prediction, context)
        preds = get_single_column(preds).reindex(predict_x.index)
    preds.name = ''
    return preds, x, y
コード例 #12
0
ファイル: folds.py プロジェクト: mengfan/ramp
 def __iter__(self):
     for i in range(self.folds):
         y = build_target(self.config.target, self.context)
         positives = y[y != 0].index
         negatives = y[y == 0].index
         np = len(positives)
         nn = len(negatives)
         test_positives = random.sample(positives, self.pos_test)
         test_negatives = random.sample(negatives, self.neg_test)
         train_positives = random.sample(positives - test_positives, self.pos_train)
         train_negatives = random.sample(negatives - test_negatives, self.neg_train)
         test = test_positives + test_negatives
         train = train_positives + train_negatives
         if self.verbose:
             print "Sampled Folds:"
             print "\tPos\tNeg\tPos pct"
             print "Train:\t%d\t%d\t%0.3f" % (self.pos_train, self.neg_train,
                     self.pos_train / float( self.pos_train + self.neg_train))
             print "Test:\t%d\t%d\t%0.3f" % (self.pos_test, self.neg_test, self.pos_test / float(self.neg_test + self.pos_test))
         yield pd.Index(train), pd.Index(test)
コード例 #13
0
ファイル: models.py プロジェクト: mengfan/ramp
def predict_autosequence(config,
                         context,
                         predict_index,
                         fit_model=True,
                         update_column=None):
    if len(context.train_index & predict_index):
        print "WARNING: train and predict indices overlap..."

    x, y = None, None

    if fit_model:
        x, y = fit(config, context)

    if debug:
        print x.columns
        print config.model.coef_

    ctx = context.copy()
    ps = []
    for i in predict_index:
        ctx.data = context.data
        x = get_x(config, ctx)
        predict_x = x.reindex([i])

        # make actual predictions
        p = config.model.predict(predict_x.values)
        if update_column is not None:
            ctx.data[update_column][i] = p[0]
        ps.append(p[0])
    try:
        preds = Series(ps, index=predict_index)
    except:
        preds = DataFrame(ps, index=predict_index)
    # prediction post-processing
    if config.prediction is not None:
        context.data[config.predictions_name] = preds
        preds = build_target(config.prediction, context)
        preds = preds.reindex(predict_index)
    preds.name = ''
    return preds, x, y
コード例 #14
0
ファイル: models.py プロジェクト: robotsamurai/ramp
def predict(config, context, predict_index, force_prediction=False):
    if (context.train_index & predict_index):
        print "WARNING: train and predict indices overlap..."

    x, y = fit(config, context)
    # TODO: possible to have x loaded without new prediction rows
    if x is None:
        # rebuild just the necessary x:
        ctx = context.copy()
        ctx.data = context.data.ix[predict_index]
        x, y = get_xy(config, ctx)

    # ensure correct columns exist:
#    for col in columns_used:
#        if col not in x.columns:
#            print "WARNING: filling missing column '%s' with zeros" % col
#            x[col] = Series(np.random.randn(len(x)) / 100, index=x.index)
#    symdif = set(x.columns) ^ set(columns_used)
#    if symdif:
#        print symdif
#        raise Exception("mismatched columns between fit and predict.")
    # re-order columns
#    x = x.reindex(columns=columns_used)

    predict_x = x.reindex(predict_index)

    # make actual predictions
    ps = config.model.predict(predict_x.values)
    preds = Series(ps, index=predict_x.index)

    # prediction post-processing
    if config.prediction is not None:
        context.data[config.predictions_name] = preds
        preds = build_target(config.prediction, context)
        preds = get_single_column(preds).reindex(predict_x.index)
    preds.name = ''
    return preds, x, y
コード例 #15
0
ファイル: models.py プロジェクト: Marigold/ramp
def get_y(config, context):
    return build_target(config.target, context)
コード例 #16
0
ファイル: models.py プロジェクト: mengfan/ramp
def get_y(config, context):
    return build_target(config.target, context)