Ejemplo n.º 1
0
def train(config, model_data, data, record):
    model_class_name, percentile = model_data
    model = instantiate_from_class_string(model_class_name)

    try:
        model.n_jobs = config['n_jobs']
    except:
        log.info('Cannot set n_jobs for this model...')

    record['model'] = model_name(model)
    record['parameters'] = model.get_params()
    record['feats_percentile'] = percentile

    train_x = data['train_x']
    train_y = data['train_y']
    test_x = data['test_x']

    # estimate accuracy using cross-validation
    model = make_pipeline(SelectPercentile(f_classif, percentile),
                          StandardScaler(), model)

    scores = cross_validation.cross_val_score(model, train_x,
                                              train_y, cv=5,
                                              scoring='accuracy')
    record['mean_acc'] = scores.mean()

    # predict on the test set
    fn = SelectPercentile(f_classif, percentile).fit(train_x, train_y)
    train_x = fn.transform(train_x)
    test_x = fn.transform(test_x)

    scaler = StandardScaler().fit(train_x)
    train_x = scaler.transform(train_x)
    test_x = scaler.transform(test_x)

    model.fit(train_x, train_y)
    ids = data['test_ids']
    preds = model.predict(test_x)
    record['test_preds'] = [(id_, pred) for id_, pred in zip(ids, preds)]
Ejemplo n.º 2
0
from sklearn.datasets import load_iris
from sklearn.metrics import precision_score
from sklearn.cross_validation import train_test_split

classes = ["sklearn.ensemble.RandomForestClassifier"]
models = grid_generator.grid_from_classes(classes)

iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.30)

# create a new experiment
ex = Experiment(main["logger"])

for m in models:
    # create a new record
    rec = ex.record()

    m.fit(X_train, y_train)
    preds = m.predict(X_test)
    rec["precision"] = precision_score(y_test, preds)
    rec["parameters"] = m.get_params()
    rec["model"] = model_name(m)


# select top_k
ex.records = top_k(ex.records, "precision", 2)

# store records in the database
ex.save()