예제 #1
0
log = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter(
        '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
log.addHandler(handler)
log.setLevel(logging.DEBUG)

'''
    Pipeline example using scikit-learn
    - Scaling features
    - Training different models and hyperparameter sets
    - Feature selection using SelectPercentile
'''

pip = Pipeline(config, load_yaml('exp.yaml'), workers=10, save=True)


@pip.load
def load(config):
    data = {}
    # data loading
    train = pd.read_csv('../data/train.csv', index_col='id')
    data['train_x'] = train.drop('survived', axis=1).values
    data['train_y'] = train.survived.values
    test = pd.read_csv('../data/test.csv', index_col='id')
    data['test_x'] = test.values
    data['test_ids'] = test.index
    return data

예제 #2
0
from dstools.util import load_yaml
from dstools.lab import Experiment
import pandas as pd

ex = Experiment(load_yaml('exp.yaml')['conf'])
ex.get(_id=['57435574e0f48c88752991ba'])
best = ex.records[0]

df = pd.DataFrame(best.test_preds, columns=['PassengerId', 'Survived'])
df.set_index('PassengerId', inplace=True)
df.Survived = df.Survived.astype(int)
df.to_csv('res.csv')
예제 #3
0
from dstools.util import load_yaml
from dstools.sklearn import grid_generator
from pipeline.lab.util import top_k
from pipeline import SKPipeline

# logger configuration
log = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter(
    '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
log.addHandler(handler)
log.setLevel(logging.INFO)

# create pipeline object
pip = SKPipeline(config, load_yaml('exp.yaml'))


# this function should return the all the data used to train models
# as a dictionary. In subsequentent functions the data will be available in
# the 'data' parameter
@pip.load
def load(config):
    iris = load_iris()
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        test_size=0.30,
                                                        random_state=0)
    data = {
        'X_train': X_train,
        'X_test': X_test,
예제 #4
0
    record['mean_acc'] = scores.mean()

    # predict on the test set
    fn = SelectPercentile(f_classif, percentile).fit(train_x, train_y)
    train_x = fn.transform(train_x)
    test_x = fn.transform(test_x)

    scaler = StandardScaler().fit(train_x)
    train_x = scaler.transform(train_x)
    test_x = scaler.transform(test_x)

    model.fit(train_x, train_y)
    ids = data['test_ids']
    preds = model.predict(test_x)
    record['test_preds'] = [(id_, pred) for id_, pred in zip(ids, preds)]


def finalize(config, experiment):
    experiment.records = top_k(experiment.records, 'mean_acc', 30)
    experiment['exp_name'] = config['exp_name']

pip = Pipeline(config, load_yaml('exp.yaml'), workers=4, save=True)

pip.load = load
# pip.model_iterator = model_iterator
pip.model_iterator = model_iterator_autosklearn
pip.train = train
pip.finalize = finalize

pip()
예제 #5
0
파일: basic.py 프로젝트: edublancas/dstools

# function used to train models, should return
# a fitted model
def train(config, model, data, record):
    print record

    model.fit(data['x_train'], data['y_train'])
    preds = model.predict(data['X_test'])

    record['precision'] = precision_score(data['y_test'], preds)
    return model


# optional function used when every model has been trained
def finalize(config, experiment):
    pass
    # experiment.records = top_k(experiment.records, 'precision', 4)

# create pipeline object
pip = SKPipeline(config, load_yaml('exp.yaml'), workers=1)

# assign your functions
pip.load = load
pip.model_iterator = model_iterator
pip.train = train
pip.finalize = finalize

# run pipeline
pip()