예제 #1
0
def train_model(experiment_name, groups=1, time_window=pio.TimeWindow(-90, -30, 1, 15)):
    path = os.path.join(DATA_PATH, 'ts.csv')
    data, group_list = get_data(path, groups)
    fname = '{}_{}.csv'.format(experiment_name, '-'.join(group_list))
    data.to_csv(fname, index=False)
    project = pio.Project.from_id(PROJECT_ID)
    dataset = project.create_dataset(name=experiment_name,
                                     dataframe=data)

    experiment_version_config = pio.TrainingConfig(
        advanced_models=[pio.AdvancedModel.LinReg],
        normal_models=[pio.NormalModel.LinReg],
        features=[pio.Feature.Counts],
        profile=pio.Profile.Quick,
    )

    col_config = pio.ColumnConfig(target_column='target',
                                  time_column='time',
                                  # group_columns=group_list
                                  )

    experiment_version = project.fit_timeseries_regression(
        experiment_name,
        dataset,
        time_window=time_window,
        training_config=experiment_version_config,
        column_config=col_config,
    )
    return experiment_version
예제 #2
0
def setup_module(module):
    # Create project
    global project
    project = pio.Project.new(name=PROJECT_NAME,
                              description="description test sdk")

    # Create dataset
    global dataset
    dataset = project.create_dataset('test_exporter',
                                     file_name='data_exporter/titanic.csv')

    # Train one model
    training_config = pio.TrainingConfig(
        advanced_models=[],
        normal_models=[],
        simple_models=[pio.SimpleModel.DecisionTree],
        features=[],
        profile=pio.Profile.Quick)
    column_config = pio.ColumnConfig(target_column='Survived',
                                     id_column='PassengerId')

    experiment_version = project.fit_classification(
        'test_exporter_classif',
        dataset,
        column_config,
        metric=pio.metrics.Classification.AUC,
        training_config=training_config,
    )

    # Create validation_prediction
    experiment_version.wait_until(lambda experimentv: (len(
        experimentv.models) > 0) or (experimentv._status['state'] == 'failed'))
    if experiment_version._status['state'] == 'failed':
        raise RuntimeError('Could not train experiment')
    global validation_prediction
    validation_prediction = experiment_version.predict_from_dataset(dataset)

    # Create experiment deployment
    experiment_version_best_model = experiment_version.best_model
    experiment_deployment = project.create_experiment_deployment(
        'test_sdk_' + TESTING_ID, experiment_version_best_model)

    # Create deployment_prediction
    experiment_deployment.wait_until(
        lambda experimentd: experimentd.run_state == 'done')
    global deployment_prediction
    deployment_prediction = experiment_deployment.predict_from_dataset(dataset)
예제 #3
0
def train_model(uc_name, groups=1, time_window=pio.TimeWindow(-90, -30, 1, 15)):
    path = os.path.join(DATA_PATH, 'ts.csv')
    data, group_list = get_data(path, groups)
    fname = '{}_{}.csv'.format(uc_name, '-'.join(group_list))
    data.to_csv(fname, index=False)
    dataset = pio.Dataset.new(name=uc_name,
                              dataframe=data)

    uc_config = pio.TrainingConfig(normal_models=[pio.Model.LinReg],
                                   lite_models=[pio.Model.LinReg],
                                   features=[pio.Feature.Counts],
                                   profile=pio.Profile.Quick)

    col_config = pio.ColumnConfig(target_column='target',
                                  time_column='time',
                                  # group_columns=group_list
                                  )

    uc = pio.TimeSeries.fit(uc_name,
                            dataset,
                            time_window=time_window,
                            training_config=uc_config,
                            column_config=col_config)
    return uc
예제 #4
0
import os
import pandas as pd
import previsionio as pio
from .utils import get_testing_id

TESTING_ID = get_testing_id()

pio.config.default_timeout = 120

col_config = pio.ColumnConfig(target_column='class', filename_column='filename')
uc_config = pio.TrainingConfig(normal_models=[pio.Model.LinReg],
                               lite_models=[],
                               simple_models=[],
                               features=[pio.Feature.Counts],
                               profile=pio.Profile.Quick)

test_datasets = {}
dataset_name = 'cats_and_dogs_train'
dataset_test_name = TESTING_ID + '-' + dataset_name


def upload_datasets():
    datapath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data_img/{}'.format(dataset_name))
    # upload CSV reference file
    dataset_csv = pio.Dataset.new(
        name=dataset_test_name,
        dataframe=pd.read_csv(os.path.join(datapath, '{}.csv'.format(dataset_name)))
    )
    # upload ZIP images folder
    dataset_zip = pio.DatasetImages.new(
        name=dataset_test_name,
예제 #5
0
import os
import pandas as pd
import pytest
import previsionio as pio
from .datasets import make_supervised_datasets, remove_datasets
from . import DATA_PATH
from .utils import train_model, get_testing_id, DROP_COLS

TESTING_ID = get_testing_id()

pio.config.zip_files = False
pio.config.default_timeout = 80

uc_config = pio.TrainingConfig(models=[pio.Model.LinReg],
                               simple_models=[pio.SimpleModel.DecisionTree],
                               features=[pio.Feature.Counts],
                               profile=pio.Profile.Quick)
test_datasets = {}

type_problem_2_pio_class = {
    'regression': pio.Regression,
    'classification': pio.Classification,
    'multiclassification': pio.MultiClassification,
}
type_problems = type_problem_2_pio_class.keys()


def make_pio_datasets(paths):
    for problem_type, p in paths.items():
        dataset = pio.Dataset.new(p.split('/')[-1].replace(
            '.csv',
예제 #6
0
# CLIENT INITIALIZATION -----------------------------------------
url = """https://<your instance>.prevision.io"""
token = """<your token>"""
pio.client.init_client(url, token)

# DATA LOADING --------------------------------------------------
# load data from a CSV
dataframe = pd.read_csv('helloworld_train.csv')
# upload it to the platform
dataset = pio.Dataset.new(name='helloworld_train', dataframe=dataframe)

# USECASE TRAINING ----------------------------------------------
# setup usecase
uc_config = pio.TrainingConfig(models=[pio.Model.XGBoost],
                                features=pio.Feature.Full,
                                profile=pio.Profile.Quick)

# run training
uc = pio.Classification.fit('helloworld_classif',
                                dataset,
                                metric=pio.metrics.Classification.AUC,
                                training_config=uc_config)

# (block until there is at least 1 model trained)
uc.wait_until(lambda usecase: len(usecase) > 0)

# check out the usecase status and other info
uc.print_info()
print('Current number of models:', len(uc))
print('Current (best model) score:', uc.score)
예제 #7
0
import pandas as pd
import previsionio as pio

from . import DATA_PATH
from .datasets import make_supervised_datasets, remove_datasets
from .utils import train_model, get_testing_id

TESTING_ID = get_testing_id()

PROJECT_NAME = "sdk_test_experiment_deployment" + str(TESTING_ID)
PROJECT_ID = ""

experiment_version_config = pio.TrainingConfig(
    advanced_models=[pio.AdvancedModel.LinReg],
    normal_models=[pio.NormalModel.LinReg],
    simple_models=[pio.SimpleModel.DecisionTree],
    features=[pio.Feature.Counts],
    profile=pio.Profile.Quick,
)

training_type_2_pio_class = {
    'regression': "fit_regression",
    'classification': "fit_classification",
    'multiclassification': "fit_multiclassification",
}
training_types = training_type_2_pio_class.keys()

test_datasets = {}


def make_pio_datasets(paths):
예제 #8
0
# get train & test dataset stocked on the datastore
train = pio.dataset.Dataset.get_by_name(dataset_name='regression_house_80')
test = pio.dataset.Dataset.get_by_name(dataset_name='regression_house_20')
#transform a var
train._data['bathrooms'] = train._data['bathrooms'].astype('int').apply(
    lambda x: round(x))
test._data['bathrooms'] = test._data['bathrooms'].astype('int').apply(
    lambda x: round(x))
#register new datasets
train_fe = pio.Dataset.new('regression_house_80_fe', dataframe=train._data)
test_fe = pio.Dataset.new('regression_house_20_fe', dataframe=test._data)
## auto ml use case starting
uc_config = pio.TrainingConfig(
    models=[pio.Model.XGBoost, pio.Model.RandomForest],
    features=pio.Feature.Full,
    profile=pio.Profile.Quick,
    with_blend=False)

col_config = pio.ColumnConfig(target_column='TARGET', id_column='ID')

uc = pio.Regression.fit('housing_from_sdk',
                        dataset=train_fe,
                        holdout_dataset=test_fe,
                        column_config=col_config,
                        training_config=uc_config)

uc.wait_until(lambda u: len(u) > 1)

## Get some Use case derived informations:
# correlation matrix
예제 #9
0
import os
import pandas as pd
import previsionio as pio
from .utils import get_testing_id

TESTING_ID = get_testing_id()

pio.config.default_timeout = 120

col_config = pio.ColumnConfig(target_column='class',
                              filename_column='filename')
uc_config = pio.TrainingConfig(models=[pio.Model.LinReg],
                               features=[pio.Feature.Counts],
                               profile=pio.Profile.Quick)

test_datasets = {}
dataset_name = 'cats_and_dogs_train'
dataset_test_name = TESTING_ID + '-' + dataset_name


def upload_datasets():
    datapath = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            'data_img/{}'.format(dataset_name))
    # upload CSV reference file
    dataset_csv = pio.Dataset.new(name=dataset_test_name,
                                  dataframe=pd.read_csv(
                                      os.path.join(
                                          datapath,
                                          '{}.csv'.format(dataset_name))))
    # upload ZIP images folder
    dataset_zip = pio.DatasetImages.new(name=dataset_test_name,