def train_model(uc_name, dataset, type_problem, type_problem_class, training_config): return type_problem_class.fit(uc_name, dataset, pio.ColumnConfig(target_column='target', drop_list=DROP_COLS), training_config=training_config)
def train_model(experiment_name, groups=1, time_window=pio.TimeWindow(-90, -30, 1, 15)): path = os.path.join(DATA_PATH, 'ts.csv') data, group_list = get_data(path, groups) fname = '{}_{}.csv'.format(experiment_name, '-'.join(group_list)) data.to_csv(fname, index=False) project = pio.Project.from_id(PROJECT_ID) dataset = project.create_dataset(name=experiment_name, dataframe=data) experiment_version_config = pio.TrainingConfig( advanced_models=[pio.AdvancedModel.LinReg], normal_models=[pio.NormalModel.LinReg], features=[pio.Feature.Counts], profile=pio.Profile.Quick, ) col_config = pio.ColumnConfig(target_column='target', time_column='time', # group_columns=group_list ) experiment_version = project.fit_timeseries_regression( experiment_name, dataset, time_window=time_window, training_config=experiment_version_config, column_config=col_config, ) return experiment_version
def train_model(project_id, experiment_name, dataset, training_type, training_type_func, training_config): project = pio.Project.from_id(project_id) training_type_func = getattr(project, training_type_func) return training_type_func( experiment_name, dataset, pio.ColumnConfig(target_column='target', drop_list=DROP_COLS), training_config=training_config, )
def setup_module(module): # Create project global project project = pio.Project.new(name=PROJECT_NAME, description="description test sdk") # Create dataset global dataset dataset = project.create_dataset('test_exporter', file_name='data_exporter/titanic.csv') # Train one model training_config = pio.TrainingConfig( advanced_models=[], normal_models=[], simple_models=[pio.SimpleModel.DecisionTree], features=[], profile=pio.Profile.Quick) column_config = pio.ColumnConfig(target_column='Survived', id_column='PassengerId') experiment_version = project.fit_classification( 'test_exporter_classif', dataset, column_config, metric=pio.metrics.Classification.AUC, training_config=training_config, ) # Create validation_prediction experiment_version.wait_until(lambda experimentv: (len( experimentv.models) > 0) or (experimentv._status['state'] == 'failed')) if experiment_version._status['state'] == 'failed': raise RuntimeError('Could not train experiment') global validation_prediction validation_prediction = experiment_version.predict_from_dataset(dataset) # Create experiment deployment experiment_version_best_model = experiment_version.best_model experiment_deployment = project.create_experiment_deployment( 'test_sdk_' + TESTING_ID, experiment_version_best_model) # Create deployment_prediction experiment_deployment.wait_until( lambda experimentd: experimentd.run_state == 'done') global deployment_prediction deployment_prediction = experiment_deployment.predict_from_dataset(dataset)
def train_model(uc_name, groups=1, time_window=pio.TimeWindow(-90, -30, 1, 15)): path = os.path.join(DATA_PATH, 'ts.csv') data, group_list = get_data(path, groups) fname = '{}_{}.csv'.format(uc_name, '-'.join(group_list)) data.to_csv(fname, index=False) dataset = pio.Dataset.new(name=uc_name, dataframe=data) uc_config = pio.TrainingConfig(normal_models=[pio.Model.LinReg], lite_models=[pio.Model.LinReg], features=[pio.Feature.Counts], profile=pio.Profile.Quick) col_config = pio.ColumnConfig(target_column='target', time_column='time', # group_columns=group_list ) uc = pio.TimeSeries.fit(uc_name, dataset, time_window=time_window, training_config=uc_config, column_config=col_config) return uc
import os import pandas as pd import previsionio as pio from .utils import get_testing_id TESTING_ID = get_testing_id() pio.config.default_timeout = 120 col_config = pio.ColumnConfig(target_column='class', filename_column='filename') uc_config = pio.TrainingConfig(normal_models=[pio.Model.LinReg], lite_models=[], simple_models=[], features=[pio.Feature.Counts], profile=pio.Profile.Quick) test_datasets = {} dataset_name = 'cats_and_dogs_train' dataset_test_name = TESTING_ID + '-' + dataset_name def upload_datasets(): datapath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data_img/{}'.format(dataset_name)) # upload CSV reference file dataset_csv = pio.Dataset.new( name=dataset_test_name, dataframe=pd.read_csv(os.path.join(datapath, '{}.csv'.format(dataset_name))) ) # upload ZIP images folder dataset_zip = pio.DatasetImages.new( name=dataset_test_name,
#transform a var train._data['bathrooms'] = train._data['bathrooms'].astype('int').apply( lambda x: round(x)) test._data['bathrooms'] = test._data['bathrooms'].astype('int').apply( lambda x: round(x)) #register new datasets train_fe = pio.Dataset.new('regression_house_80_fe', dataframe=train._data) test_fe = pio.Dataset.new('regression_house_20_fe', dataframe=test._data) ## auto ml use case starting uc_config = pio.TrainingConfig( models=[pio.Model.XGBoost, pio.Model.RandomForest], features=pio.Feature.Full, profile=pio.Profile.Quick, with_blend=False) col_config = pio.ColumnConfig(target_column='TARGET', id_column='ID') uc = pio.Regression.fit('housing_from_sdk', dataset=train_fe, holdout_dataset=test_fe, column_config=col_config, training_config=uc_config) uc.wait_until(lambda u: len(u) > 1) ## Get some Use case derived informations: # correlation matrix print('*************************************') print('*** GET CORR MATRIX ***') CM = uc.get_correlation_matrix() print(CM)
'smart_242_normalized', 'smart_242_raw', 'smart_250_normalized', 'smart_250_raw', 'smart_251_normalized', 'smart_251_raw', 'smart_252_normalized', 'smart_252_raw', 'smart_254_normalized', 'smart_254_raw', 'smart_255_normalized', 'smart_255_raw'] dset = pd.read_csv( '/Users/gpistre/Prevision/prevision-python/examples/data/mclass.csv' ).sample(n=101).rename(columns={'failure': 'target'}).drop(cs, axis=1) uc_config = pio.TrainingConfig(models=[pio.Model.LinReg], features=[pio.Feature.Counts], profile=pio.Profile.Quick, with_blend=False) col_config = pio.ColumnConfig(target_column='target') train_dset = pio.Dataset.new(name='events_test' + '_train', dataframe=dset) uc = pio.MultiClassification.fit('events_test', dataset=train_dset, column_config=col_config, training_config=uc_config) uc.save() uc = pio.Supervised.load('events_test.pio') uc.wait_until(lambda u: len(u) > 0, timeout=None) preds = uc.predict(dset.drop('target', axis=1))