def main(input_filepath, output_filepath, config_file): """Runs data loading and cleaning and pre-processing scripts and saves data in ../processed.""" logger = logging.getLogger(__name__) logger.info('Loading training data set, setting up pipeline, tuning,' 'training and evaluating final model.') # Parse config file # config = parse_config(config_file) # Load training data X_train = pd.read_csv(input_filepath + '/X_train.csv') y_train = pd.read_csv(input_filepath + '/y_train.csv').values.ravel() # Pre-processing and modeling pipeline cat_features = X_train.select_dtypes(exclude='float64').columns num_features = X_train.select_dtypes(include='float64').columns pipe = Pipeline([('preprocessing', preprocessing_pipeline(cat_features, num_features)), ('model', TransformedTargetRegressor(regressor=SVR(), func=np.log1p, inverse_func=np.expm1))]) # Tune or select model # kf = KFold(config['modeling']['num_folds'], shuffle=True, # random_state=rng).get_n_splits(X_train.values) model = Model(model=pipe) # Train model model.train(X_train, y_train) # Save model model.save(output_filepath + model.name + '.pkl')
training=False) val_loader = DataLoader(dataset=val_dataset, batch_size=config.batch_size, num_workers=2, drop_last=False, shuffle=False) sample_iterator = val_dataset.create_iterator(config.sample_size) model = Model(config, logger=logger) model.load(is_test=False) steps_per_epoch = len(train_dataset) // config.batch_size iteration = model.iteration epoch = model.iteration // steps_per_epoch logger.info('Start from epoch:{}, iteration:{}'.format(epoch, iteration)) model.train() keep_training = True best_score = {} while (keep_training): epoch += 1 stateful_metrics = ['epoch', 'iter', 'g_lr'] progbar = Progbar(len(train_dataset), max_iters=steps_per_epoch, width=20, stateful_metrics=stateful_metrics) for items in train_loader: model.train() items = to_cuda(items, config.device) _, g_loss, d_loss, logs = model.get_losses(items) model.backward(g_loss=g_loss, d_loss=d_loss)