def train_model(X_df, y_array, skf_is): fe = feature_extractor.FeatureExtractor() fe.fit(X_df, y_array) X_array = fe.transform(X_df) # Regression train_is, _ = skf_is X_train_array = np.array([X_array[i] for i in train_is]) y_train_array = np.array([y_array[i] for i in train_is]) reg = regressor.Regressor() reg.fit(X_train_array, y_train_array) return fe, reg
def train_submission(module_path, X_df, y_array, train_is): # Preparing the training set X_train_df = X_df.iloc[train_is] y_train_array = y_array[train_is] # Feature extraction import feature_extractor fe = feature_extractor.FeatureExtractor() fe.fit(X_train_df, y_train_array) X_train_array = fe.transform(X_train_df) import regressor reg = regressor.Regressor() reg.fit(X_train_array, y_train_array) return fe, reg
def main(argv): df_train = pd.read_csv('data/train.csv') df_test = pd.read_csv('data/test.csv') df_answer = pd.DataFrame() df_train, df_test, df_answer = process_data(df_train, df_test, df_answer) label = df_train['NU_NOTA_MT'] df_train.drop(['NU_NOTA_MT'], axis=1, inplace=True) regression_model = rg.Regressor(df_train, df_test, df_answer, label) regression_model.auto_sklearn(time=int(sys.argv[1])) regression_model.prediction() regression_model.save_model('my_model') regression_model.save_answer('automl_answer') convert_to_zero('automl_answer')
y_train_reg = y_train_df['concentration'].values y_test_clf = y_test_df['molecule'].values y_test_reg = y_test_df['concentration'].values fe_clf = feature_extractor_clf.FeatureExtractorClf() fe_clf.fit(X_train_df, y_train_clf) X_train_array_clf = fe_clf.transform(X_train_df) X_test_array_clf = fe_clf.transform(X_test_df) clf = classifier.Classifier() clf.fit(X_train_array_clf, y_train_clf) y_proba_clf = clf.predict_proba(X_test_array_clf) y_pred_clf = labels[np.argmax(y_proba_clf, axis=1)] error = 1 - accuracy_score(y_test_clf, y_pred_clf) print('error = %s' % error) fe_reg = feature_extractor_reg.FeatureExtractorReg() for i, label in enumerate(labels): X_train_df.loc[:, label] = (y_train_df['molecule'] == label) X_test_df.loc[:, label] = y_proba_clf[:, i] fe_reg.fit(X_train_df, y_train_reg) X_train_array_reg = fe_reg.transform(X_train_df) X_test_array_reg = fe_reg.transform(X_test_df) reg = regressor.Regressor() reg.fit(X_train_array_reg, y_train_reg) y_pred_reg = reg.predict(X_test_array_reg) mare = mare_score(y_test_reg, y_pred_reg) print('mare = ', mare) print('combined error = ', 2. / 3 * error + 1. / 3 * mare)
def process(self, campaign_configuration, regression_inputs, processes_number): """ Perform the actual regression Parameters ---------- campaign_configuration: dictionary The set of options specified by the user though command line and campaign configuration files regression_inputs: RegressionInputs The input of the regression problem """ self._logger.info("-->Generate generators") factory = gf.GeneratorsFactory(campaign_configuration, self._random_generator.random()) top_generator = factory.build() self._logger.info("<--") self._logger.info("-->Generate experiments") expconfs = top_generator.generate_experiment_configurations([], regression_inputs) self._logger.info("<--") assert expconfs if processes_number == 1: self._logger.info("-->Run experiments (sequentially)") for exp in tqdm.tqdm(expconfs, dynamic_ncols=True): exp.train() self._logger.info("<--") else: self._logger.info("-->Run experiments (in parallel)") pool = multiprocessing.Pool(processes_number) expconfs = list(tqdm.tqdm(pool.imap(process_wrapper, expconfs), total=len(expconfs))) self._logger.info("<--") self._logger.info("-->Collecting results") results = re.Results(campaign_configuration, expconfs) results.collect_data() self._logger.info("<--Collected") for metric, mapes in results.raw_results.items(): for experiment_configuration, mape in mapes.items(): self._logger.debug("%s of %s is %f", metric, experiment_configuration, mape) best_confs, best_technique = results.get_bests() best_regressors = {} self._logger.info("-->Building the final regressors") # Create a shadow copy all_data = regression_inputs.copy() # Set all sets equal to whole input set all_data.inputs_split["training"] = all_data.inputs_split["all"] all_data.inputs_split["validation"] = all_data.inputs_split["all"] all_data.inputs_split["hp_selection"] = all_data.inputs_split["all"] for technique in best_confs: best_conf = best_confs[technique] # Get information about the used x_columns all_data.x_columns = best_conf.get_x_columns() if 'normalization' in campaign_configuration['DataPreparation'] and campaign_configuration['DataPreparation']['normalization']: # Restore non-normalized columns for column in all_data.scaled_columns: all_data.data[column] = all_data.data["original_" + column] all_data.data = all_data.data.drop(columns=["original_" + column]) all_data.scaled_columns = [] self._logger.debug("Denormalized inputs are:%s\n", str(all_data)) # Normalize normalizer = data_preparation.normalization.Normalization(campaign_configuration) all_data = normalizer.process(all_data) # Set training set best_conf.set_training_data(all_data) # Train best_conf.train() best_conf.evaluate() self._logger.info("Validation MAPE on full dataset for %s: %s", technique, str(best_conf.mapes["validation"])) # Build the regressor best_regressors[technique] = regressor.Regressor(campaign_configuration, best_conf.get_regressor(), best_conf.get_x_columns(), all_data.scalers) pickle_file_name = os.path.join(campaign_configuration['General']['output'], ec.enum_to_configuration_label[technique] + ".pickle") pickle_file = open(pickle_file_name, "wb") pickle.dump(best_regressors[technique], pickle_file) pickle_file.close() self._logger.info("<--Built the final regressors") # Return the regressor return best_regressors[best_technique]
# Module to run model-agnostic meta-learning supervised regression experiments. import torch import experiments import regressor import utility if __name__ == "__main__": input_arguments = utility.parse_input_arguments() utility.control_randomness(input_arguments.seed) torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Create and train baseline regressor. baseline_regressor = regressor.Regressor(0.01, torch_device) experiments.train_baseline_regressor( baseline_regressor, input_arguments.baseline_training_iterations, input_arguments.batch_size, torch_device) # Create and train model-agnostic meta-learning regressor. maml_regressor = regressor.Regressor(0.01, torch_device) experiments.train_maml_regressor(maml_regressor, input_arguments.meta_training_iterations, input_arguments.meta_batch_size, input_arguments.batch_size, torch_device) # Evaluate the trained regressors, and create and save the test result plots. test_results = experiments.test_regressors(baseline_regressor, maml_regressor, 100, input_arguments.batch_size,
x = dataset.drop(['price','cut','color','clarity'],axis = 1) y = dataset['price'] x = prepro.scale(x) encode_col = dataset[['cut','color','clarity']] encode_col = prepro.encode(encode_col) x = np.concatenate((x,encode_col),axis=1) X_train, X_test, y_train, y_test = train_test_split(x, y,random_state=0,test_size=0.33) vis.Visualizer().scatterplot(X_test[:,0],y_test.iloc[:]) # Linear Regression regressor = reg.Regressor(type=reg.LINEAR_REGRESSION) regressor.fit(X_train, y_train) print("******************Linear Regression******************") print(regressor.score(X_test,y_test)) #vis.Visualizer().scatterplot(X_test[:,0],y_test.iloc[:],regressor) print("*************************************************") # polynomial Regression params = dict(degree = 5) regressor = reg.Regressor(type=reg.POLY_REGRESSION, **params) regressor.fit(X_train, y_train) print("**************Polynomial Regression***************") #print(regressor.score(X_test,y_test)) print("*************************************************")