def read_data(self): data_set = Dataset(self.train_file_name, self.test_file_name, base_dir=self.data_dir) self.train_X, self.train_Y, self.test, self.train_features, self.cate_features = data_set.preprocess( reload=True)
def read_data(self): self.set_train_test_bool = True data_set = Dataset(self.train_file_name, self.test_file_name, base_dir=self.data_dir) self.train_X, self.train_Y, self.test, self.train_features, self.cate_features = data_set.preprocess( reload=True) # default stratified values self.stratified_values = self.train_Y.values
def set_train_outlier(self): dataset = Dataset(train_path=self.train_file, test_path=self.test_file) self.train_X, self.train_Y, self.test, self.features, self.cate_features = dataset.preprocess( reload=True) self.train_X['target'] = self.train_Y if 'outliers' not in self.train_X.columns: dataset.set_outlier_col(self.train_X) train_df = self.train_X[self.train_X['outliers'] == 0] target = train_df['target'] del train_df['target'] return train_df, target
def load_data(self): dataset = Dataset('train_agg_id1.csv', 'test_agg_id1.csv') self.train_X, self.target, self.test, self.features, self.cate_features = dataset.preprocess( reload=True)
param_dict[key] = params[index] # set some non-numeric parameters too for key in self.non_numeric_param: param_dict[key] = self.non_numeric_param[key] print(param_dict) cv_error, _ = self._train(param_dict) return cv_error, param_dict def predict(self): pass if __name__ == "__main__": dataset = Dataset(train_path='df_train_agg1.csv', test_path='df_test_agg1.csv') dataset.format_transformer( train_file_name='alltrainffm_agg1.txt', test_file_name='alltestffm_agg1.txt', fields=[ 'feature_1', 'feature_2', 'feature_3', 'elapsed_time', 'hist_month_lag_max', 'hist_category_1_sum', 'hist_weekend_sum', 'hist_category_3_mean_mean', 'hist_category_1_sum', 'hist_category_1_mean', 'hist_authorized_flag_sum', 'hist_authorized_flag_mean', 'hist_purchase_date_max' ]) model = FFMModel(debug=True, train_name='alltrainffm_agg1.txt', test_name='alltestffm_agg1.txt') model.train()