def train(self, parameters): tsne = TSNE(**parameters) tsne_outputs = tsne.fit_transform(self.x_train) utils.save_data_to_pkl(tsne_outputs, tsne_outputs_path + 'tsne_outputs.p')
def train(self, load_pickle=False, load_pickle_path=None): """ Training the model """ start_time = time.time() path_list = [ self.pred_path + 'multiclass/', self.pred_path + 'pred_era/', self.pred_path + 'pred_era/final_results/', self.pred_path + 'final_results/' ] utils.check_dir(path_list) print('======================================================') print('Start Training PrejudgeMultiClass...') if load_pickle: # Load era_sign_test if load_pickle_path is None: era_sign_test = utils.load_pkl_to_data( self.prejudged_data_path + 'multiclass_era_sign_test.p') else: era_sign_test = utils.load_pkl_to_data(load_pickle_path) else: # Training Era Sign era_sign_test = self.predict_era_sign() # era_sign_test = self.load_era_sign_csv(self.pred_path + 'pred_era/final_results/lgb_result.csv') # Save era_sign_test to Pickle File utils.save_data_to_pkl( era_sign_test, self.prejudged_data_path + 'multiclass_era_sign_test.p') # Get Split Data x_test, x_g_test, id_test, x_test_idx = self.split_test_set_by_era_sign( era_sign_test) x_train, x_g_train, y_train, w_train, e_train = self.split_train_set_by_era( ) # Training Models by Era Sign prob_test = \ self.train_models_by_era_sign(x_train, x_g_train, y_train, w_train, e_train, x_test, x_g_test, id_test, x_test_idx) # Save Predictions utils.save_pred_to_csv(self.pred_path + 'final_results/prejudge_', self.id_test, prob_test) total_time = time.time() - start_time print('======================================================') print('Training Done!') print('Total Time: {}s'.format(total_time)) print('======================================================')
def generate_tampered_results_by_weight(self, n_pairs): w_train = utils.load_pkl_to_data(preprocessed_data_path + 'w_train.p') print('------------------------------------------------------') print('Calculating Big Weight Same Pairs...') print('------------------------------------------------------') print('Sorting...') sorted_by_weight_idx = np.argsort(w_train)[:-n_pairs*3:-1] sorted_w_train = w_train[sorted_by_weight_idx] sorted_code_id_train = self.code_id_train[sorted_by_weight_idx] print('Deduplicating...') big_weight_code_id = [] big_weight_w_train = [] for idx, code_id in enumerate(sorted_code_id_train): if code_id not in big_weight_code_id: if code_id in set(self.same_test_code_id): big_weight_code_id.append(code_id) big_weight_w_train.append(sorted_w_train[idx]) print('Generating Pairs...') idx_pair_list, w_train_col, code_id_col, id_col = \ self.get_pair_list(n_pairs, big_weight_code_id, use_weight=True, w_train_list=big_weight_w_train) print('------------------------------------------------------') print('Number of Big Weight Same Pairs: {}'.format(len(idx_pair_list))) utils.save_data_to_pkl(idx_pair_list, preprocessed_data_path + 'big_weight_idx_pairs.p') index = [] for i in range(1, len(idx_pair_list)+1): index.extend([i, i]) df_log = pd.DataFrame({'index': np.array(index, dtype=int), 'weight': np.array(w_train_col), 'code_id': np.array(code_id_col, dtype=int), 'id': np.array(id_col, dtype=int)}) cols = ['index', 'weight', 'code_id', 'id'] df_log = df_log.loc[:, cols] tampered_pred_path_ = tampered_pred_path + 'big_weight_tampered_log.csv' print('------------------------------------------------------') print('Saving {} ...'.format(tampered_pred_path_)) df_log.to_csv(tampered_pred_path_, sep=',', index=False) # Save Same Pairs csv file self.save_same_pairs_test_csv(preprocessed_data_path + 'big_weight_same_pairs.csv', idx_pair_list) # Generate Tampered Results self.tamper_result(idx_pair_list, 'big_weight')
def main(self): print('Split Data Set by Code ID...') print('------------------------------------------------------') x_test_list, id_test_list, code_id_list, test_idx_list = self.split_data_by_code_id( ) same_id_list = [] same_idx_list = [] print('Searching Same ID Pairs...') print('------------------------------------------------------') for i in tqdm.trange(len(id_test_list)): x_test_i, id_test_i, code_id_i, test_idx_i = \ x_test_list[i], id_test_list[i], code_id_list[i], test_idx_list[i] same_id_list_c, same_idx_list_c = self.get_same_id_list( x_test_i, id_test_i, test_idx_i) same_id_list.extend(same_id_list_c) same_idx_list.extend(same_idx_list_c) print('------------------------------------------------------') print('Same Code Pairs: {}'.format(len(same_idx_list))) print('------------------------------------------------------') print('Saving same_test_pairs.csv...') same_idx = np.concatenate(np.array(same_idx_list)).tolist() test_f = pd.read_csv(test_path, header=0, dtype=np.float64) df = test_f.iloc[same_idx] cols = [ 'code_id', *['feature{}'.format(i) for i in range(97)], 'group1', 'group2', 'id' ] df = df.loc[:, cols] df.to_csv(preprocess_path + 'same_test_pairs.csv', sep=',', index=False) print('------------------------------------------------------') print('Saving same_test_idx_pairs.p...') utils.save_data_to_pkl(same_idx_list, preprocess_path + 'same_test_idx_pairs.p')
def search_diff_code_id(self): print('Searching Different Code ID of Test Set...') print('------------------------------------------------------') diff_code_id_test = np.array(list( set([i for i in self.code_id_test if i not in self.code_id_train])), dtype=int) diff_code_id_test.reshape(-1, 1) print('Number of diff_code_id_test: ', diff_code_id_test.shape[0]) utils.save_data_to_pkl(diff_code_id_test, preprocess_path + 'diff_code_id_test.p') print('Saving {} ...'.format(preprocess_path + 'diff_code_id_test.csv')) np.savetxt(preprocess_path + 'diff_code_id_test.csv', diff_code_id_test, delimiter=',', fmt='%d')
def generate_tampered_results_by_absence(self, n_pairs): diff_code_id_test = utils.load_pkl_to_data(preprocessed_data_path + 'diff_code_id_test.p') print('------------------------------------------------------') print('Calculating Absent Same Pairs...') print('------------------------------------------------------') print('Sorting...') diff_code_id_test = np.sort(diff_code_id_test) absent_code_id = diff_code_id_test[:-n_pairs*3:-1] print('Generating Pairs...') idx_pair_list, code_id_col, id_col = self.get_pair_list(n_pairs, absent_code_id) print('------------------------------------------------------') print('Number of Absent Same Pairs: {}'.format(len(idx_pair_list))) utils.save_data_to_pkl(idx_pair_list, preprocessed_data_path + 'absent_idx_pairs.p') index = [] for i in range(1, len(idx_pair_list)+1): index.extend([i, i]) df_log = pd.DataFrame({'index': np.array(index, dtype=int), 'code_id': np.array(code_id_col, dtype=int), 'id': np.array(id_col, dtype=int)}) cols = ['index', 'code_id', 'id'] df_log = df_log.loc[:, cols] tampered_pred_path_ = tampered_pred_path + 'absent_tampered_log.csv' print('------------------------------------------------------') print('Saving {} ...'.format(tampered_pred_path_)) df_log.to_csv(tampered_pred_path_, sep=',', index=False) # Save Same Pairs csv file self.save_same_pairs_test_csv(preprocessed_data_path + 'absent_same_pairs.csv', idx_pair_list) # Generate Tampered Results self.tamper_result(idx_pair_list, 'absent')
def generate_tampered_results_by_range(self, start_code_id, n_pairs, reverse=False): print('------------------------------------------------------') print('Generating All Tampered Results...') stop_code_id = start_code_id+(n_pairs*3) range_code_id = range(start_code_id, stop_code_id) if reverse: range_code_id = range_code_id[::-1] print('Generating Pairs...') idx_pair_list, code_id_col, id_col = self.get_pair_list(n_pairs, range_code_id,) print('------------------------------------------------------') print('Number of Range Same Pairs: {}'.format(len(idx_pair_list))) pickle_path = preprocessed_data_path + 'range-{}-{}_idx_pairs.p'.format(start_code_id, n_pairs) utils.save_data_to_pkl(idx_pair_list, pickle_path) index = [] for i in range(1, len(idx_pair_list) + 1): index.extend([i, i]) df_log = pd.DataFrame({'index': np.array(index, dtype=int), 'code_id': np.array(code_id_col, dtype=int), 'id': np.array(id_col, dtype=int)}) cols = ['index', 'code_id', 'id'] df_log = df_log.loc[:, cols] tampered_pred_path_ = tampered_pred_path + 'range-{}-{}_tampered_log.csv'.format(start_code_id, n_pairs) print('------------------------------------------------------') print('Saving {} ...'.format(tampered_pred_path_)) df_log.to_csv(tampered_pred_path_, sep=',', index=False) # Save Same Pairs csv file test_csv_path = preprocessed_data_path + 'range-{}-{}_same_pairs.csv'.format(start_code_id, n_pairs) self.save_same_pairs_test_csv(test_csv_path, idx_pair_list) # Generate Tampered Results self.tamper_result(idx_pair_list, 'range-{}-{}'.format(start_code_id, n_pairs))
def save_global_valid_set(self): print('======================================================') print('Saving Validation Set...') utils.save_data_to_pkl(self.x_valid, self.preprocess_path + 'x_global_valid.p') utils.save_data_to_pkl(self.x_g_valid, self.preprocess_path + 'x_g_global_valid.p') utils.save_data_to_pkl(self.y_valid, self.preprocess_path + 'y_global_valid.p')
def train_models_by_era_sign(self, x_train, x_g_train, y_train, w_train, e_train, x_test, x_g_test, id_test, x_test_idx): """ Training Models for Different Eras """ print('======================================================') print('Training Models by Era Sign...') prob_test = np.zeros_like(self.id_test, dtype=np.float64).reshape(-1, 1).tolist() for model_iter in range(self.n_era): print('======================================================') print('Training Models of Era: {}/{}'.format( model_iter + 1, self.n_era)) x_train_era = np.array(x_train[model_iter]) x_g_train_era = np.array(x_g_train[model_iter]) y_train_era = np.array(y_train[model_iter]) w_train_era = np.array(w_train[model_iter]) e_train_era = np.array(e_train[model_iter]) x_test_era = np.array(x_test[model_iter]) x_g_test_era = np.array(x_g_test[model_iter]) id_test_era = np.array(id_test[model_iter]) x_test_idx_era = np.array(x_test_idx[model_iter]) print('------------------------------------------------------') print('Initializing Model...') model = self.multiclass_model_initializer( x_train_era, x_g_train_era, y_train_era, w_train_era, e_train_era, x_test_era, x_g_test_era, id_test_era) cv_generator = CrossValidation.random_split prob_test_era = model.train(self.pred_path + 'multiclass/', self.loss_log_path + 'multiclass/', csv_log_path=self.csv_log_path, n_valid=self.n_valid_m, n_cv=self.n_cv_m, train_seed=self.train_seed, cv_seed=self.cv_seed, parameters=self.parameters_m, return_prob_test=True, show_importance=self.show_importance, show_accuracy=self.show_accuracy, save_csv_log=True, csv_idx='era_{}'.format(model_iter + 1), cv_generator=cv_generator) utils.save_data_to_pkl( prob_test, self.prejudged_data_path + 'multi_prob_test_era_{}.p'.format(model_iter + 1)) for idx_era, prob_era in zip(x_test_idx_era, prob_test_era): if prob_test[idx_era][0] == 0.: prob_test[idx_era][0] = prob_era else: prob_test[idx_era].append(prob_era) utils.save_data_to_pkl(prob_test, self.prejudged_data_path + 'multi_prob_test.p') # Calculate Mean of prob_test prob_test = np.mean(prob_test, axis=1, dtype=np.float64) return prob_test
def train(self, load_pickle=False, load_pickle_path=None): """ Training the model """ start_time = time.time() path_list = [ self.pred_path + 'positive/', self.pred_path + 'negative/', self.pred_path + 'pred_era/', self.pred_path + 'pred_era/final_results/', self.pred_path + 'final_results/', self.loss_log_path + 'positive/', self.loss_log_path + 'negative/' ] utils.check_dir(path_list) print('======================================================') print('Start Training PrejudgeBinary...') if load_pickle: # Load era_sign_test if load_pickle_path is None: era_sign_test = utils.load_pkl_to_data( self.prejudged_data_path + 'binary_era_sign_test.p') else: era_sign_test = utils.load_pkl_to_data(load_pickle_path) else: # Training Era Sign era_sign_test = self.predict_era_sign() # era_sign_test = self.load_era_sign_csv(self.pred_path + 'pred_era/final_results/lgb_result.csv') # Save era_sign_test to Pickle File utils.save_data_to_pkl( era_sign_test, self.prejudged_data_path + 'binary_era_sign_test.p') # Print Prediction of Positive Era Rate utils.print_positive_rate_test(era_sign_test) # Get Split Data x_test_p, x_g_test_p, id_test_p, era_idx_test_p, x_test_n, \ x_g_test_n, id_test_n, era_idx_test_n = self.split_test_set_by_era_sign(era_sign_test) # Training Models by Era Sign prob_test = \ self.train_models_by_era_sign(x_test_p, x_g_test_p, id_test_p, era_idx_test_p, x_test_n, x_g_test_n, id_test_n, era_idx_test_n) # Save Predictions utils.save_pred_to_csv(self.pred_path + 'final_results/prejudge_', self.id_test, prob_test) # Print Prediction of Positive Era Rate utils.print_positive_rate_test(era_sign_test) total_time = time.time() - start_time print('======================================================') print('Training Done!') print('Total Time: {}s'.format(total_time)) print('======================================================')
def save_data(self): print('======================================================') print('Saving Preprocessed Data...') utils.save_data_to_pkl(self.x_train, self.preprocess_path + 'x_train.p') utils.save_data_to_pkl(self.y_train, self.preprocess_path + 'y_train.p') utils.save_data_to_pkl(self.w_train, self.preprocess_path + 'w_train.p') utils.save_data_to_pkl(self.e_train, self.preprocess_path + 'e_train.p') utils.save_data_to_pkl(self.y_test, self.preprocess_path + 'y_test.p') utils.save_data_to_pkl(self.x_test, self.preprocess_path + 'x_test.p') utils.save_data_to_pkl(self.w_test, self.preprocess_path + 'w_test.p') utils.save_data_to_pkl(self.e_test, self.preprocess_path + 'e_test.p') utils.save_data_to_pkl(self.pct_test, self.preprocess_path + 'pct_test.p') utils.save_data_to_pkl(self.id_test, self.preprocess_path + 'id_test.p') if group_list is not None: utils.save_data_to_pkl(self.x_g_train, self.preprocess_path + 'x_g_train.p') utils.save_data_to_pkl(self.x_g_test, self.preprocess_path + 'x_g_test.p') if self.use_code_id: utils.save_data_to_pkl(self.code_id_train, self.preprocess_path + 'code_id_train.p') utils.save_data_to_pkl(self.code_id_test, self.preprocess_path + 'code_id_test.p')
def _save_data(self): """ Save data set to pickle files. """ utils.thin_line() print('Saving pickle files...') utils.check_dir([self.preprocessed_path]) utils.save_data_to_pkl( self.x_train, join(self.preprocessed_path, 'x_train.p')) utils.save_data_to_pkl( self.y_train, join(self.preprocessed_path, 'y_train.p')) utils.save_data_to_pkl( self.x_valid, join(self.preprocessed_path, 'x_valid.p')) utils.save_data_to_pkl( self.y_valid, join(self.preprocessed_path, 'y_valid.p')) utils.save_data_to_pkl( self.x_test, join(self.preprocessed_path, 'x_test.p')) utils.save_data_to_pkl( self.y_test, join(self.preprocessed_path, 'y_test.p'))
def save_data(self): print('======================================================') print('Saving Preprocessed Data...') utils.save_data_to_pkl(self.x_train, self.preprocess_path + 'x_train.p') utils.save_data_to_pkl(self.x_g_train, self.preprocess_path + 'x_g_train.p') utils.save_data_to_pkl(self.y_train, self.preprocess_path + 'y_train.p') utils.save_data_to_pkl(self.x_test, self.preprocess_path + 'x_test.p') utils.save_data_to_pkl(self.x_g_test, self.preprocess_path + 'x_g_test.p') utils.save_data_to_pkl(self.id_test, self.preprocess_path + 'id_test.p')
def save_data_by_era_distribution_pd(self): print('======================================================') print('Saving Preprocessed Data Split by Era Distribution...') # Positive Data print('Saving Positive Data...') utils.save_data_to_pkl(self.x_train_p, self.preprocess_path + 'x_train_p.p') utils.save_data_to_pkl(self.y_train_p, self.preprocess_path + 'y_train_p.p') utils.save_data_to_pkl(self.w_train_p, self.preprocess_path + 'w_train_p.p') utils.save_data_to_pkl(self.e_train_p, self.preprocess_path + 'e_train_p.p') # Negative Data print('Saving Negative Data...') utils.save_data_to_pkl(self.x_train_n, self.preprocess_path + 'x_train_n.p') utils.save_data_to_pkl(self.y_train_n, self.preprocess_path + 'y_train_n.p') utils.save_data_to_pkl(self.w_train_n, self.preprocess_path + 'w_train_n.p') utils.save_data_to_pkl(self.e_train_n, self.preprocess_path + 'e_train_n.p') if group_list is not None: utils.save_data_to_pkl(self.x_g_train_p, self.preprocess_path + 'x_g_train_p.p') utils.save_data_to_pkl(self.x_g_train_n, self.preprocess_path + 'x_g_train_n.p')
def save_global_valid_set(self): print('======================================================') print('Saving Validation Set...') utils.save_data_to_pkl(self.x_valid, self.preprocess_path + 'x_global_valid.p') utils.save_data_to_pkl(self.y_valid, self.preprocess_path + 'y_global_valid.p') utils.save_data_to_pkl(self.w_valid, self.preprocess_path + 'w_global_valid.p') utils.save_data_to_pkl(self.e_valid, self.preprocess_path + 'e_global_valid.p') if group_list is not None: utils.save_data_to_pkl(self.x_g_valid, self.preprocess_path + 'x_g_global_valid.p') if self.use_code_id: utils.save_data_to_pkl( self.code_id_valid, self.preprocess_path + 'code_id_global_valid.p')
def save_data(self): print('Saving Preprocessed Data...') utils.save_data_to_pkl(self.x_train, self.preprocessed_path + 'x_train_gan.p') utils.save_data_to_pkl(self.x_test, self.preprocessed_path + 'x_test_gan.p')
def train(self, similarity_prob_path=None, global_epochs=1, return_similarity_prob=False): """ Train the GAN """ print('======================================================') print('Training GAN for Adversarial Validation Set...') print('------------------------------------------------------') # Build Network tf.reset_default_graph() train_graph = tf.Graph() with train_graph.as_default(): # Get inputs inputs_real, inputs_z, keep_prob = self.model_inputs() # Get losses d_loss, g_loss = self.model_loss(inputs_real, inputs_z, keep_prob) # Get optimizers d_train_opt, g_train_opt = self.model_opt(d_loss, g_loss) # Get similarities similarities = self.get_similarity(inputs_real, keep_prob) # Get generator g_outputs = self.get_generator(inputs_z, keep_prob) batch_counter = 0 similarity_prob_total = [] with tf.Session(graph=train_graph) as sess: local_start_time = time.time() for global_epoch_i in range(global_epochs): print('======================================================') print('Training on Global Epoch: {}/{}'.format(global_epoch_i+1, global_epochs)) print('------------------------------------------------------') x_test = self.x_test np.random.shuffle(x_test) sess.run(tf.global_variables_initializer()) for epoch_i in range(self.epochs): for batch_i, x_batch in enumerate(self.get_batches(x_test, self.batch_size)): batch_counter += 1 # Sample random noise batch_z = np.random.uniform(0, 1, size=(self.batch_size, self.z_dim)) # Run optimizers for _ in range(self.d_epochs): sess.run(d_train_opt, feed_dict={inputs_real: x_batch, inputs_z: batch_z, keep_prob: self.keep_prob}) for _ in range(self.g_epochs): sess.run(g_train_opt, feed_dict={inputs_real: x_batch, inputs_z: batch_z, keep_prob: self.keep_prob}) if batch_counter % self.display_step == 0 and batch_i > 0: # At losses d_cost = d_loss.eval({inputs_real: x_batch, inputs_z: batch_z, keep_prob: 1.0}) g_cost = g_loss.eval({inputs_z: batch_z, keep_prob: 1.0}) total_time = time.time() - local_start_time print('Global_Epoch: {}/{} |'.format(global_epoch_i+1, global_epochs), 'Epoch: {}/{} |'.format(epoch_i + 1, self.epochs), 'Batch: {:>5} |'.format(batch_counter), 'Time: {:>3.2f}s |'.format(total_time), 'd_Loss: {:.8f} |'.format(d_cost), 'g_Loss: {:.8f}'.format(g_cost)) if batch_counter % self.show_step == 0 and batch_i > 0: example_z = np.random.uniform(0, 1, size=(self.batch_size, self.z_dim)) # At losses generator_outputs = sess.run(g_outputs, feed_dict={inputs_z: example_z, keep_prob: 1.0}) g_similarity_prob = \ sess.run(similarities, feed_dict={inputs_real: generator_outputs, keep_prob: 1.0}) t_similarity_prob = \ sess.run(similarities, feed_dict={inputs_real: self.x_train, keep_prob: 1.0}) print('------------------------------------------------------') print('Generator Outputs:\n', generator_outputs[0]) print('------------------------------------------------------') print('Similarity Prob of Generator Outputs:\n', g_similarity_prob[:50].reshape(1, -1)) print('------------------------------------------------------') print('Similarity Prob of Train Set:\n', t_similarity_prob[:50].reshape(1, -1)) print('------------------------------------------------------') print('------------------------------------------------------') print('Calculating Similarities of Train Set...') similarity_prob = \ sess.run(similarities, feed_dict={inputs_real: self.x_train, keep_prob: 1.0}) similarity_prob_total.append(similarity_prob) print('======================================================') print('Calculating Final Similarities of Train Set...') similarity_prob_mean = np.mean(np.array(similarity_prob_total), axis=0) utils.save_data_to_pkl(similarity_prob_mean, similarity_prob_path + 'similarity_prob.p') if return_similarity_prob: return similarity_prob_mean
def split_data_by_gan(self, load_pickle=True, sample_ratio=None, sample_by_era=True, generate_mode='valid'): print('======================================================') print('Splitting Adversarial Validation Set by GAN...') if load_pickle: similarity_prob = utils.load_pkl_to_data(cfg.gan_prob_path + 'similarity_prob.p') else: similarity_prob = \ GenerateValidation.train(train_path=cfg.train_csv_path, test_path=cfg.test_csv_path, global_epochs=1, similarity_prob_path=cfg.gan_prob_path, return_similarity_prob=True, load_preprocessed_data=True) valid_idx = [] train_idx = [] if sample_by_era: similarity_prob_e = [] index_e = [] similarity_prob_all = [] index_all = [] era_tag = 0 era_all = [era_tag] for idx, era in enumerate(self.e_train): if idx == len(self.e_train) - 1: similarity_prob_e.append(similarity_prob[idx]) index_e.append(idx) similarity_prob_all.append(similarity_prob_e) index_all.append(index_e) elif era_tag == era: similarity_prob_e.append(similarity_prob[idx]) index_e.append(idx) else: era_tag = era era_all.append(era) similarity_prob_all.append(similarity_prob_e) index_all.append(index_e) similarity_prob_e = [similarity_prob[idx]] index_e = [idx] for e, similarity_prob_e in enumerate(similarity_prob_all): n_sample_e = int(len(similarity_prob_e) * sample_ratio) most_similar_idx_e = np.argsort( similarity_prob_e)[:, :-(n_sample_e + 1):-1] least_similar_idx_e = np.argsort( similarity_prob_e)[:, :len(similarity_prob_e) - n_sample_e] if generate_mode == 'valid': valid_idx += list(index_all[e][most_similar_idx_e]) train_idx += list(index_all[e][least_similar_idx_e]) elif generate_mode == 'train': train_idx += list(index_all[e][most_similar_idx_e]) valid_idx += list(index_all[e][least_similar_idx_e]) else: raise ValueError("Wrong 'generate_mode'!") else: n_sample = int(len(similarity_prob) * sample_ratio) most_similar_idx = np.argsort(similarity_prob)[:, :-(n_sample + 1):-1] least_similar_idx = np.argsort( similarity_prob)[:, :len(similarity_prob) - n_sample] if generate_mode == 'valid': valid_idx = most_similar_idx train_idx = least_similar_idx elif generate_mode == 'train': train_idx = least_similar_idx valid_idx = most_similar_idx else: raise ValueError("Wrong 'generate_mode'!") # Generate Validation Set self.x_valid = self.x_train[valid_idx] self.y_valid = self.x_train[valid_idx] # Generate Training Set self.x_train = self.x_train[train_idx] self.y_train = self.y_train[train_idx] self.w_train = self.w_train[train_idx] self.e_train = self.e_train[train_idx] if group_list is not None: self.x_g_valid = self.x_g_train[valid_idx] self.x_g_train = self.x_g_train[train_idx] # Save Adversarial Validation Set print('Saving Adversarial Validation Set...') utils.save_data_to_pkl(self.x_valid, self.preprocess_path + 'x_valid.p') utils.save_data_to_pkl(self.x_g_valid, self.preprocess_path + 'x_g_valid.p') utils.save_data_to_pkl(self.y_valid, self.preprocess_path + 'y_valid.p')