def human_test(filename, sample_size, random_seed=None, filter_fn=None): """ Presents a textual interface that allows human accuracy to be evaluated on the given data. A human will be asked to classify a given comment into a specific subreddit. :param filename: Str, a filename as a path :param sample_size: Number of Comment instances to use for each cross validation set. :param random_seed: A integer value that can be used to ensure stable random generation of the data set. :param filter_fn: A function that can be used to filter which Comments are included in the training data on a per comment basis. :return: Accuracy of correctly predicted comment => subreddit labels. """ data = DataSet( [comment for comment in load_comments(filename, sample_size)], random_seed, filter_fn).generate_human(filter_fn) correct = 0 potential_labels = list(set(data[1])) for i in range(0, len(data[0])): print(data[0][i]) for j in range(0, len(potential_labels)): print("[{}]: {}".format(j, potential_labels[j])) choice = input("Enter the # corresponding to the correct subreddit: ") if potential_labels[choice] == data[1][i]: correct += 1 return {"human_average": correct / float(len(data[0]))}
def evaluate(filename, sample_size, classifiers, n_cross_validation=5, random_seed=None, filter_fn=None, verbose=False): """ Evaluates the given list of Classifier instances using n cross validation. :param filename: Str, a filename as a path :param sample_size: Number of Comment instances to use for each cross validation set. :param classifiers: List of Classifier instances which will each be fitted on the training data and scored on the testing data. :param n_cross_validation: Number of cross validation sets to generate during evaluation of each Classifier. :param random_seed: A integer value that can be used to ensure stable random generation of the data set. :param filter_fn: A function that can be used to filter which Comments are included in the training data on a per comment basis. :param verbose: If enabled output will be given detailing the progress on the evaluation. :return: Tuple (data set size, Dict{ classifier results: Dict{ n cross result accuracy, average accuracy } }) """ sets = DataSet( [comment for comment in load_comments(filename, sample_size)], random_seed, filter_fn).generate_n_cross_validation_sets(n_cross_validation) if verbose: print("Finished generating cross validation sets") results = {} import numpy as np for classifier in classifiers: result = [ classifier.fit(set['training_sparse_matrix'], set['training_labels']).score( set['validation_sparse_matrix'], set['validation_labels']) for set in sets ], results[type(classifier).__name__] = { "result": result, "average": np.mean(result) } if verbose: print("Finished testing {}".format(type(classifier).__name__)) return (sets[0]['size'], results)
def load_data(self, filepath): self.DataSet = DataSet.fromTrainTest(filepath) mult = self.DataSet.trackword_config["TargetPrecision"]["full"] - \ self.DataSet.trackword_config["TargetPrecision"]["int"] self.DataSet.X_train["b_trk_inv2R"] = self.DataSet.X_train["b_trk_inv2R"] * \ (2**(mult-self.DataSet.trackword_config["InvR"]["nbits"])) self.DataSet.X_train["b_trk_cot"] = self.DataSet.X_train["b_trk_cot"] * \ (2**(mult-self.DataSet.trackword_config["Cot"]["nbits"])) self.DataSet.X_train["b_trk_zT"] = self.DataSet.X_train["b_trk_zT"] * \ (2**(mult-self.DataSet.trackword_config["ZT"]["nbits"])) self.DataSet.X_train["b_trk_phiT"] = self.DataSet.X_train["b_trk_phiT"] * \ (2**(mult-self.DataSet.trackword_config["PhiT"]["nbits"])) # ============================================================================== self.DataSet.X_test["b_trk_inv2R"] = self.DataSet.X_test["b_trk_inv2R"] * \ (2**(mult-self.DataSet.trackword_config["InvR"]["nbits"])) self.DataSet.X_test["b_trk_cot"] = self.DataSet.X_test["b_trk_cot"] * \ (2**(mult-self.DataSet.trackword_config["Cot"]["nbits"])) self.DataSet.X_test["b_trk_zT"] = self.DataSet.X_test["b_trk_zT"] * \ (2**(mult-self.DataSet.trackword_config["ZT"]["nbits"])) self.DataSet.X_test["b_trk_phiT"] = self.DataSet.X_test["b_trk_phiT"] * \ (2**(mult-self.DataSet.trackword_config["PhiT"]["nbits"])) for k in range(4): self.DataSet.X_test["b_stub_r_" + str(k)] = self.DataSet.X_test["b_stub_r_" + str(k)] * \ (2**(mult-self.DataSet.trackword_config["r"]["nbits"])) self.DataSet.X_test["b_stub_phi_" + str(k)] = self.DataSet.X_test["b_stub_phi_" + str(k)] * \ (2**(mult-self.DataSet.trackword_config["phi"]["nbits"])) self.DataSet.X_test["b_stub_z_" + str(k)] = self.DataSet.X_test["b_stub_z_" + str(k)] * \ (2**(mult-self.DataSet.trackword_config["z"]["nbits"])) self.DataSet.X_test["b_stub_dPhi_" + str(k)] = self.DataSet.X_test["b_stub_dPhi_" + str(k)] * \ (2**(mult-self.DataSet.trackword_config["dPhi"]["nbits"])) self.DataSet.X_test["b_stub_dZ_" + str(k)] = self.DataSet.X_test["b_stub_dZ_" + str(k)] * \ (2**(mult-self.DataSet.trackword_config["dZ"]["nbits"])) # ============================================================================== self.DataSet.X_train["b_stub_r_" + str(k)] = self.DataSet.X_train["b_stub_r_" + str(k)] * \ (2**(mult-self.DataSet.trackword_config["r"]["nbits"])) self.DataSet.X_train["b_stub_phi_" + str(k)] = self.DataSet.X_train["b_stub_phi_" + str(k)] * \ (2**(mult-self.DataSet.trackword_config["phi"]["nbits"])) self.DataSet.X_train["b_stub_z_" + str(k)] = self.DataSet.X_train["b_stub_z_" + str(k)] * \ (2**(mult-self.DataSet.trackword_config["z"]["nbits"])) self.DataSet.X_train["b_stub_dPhi_" + str(k)] = self.DataSet.X_train["b_stub_dPhi_" + str(k)] * \ (2**(mult-self.DataSet.trackword_config["dPhi"]["nbits"])) self.DataSet.X_train["b_stub_dZ_" + str(k)] = self.DataSet.X_train["b_stub_dZ_" + str(k)] * \ (2**(mult-self.DataSet.trackword_config["dZ"]["nbits"]))
from Disperse_WHEEL import Disperse_Wheel from OLH import OLH #封装的函数获取epsilon EPS = 1 M = 1 #每个用户的数据多少 getm_usernum = 30000 #获取80%的m使用的用户数量 D = 1 #domain #获取80%用户的长度 get_m = Get_M(EPS, getm_usernum) get_m.load_data_random('kosarak.dat') M = get_m.get_m() ##################### #获取全部的数据 Data1 = DataSet() Data1.create_data_random('kosarak.dat', 0) max_num = Data1.max_valued() #D=max_num+1 #数据集进行编号,用map存储 map_data = {} Data1.transform_to_map(map_data) map_length = len(map_data) D = map_length + 1 ##################### #数据等长处理,对不够长的进行填充,更新map_data表 Data2 = DataSet() Data2.create_data_certain('kosarak.dat', M, getm_usernum, D, map_data) numbered_data = [] #这里将其转化为编完号的表,可以直接使用这个
def train(): LEARNING_RATE = 1e-3 BATCH_SIZE = 64 PRETRAIN_EPOCH = 1000 PRETRAIN_EPOCH_d = 1100 feature_nums = 15549 dropout_value = 0.9 dropout_sign = 1.0 train_datapath = r"F:/project/simulation_data/drop60_p.train" EPOCH = 2500 # outDir = r"F:/project/simulation_data/drop60/bn_" model_name = "AE-GAN_bn_dp_0.9_0_separate" load_checkpoint = False outDir = os.path.join("F:/project/simulation_data/drop60", model_name) model = "separate" x = tf.placeholder(tf.float32, [None, feature_nums], name="input_data") # completion = tf.placeholder(tf.float32, [BATCH_SIZE, feature_nums]) is_training = tf.placeholder(tf.bool, [], name="is_training") # completed = tf.placeholder(tf.float32,[None, feature_nums], name="generator_out") mask = tf.placeholder(tf.float32, [None, feature_nums], name="input_data") model = Simple_separate(x, mask, is_training, batch_size=BATCH_SIZE, feature_num=feature_nums, dropout_value=dropout_value, dropout_sign=dropout_sign, is_bn=True) sess = tf.Session() global_step = tf.Variable(0, name='global_step', trainable=False) epoch = tf.Variable(0, name='epoch', trainable=False) with tf.name_scope("adam_optimizer"): opt = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE) gv_train_op = opt.minimize(model.gv_loss, global_step=global_step, var_list=model.g_variables) g_train_op = opt.minimize(model.g_loss, global_step=global_step, var_list=model.g_variables) d_train_op = opt.minimize(model.d_loss, global_step=global_step, var_list=model.d_variables) init_op = tf.global_variables_initializer() sess.run(init_op) saver = tf.train.Saver() load_model_dir = os.path.join('./backup', model_name) if load_checkpoint and os.path.exists('./backup/' + model_name): ckpt = tf.train.get_checkpoint_state(load_model_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) elif os.path.exists(load_model_dir): shutil.rmtree(load_model_dir) else: os.makedirs(load_model_dir) logs_dir = os.path.join("./logs", model_name) if load_checkpoint is False and os.path.exists(logs_dir): shutil.rmtree(logs_dir) os.makedirs(logs_dir) writer = tf.summary.FileWriter(logs_dir, sess.graph) # if tf.train.get_checkpoint_state('./backup'): # saver = tf.train.Saver() # saver.restore(sess, './backup/latest') # # logs_dir = os.path.join("./logs", model_name) # if os.path.exists(logs_dir) == False: # os.makedirs(logs_dir) # writer = tf.summary.FileWriter(logs_dir, sess.graph) dataset = DataSet(train_datapath, BATCH_SIZE) # each epoch has step_num steps step_num = dataset.steps while sess.run(epoch) < EPOCH: sess.run(tf.assign(epoch, tf.add(epoch, 1))) print('epoch: {}'.format(sess.run(epoch))) # Completion if sess.run(epoch) <= PRETRAIN_EPOCH: for i in tqdm.tqdm(range(step_num)): x_batch = dataset.next() _, gv_loss, gv_summary_str = sess.run( [gv_train_op, model.gv_loss, model.gv_sum], feed_dict={ x: x_batch, is_training: True, K.learning_phase(): 1 }) if i % 10 == 0: writer.add_summary(gv_summary_str) print('Completion loss: {}'.format(gv_loss)) if sess.run(epoch) % 100 == 0: saver.save(sess, load_model_dir + '/pretrained_g', write_meta_graph=False) if sess.run(epoch) == PRETRAIN_EPOCH: dataset = DataSet(train_datapath, BATCH_SIZE, onepass=True, shuffle=False) imitate_datas = [] complete_datas = [] embed_datas = [] for i in tqdm.tqdm(range(step_num + 1)): x_batch = dataset.next() mask = x_batch == 0 embed, imitation, completion = sess.run( [ model.encoderv_out, model.imitation, model.completion ], feed_dict={ x: x_batch, is_training: False, K.learning_phase(): 0 }) completion = np.array(completion, dtype=np.float) imitation = np.array(imitation, dtype=np.float) embed = np.array(embed, dtype=np.float) mask = mask.astype(float) completion = x_batch * (1 - mask) + completion * mask imitation = x_batch * (1 - mask) + imitation * mask complete_datas.append(completion) imitate_datas.append(imitation) embed_datas.append(embed) dataset = DataSet(train_datapath, BATCH_SIZE) complete_datas = np.reshape( np.concatenate(complete_datas, axis=0), (-1, feature_nums)) imitate_datas = np.reshape( np.concatenate(imitate_datas, axis=0), (-1, feature_nums)) embed_datas = np.reshape(np.concatenate(embed_datas, axis=0), (-1, feature_nums // 32)) df_c = pd.DataFrame(complete_datas) df_i = pd.DataFrame(imitate_datas) df_e = pd.DataFrame(embed_datas) if os.path.exists(outDir) == False: os.makedirs(outDir) # outPath = os.path.join(outDir, "infer.complete") df_c.to_csv(outDir + "generator.imitate", index=None) df_i.to_csv(outDir + "generator.complete", index=None) df_e.to_csv(outDir + "generator.embed", index=None) print("save complete data to {}".format(outDir + "infer.complete")) saver.save(sess, load_model_dir + '/pretrained_g', write_meta_graph=False) # Discrimitation elif sess.run(epoch) <= PRETRAIN_EPOCH_d: for i in tqdm.tqdm(range(step_num)): x_batch = dataset.next() _, d_loss, d_summary_str = sess.run( [d_train_op, model.d_loss, model.d_sum], feed_dict={ x: x_batch, is_training: True, K.learning_phase(): 1 }) if i % 10 == 0: writer.add_summary(d_summary_str) print('Discriminator loss: {}'.format(d_loss)) if sess.run(epoch) % 100 == 0: saver = tf.train.Saver() saver.save(sess, load_model_dir + '/pretrained_d', write_meta_graph=False) # together elif sess.run(epoch) < EPOCH: for i in tqdm.tqdm(range(step_num)): x_batch = dataset.next() _, d_loss, d_summary_str = sess.run( [d_train_op, model.d_loss, model.d_sum], feed_dict={ x: x_batch, is_training: True, K.learning_phase(): 1 }) if i % 10 == 0: writer.add_summary(d_summary_str) _, g_loss, g_summary_str = sess.run( [g_train_op, model.g_loss, model.g_sum], feed_dict={ x: x_batch, is_training: True, K.learning_phase(): 1 }) if i % 10 == 0: writer.add_summary(g_summary_str) print('Completion loss: {}'.format(g_loss)) print('Discriminator loss: {}'.format(d_loss)) if sess.run(epoch) % 100 == 0: saver = tf.train.Saver() saver.save(sess, load_model_dir + '/latest', write_meta_graph=False) elif sess.run(epoch) == EPOCH: dataset = DataSet(train_datapath, BATCH_SIZE, onepass=True, shuffle=False) imitate_datas = [] complete_datas = [] embed_datas = [] for i in tqdm.tqdm(range(step_num + 1)): x_batch = dataset.next() mask = x_batch == 0 embed, imitation, completion = sess.run( [model.encoderv_out, model.imitation, model.completion], feed_dict={ x: x_batch, is_training: False, K.learning_phase(): 0 }) completion = np.array(completion, dtype=np.float) imitation = np.array(imitation, dtype=np.float) embed = np.array(embed, dtype=np.float) mask = mask.astype(float) completion = x_batch * (1 - mask) + completion * mask imitation = x_batch * (1 - mask) + imitation * mask complete_datas.append(completion) imitate_datas.append(imitation) embed_datas.append(embed) # saver = tf.train.Saver() # saver.save(sess, load_model_dir+'/complete', write_meta_graph=False) complete_datas = np.reshape(np.concatenate(complete_datas, axis=0), (-1, feature_nums)) imitate_datas = np.reshape(np.concatenate(imitate_datas, axis=0), (-1, feature_nums)) embed_datas = np.reshape(np.concatenate(embed_datas, axis=0), (-1, feature_nums // 32)) df_c = pd.DataFrame(complete_datas) df_i = pd.DataFrame(imitate_datas) df_e = pd.DataFrame(embed_datas) if os.path.exists(outDir) == False: os.makedirs(outDir) # outPath = os.path.join(outDir, "infer.complete") df_c.to_csv(outDir + "infer.imitate", index=None) df_i.to_csv(outDir + "infer.complete", index=None) df_e.to_csv(outDir + "infer.embed", index=None) print("save complete data to {}".format(outDir))
"features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] return X, y if __name__ == "__main__": #Load the training dataset and generate folds d = DataSet() folds, hold_out = kfold_split(d, n_folds=10) fold_stances, hold_out_stances = get_stances_for_folds(d, folds, hold_out) print("length of fold stances :--------> ", len(fold_stances)) # Load the competition dataset competition_dataset = DataSet("competition_test") X_competition, y_competition = generate_features( competition_dataset.stances, competition_dataset, "competition") Xs = dict() ys = dict() # Load/Precompute all features now X_holdout, y_holdout = generate_features(hold_out_stances, d, "holdout") for fold in fold_stances:
def train(self, config): begin = time.clock() dataset = DataSet(config.train_datapath, config.batch_size) test_dataset = DataSet(config.train_datapath, config.batch_size, shuffle=False, onepass=True) # dataset = dataset # test_dataset = dataset create_conf = self.create_conf steps = dataset.steps * config.epoch print("total {} steps...".format(steps)) # sample_dirs = os.path.join("samples", self.model_name) # for dir in [sample_dirs, log_dirs]: # if os.path.exists(dir) == False: # os.makedirs(dir) # gpu_conf = tf.ConfigProto() # gpu_conf.gpu_options.per_process_gpu_memory_fraction = config.gpu_ratio with tf.Session() as session: log_dirs = os.path.join("./logs", self.model_name) if os.path.exists(log_dirs) == False: os.makedirs(log_dirs) load_model_dir = os.path.join('./backup', self.model_name) if config.load_checkpoint and os.path.exists(load_model_dir): self.load(session, load_model_dir) elif os.path.exists(load_model_dir): shutil.rmtree(load_model_dir) if config.load_checkpoint is False and os.path.exists(log_dirs): shutil.rmtree(log_dirs) os.makedirs(log_dirs) self.writer = tf.summary.FileWriter(log_dirs, session.graph) tf.global_variables_initializer().run() # sample_batch = dataset.sample_batch() # sample_mask = np.float32(sample_batch > 0.0) # sample_path = os.path.join(sample_dirs, "{}.sample".format(self.model_name)) # pd.DataFrame(sample_batch).to_csv(sample_path, index=False) for step in range(steps + 1): batch_data = dataset.next() mask = (batch_data == 0.0) # mask = np.float32(mask) if step % config.save_freq_steps != 0: _, loss, mse_loss, sparse_loss, rank_loss = session.run( [ self.apply_grads, self.loss, self.mse_loss, self.sparse_loss, self.rank_loss ], feed_dict={ self.X: batch_data, self.mask: mask, self.is_training: True, K.learning_phase(): 1 }) else: _, summary_str, loss, mse_loss, sparse_loss, rank_loss = session.run( [ self.apply_grads, self.merged_summary_op, self.loss, self.mse_loss, self.sparse_loss, self.rank_loss ], feed_dict={ self.X: batch_data, self.mask: mask, self.is_training: True, K.learning_phase(): 1 }) if step % config.log_freq_steps == 0: print( "step {}th, loss: {}, mse_loss: {}, sparse_loss: {}, rank_loss: {}" .format(step, loss, mse_loss, sparse_loss, rank_loss)) if step % config.save_freq_steps == 0: self.writer.add_summary(summary_str, step) # save_dir = os.path.join(config.checkpoint_dir, self.model_name) self.save(session, load_model_dir, step) self.predict_tmp(session, steps, test_dataset, config) end = time.clock() print("training {} cost time: {} mins".format(self.model_name, (end - begin) / 60.0))
def load_data(self, filepath): self.DataSet = DataSet.fromTrainTest(filepath) self.train_ds = tfdf.keras.pd_dataframe_to_tf_dataset( self.DataSet.X_train, label="trk_fake") self.test_ds = tfdf.keras.pd_dataframe_to_tf_dataset( self.DataSet.X_test, label="trk_fake")
def load_data(self, filepath): self.DataSet = DataSet.fromTrainTest(filepath) self.dtrain = xgb.DMatrix(self.DataSet.X_train.to_numpy(), label=np.ravel(self.DataSet.y_train)) self.dtest = xgb.DMatrix(self.DataSet.X_test.to_numpy(), label=np.ravel(self.DataSet.y_test))
p = Parameters(NN='CNN', period=24, n_filters=64, n_dense=256, n_cl=6, batch_size=256, n_epochs=50, learning_rate=0.0005, loss_type='mean_squared_error') print(datestr) titolo = str(p.str) + '_date-' + datestr print(titolo) train = DataSet('Dataset/ARS_DLR_DataSet_V2.mat', seed=1, parameters=p, reduced_dataset=True) test = DataSet('Dataset/ARS_DLR_Benchmark_Data_Set.mat', seed=1, parameters=p, reduced_dataset=True, labtab=train.lab_tab) assert (str(train.lab_tab) == str(test.lab_tab)) model = MLModel(train, test, titolo) model.init(n_filters=p.n_filters, n_dense=p.n_dense, learning_rate=p.learning_rate, loss_type=p.loss_type)
def create_dataset(file, attrnames, target, values): examples = formatfile(file) attrs = [k for k in range(len(examples[0]))] # index of examples inputs = create_input(attrs, target) return DataSet(file, examples, inputs, attrs, target, attrnames, values)
import datetime import numpy as np from Settings import Config from Dataset import DataSet from network import GATQA from sklearn.metrics import f1_score from sklearn.metrics import accuracy_score import pickle os.environ['CUDA_VISIBLE_DEVICES'] = '6,7' FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_boolean('train', True, 'set True to train') conf_ = Config() domain = conf_.domain dataset = DataSet() traindata = pickle.load( open('./data/' + domain + '/' + domain + '-train.pkl', 'rb')) testdata = pickle.load( open('./data/' + domain + '/' + domain + '-test.pkl', 'rb')) def evaluation(y_pred, y_true): f1_s = f1_score(y_true, y_pred, average='macro') accuracy_s = accuracy_score(y_true, y_pred) return f1_s, accuracy_s def train(sess, setting): with sess.as_default(): initializer = tf.contrib.layers.xavier_initializer()
if __name__ == '__main__': args = parse_args() num_epochs = args.epochs batch_size = args.batch_size mf_dim = args.num_factors layers = eval(args.layers) num_negatives = args.num_neg atten_prob = args.atten_prob topK = 10 evaluation_threads = 1 # mp.cpu_count() print("NeuMF arguments: %s " % args) # Loading data t1 = time() dataset = DataSet(args.path + args.dataset) train, testRatings, testNegatives = dataset.trainMatrix, dataset.testRatings, dataset.testNegatives num_users, num_items = train.shape print("Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d" % (time() - t1, num_users, num_items, train.nnz, len(testRatings))) # Build model model = get_model(num_users, num_items, mf_dim, layers, atten_prob) model.compile(optimizer=Adam(), loss='binary_crossentropy') # Init performance (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK, evaluation_threads) hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
data = scaler.fit_transform(df) df = pd.DataFrame(data=data, columns=df.columns) columns = list(df.columns) if FLAGS.method is not None: columns = select_gene(columns, df, method=FLAGS.method, feature_sel=FLAGS.feature_sel, save=False) df = df[columns] data = df.values samples, feature_nums = data.shape train_size = np.int(np.round(samples * 0.8)) val_size = samples - train_size df_shuffle = df.sample(frac=1).reset_index(drop=True) df_train = df_shuffle.iloc[0:train_size] df_val = df_shuffle.iloc[train_size:] train_dataset = DataSet(df_train, FLAGS.batch_size) val_dataset = DataSet(df_val, FLAGS.batch_size, shuffle=False) test_dataset = DataSet(df, FLAGS.batch_size, shuffle=False) model = AutoEncoder(feature_nums, num_clusters=FLAGS.num_clusters, beta=FLAGS.beta, dropout=FLAGS.dropout, learning_rate=FLAGS.learning_rate, activation=FLAGS.activation, model_name=FLAGS.model_name) model.train(train_dataset, val_dataset, test_dataset, columns, FLAGS)
def predict(LEARNING_RATE1, LEARNING_RATE2, PRETRAIN_EPOCH, PRETRAIN_EPOCH_d, dropout_encoder, dropout_decoder, EPOCH, model_name, is_bn, is_log, is_mask, is_binary, is_dependent, is_after, is_before, is_bn_d, is_approximate, model="separate", load_checkpoint=False, train_datapath=r"F:/project/simulation_data/drop60_p.train", feature_nums=15549, dropout_sign=1.0): BATCH_SIZE = 64 outDir = os.path.join("F:/project/simulation_data/drop60", model_name) model = "separate" x = tf.placeholder(tf.float32, [None, feature_nums], name="input_data") # completion = tf.placeholder(tf.float32, [BATCH_SIZE, feature_nums]) is_training = tf.placeholder(tf.bool, [], name="is_training") # completed = tf.placeholder(tf.float32,[None, feature_nums], name="generator_out") learning_rate = tf.placeholder(tf.float32, shape=[]) model_dict = { "simple": Simple_model, "approximate": Softgum_app, "separate": Simple_separate } Network = model_dict[model] model = Network(x, is_training, batch_size=BATCH_SIZE, feature_num=feature_nums, dropout_value=dropout_value, dropout_sign=dropout_sign, is_bn=is_bn, is_log=is_log, is_mask=is_mask, is_binary=is_binary, is_dependent=is_dependent, is_after=is_after, is_before=is_before, is_approximate=is_approximate) sess = tf.Session() global_step = tf.Variable(0, name='global_step', trainable=False) epoch = tf.Variable(0, name='epoch', trainable=False) with tf.name_scope("adam_optimizer"): opt = tf.train.AdamOptimizer(learning_rate=learning_rate) gv_train_op = opt.minimize(model.gv_loss, global_step=global_step, var_list=model.g_variables) g_train_op = opt.minimize(model.g_loss, global_step=global_step, var_list=model.g_variables) d_train_op = opt.minimize(model.d_loss, global_step=global_step, var_list=model.d_variables) init_op = tf.global_variables_initializer() sess.run(init_op) saver = tf.train.Saver() load_model_dir = os.path.join('./backup', model_name) ckpt = tf.train.get_checkpoint_state(load_model_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) #each epoch has step_num steps dataset = DataSet(train_datapath, BATCH_SIZE, onepass=True, shuffle=False) step_num = dataset.steps imitate_datas = [] complete_datas = [] embed_datas = [] for i in tqdm.tqdm(range(step_num + 1)): x_batch = dataset.next() mask = x_batch == 0 embed, imitation, completion = sess.run( [model.encoderv_out, model.imitation, model.completion], feed_dict={ x: x_batch, is_training: False, learning_rate: LEARNING_RATE2, K.learning_phase(): 0 }) completion = np.array(completion, dtype=np.float) imitation = np.array(imitation, dtype=np.float) embed = np.array(embed, dtype=np.float) mask = mask.astype(float) completion = x_batch * (1 - mask) + completion * mask imitation = x_batch * (1 - mask) + imitation * mask complete_datas.append(completion) imitate_datas.append(imitation) embed_datas.append(embed) complete_datas = np.reshape(np.concatenate(complete_datas, axis=0), (-1, feature_nums)) imitate_datas = np.reshape(np.concatenate(imitate_datas, axis=0), (-1, feature_nums)) embed_datas = np.reshape(np.concatenate(embed_datas, axis=0), (-1, feature_nums // 32)) df_c = pd.DataFrame(complete_datas) df_i = pd.DataFrame(imitate_datas) df_e = pd.DataFrame(embed_datas) if os.path.exists(outDir) == False: os.makedirs(outDir) # outPath = os.path.join(outDir, "infer.complete") df_c.to_csv(outDir + "infer.imitate", index=None) df_i.to_csv(outDir + "infer.complete", index=None) df_e.to_csv(outDir + "infer.embed", index=None) print("save complete data to {}".format(outDir))
def load_data(self, filepath): self.DataSet = DataSet.fromTrainTest(filepath) print(self.DataSet)