def train(opt): model = DPCNN(opt["hidden_size"], opt["feature_map"], opt["seq_len"], opt["num_class"], opt["vocab_size"], opt["drop_rate"]).to(device) with open("/home/FuDawei/NLP/Text_Classification/dataset/train_text.json", "r") as f: train_text = json.load(f) with open("/home/FuDawei/NLP/Text_Classification/dataset/train_star.json", "r") as f: train_star = json.load(f) with open("/home/FuDawei/NLP/Text_Classification/dataset/dev_text.json", "r") as f: dev_text = json.load(f) with open("/home/FuDawei/NLP/Text_Classification/dataset/dev_star.json", "r") as f: dev_star = json.load(f) optimizer = optim.Adam(model.parameters(), opt["lr"]) cnt = 0 total_loss = 0 for ep in range(opt["epoch"]): for text, star in batch_generator(train_text, train_star, opt["batch_size"]): text, star = torch.tensor(text).to(device), torch.tensor(star).to( device) logits = model(text) loss = model.compute_loss(logits, star) optimizer.zero_grad() loss.backward() optimizer.step() cnt += 1 total_loss += loss.item() if cnt % 100 == 0: print(total_loss / 100) total_loss = 0 if cnt % 1000 == 0: model.eval() preds = [] for text, _ in batch_generator(dev_text, dev_star, opt["batch_size"]): text = torch.tensor(text).to(device) logits = model(text) pred = model.compute_res(logits).tolist() preds.extend(pred) a = confusion_matrix(dev_star, preds) print(a) right, all = 0, 0 for idx, item in enumerate(a): right += item[idx] all += sum(item) final_rate = right / all print(final_rate) model.train()
def train(opt): base_dir = "/home/FuDawei/NLP/Pretrained_model/dataset/" model = Elmo(opt["char_size"], opt["char_emb_size"], opt["embedding_size"], opt["hidden_size"], opt["vocab_size"], opt["drop_rate"]).to(device) with open(base_dir+"word2id.json", "r") as f: word2id = json.load(f) with open(base_dir+"elmo_lower_data.json", "r") as f: data = json.load(f) optimizer = optim.Adam(model.parameters(), lr=opt["lr"]) cnt = 0 lo = 0 for ep in range(opt["epoch"]): for batch_data in batch_generator(data, opt["batch_size"]): forward_res, forward_mask, forward_ground, backward_res, backward_mask, backward_ground = token_elmo(batch_data, word2id) forward_input, forward_mask, forward_ground, backward_input, backward_mask, backward_ground = \ torch.tensor(forward_res).long().to(device), torch.tensor(forward_mask).to(device), torch.tensor(forward_ground).long().to(device), \ torch.tensor(backward_res).long().to(device), torch.tensor(backward_mask).to(device), torch.tensor(backward_ground).long().to(device) forward_output, backward_output = model(forward_input, forward_mask, backward_input, backward_mask) loss = model.compute_loss(forward_output, backward_output, forward_ground, backward_ground) # print(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() cnt += 1 lo += loss.item() if cnt % 100 == 0: print(lo/100) lo = 0
def infer(sess, model, X): # TODO: assert that current graph represents the model? batches = batch_generator([X], batch_size=128, forever=False, do_shuffle=False) probs = [] for batch, in batches: probs.append(sess.run(model.softmax, feed_dict={X: batch, model.keep_prob: 1.0})) return probs
def predict(self, features): """The universal interface for testing a quantity of data. :param features: the features which are fed into network for get prediction result. """ data = self._data_decorator(features) batches = batch_generator(data, self.batch_size) preds = self._run_net_with_batches(batches, mode="predict") return preds
def new_run(X_train, y_train, X_val, y_val, model_savename): """Trains and saves a model with given training data.""" tf.reset_default_graph() batches = batch_generator((X_train, y_train), batch_size=128) with tf.Session() as sess: # Create the model X = tf.placeholder(tf.float32, (None, IMAGE_SHAPE[0], IMAGE_SHAPE[1], 3)) target = tf.placeholder(tf.float32, (None, NUM_CLASSES)) model = Fishmodel(X, num_classes=NUM_CLASSES) saver = tf.train.Saver(tf.global_variables()) # Cross entropy loss cross_entropy = tf.nn.softmax_cross_entropy_with_logits( model.logits, target, name="cross_entropy") loss = tf.reduce_mean(cross_entropy, name="cross_entropy_mean") # Accuracy corrects = tf.equal(tf.argmax(model.softmax, 1), tf.argmax(target, 1)) accuracy = tf.reduce_mean(tf.cast(corrects, tf.uint8)) # Summary reports for tensorboard tf.scalar_summary("Mean Cross Entropy Loss", loss) tf.scalar_summary("Accuracy", accuracy) merged_summary = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(SUMMARY_DIR, sess.graph) global_step = tf.Variable(0, name='global_step', trainable=False) train_step = tf.train.AdamOptimizer(1e-3).minimize( loss, global_step=global_step) sess.run(tf.global_variables_initializer()) print("Starting training...") for _ in range(int(1e7)): X_batch, y_batch = next(batches) _, summary, i = sess.run([train_step, merged_summary, global_step], feed_dict={ X: X_batch, target: y_batch, model.keep_prob: 0.5 }) summary_writer.add_summary(summary, i) if i > 100000 and i % 1000 == 0: probs_val = infer(sess, model, X_val) # TODO: compare with y_val to see if we should stop early # TODO run accuracy on whole validation set probs_val = infer(sess, model, X_val) # TODO: define tf ops for this total accuracy saver.save(sess, model_savename)
def transform(self, features): """The universal interface for extracting high-level features by using neural network. :param features: the features which are fed into network for get high-level features. """ data = self._data_decorator(features) batches = batch_generator(data, self.batch_size) new_features = self._run_net_with_batches(batches, mode="transform") return new_features
def batch_test(): with open(TEST_DATA_PATH, encoding='utf-8') as f: text = f.read() tc = util.TextConverter(text, -1) g = util.batch_generator(tc.text_to_arr(text), TEST_BATCH_SIZE, TEST_SEQ_SIZE) x_batch, y_batch = g.__next__() print(x_batch.shape, x_batch) for arr in x_batch: print(tc.arr_to_text(arr)) print(y_batch.shape, y_batch) for arr in y_batch: print(tc.arr_to_text(arr))
def fit(self, X, X_vali=None): self.n_visible = X.shape[1] self._build_model() init = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init) for e in range(self.n_epoches): if e > 5: self.momentum = 0.9 data = np.array(X) for batch in batch_generator(self.batch_size, data): self.partial_fit(batch) # print('error:', self.get_err(X)) if e % 5 == 0: if X_vali is not None: # print(X[:500,:].shape,X_vali[:500,:].shape) print('gap of epoch', e, 'is:', self.free_energy_gap(X[:500, :], X_vali[:500, :])) return self
def model_test(): with open(TEST_DATA_PATH, encoding='utf-8') as f: text = f.read() tc = util.TextConverter(text, -1) g = util.batch_generator(tc.text_to_arr(text), TEST_BATCH_SIZE, TEST_SEQ_SIZE) # 模型加载测试 rnn_model = model.CharRNN(output_size=tc.vocab_size, batch_size=TEST_BATCH_SIZE, seq_size=TEST_SEQ_SIZE, lstm_size=TEST_LSTM_SIZE, num_layers=TEST_NUM_LAYERS, learning_rate=TEST_RATE, train_keep_prob=TEST_KEEP_PROB) x_batch, y_batch = g.__next__() sess = rnn_model.session state = sess.run(rnn_model.initial_state) feed = {rnn_model.input: x_batch, rnn_model.target: y_batch, rnn_model.initial_state: state, rnn_model.keep_prob: TEST_KEEP_PROB} # 模型输入流测试 one_hot_input = sess.run(rnn_model.one_hot_input, feed_dict=feed) print(one_hot_input.shape, one_hot_input)
def main(_): model_path = os.path.join('model', FLAGS.name) if not os.path.exists(model_path): os.makedirs(model_path) with open(FLAGS.input_file_path, 'r', encoding='utf-8') as f: text = f.read() tc = util.TextConverter(text, FLAGS.max_vocab) tc.save_vocab(os.path.join('vocab', FLAGS.name)) output_size = tc.vocab_size batch_generator = util.batch_generator(tc.text_to_arr(text), FLAGS.batch_size, FLAGS.seq_size) model = CharRNN(output_size=output_size, batch_size=FLAGS.batch_size, seq_size=FLAGS.seq_size, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob) model.train(batch_generator, max_steps=FLAGS.max_steps, model_save_path=model_path, save_with_steps=FLAGS.save_every_n_steps, log_with_steps=FLAGS.log_every_n_steps)
def fit(self, features, labels, verbose=False): """The universal interface for fit a quantity of data. :param features: the features which are used in fitting model. :param labels: the labels which are used in fitting model. """ data = self._data_decorator(features, labels) self._global_variables_initialize(data) acc_global_best = -1.1 window_step = 0 global_step = 0 while window_step < self.tol_window_size: batches = batch_generator(data, self.batch_size) acc_local = self._run_net_with_batches(batches, mode="train") global_step += 1 if verbose: print(self.name + "@%d obtain accuracy: %.2f%%" % (global_step, 100 * acc_local)) if acc_local - acc_global_best >= self.tol: acc_global_best = acc_local window_step = 0 self._save_model() else: window_step += 1 if global_step >= self.max_iter: warning_msg = " ".join(("The", self.name, "parameters did not", "converge after %d iterations!")) print(warning_msg % self.max_iter, file=sys.stderr) break self._load_model()
def evaluate(data, X, Y, model, evaluateL2, evaluateL1, batch_size, args, yscaler): model.eval() total_loss = 0 total_loss_l1 = 0 n_samples = 0 predict = None test = None seq_len = args.seq_len obs_len = args.num_obs_to_train for step in range(args.step_per_epoch): Xeva, yeva, Xf, yf, batch = util.batch_generator( X, Y, obs_len, seq_len, args.batch_size) Xeva = torch.from_numpy(Xeva).float() yeva = torch.from_numpy(yeva).float() Xf = torch.from_numpy(Xf).float() yf = torch.from_numpy(yf).float() for i in range(len(Xeva[0][0])): yeva = Xeva[:, :, i] yf = Xf[:, :, i] ypred = model(yeva) scale = data.scale[batch] scale = scale.view([scale.size(0), 1]) ypred = ypred * scale yf = yf * scale ypred = ypred.data.numpy() if yscaler is not None: ypred = yscaler.inverse_transform(ypred) ypred = ypred.ravel() yfs = yf.shape ypred = ypred.ravel().reshape(yfs[0], yfs[1]) ypred = torch.Tensor(ypred) yf = torch.Tensor(yf) if torch.isnan(yf).any(): continue if predict is None: predict = ypred test = yf else: predict = torch.cat((predict, ypred)) test = torch.cat((test, yf)) total_loss += evaluateL2(ypred, yf).item() total_loss_l1 += evaluateL1(ypred, yf).item() n_samples += (yf.size(0)) # n_samples += (yf.size(0) * data.m) rse = math.sqrt(total_loss / n_samples) / data.rse rae = (total_loss_l1 / n_samples) / data.rae predict = predict.data.cpu().numpy() Ytest = test.data.cpu().numpy() sigma_p = (predict).std(axis=0) sigma_g = (Ytest).std(axis=0) mean_p = predict.mean(axis=0) mean_g = Ytest.mean(axis=0) index = (sigma_g != 0) correlation = ((predict - mean_p) * (Ytest - mean_g)).mean(axis=0) / (sigma_p * sigma_g) correlation = (correlation[index]).mean() return rse, rae, correlation
def train(Data, args): ''' Args: - X (array like): shape (num_samples, num_features, num_periods) - y (array like): shape (num_samples, num_periods) - epoches (int): number of epoches to run - step_per_epoch (int): steps per epoch to run - seq_len (int): output horizon - likelihood (str): what type of likelihood to use, default is gaussian - num_skus_to_show (int): how many skus to show in test phase - num_results_to_sample (int): how many samples in test phase as prediction ''' evaluateL2 = nn.MSELoss(size_average=False) evaluateL1 = nn.L1Loss(size_average=False) if args.L1Loss: criterion = nn.L1Loss(size_average=False) else: criterion = nn.MSELoss(size_average=False) yscaler = None if args.standard_scaler: yscaler = util.StandardScaler() elif args.log_scaler: yscaler = util.LogScaler() elif args.mean_scaler: yscaler = util.MeanScaler() elif args.max_scaler: yscaler = util.MaxScaler() model = TPALSTM(1, args.seq_len, args.hidden_size, args.num_obs_to_train, args.n_layers) # modelPath = "/home/isabella/Documents/5331/tpaLSTM/model/electricity.pt" # # with open(modelPath, 'rb') as f: # model = torch.load(f) optimizer = Adam(model.parameters(), lr=args.lr) random.seed(2) # select sku with most top n quantities Xtr = np.asarray(Data.train[0].permute(2, 0, 1)) ytr = np.asarray(Data.train[1].permute(1, 0)) Xte = np.asarray(Data.test[0].permute(2, 0, 1)) yte = np.asarray(Data.test[1].permute(1, 0)) Xeva = np.asarray(Data.valid[0].permute(2, 0, 1)) yeva = np.asarray(Data.valid[1].permute(1, 0)) # print("\nRearranged Data") # print("Xtr.size", Xtr.shape) # print("ytr.size", ytr.shape) # print("Xte.size", Xte.shape) # print("yte.size", yte.shape) # print("Xeva.size", Xeva.shape) # print("yeva.size", yeva.shape) num_ts, num_periods, num_features = Xtr.shape if yscaler is not None: ytr = yscaler.fit_transform(ytr) # training seq_len = args.seq_len obs_len = args.num_obs_to_train progress = ProgressBar() best_val = np.inf total_loss = 0 n_samples = 0 losses = [] for epoch in progress(range(args.num_epoches)): epoch_start_time = time.time() model.train() total_loss = 0 n_samples = 0 # print("\n\nData.get_batches") # for X,Y in Data.get_batches(Data.train[0], Data.train[1], args.batch_size, True): # print("X.shape",X.shape) # print("Y.shape", Y.shape) for step in range(args.step_per_epoch): print(step) Xtrain, ytrain, Xf, yf, batch = util.batch_generator( Xtr, ytr, obs_len, seq_len, args.batch_size) Xtrain = torch.from_numpy(Xtrain).float() ytrain = torch.from_numpy(ytrain).float() Xf = torch.from_numpy(Xf).float() yf = torch.from_numpy(yf).float() for i in range(len(Xeva[0][0])): ytrain = Xtrain[:, :, i] yf = Xf[:, :, i] ypred = model(ytrain) scale = Data.scale[batch] scale = scale.view([scale.size(0), 1]) loss = criterion(ypred * scale, yf * scale) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() n_samples += (ypred.size(0)) train_loss = total_loss / n_samples val_loss, val_rae, val_corr = evaluate(Data, Xeva, yeva, model, evaluateL2, evaluateL1, args.batch_size, args, yscaler) print( '| end of epoch {:3d} | time: {:5.2f}s | train_loss {:5.4f} | valid rse {:5.4f} | valid rae {:5.4f} | valid corr {:5.4f}' .format(epoch, (time.time() - epoch_start_time), train_loss, val_loss, val_rae, val_corr)) # Save the model if the validation loss is the best we've seen so far. if val_loss < best_val: with open(args.save, 'wb') as f: torch.save(model, f) best_val = val_loss if epoch % 5 == 0: test_acc, test_rae, test_corr = evaluate(Data, Xte, yte, model, evaluateL2, evaluateL1, args.batch_size, args, yscaler) print("test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}". format(test_acc, test_rae, test_corr))
# 特征长度 feature_length = df_train.shape[1] hp.feature_length = feature_length # 特征名称列表 feature_cols = df_train.columns.tolist() # 获取feature2field_dict feature2field_dict,field_list = get_feature2field_dict(feature_cols,hp.prefix_sep) hp.field_num = len(field_list) # 样本数量 train_num = df_train.shape[0] # 数据生成器 batch_gen = batch_generator([df_train.values,train_labels],hp.batch_size) # initialize FFM model logging.info('initialize FFM model') fm_model = FFM(hp,feature2field_dict) fm_model.build_graph() # begin session logging.info('# Session') saver = tf.train.Saver(max_to_keep=hp.max_to_keep) with tf.Session() as sess: # 恢复数据 ckpt = tf.train.latest_checkpoint(hp.logdir) if ckpt is None:
def main(): N = 10000 d = 250 alpha = np.ones((d),) alpha[d/2:] = 10.0 sigma2 = 1.0 X = np.random.rand(N, d) w, y = simulate(X, alpha, sigma2) batch_size = 64 batch_X = tf.placeholder(tf.float32, (batch_size, d), name="X") batch_y = tf.placeholder(tf.float32, (batch_size, ), name="y") mf = bf.mean_field.MeanFieldInference(linear_ard_joint_density, batch_X=batch_X, batch_y=batch_y, N=N) a0 = 1.0 b0 = 1.0 c0 = 1.0 d0 = 1.0 alpha_default = np.ones((d,), dtype=np.float32) * a0/b0 mf.add_latent("alpha", 1/np.sqrt(alpha_default), 1e-6 * np.ones((d,), dtype=np.float32), bf.transforms.exp_reciprocal, shape=(d,)) sigma2_default = np.array(d0/(c0+1)).astype(np.float32) mf.add_latent("sigma2", np.sqrt(sigma2_default), 1e-6, bf.transforms.square, shape=()) mf.add_latent("w", tf.random_normal([d,], stddev=1.0, dtype=tf.float32), 1e-6 * np.ones((d,), dtype=np.float32), shape=(d,)) elbo = mf.build_stochastic_elbo(n_eps=5) sigma2s = mf.get_posterior_samples("sigma2") #alphas = mf.get_posterior_samples("alpha") alpha_mean_var = mf.latents["alpha"]["q_mean"] alpha_stddev_var = mf.latents["alpha"]["q_stddev"] alpha_var = mf.latents["alpha"]["samples"][0] train_step = tf.train.AdamOptimizer(0.01).minimize(-elbo) debug = tf.add_check_numerics_ops() init = tf.initialize_all_variables() merged = tf.merge_all_summaries() sess = tf.Session() writer = tf.train.SummaryWriter("/tmp/ard_logs", sess.graph_def) sess.run(init) for i, batch_xs, batch_ys in batch_generator(X, y, 64, max_steps=20000): fd = mf.sample_stochastic_inputs() fd[batch_X] = batch_xs fd[batch_y] = batch_ys (elbo_val, sigma2s_val, alpha_mean, alpha_stddev, alpha_val) = sess.run([elbo, sigma2s, alpha_mean_var, alpha_stddev_var, alpha_var], feed_dict=fd) print "step %d elbo %.2f sigma2 %.2f " % (i, elbo_val, np.mean(sigma2s_val)) summary_str = sess.run(merged, feed_dict=fd) writer.add_summary(summary_str, i) try: sess.run(debug, feed_dict=fd) except: bad = ~np.isfinite(alpha_val) print alpha_mean[bad] print alpha_stddev[bad] print alpha_val[bad] sess.run(train_step, feed_dict = fd)
def train(X, y, args): ''' Args: - X (array like): shape (num_samples, num_features, num_periods) - y (array like): shape (num_samples, num_periods) - epoches (int): number of epoches to run - step_per_epoch (int): steps per epoch to run - seq_len (int): output horizon - likelihood (str): what type of likelihood to use, default is gaussian - num_skus_to_show (int): how many skus to show in test phase - num_results_to_sample (int): how many samples in test phase as prediction ''' num_ts, num_periods, num_features = X.shape model = TPALSTM(1, args.seq_len, args.hidden_size, args.num_obs_to_train, args.n_layers) optimizer = Adam(model.parameters(), lr=args.lr) random.seed(2) # select sku with most top n quantities Xtr, ytr, Xte, yte = util.train_test_split(X, y) losses = [] cnt = 0 yscaler = None if args.standard_scaler: yscaler = util.StandardScaler() elif args.log_scaler: yscaler = util.LogScaler() elif args.mean_scaler: yscaler = util.MeanScaler() elif args.max_scaler: yscaler = util.MaxScaler() if yscaler is not None: ytr = yscaler.fit_transform(ytr) # training seq_len = args.seq_len obs_len = args.num_obs_to_train progress = ProgressBar() for epoch in progress(range(args.num_epoches)): # print("Epoch {} starts...".format(epoch)) for step in range(args.step_per_epoch): Xtrain, ytrain, Xf, yf = util.batch_generator( Xtr, ytr, obs_len, seq_len, args.batch_size) Xtrain = torch.from_numpy(Xtrain).float() ytrain = torch.from_numpy(ytrain).float() Xf = torch.from_numpy(Xf).float() yf = torch.from_numpy(yf).float() ypred = model(ytrain) # loss = util.RSE(ypred, yf) loss = F.mse_loss(ypred, yf) losses.append(loss.item()) optimizer.zero_grad() loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() # test mape_list = [] # select skus with most top K X_test = Xte[:, -seq_len - obs_len:-seq_len, :].reshape( (num_ts, -1, num_features)) Xf_test = Xte[:, -seq_len:, :].reshape((num_ts, -1, num_features)) y_test = yte[:, -seq_len - obs_len:-seq_len].reshape((num_ts, -1)) yf_test = yte[:, -seq_len:].reshape((num_ts, -1)) yscaler = None if args.standard_scaler: yscaler = util.StandardScaler() elif args.log_scaler: yscaler = util.LogScaler() elif args.mean_scaler: yscaler = util.MeanScaler() elif args.max_scaler: yscaler = util.MaxScaler() if yscaler is not None: ytr = yscaler.fit_transform(ytr) if yscaler is not None: y_test = yscaler.fit_transform(y_test) X_test = torch.from_numpy(X_test).float() y_test = torch.from_numpy(y_test).float() Xf_test = torch.from_numpy(Xf_test).float() ypred = model(y_test) ypred = ypred.data.numpy() if yscaler is not None: ypred = yscaler.inverse_transform(ypred) ypred = ypred.ravel() loss = np.sqrt(np.sum(np.square(yf_test - ypred))) print("losses: ", loss) if args.show_plot: plt.figure(1, figsize=(20, 5)) plt.plot([k + seq_len + obs_len - seq_len \ for k in range(seq_len)], ypred, "r-") plt.title('Prediction uncertainty') yplot = yte[-1, -seq_len - obs_len:] plt.plot(range(len(yplot)), yplot, "k-") plt.legend(["prediction", "true", "P10-P90 quantile"], loc="upper left") ymin, ymax = plt.ylim() plt.vlines(seq_len + obs_len - seq_len, ymin, ymax, color="blue", linestyles="dashed", linewidth=2) plt.ylim(ymin, ymax) plt.xlabel("Periods") plt.ylabel("Y") plt.show() return losses, mape_list