def main(): #pretrained_embedding = utils.load_word_embedding() num_gpu = torch.cuda.device_count() model = Classifier(embedding, args.lstm_hidden_dim, args.num_class, args.n_layers, args.bidirectional, args.dropout_keep_prob) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # for GPU usage if num_gpu > 1: model = nn.DataParallel(model) model.to(device) loss_ft.cuda() val_iter = utils.data_iter(args.seq_len, args.batch_size, args.vocab_size, False, 'valid') for epoch in range(args.num_epochs): train_iter = utils.data_iter(args.seq_len, args.batch_size, True, 'train') start = time.time() train_loss, train_acc = train_model(model, train_iter, epoch) logger.info("Training elapsed time: %.4fs per epoch\n" % (time.time() - start)) val_loss, val_acc = eval_model(model, val_iter, epoch, True) logger.info('--- Epoch: %d ---' % (epoch + 1)) logger.info('Train Acc: %.2f, Train Loss: %.4f' % (100 * train_acc, train_loss)) logger.info('Val Acc: %.2f, Val Loss: %.4f' % (100 * val_acc, val_loss))
def evaluate_loss(model, data, criterion, pad_idx=constants.PAD, cuda=True): model.eval() total_loss = 0. total_tgt_words = 0 total_correct_tokens = 0 for src_sents, tgt_sents in data_iter(data, batch_size=model.args.valid_batch_size, batch_type=model.args.batch_type, shuffle=False): pred_tgt_word_num = sum(len(s[1:]) for s in tgt_sents) src = to_input_variable(src_sents, model.vocab.src, cuda=cuda) tgt = to_input_variable([item[:-1] for item in tgt_sents], model.vocab.tgt, cuda=cuda) gold_tgt_sents, _ = to_input_variable([item[1:] for item in tgt_sents], model.vocab.tgt, cuda=cuda) gold_tgt_tokens_flatten = gold_tgt_sents.view(-1) scores = model(src, tgt) loss = criterion(scores, gold_tgt_tokens_flatten) _, argmax_idxs = torch.max(scores, dim=-1) equals_batch_tokens = argmax_idxs.eq(gold_tgt_tokens_flatten) padding_mask = gold_tgt_tokens_flatten.ne(pad_idx) equals_batch_tokens_padding = equals_batch_tokens.long() * padding_mask.long() correct_tokens = torch.sum(equals_batch_tokens_padding) total_loss += loss.item() total_tgt_words += pred_tgt_word_num total_correct_tokens += correct_tokens.item() loss = total_loss / total_tgt_words ppl = math.exp(loss) acc = 1.0 * total_correct_tokens / total_tgt_words * 100 return loss, ppl, acc
def fit(self, X, Y, batch_size, coef_init=None): # coef_init validation if coef_init is not None: coef_init = np.asarray(coef_init, dtype=np.float64, order="C") coef_init = coef_init.ravel() if coef_init.shape != (X.shape[1], ): raise ValueError("Provided coef_init does not match dataset.") coef_init = np.r_[np.zeros(1), coef_init] else: coef_init = np.zeros((X.shape[1] + 1, )) # label validation Y = check_array(Y, ensure_2d=False) # Check that X and Y have correct shape X, Y = check_X_y(X, Y, y_numeric=True) # add bias to X X = np.c_[np.ones((X.shape[0])), X] self.t_ = 0 self.n_iter_ = 0 self.loss_hist_ = [] self.loss_epoch_ = [] loss_count = 0 loss = float("inf") theta = coef_init for i in range(int(self.max_iter)): self.n_iter_ += 1 loss_epoch = [] for x, y in data_iter(X, Y, batch_size): self.t_ += 1 error = x.dot(theta) - y loss_prev = loss loss = error.dot(error) / x.shape[0] loss_epoch.append(loss) gradient = (x.T.dot(error)) / x.shape[0] theta = theta - (self.eta0 / (self.t_**self.power_t)) * gradient if self.loss_hist_: loss_prev = min(self.loss_hist_[-self.n_iter_no_change + 1:]) else: loss_prev = float("inf") loss = np.average(loss_epoch) self.loss_epoch_.append(loss_epoch) self.loss_hist_.append(loss) if loss + self.tol > loss_prev: if loss_count == self.n_iter_no_change - 1: break else: loss_count += 1 else: loss_count = 0 self.coef_ = theta return self
def inference(print_datetime): best_model = Classifier(embedding, args.lstm_hidden_dim, args.num_class, args.n_layers, args.bidirectional, args.dropout_keep_prob) ckpts = glob.glob('./ckpt/%s/*' % print_datetime) path = ckpts[np.argsort( [float(ckpt.replace('.pt', '').split('_')[-1]) for ckpt in ckpts])[-1]] print('Best Model: %s' % path) best_model.load_state_dict(torch.load(path)) test_iter = utils.data_iter(args.seq_len, args.batch_size, args.vocab_size, False, 'test') test_loss, test_acc = eval_model(best_model, test_iter) print('Test Acc: %.2f, Test Loss: %.4f' % (100 * test_acc, test_loss))
def _train(self, epoch): self.optimizer.zero_grad() self.model.train() start_time = time.time() epoch_start_time = time.time() overall_losses = 0 losses = 0 batch_idx = 1 y_pred = [] y_true = [] for batch_data in data_iter(self.train_data, train_batch_size, shuffle=True): torch.cuda.empty_cache() batch_inputs, batch_labels = self.batch2tensor(batch_data) batch_outputs = self.model(batch_inputs)
def optimize(batch_size, lr, mom, num_epochs, log_interval): [w, b], vs = init_params() y_vals = [squared_loss(net(X, w, b), y).mean().asnumpy()] print('batch_size', batch_size) for epoch in range(1, num_epochs + 1): # 学习率自我衰减 if epoch > 2: lr *= .1 for batch_i, (features, label) in enumerate( utils.data_iter(batch_size, num_examples, X, y)): with autograd.record(): output = net(features, w, b) loss = squared_loss(output, label) loss.backward() sgd_momentum([w, b], vs, lr, mom, batch_size) if batch_i * batch_size % log_interval == 0: y_vals.append(squared_loss(net(X, w, b), y).mean().asnumpy()) print('epoch %d, learning_rate %f, loss %.4e' % (epoch, lr, y_vals[-1])) # 为了便于打印,改变输出形状并转化成numpy数组 print('w:', w.reshape((1, -1)).asnumpy(), 'b:', b.asscalar(), '\n') x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True) utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
def optimize(batch_size, lr, mom, num_epochs, log_interval): num_examples = 1000 X, y = genData(num_examples) [w, b], vs = init_params() y_vals = [utils.squared_loss(utils.linreg(X, w, b), y).mean().asnumpy()] for epoch in range(1, num_epochs + 1): # 学习率自我衰减。 if epoch > 2: lr *= 0.1 for batch_i, (features, label) in enumerate( utils.data_iter(batch_size, num_examples, X, y)): with autograd.record(): output = utils.linreg(features, w, b) loss = utils.squared_loss(output, label) loss.backward() sgd_momentum([w, b], lr, batch_size, vs, mom) if batch_i * batch_size % log_interval == 0: y_vals.append( utils.squared_loss(utils.linreg(X, w, b), y).mean().asnumpy()) print('w:', w, '\nb:', b, '\n') x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True) utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
import paddle import numpy as np from paddle.fluid.dygraph.base import to_variable from paddle.static import InputSpec import grpc import yaml import utils if __name__ == "__main__": paddle.enable_static() exe = paddle.static.Executor(paddle.CPUPlace()) inference_program, feed_target_names, fetch_targets = \ paddle.static.load_inference_model( path_prefix="whole/static", executor=exe) for i, item in enumerate(utils.data_iter("data/input.json")): uid, x1, x2, label = item feed = {"x1": x1, "x2": x2} fetch_vars = exe.run(program=inference_program, feed=feed, fetch_list=fetch_targets, return_numpy=False) print(np.array(fetch_vars))
def main(args): """ Main function """ # training data print("****** Train Set *****") train_data_src = read_corpus(args.train_src, source='src') train_data_tgt = read_corpus(args.train_tgt, source='tgt') print() # valid data print("****** Valid Set *****") valid_data_src = read_corpus(args.valid_src, source='src') valid_data_tgt = read_corpus(args.valid_tgt, source='tgt') print() # merge data for source and target train_data = zipped(train_data_src, train_data_tgt) valid_data = zipped(valid_data_src, valid_data_tgt) vocab, transformer, optimizer, cross_entropy_loss = init_training(args) print("[Transformer Config] ", ) print(transformer) epoch = 0 checkpoint = codecs.open(args.checkpoint, "w", encoding="utf-8") transformer.train() while epoch < args.epochs: total_loss = 0. total_tgt_words = 0 total_correct_tokens = 0 freq = 0 start_epoch = start_batch = time.time() for src_sents, tgt_sents in data_iter(train_data, batch_size=args.batch_size, batch_type=args.batch_type): # sum for predicting target words per batch(no padding) pred_tgt_word_num = sum(len(s[1:]) for s in tgt_sents) optimizer.zero_grad() # format data for source and target(add padding) src = to_input_variable(src_sents, vocab.src, cuda=args.cuda) tgt = to_input_variable([item[:-1] for item in tgt_sents], vocab.tgt, cuda=args.cuda) # scores for predicting(before softmax) scores = transformer(src, tgt) gold_tgt_sents, _ = to_input_variable( [item[1:] for item in tgt_sents], vocab.tgt, cuda=args.cuda) gold_tgt_tokens_flatten = gold_tgt_sents.view(-1) # get loss according cross_entropy(one_hot distribution) weight_loss = cross_entropy_loss(scores, gold_tgt_tokens_flatten) mean_loss = weight_loss / pred_tgt_word_num # get loss according cross_entropy(smoothing distribution) if args.label_smoothing: smoothing_loss = label_smoothing_loss( scores, gold_tgt_tokens_flatten, epsilon=args.label_smoothing_rate) smoothing_mean_loss = smoothing_loss / pred_tgt_word_num _, pred_idxs = torch.max(scores, dim=-1) is_target = gold_tgt_tokens_flatten.ne(constants.PAD) correct_tokens = torch.sum( gold_tgt_tokens_flatten.eq(pred_idxs).float() * is_target.float()) if args.label_smoothing: smoothing_mean_loss.backward() else: mean_loss.backward() optimizer.step() if args.optimizer == "Warmup_Adam": optimizer.update_learning_rate() total_loss += mean_loss.item() total_correct_tokens += correct_tokens.item() total_tgt_words += pred_tgt_word_num freq += 1 if freq % args.displayFreq == 0: end_batch = time.time() total_time = end_batch - start_batch aver_per_word_loss = total_loss / args.displayFreq acc = 1.0 * total_correct_tokens / total_tgt_words * 100 print( "[%d] [loss:%5.2f] [acc:%5.2f%%] [ppl:%5.2f] [speed:%5.2f words/s] [time:%5.2fs]" % (freq, aver_per_word_loss, acc, math.exp(aver_per_word_loss), total_tgt_words / total_time, total_time)) total_loss = 0. total_tgt_words = 0 total_correct_tokens = 0 start_batch = end_batch if freq % args.validFreq == 0: t0 = time.time() if torch.cuda.device_count() > 1: valid_loss, ppl, acc = evaluate_loss( transformer.module, valid_data, cross_entropy_loss) else: valid_loss, ppl, acc = evaluate_loss( transformer, valid_data, cross_entropy_loss) t1 = time.time() print( "[Valid] [loss:%5.2f] [acc:%5.2f%%] [ppl:%5.2f] [time:%5.2fs]" % (valid_loss, acc, ppl, t1 - t0)) epoch += 1 end_epoch = time.time() print("[Epoch %d] is ending... [total_time:%.2f min]" % (epoch, (end_epoch - start_epoch) / 60)) print("Saving model...") if not os.path.isdir(args.save_to): os.makedirs(args.save_to) if args.finetune: torch.save( transformer.state_dict(), args.finetune_model_path + "_finetune_epoch%d" % (epoch)) checkpoint.write(args.finetune_model_path + "_finetune_epoch%d\n" % (epoch)) else: torch.save(transformer.state_dict(), args.save_to + "transformer_epoch%d" % (epoch)) checkpoint.write(args.save_to + "transformer_epoch%d\n" % (epoch)) checkpoint.flush() print("Saving finish...\n") checkpoint.close()
def train(self, sess, vocab_size, epoch=25, data_dir="data", dataset_name="cnn", log_dir='log/tmp/', load_path=None, data_size=3000, eval_every=1500, val_rate=0.1, dropout_rate=0.9): print(" [*] Building Network...") start = time.time() self.prepare_model() print(" [*] Preparing model finished. Use %4.4f" % (time.time() - start)) # Summary writer = tf.train.SummaryWriter(log_dir, sess.graph) print(" [*] Writing log to %s" % log_dir) # Saver and Load self.saver = tf.train.Saver(max_to_keep=15) if load_path is not None: if os.path.isdir(load_path): fname = tf.train.latest_checkpoint( os.path.join(load_path, 'ckpts')) assert fname is not None else: fname = load_path print(" [*] Loading %s" % fname) self.saver.restore(sess, fname) print(" [*] Checkpoint is loaded.") else: sess.run(tf.initialize_all_variables()) print(" [*] No checkpoint to load, all variable inited") counter = 0 vcounter = 0 start_time = time.time() ACC = [] LOSS = [] train_files, validate_files = fetch_files(data_dir, dataset_name, vocab_size) if data_size: train_files = train_files[:data_size] validate_size = int( min(max(20.0, float(len(train_files)) * val_rate), len(validate_files))) print(" [*] Validate_size %d" % validate_size) for epoch_idx in xrange(epoch): # load data train_iter = data_iter(train_files, self.max_nsteps, self.max_query_length, batch_size=self.batch_size, vocab_size=self.vocab_size, shuffle_data=True) tsteps = train_iter.next() # train running_acc = 0 running_loss = 0 for batch_idx, docs, d_end, queries, q_end, y in train_iter: _, summary_str, cost, accuracy = sess.run( [self.train_op, self.train_sum, self.loss, self.accuracy], feed_dict={ self.document: docs, self.query: queries, self.d_end: d_end, self.q_end: q_end, self.y: y, self.dropout: dropout_rate, }) writer.add_summary(summary_str, counter) running_acc += accuracy running_loss += np.mean(cost) if counter % 10 == 0: print( "Epoch: [%2d] [%4d/%4d] time: %4.4f, loss: %.8f, accuracy: %.8f" % (epoch_idx, batch_idx, tsteps, time.time() - start_time, running_loss / 10.0, running_acc / 10.0)) running_loss = 0 running_acc = 0 counter += 1 if (counter + 1) % eval_every == 0: # validate running_acc = 0 running_loss = 0 idxs = np.random.choice(len(validate_files), size=validate_size) files = [validate_files[idx] for idx in idxs] validate_iter = data_iter(files, self.max_nsteps, self.max_query_length, batch_size=self.batch_size, vocab_size=self.vocab_size, shuffle_data=True) vsteps = validate_iter.next() for batch_idx, docs, d_end, queries, q_end, y in validate_iter: validate_sum_str, cost, accuracy = sess.run( [self.validate_sum, self.loss, self.accuracy], feed_dict={ self.document: docs, self.query: queries, self.d_end: d_end, self.q_end: q_end, self.y: y, self.dropout: 1.0, }) writer.add_summary(validate_sum_str, vcounter) running_acc += accuracy running_loss += np.mean(cost) vcounter += 1 ACC.append(running_acc / vsteps) LOSS.append(running_loss / vsteps) vcounter += vsteps print( "Epoch: [%2d] Validation time: %4.4f, loss: %.8f, accuracy: %.8f" % (epoch_idx, time.time() - start_time, running_loss / vsteps, running_acc / vsteps)) # save self.save(sess, log_dir, global_step=counter) print('\n\n')
import paddle.fluid as fluid import numpy as np import grpc import yaml import paddle import network import utils if __name__ == "__main__": paddle.enable_static() place = fluid.CPUPlace() exe = fluid.Executor(place) inference_program, feed_target_names, fetch_targets = \ fluid.io.load_inference_model( dirname="whole_program/inference", executor=exe) for batch_id, data in enumerate(utils.data_iter("../data/input.json")): _, x1, x2, _ = data data = { "Host|x1": x1, "Customer|x2": x2, } results = exe.run( program=inference_program, feed=data, fetch_list=fetch_targets) print("result: {}".format(np.array(results)))
for name in columns[1:]: select += "," + name return select dfMean = pd.read_csv("data\MEAN.csv") dfStd = pd.read_csv("data\STD.csv") df = pd.read_sql("SELECT {} from Bank1PercentPredict".format( getSQLColumnString(featureNames + predictNames)), conn, columns=featureNames + predictNames) cursor.close() conn.close() for columnName in featureNames: if not columnName.startswith('Next'): df[columnName] = (df[columnName] - dfMean[columnName][0]) / dfStd[columnName][0] y_Predict = nd.array(df[predictCategory][:].as_matrix()) X_Predict = nd.array(df.loc[:, 'Day1OpenPrice':'Week4Exchange'][:].as_matrix()) batch_size = 16 predict_acc, predict_1acc = utils.evaluate_accuracy( utils.data_iter(X_Predict, y_Predict, batch_size), net, ctx) print("Predict acc: %f,Predict True Value acc: %f" % (predict_acc, predict_1acc))