def evaluate_train(): # Load model weight_path = 'model/09031925_epoch_0_train_loss_5.9855.h5' # Load data Sources, Targets = load_train_data() de2idx, idx2de = load_de_vocab() en2idx, idx2en = load_en_vocab() batch_size = 5 model = TransformerModel(in_vocab_len=len(idx2de), out_vocab_len=len(idx2en), max_len=hp.maxlen) model.load_model(weight_path) for i in range(5 // batch_size): x = Sources[i * batch_size:(i + 1) * batch_size] sources = Sources[i * batch_size:(i + 1) * batch_size] targets = Targets[i * batch_size:(i + 1) * batch_size] preds = model.translate_with_ans(sources, targets, idx2en) # preds = model.translate(x, idx2en) for source, target, pred in zip(sources, targets, preds): print('source:', ' '.join(idx2de[idx] for idx in source)) print('expected:', ' '.join(idx2en[idx] for idx in target)) print('pred:', pred) print()
def train(): current_batches = 0 de2idx, idx2de = load_de_vocab() en2idx, idx2en = load_en_vocab() enc_voc = len(de2idx) dec_voc = len(en2idx) writer = SummaryWriter() # Load data X, Y = load_train_data() # calc total batch count num_batch = len(X) // hp.batch_size model = AttModel(hp, enc_voc, dec_voc) model.train() model.cuda() torch.backends.cudnn.benchmark = True if not os.path.exists(hp.model_dir): os.makedirs(hp.model_dir) if hp.preload is not None and os.path.exists(hp.model_dir + '/history.pkl'): with open(hp.model_dir + '/history.pkl', 'rb') as in_file: history = pickle.load(in_file) else: history = {'current_batches': 0} current_batches = history['current_batches'] optimizer = optim.Adam(model.parameters(), lr=hp.lr, betas=[0.9, 0.98], eps=1e-8) if hp.preload is not None and os.path.exists(hp.model_dir + '/optimizer.pth'): optimizer.load_state_dict(torch.load(hp.model_dir + '/optimizer.pth')) if hp.preload is not None and os.path.exists(hp.model_dir + '/model_epoch_%02d.pth' % hp.preload): model.load_state_dict(torch.load(hp.model_dir + '/model_epoch_%02d.pth' % hp.preload)) startepoch = int(hp.preload) if hp.preload is not None else 1 for epoch in range(startepoch, hp.num_epochs + 1): current_batch = 0 for index, current_index in get_batch_indices(len(X), hp.batch_size): tic = time.time() x_batch = Variable(torch.LongTensor(X[index]).cuda()) y_batch = Variable(torch.LongTensor(Y[index]).cuda()) toc = time.time() tic_r = time.time() torch.cuda.synchronize() optimizer.zero_grad() loss, _, acc = model(x_batch, y_batch) loss.backward() optimizer.step() torch.cuda.synchronize() toc_r = time.time() current_batches += 1 current_batch += 1 if current_batches % 10 == 0: writer.add_scalar('./loss', loss.data.cpu().numpy(), current_batches) writer.add_scalar('./acc', acc.data.cpu().numpy(), current_batches) if current_batches % 5 == 0: print('epoch %d, batch %d/%d, loss %f, acc %f' % (epoch, current_batch, num_batch, loss.data[0], acc.data[0])) print('batch loading used time %f, model forward used time %f' % (toc - tic, toc_r - tic_r)) if current_batches % 100 == 0: writer.export_scalars_to_json(hp.model_dir + '/all_scalars.json') with open(hp.model_dir + '/history.pkl', 'wb') as out_file: pickle.dump(history, out_file) checkpoint_path = hp.model_dir + '/model_epoch_%02d' % epoch + '.pth' torch.save(model.state_dict(), checkpoint_path) torch.save(optimizer.state_dict(), hp.model_dir + '/optimizer.pth')
def __init__(self, transformerModel, output_dir): self.transformerModel = transformerModel self.output_dir = output_dir self.Sources, self.Targets = load_train_data() _, self.idx2de = load_de_vocab() _, self.idx2en = load_en_vocab() os.makedirs(self.output_dir, exist_ok=True)
def train(): print('-' * 30) print('Loading and preprocessing train data...') print('-' * 30) # Prepare train and validation data imgs_scans_train, imgs_mask_train = load_train_data() imgs_scans_valid, imgs_mask_valid = load_validation_data() imgs_mask_train = imgs_mask_train.astype('float32') imgs_scans_train = imgs_scans_train.astype('float32') imgs_mask_train /= 255. # scale masks to [0, 1] imgs_scans_train /= 128. # scale input images to [0, 2] imgs_scans_train = imgs_scans_train - 1. # scale input images to [-1, 1] imgs_scans_valid = imgs_scans_valid.astype('float32') imgs_mask_valid = imgs_mask_valid.astype('float32') imgs_mask_valid /= 255. # scale masks to [0, 1] imgs_scans_valid /= 128. # scale input images to [0, 2] imgs_scans_valid = imgs_scans_valid - 1. # scale input images to [-1, 1] imgs_scans_train = np.repeat( imgs_scans_train, 3, axis=4) # repeat three times before multi modality loading imgs_scans_valid = np.repeat( imgs_scans_valid, 3, axis=4) # repeat three times before multi modality loading print('-' * 30) print('Creating and compiling model...') print('-' * 30) model = Unet_vgg.res_unet_vgg(image_depth=img_depth, image_rows=img_rows, image_cols=img_cols, train_encoder=False) model.compile(optimizer=Adam(lr=5e-5, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.000000199), loss=segmentation_loss, metrics=['accuracy', dice_coef]) model.summary() weight_dir = 'weights' if not os.path.exists(weight_dir): os.mkdir(weight_dir) model_checkpoint = ModelCheckpoint(os.path.join(weight_dir, project_name + '.h5'), monitor='val_dice_coef', save_best_only=True, mode='max') # Load planar 3D encoder model.load_weights(os.path.join(weight_dir, 'planar_3d_vgg.h5'), by_name=True) log_dir = 'logs' if not os.path.exists(log_dir): os.mkdir(log_dir) csv_logger = CSVLogger(os.path.join(log_dir, project_name + '.txt'), separator=',', append=False) print('-' * 30) print('Fitting model...') print('-' * 30) model.fit( x=imgs_scans_train, y=imgs_mask_train, batch_size=3, epochs=100, callbacks=[model_checkpoint, csv_logger], shuffle=True, validation_data=(imgs_scans_valid, imgs_mask_valid), ) print('-' * 30) print('Training finished') print('-' * 30)
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--batch_size", type=int, default=256) parser.add_argument("--dim_model", type=int, default=128) parser.add_argument("--num_iterations", type=int, default=100000) parser.add_argument("--vocab_path", type=str, default="./corpora/vocab.txt") args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'gpu') with open(args.vocab_path, "rt") as f: n_vocab = len(f.readlines()) X, class_weights = load_train_data("./data") n_class = len(class_weights) model = make_model(n_vocab, d_model=args.dim_model, batch_size=args.batch_size, n_class=n_class) model = model.to(device) print(model) # epochs = 10 for iter in range(args.num_iterations): x, y = next_batch(X, args.batch_size, args.dim_model) # batches = get_batches(in_text, out_text, 10, 200) # for x, y in batches: x = torch.tensor(x, dtype=torch.float, device=device) y = torch.tensor(y, dtype=torch.long, device=device) y = torch.squeeze(y) criterion, optimizer = get_criterion(model) optimizer.zero_grad() output = model(x, None)
parser.add_argument('--num_batches', type=int, default=30000) parser.add_argument('--save_every', type=int, default=3000) parser.add_argument('--maxlen', type=int, default=128) parser.add_argument('--weight_tying', type=int, default=0) parser.add_argument('--hidden_units', type=int, default=256) parser.add_argument('--weighted_loss', type=int, default=1) parser.add_argument('--dropout_rate', type=float, default=0.4) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--num_blocks', type=int, default=1) parser.add_argument('--num_heads', type=int, default=8) parser.add_argument('--num_epochs', type=int, default=9) parser.add_argument('--sinusoid', type=int, default=0) args = parser.parse_args() word2idx, idx2word = load_vocab(args.vocab_path) X, class_weights = load_train_data(args.train_path) args.vocab_size = len(word2idx) if args.weighted_loss: args.class_weights = class_weights with open(os.path.join(args.logdir, "args.pkl"), 'wb') as f: pickle.dump(args, f) # Construct graph model = TransformerDecoder(is_training=True, args=args) print("Graph loaded") # Start session with tf.Session(graph=model.graph) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(args.ckpt_path)
def train(): print("Graph loading......Model name:{}".format(hp.modelname)) g = Graph() print("Data loading...") _, eng_names, _, kor_names = load_train_data() _, val_eng_names, _, val_kor_names = load_evaluate_data( eval_mode="validate") early_stopping_count = 0 data_list = list(range(len(eng_names))) with g.graph.as_default(): sv = tf.train.Saver() with tf.Session() as sess: # Initialize sess.run(tf.global_variables_initializer()) best_valid_loss = 100000. for epoch in range(1, hp.num_epochs + 1): np.random.shuffle(data_list) # # Attention Plot per epochs # al = sess.run(g.alignments, {g.x: eng_names[data_list][:1], # g.y: kor_names[data_list][:1]}) # plot_alignment(al[0], epoch - 1, eng_names[data_list][:1], kor_names[data_list][:1]) # Train train_loss = 0 num_batch = len(eng_names) / hp.batch_size for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'): name_ids = data_list[step * hp.batch_size:step * hp.batch_size + hp.batch_size] loss, gs = sess.run([g.train_op, g.global_step], { g.x: eng_names[name_ids], g.y: kor_names[name_ids] }) train_loss += loss if step % 20 == 0: print('\t step:{} train_loss:{:.3f}'.format(gs, loss)) train_loss /= num_batch # Validation valid_loss = 0. for idx in range(0, len(val_eng_names), hp.batch_size): v_loss = sess.run( g.mean_loss, { g.x: val_eng_names[idx:idx + hp.batch_size], g.y: val_kor_names[idx:idx + hp.batch_size] }) valid_loss += v_loss valid_loss /= len(val_eng_names) / hp.batch_size print( "[epoch{}] train_loss={:.3f} validate_loss={:.3f} ".format( epoch, train_loss, valid_loss)) # Stopping if valid_loss <= best_valid_loss * 0.999: best_valid_loss = valid_loss sv.save(sess, "logdir/" + hp.modelname + '/model.ckpt') else: if hp.is_earlystopping: early_stopping_count += 1 if early_stopping_count == 3: print("Early Stopping...") break
if cleared not in ref_: ref_.append(cleared) ref.append(ref_.index(cleared)) ref = np.asarray(ref).astype(np.float) args.target_vocab_size = len(word2idx) args.vocab_size = ref.shape[0] else: args.vocab_size = len(word2idx) with open(os.path.join(args.logdir, "args.pkl"), 'wb') as f: pickle.dump(args, f) # Construct graph model = TransformerDecoder(is_training=True, args=args) print("Graph loaded") X = load_train_data(args.train_path, args.vocab_path, args.maxlen) pad_idx = word2idx["<pad>"] num_batch = len(X) // args.batch_size # Start session with tf.Session(graph=model.graph) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(args.ckpt_path) if ckpt: print("restoring from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) for epoch in range(1, args.num_epochs + 1): gs = sess.run(model.global_step) for step in range(num_batch - 1):
fig.colorbar(im, ax=ax) plt.xlabel('Decoder timestep') plt.ylabel('Encoder timestep') plt.savefig(hp.logdir + '/alignment_%d' % gs, format='png') if __name__ == '__main__': # Load vocabulary de2idx, idx2de = load_de_vocab() en2idx, idx2en = load_en_vocab() # Construct graph g = Graph("train") print("Graph loaded") X, Y = load_train_data() # calc total batch count num_batch = len(X) // hp.batch_size print(X.shape) g.num_batch = num_batch # Start session sv = tf.train.Supervisor(graph=g.graph, logdir=hp.logdir, summary_op=None, save_model_secs=0) with sv.managed_session() as sess: i = 0 for epoch in range(1, hp.num_epochs + 1): if sv.should_stop(): break for step in tqdm(range(g.num_batch),
parser.add_argument("mode", help="train or eval") args = parser.parse_args() en2idx, idx2en = load_de_en_vocab('processed-data/en.vocab.tsv') de2idx, idx2de = load_de_en_vocab('processed-data/zh.vocab.tsv') print("读取en,zh字典") # load train data en_npy_path = "./processed-data/train_en.npy" zh_npy_path = "./processed-data/train_zh.npy" if os.path.exists(en_npy_path) and os.path.exists(zh_npy_path): print("load training data") X = np.load(en_npy_path) Y = np.load(zh_npy_path) else: X, Y = load_train_data(de2idx, en2idx) np.save(en_npy_path, X) np.save(zh_npy_path, Y) # load test data test_en_path = "./processed-data/test_en.npy" test_s_path = "./processed-data/t_source.npy" test_t_path = "./processed-data/t_target.npy" if os.path.exists(test_en_path) and os.path.exists( test_s_path) and os.path.exists(test_t_path): print("load testing data") X_test = np.load(test_en_path) Source_test = np.load(test_s_path) Target_test = np.load(test_t_path) else: X_test, Source_test, Target_test = load_test_data(de2idx, en2idx)