def main(preprocessed_dir_path, trained_dir_path, prefix, embedding_dim, hidden_dim, batch_size): print(20 * "=", f"Preparing for test", 20 * "=") if prefix != "": test_file_name = f"{prefix}_test" vocab_file_name = f"{prefix}_vocab" revocab_file_name = f"{prefix}_revocab" weight_file_name = f"{prefix}_weight" else: test_file_name = "test" vocab_file_name = "vocab" revocab_file_name = "revocab" weight_file_name = "weight" with open(os.path.join(preprocessed_dir_path, vocab_file_name + ".pkl"), "rb") as fp: vocab = pickle.load(fp) with open(os.path.join(preprocessed_dir_path, revocab_file_name + ".pkl"), "rb") as fp: revocab = pickle.load(fp) vocab_size = len(vocab) padding_idx = vocab[" "] encoder = Encoder(vocab_size, embedding_dim, hidden_dim, padding_idx).to(device) decoder = Decoder(vocab_size, embedding_dim, hidden_dim, padding_idx).to(device) encoder.load_state_dict( torch.load(os.path.join(trained_dir_path, f"{weight_file_name}.pth"))["encoder"]) decoder.load_state_dict( torch.load(os.path.join(trained_dir_path, f"{weight_file_name}.pth"))["decoder"]) expression_ids, answer_ids = data_loader( os.path.join(preprocessed_dir_path, test_file_name + ".pkl"), padding_idx) print(20 * "=", f"Testing", 20 * "=") predicted_answers = predict(encoder, decoder, vocab, revocab, expression_ids, batch_size, padding_idx) answers = [] for ids in answer_ids.detach().numpy().tolist(): answer = int("".join([revocab[str(id)] for id in ids])) answers.append(answer) print(20 * "=", f"Calculate Test Result", 20 * "=") score = 0 missed = [] for i, answer in enumerate(answers): if predicted_answers[i] == answer: score += 1 else: missed.append((answer, predicted_answers[i])) print(f"Accuracy: {score/len(answers) * 100} ({score}/{len(answers)})")
def main(): vocab, train_iter, val_iter, test_iter = dataset_factory( 'twitter-customer-support') epochs = 100 embedding_size = 20 hidden_size = 100 vocab_size = len(vocab) padding_idx = vocab.stoi['<pad>'] encoder = Encoder(vocab_size, embedding_size, hidden_size) decoder = Decoder(vocab_size, embedding_size, hidden_size) seq2seq = cuda(Seq2Seq(encoder, decoder, vocab_size)) optimizer = optim.Adam(seq2seq.parameters()) best_val_loss = None for epoch in range(epochs): # calculate train and val loss train_loss = train(seq2seq, optimizer, train_iter, vocab_size, 5, padding_idx) val_loss = evaluate(seq2seq, val_iter, vocab_size, padding_idx) print("[Epoch=%d] train_loss %f - val_loss %f" % (epoch, train_loss, val_loss)) # save model if model achieved best val loss if not best_val_loss or val_loss < best_val_loss: print('Saving model...') save_model(seq2seq, epoch, val_loss) best_val_loss = val_loss
def __init__(self, weights_file: str, device: str = "cpu") -> None: self.weights_file = weights_file self.device = device self.encoder = Encoder( MODEL_CONF["encoder"]["input_dim"], MODEL_CONF["encoder"]["hidden_dim"], MODEL_CONF["encoder"]["n_layer"], MODEL_CONF["encoder"]["dropout"], ) self.decoder = Decoder( MODEL_CONF["decoder"]["input_dim"], MODEL_CONF["decoder"]["output_dim"], MODEL_CONF["decoder"]["hidden_dim"], MODEL_CONF["decoder"]["n_layer"], MODEL_CONF["decoder"]["dropout"], ) self.net = Seq2Seq(self.encoder, self.decoder, self.device) # load weight self.net.load_state_dict( torch.load(self.weights_file, map_location=self.device)) print("weights loaded.")
max_len = 200 h = 8 Num = 6 max_token = 4096 #dataset dataset = Basedataset(data_path, en_word2idx, de_word2idx, bpe) dataloader = Batch_loader(dataset, device, max_len, max_token) total = len(dataset) #model encoder = Encoder(en_vocab_size, emb_size, d_ff, dropout, max_len, h, Num, device) decoder = Decoder(de_vocab_size, emb_size, d_ff, dropout, max_len, h, Num, device) model = Transformer(encoder, decoder, PAD, device, max_len).to(device) ''' model = Transformer_fr(en_vocab_size, de_vocab_size, PAD, max_len, emb_size, device).to(device) ''' #model.load_state_dict(torch.load("./model.pt", map_location= device)) criterion = nn.CrossEntropyLoss() #(ignore_index = 0) #criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.98), eps=1e-9) test_dataset = Basedataset(test_path, en_word2idx, de_word2idx, bpe) test_loader = Batch_loader(test_dataset, device, max_len, 1500)
def main(): # data data = Data( IMAGE_DIR, PICKLES_DIR, params['keep_words'], params=params, img_embed=params["img_embed"]) data_dict = data.dictionary # define placeholders capt_inputs = tf.placeholder(tf.int32, [None, None]) capt_labels = tf.placeholder(tf.int32, [None, None]) seq_length = tf.placeholder(tf.int32, [None]) # forward pass is expensive, so can use this method to reduce computation if params["img_embed"]== "vgg": n_features = 4096 elif params["img_embed"] == "resnet": n_features = 2048 image_embs = tf.placeholder(tf.float32, [None, n_features]) # vgg16 if params['num_captions'] > 1 and params['mode'] == 'training': features_tiled = tf.tile(tf.expand_dims(image_embs, 1), [1, params['num_captions'], 1]) features_tiled = tf.reshape(features_tiled, [tf.shape(image_embs )[0] * params['num_captions'], n_features]) # [5 * b_s, 4096] else: features_tiled = image_embs model = Decoder(capt_inputs, params['lstm_hidden'], params['embed_dim'], seq_length, data_dict, params['lstm_hidden'], image_embs, params=params, reuse_text_emb=True) with tf.variable_scope("rnn", reuse=tf.AUTO_REUSE): x_cmlogits, _ = model.forward(mode='train_capt', image_embs=features_tiled) x_lmrlogits, _ = model.forward(mode='train_lmr', lm_label='romantic') x_lmhlogits, _ = model.forward(mode='train_lmh', lm_label='humorous') # losses labels_flat = tf.reshape(capt_labels, [-1]) cm_loss = masked_loss(labels_flat, x_cmlogits, mode='train_capt') lmh_loss = masked_loss(labels_flat, x_lmhlogits, mode='train_lmh') lmr_loss = masked_loss(labels_flat, x_lmrlogits, mode='train_lmr') # optimizers cm_opt = lstm_optimizer(cm_loss, params, params['learning_rate'], mode='train_capt') lmh_opt = lstm_optimizer(lmh_loss, params, 0.0005, mode='train_lmh') lmr_opt = lstm_optimizer(lmr_loss, params, 0.0005, mode='train_lmr') # train saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=params['keep_cp']) gpu_options = tf.GPUOptions( visible_device_list=params["gpu"], allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) if params['write_summary']: summary_writer = tf.summary.FileWriter('./logs', sess.graph) summary_writer.add_graph(sess.graph) # print(tf.trainable_variables()) # train 3 networks, save S_act, S_hum, S_rom if params['restore']: print("Restoring from checkpoint") saver.restore(sess, "./checkpoints/{}.ckpt".format(params['checkpoint'])) # choose labels for the training tr_labels = ['actual'] tr_style = params['tr_style'] if tr_style == 'both': tr_labels.extend(['humorous', 'romantic']) else: tr_labels.append(tr_style) if params['mode'] == 'training': for e in range(params['epochs']): for label in tr_labels: if label == 'actual': # folowing paper batch_size = params['batch_size'] else: batch_size = params['batch_size_lm'] for captions, lengths, image_f in data.get_batch( batch_size, label=label, set='train'): feed = {capt_inputs: captions[0], capt_labels: captions[1], image_embs: image_f, seq_length: lengths} if label == 'actual': opt_loss, optim = cm_loss, cm_opt elif label == 'humorous': opt_loss, optim = lmh_loss, lmh_opt elif label == 'romantic': opt_loss, optim = lmr_loss, lmr_opt loss_, _ = sess.run([opt_loss, optim], feed) if e % 4 == 0: losses = [] for captions, lengths, image_f in data.get_batch( params['batch_size'], label=label, set='val'): feed = {capt_inputs: captions[0], capt_labels: captions[1], image_embs: image_f, seq_length: lengths} if label == 'actual': opt_loss, optim = cm_loss, cm_opt elif label == 'humorous': opt_loss, optim = lmh_loss, lmh_opt elif label == 'romantic': opt_loss, optim = lmr_loss, lmr_opt vloss_ = sess.run([opt_loss], feed) losses.append(vloss_) print("Validation Model: {} Epoch: {} Loss: {}".format( label, e, np.mean(losses))) # save model if not os.path.exists("./checkpoints"): os.makedirs("./checkpoints") if e % 10 == 0 and e != 0: # save every 10 epochs save_path = saver.save(sess, "./checkpoints/{}.ckpt".format( params['checkpoint'])) print("Model saved in file: %s" % save_path) print("{} Model: Epoch: {} Loss: {}".format(label, e, loss_)) # save model if not os.path.exists("./checkpoints"): os.makedirs("./checkpoints") save_path = saver.save(sess, "./checkpoints/{}.ckpt".format( params['checkpoint'])) print("Model saved in file: %s" % save_path) elif params['mode'] == 'inference': inference(params, model, data, saver, sess)
def build_model(self): hps = self.hps ns = self.hps.ns enc_mode = self.enc_mode seg_len = self.hps.seg_len enc_size = self.hps.enc_size emb_size = self.hps.emb_size betas = (0.5, 0.9) #---stage one---# self.Encoder = cc( Encoder(ns=ns, dp=hps.enc_dp, enc_size=enc_size, seg_len=seg_len, enc_mode=enc_mode)) self.Decoder = cc( Decoder(ns=ns, c_in=enc_size, c_h=emb_size, c_a=hps.n_speakers, seg_len=seg_len)) self.SpeakerClassifier = cc(SpeakerClassifier(ns=ns, c_in=enc_size * enc_size if enc_mode == 'binary' else \ (2*enc_size if enc_mode == 'multilabel_binary' else enc_size), \ c_h=emb_size, n_class=hps.n_speakers, dp=hps.dis_dp, seg_len=seg_len)) #---stage one opts---# params = list(self.Encoder.parameters()) + list( self.Decoder.parameters()) self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas) self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(), lr=self.hps.lr, betas=betas) #---stage two---# if self.g_mode == 'naive': self.Generator = cc( Decoder(ns=ns, c_in=enc_size, c_h=emb_size, c_a=hps.n_speakers, seg_len=seg_len)) elif self.g_mode == 'targeted' or self.g_mode == 'targeted_residual': self.Generator = cc(Decoder(ns=ns, c_in=enc_size, c_h=emb_size, c_a=hps.n_target_speakers, seg_len=seg_len, \ output_mask=True if self.g_mode == 'targeted_residual' else False)) elif self.g_mode == 'enhanced': self.Generator = cc( Enhanced_Generator(ns=ns, dp=hps.enc_dp, enc_size=1024, emb_size=1024, seg_len=seg_len, n_speakers=hps.n_speakers)) elif self.g_mode == 'spectrogram': self.Generator = cc( Spectrogram_Patcher(ns=ns, c_in=513, c_h=emb_size, c_a=hps.n_target_speakers, seg_len=seg_len)) elif self.g_mode == 'tacotron': self.Generator = cc( Tacotron(enc_size, hps.n_target_speakers, mel_dim=hp.n_mels, linear_dim=int(hp.n_fft / 2) + 1)) self.tacotron_input_lengths = torch.tensor( [self.hps.seg_len // 8 for _ in range(hps.batch_size)]) else: raise NotImplementedError('Invalid Generator mode!') self.PatchDiscriminator = cc(nn.DataParallel(PatchDiscriminator(ns=ns, n_class=hps.n_speakers \ if self.g_mode == 'naive' else hps.n_target_speakers, seg_len=seg_len))) #---stage two opts---# self.gen_opt = optim.Adam(self.Generator.parameters(), lr=self.hps.lr, betas=betas) self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(), lr=self.hps.lr, betas=betas) #---target classifier---# self.TargetClassifier = cc( nn.DataParallel(TargetClassifier(ns=ns, n_class=3, seg_len=seg_len))) #---target classifier opts---# self.tclf_opt = optim.Adam(self.TargetClassifier.parameters(), lr=self.hps.lr, betas=betas)
def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: nn.init.normal_(m.weight.data, 0.0, 0.02) elif classname.find('BatchNorm') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0) netE = Encoder(ngpu, ndf, nc).cuda() if (device.type == 'cuda') and (ngpu > 1): netE = nn.DataParallel(netE, list(range(ngpu))) netE.apply(weights_init) netG = Decoder(ngpu, num, ngf).cuda() if (device.type == 'cuda') and (ngpu > 1): netG = nn.DataParallel(netG, list(range(ngpu))) netG.apply(weights_init) netD = Discriminator(ngpu, ndf, nc).cuda() if (device.type == 'cuda') and (ngpu > 1): netD = nn.DataParallel(netD, list(range(ngpu))) netD.apply(weights_init) # 损失函数和优化器 criterionE = Encoder_loss criterionG = Decoder_loss criterionD = nn.BCELoss() # 优化器
def main(preprocessed_dir_path, dest_dir_path, prefix, embedding_dim, hidden_dim, epoch, learning_rate, batch_size): print(20 * "=", f"Preparing for train", 20 * "=") if prefix != "": train_file_name = f"{prefix}_train" vocab_file_name = f"{prefix}_vocab" weight_file_name = f"{prefix}_weight" else: train_file_name = "train" vocab_file_name = "vocab" weight_file_name = "weight" with open(os.path.join(preprocessed_dir_path, vocab_file_name + ".pkl"), "rb") as fp: vocab = pickle.load(fp) vocab_size = len(vocab) padding_idx = vocab[" "] encoder = Encoder(vocab_size, embedding_dim, hidden_dim, padding_idx).to(device) decoder = Decoder(vocab_size, embedding_dim, hidden_dim, padding_idx).to(device) criterion = nn.CrossEntropyLoss() encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) expressions, answers = data_loader( os.path.join(preprocessed_dir_path, train_file_name + ".pkl"), padding_idx) expression_batches = batching(expressions, batch_size) answer_batches = batching(answers, batch_size) print(20 * "=", f"Training", 20 * "=") all_losses = [] all_times = [] for i in range(1, epoch + 1): print(f"Epoch {i}") epoch_loss = 0 start_time = time.time() for j in range(len(expression_batches)): encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() expression_batch = expression_batches[j] answer_batch = answer_batches[j] encoder_state = encoder(expression_batch) source = answer_batch[:, :-1] target = answer_batch[:, 1:] loss = 0 decoder_output, _ = decoder(source, encoder_state) for k in range(decoder_output.size()[1]): loss += criterion(decoder_output[:, k, :], target[:, k]) epoch_loss += loss.item() loss.backward() encoder_optimizer.step() decoder_optimizer.step() end_time = time.time() t = end_time - start_time print( f" Loss:{epoch_loss} Time:{datetime.timedelta(seconds=int(t))}s") all_losses.append(epoch_loss) all_times.append(t) if min(all_losses) == epoch_loss: torch.save( { "encoder": encoder.state_dict(), "decoder": decoder.state_dict() }, os.path.join(dest_dir_path, f"{weight_file_name}.pth")) if epoch_loss < 1: print(" Ealry Stop") break print(20 * "=", f"Done training", 20 * "=") print(f"Minimum Loss: {min(all_losses)}") print(f"Training time: {datetime.timedelta(seconds=int(sum(all_times)))}s")