def __init__(self): print(" # Welcome to HRED Chatbot.") print(" # Tensorflow detected: v{}".format(tf.__version__)) print() self.config = HConfig() self.dataloader = DataLoader(self.config.num_utterance, self.config.max_length, self.config.split_ratio, self.config.batch_size) self.word_to_id = self.dataloader.word_to_id self.id_to_word = self.dataloader.id_to_word self.config.vocab_size = self.dataloader.vocab_size self.config.SOS_ID = self.dataloader.SOS_ID self.config.EOS_ID = self.dataloader.EOS_ID self.model = Model(self.config) print() print(" # Parameter Size: {}".format(self.model.get_parameter_size())) print() ### session self.sess = tf.Session() self.config.checkpoint_dir = os.path.join( "save", self.model.__class__.__name__) print(" # Save directory: {}".format(self.config.checkpoint_dir))
def loadTrainDataAndValidateDate(args): # 加载训练集 trainsrc = os.path.join(args.data, "train.src") traintrg = os.path.join(args.data, "train.trg") trainmta = os.path.join(args.data, "train.mta") trainData = DataLoader(trainsrc, traintrg, trainmta, args.batch, args.bucketsize) print("Reading training data...") trainData.load(args.max_num_line) print("Allocation: {}".format(trainData.allocation)) print("Percent: {}".format(trainData.p)) # 如果存在验证集,加载验证集 valsrc = os.path.join(args.data, "val.src") valtrg = os.path.join(args.data, "val.trg") valmta = os.path.join(args.data, "val.mta") valData = 0 if os.path.isfile(valsrc) and os.path.isfile(valtrg): valData = DataLoader(valsrc, valtrg, valmta, args.batch, args.bucketsize, True) print("Reading validation data...") valData.load() assert valData.size > 0, "Validation data size must be greater than 0" print("Loaded validation data size {}".format(valData.size)) else: print("No validation data found, training without validating...") return trainData, valData
def main(): """ config some args """ print('start training ...') config = Config() config.save_path = config.save_path + str(config.task_id) + '/' config.data_dir = 'data/dialog-bAbI-tasks/' config.target_file = 'trn' loader = DataLoader(config) print('\nconfig vocab_size: {}, batch_num: {}, seq_length: {}'.format( config.vocab_size, config.batch_num, config.seq_length)) config.create_properties() train(config, loader)
from data_utils import DataLoader, Tokenizer from utils import fetch_translations_and_bleu, get_args from modeling import TransformerMaskPredict PROJECT_NAME = "parallel-decoder-paper" if __name__ == "__main__": args = get_args() wandb.init(project=PROJECT_NAME, config=args) model = TransformerMaskPredict.from_pretrained(args.model_id) tokenizer = Tokenizer(model.config.encoder_id, model.config.decoder_id, model.config.length_token) dl = DataLoader(args, tokenizer) tr_dataset, val_dataset, tst_dataset = dl() data, columns = dl.build_seqlen_table() wandb.log({'Sequence-Lengths': wandb.Table(data=data, columns=columns)}) for mode, dataset in zip(["tr", "val", "tst"], [tr_dataset, val_dataset, tst_dataset]): out = fetch_translations_and_bleu(model, dataset, tokenizer, args.iterations, args.B, num_samples=args.bleu_num_samples) data = list(zip(out["src"], out["tgt"], out["pred"])) wandb.log({
logger = logging.getLogger('Sentiment Analysis') logging_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=logging_level) if args.file is None: print('Need to pass the directory of reviews. Exiting...') parser.print_help() exit(-1) ### # Load the data ### dataLoader = DataLoader(logging) training_data = dataLoader.load_data(args.file, 'train', limit=int(args.num_records), randomize=True) logging.info('*************** describe data ***************') logging.info(training_data.describe()) logging.info('*************** describe types *************** \n {0}'.format( training_data.dtypes)) logging.debug('head data set') logging.debug(training_data.head(2)) logging.debug('tail data set')
def train(args): logging.basicConfig(filename="training.log", level=logging.INFO) trainsrc = os.path.join(args.data, "train.src") traintrg = os.path.join(args.data, "train.trg") trainData = DataLoader(trainsrc, traintrg, args.batch, args.bucketsize) print("Reading training data...") trainData.load(args.max_num_line) print("Allocation: {}".format(trainData.allocation)) print("Percent: {}".format(trainData.p)) valsrc = os.path.join(args.data, "val.src") valtrg = os.path.join(args.data, "val.trg") if os.path.isfile(valsrc) and os.path.isfile(valtrg): valData = DataLoader(valsrc, valtrg, args.batch, args.bucketsize, True) print("Reading validation data...") valData.load() assert valData.size > 0, "Validation data size must be greater than 0" print("Loaded validation data size {}".format(valData.size)) else: print("No validation data found, training without validating...") ## create criterion, model, optimizer if args.criterion_name == "NLL": criterion = NLLcriterion(args.vocab_size) lossF = lambda o, t: criterion(o, t) else: assert os.path.isfile(args.knearestvocabs),\ "{} does not exist".format(args.knearestvocabs) print("Loading vocab distance file {}...".format(args.knearestvocabs)) with h5py.File(args.knearestvocabs) as f: V, D = f["V"][...], f["D"][...] V, D = torch.LongTensor(V), torch.FloatTensor(D) D = dist2weight(D, args.dist_decay_speed) if args.cuda and torch.cuda.is_available(): V, D = V.cuda(), D.cuda() criterion = KLDIVcriterion(args.vocab_size) lossF = lambda o, t: KLDIVloss(o, t, criterion, V, D) m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size, args.num_layers, args.dropout, args.bidirectional) m1 = nn.Sequential(nn.Linear(args.hidden_size, args.vocab_size), nn.LogSoftmax(dim=1)) if args.cuda and torch.cuda.is_available(): print("=> training with GPU") m0.cuda() m1.cuda() criterion.cuda() #m0 = nn.DataParallel(m0, dim=1) else: print("=> training with CPU") m0_optimizer = torch.optim.Adam(m0.parameters(), lr=args.learning_rate) m1_optimizer = torch.optim.Adam(m1.parameters(), lr=args.learning_rate) ## load model state and optmizer state if os.path.isfile(args.checkpoint): print("=> loading checkpoint '{}'".format(args.checkpoint)) logging.info("Restore training @ {}".format(time.ctime())) checkpoint = torch.load(args.checkpoint) args.start_iteration = checkpoint["iteration"] best_prec_loss = checkpoint["best_prec_loss"] m0.load_state_dict(checkpoint["m0"]) m1.load_state_dict(checkpoint["m1"]) m0_optimizer.load_state_dict(checkpoint["m0_optimizer"]) m1_optimizer.load_state_dict(checkpoint["m1_optimizer"]) else: print("=> no checkpoint found at '{}'".format(args.checkpoint)) logging.info("Start training @ {}".format(time.ctime())) best_prec_loss = float('inf') #print("=> initializing the parameters...") #init_parameters(m0) #init_parameters(m1) ## here: load pretrained wrod (cell) embedding num_iteration = args.epochs * sum(trainData.allocation) // args.batch print("Iteration starts at {} " "and will end at {}".format(args.start_iteration, num_iteration-1)) ## training for iteration in range(args.start_iteration, num_iteration): try: input, lengths, target = trainData.getbatch() if args.cuda and torch.cuda.is_available(): input, lengths, target = input.cuda(), lengths.cuda(), target.cuda() m0_optimizer.zero_grad() m1_optimizer.zero_grad() ## forward computation output = m0(input, lengths, target) loss = batchloss(output, target, m1, lossF, args.generator_batch) ## compute the gradients loss.backward() ## clip the gradients clip_grad_norm_(m0.parameters(), args.max_grad_norm) clip_grad_norm_(m1.parameters(), args.max_grad_norm) ## one step optimization m0_optimizer.step() m1_optimizer.step() ## average loss for one word avg_loss = loss.item() / target.size(0) if iteration % args.print_freq == 0: print("Iteration: {}\tLoss: {}".format(iteration, avg_loss)) if iteration % args.save_freq == 0 and iteration > 0: prec_loss = validate(valData, (m0, m1), lossF, args) if prec_loss < best_prec_loss: best_prec_loss = prec_loss logging.info("Best model with loss {} at iteration {} @ {}"\ .format(best_prec_loss, iteration, time.ctime())) is_best = True else: is_best = False print("Saving the model at iteration {} validation loss {}"\ .format(iteration, prec_loss)) savecheckpoint({ "iteration": iteration, "best_prec_loss": best_prec_loss, "m0": m0.state_dict(), "m1": m1.state_dict(), "m0_optimizer": m0_optimizer.state_dict(), "m1_optimizer": m1_optimizer.state_dict() }, is_best) except KeyboardInterrupt: break
import dash_core_components as dcc import dash_html_components as html from dash.dependencies import Input, Output from datetime import date, datetime from data_utils import DataLoader from fig_utils import get_ts_plot, get_bar_plot, get_pie_plot """ This is the main dashboard program and it uses 'data_utils' for fetching the data and 'fig_utils' to fetch plotply express figues. """ numpy.seterr(divide='ignore') os.environ["TZ"] = "Asia/Kolkata" time.tzset() DL = DataLoader() start_date = datetime.strptime('2020-01-01', '%Y-%m-%d') end_date = date.today() #external_stylesheets = ['bWLwgP.css'] #external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] external_stylesheets = ['mystyle.css'] colors = {'background': '#111111', 'text': '#7FDBFF'} app = dash.Dash(__name__, external_stylesheets=external_stylesheets) app.title = 'Jayanti Prasad\'s Covid-19 Dashboard' my_js_url = ['display.js']
def main(args): # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # device = torch.device("cpu") # Define model model = nn.DataParallel(WESS()).to(device) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model_bert = BertModel.from_pretrained('bert-base-uncased') print("Models Have Been Defined") # Get dataset dataset = WESSDataLoader(tokenizer, model_bert) # Optimizer and loss optimizer = torch.optim.Adam(model.parameters(), lr=hp.learning_rate) wess_loss = WESSLoss().to(device) # loss_list = list() # Get training loader print("Get Training Loader") training_loader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn, drop_last=True, num_workers=cpu_count()) # Load checkpoint if exists try: checkpoint = torch.load( os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) # optimizer.load_state_dict(checkpoint['optimizer']) print("---Model Restored at Step %d---\n" % args.restore_step) except: print("---Start New Training---\n") if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) # Init logger if not os.path.exists("logger"): os.mkdir("logger") # Training model = model.train() total_step = hp.epochs * len(training_loader) Time = np.array(list()) Start = time.clock() for epoch in range(hp.epochs): for i, data_of_batch in enumerate(training_loader): start_time = time.clock() current_step = i + args.restore_step + \ epoch * len(training_loader) + 1 # Init optimizer.zero_grad() # Prepare Data indexs_list = torch.Tensor([i for i in range(hp.batch_size) ]).int().to(device) # print(indexs_list) texts = data_of_batch["text"] mels = data_of_batch["mel"] embeddings = data_of_batch["embeddings"] sep_lists = data_of_batch["sep"] gates = data_of_batch["gate"] if torch.cuda.is_available(): texts = torch.from_numpy(texts).long().to(device) else: texts = torch.from_numpy(texts).long().to(device) mels = torch.from_numpy(mels).to(device) gates = torch.from_numpy(gates).float().to(device) # print("mels:", mels.size()) # print("gates:", gates.size()) # print(gates) # Forward output, mel_target, gate_target = model(texts, embeddings, sep_lists, indexs_list, mels, gates) mel_output, mel_out_postnet, gate_predicted = output # # Test # print(mel_out_postnet.size()) # print(mel_out_postnet) # test_mel = mel_out_postnet[0].cpu().detach().numpy() # np.save("test_mel.npy", test_mel) # print(gate_predicted) # print() # print("mel target size:", mels.size()) # print("mel output size:", mel_output.size()) # print("gate predict:", gate_predicted.size()) # Calculate loss if if_parallel: total_loss, mel_loss, gate_loss = wess_loss( mel_output, mel_out_postnet, gate_predicted, mel_target, gate_target) # print(gate_loss) # loss_list.append(total_loss.item()) # print(total_loss.item()) else: # print("there") total_loss, mel_loss, gate_loss = wess_loss( mel_output, mel_out_postnet, gate_predicted, mels, gates) t_l = total_loss.item() m_l = mel_loss.item() g_l = gate_loss.item() with open(os.path.join("logger", "total_loss.txt"), "a") as f_total_loss: f_total_loss.write(str(t_l) + "\n") with open(os.path.join("logger", "mel_loss.txt"), "a") as f_mel_loss: f_mel_loss.write(str(m_l) + "\n") with open(os.path.join("logger", "gate_loss.txt"), "a") as f_gate_loss: f_gate_loss.write(str(g_l) + "\n") # Backward total_loss.backward() # Clipping gradients to avoid gradient explosion nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh) # Update weights optimizer.step() if current_step % hp.log_step == 0: Now = time.clock() str1 = "Epoch [{}/{}], Step [{}/{}], Gate Loss: {:.4f}, Mel Loss: {:.4f}, Total Loss: {:.4f}.".format( epoch + 1, hp.epochs, current_step, total_step, gate_loss.item(), mel_loss.item(), total_loss.item()) str2 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format( (Now - Start), (total_step - current_step) * np.mean(Time)) print(str1) print(str2) with open(os.path.join("logger", "logger.txt"), "a") as f_logger: f_logger.write(str1 + "\n") f_logger.write(str2 + "\n") f_logger.write("\n") if current_step % hp.save_step == 0: torch.save( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % current_step)) print("save model at step %d ..." % current_step) if current_step in hp.decay_step: optimizer = adjust_learning_rate(optimizer, current_step) end_time = time.clock() Time = np.append(Time, end_time - start_time) if len(Time) == hp.clear_Time: temp_value = np.mean(Time) Time = np.delete(Time, [i for i in range(len(Time))], axis=None) Time = np.append(Time, temp_value)
def main(args): # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Define model model = Tacotron2(hp).to(device) model_SpeakerEncoder = SpeakerEncoder.get_model().to(device) # model = Tacotron2(hp).to(device) print("All Models Have Been Defined") # Get dataset dataset = Tacotron2DataLoader() # Optimizer optimizer = torch.optim.Adam( model.parameters(), lr=hp.learning_rate, weight_decay=hp.weight_decay) # Criterion criterion = Tacotron2Loss() # Get training loader print("Get Training Loader") training_loader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn, drop_last=True, num_workers=cpu_count()) # Load checkpoint if exists try: checkpoint = torch.load(os.path.join( hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("\n---Model Restored at Step %d---\n" % args.restore_step) except: print("\n---Start New Training---\n") if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) # Define Some Information total_step = hp.epochs * len(training_loader) Time = np.array([]) Start = time.perf_counter() # Training model = model.train() for epoch in range(hp.epochs): for i, batch in enumerate(training_loader): start_time = time.perf_counter() # Count step current_step = i + args.restore_step + \ epoch * len(training_loader) + 1 # Init optimizer.zero_grad() # Load Data text_padded, input_lengths, mel_padded, gate_padded, output_lengths, mel_for_SE = batch # Get Speaker Embedding # print(np.shape(mel_for_SE)) mel_for_SE = torch.from_numpy(mel_for_SE).float().to(device) # print(mel_for_SE.size()) with torch.no_grad(): SpeakerEmbedding = model_SpeakerEncoder(mel_for_SE) # print(SpeakerEmbedding.size()) # print(SpeakerEmbedding) # print(SpeakerEmbedding.grad) if cuda_available: text_padded = torch.from_numpy(text_padded).type( torch.cuda.LongTensor).to(device) else: text_padded = torch.from_numpy(text_padded).type( torch.LongTensor).to(device) mel_padded = torch.from_numpy(mel_padded).to(device) gate_padded = torch.from_numpy(gate_padded).to(device) input_lengths = torch.from_numpy(input_lengths).to(device) output_lengths = torch.from_numpy(output_lengths).to(device) # print("mel", mel_padded.size()) # print("text", text_padded.size()) # print("gate", gate_padded.size()) batch = text_padded, input_lengths, mel_padded, gate_padded, output_lengths x, y = model.parse_batch(batch) y_pred = model(x, SpeakerEmbedding) # Loss loss, mel_loss, gate_loss = criterion(y_pred, y) # Backward loss.backward() # Clipping gradients to avoid gradient explosion nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh) # Update weights optimizer.step() if current_step % hp.log_step == 0: Now = time.perf_counter() str_loss = "Epoch [{}/{}], Step [{}/{}], Mel Loss: {:.4f}, Gate Loss: {:.4f}, Total Loss: {:.4f}.".format( epoch + 1, hp.epochs, current_step, total_step, mel_loss.item(), gate_loss.item(), loss.item()) str_time = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format( (Now - Start), (total_step - current_step) * np.mean(Time)) print(str_loss) print(str_time) with open("logger.txt", "a")as f_logger: f_logger.write(str_loss + "\n") f_logger.write(str_time + "\n") f_logger.write("\n") if current_step % hp.save_step == 0: torch.save({'model': model.state_dict(), 'optimizer': optimizer.state_dict( )}, os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % current_step)) print("\nsave model at step %d ...\n" % current_step) end_time = time.perf_counter() Time = np.append(Time, end_time - start_time) if len(Time) == hp.clear_Time: temp_value = np.mean(Time) Time = np.delete( Time, [i for i in range(len(Time))], axis=None) Time = np.append(Time, temp_value)
def train(): if args.load_model and (not args.reset_hparams): print("load hparams..") hparams_file_name = os.path.join(args.output_dir, "hparams.pt") hparams = torch.load(hparams_file_name) else: hparams = HParams( data_path=args.data_path, source_train=args.source_train, target_train=args.target_train, source_valid=args.source_valid, target_valid=args.target_valid, source_vocab=args.source_vocab, target_vocab=args.target_vocab, source_test=args.source_test, target_test=args.target_test, max_len=args.max_len, max_tree_len=args.max_tree_len, n_train_sents=args.n_train_sents, cuda=args.cuda, d_word_vec=args.d_word_vec, d_model=args.d_model, batch_size=args.batch_size, batcher=args.batcher, n_train_steps=args.n_train_steps, dropout=args.dropout, lr=args.lr, lr_dec=args.lr_dec, l2_reg=args.l2_reg, init_type=args.init_type, init_range=args.init_range, trdec=args.trdec, target_tree_vocab=args.target_tree_vocab, target_word_vocab=args.target_word_vocab, target_tree_train=args.target_tree_train, target_tree_valid=args.target_tree_valid, target_tree_test=args.target_tree_test, max_tree_depth=args.max_tree_depth, parent_feed=args.parent_feed, rule_parent_feed=args.rule_parent_feed, label_smooth=args.label_smooth, raml_rule=args.raml_rule, raml_tau=args.raml_tau, no_lhs=args.no_lhs, root_label=args.root_label, single_readout=args.single_readout, single_attn=args.single_attn, pos=args.pos, share_emb_softmax=args.share_emb_softmax, attn=args.attn, self_attn=args.self_attn, no_word_to_rule=args.no_word_to_rule, single_inp_readout=args.single_inp_readout, rule_tanh=args.rule_tanh, n_heads=args.n_heads, d_k=args.d_k, d_v=args.d_v, residue=args.residue, layer_norm=args.layer_norm, no_piece_tree=args.no_piece_tree, self_attn_input_feed=args.self_attn_input_feed, trdec_attn_v1=args.trdec_attn_v1, merge_bpe=args.merge_bpe, ignore_rule_len=False, nbest=False, force_rule=True, force_rule_step=1, ) data = DataLoader(hparams=hparams) hparams.add_param("source_vocab_size", data.source_vocab_size) if args.trdec: hparams.add_param("target_rule_vocab_size", data.target_rule_vocab_size) hparams.add_param("target_word_vocab_size", data.target_word_vocab_size) else: hparams.add_param("target_vocab_size", data.target_vocab_size) hparams.add_param("pad_id", data.pad_id) hparams.add_param("unk_id", data.unk_id) hparams.add_param("bos_id", data.bos_id) hparams.add_param("eos_id", data.eos_id) hparams.add_param("n_train_steps", args.n_train_steps) # build or load model model print("-" * 80) print("Creating model") if args.load_model: model_file_name = os.path.join(args.output_dir, "model.pt") print("Loading model from '{0}'".format(model_file_name)) model = torch.load(model_file_name) optim_file_name = os.path.join(args.output_dir, "optimizer.pt") print("Loading optimizer from {}".format(optim_file_name)) trainable_params = [p for p in model.parameters() if p.requires_grad] optim = torch.optim.Adam(trainable_params, lr=hparams.lr, weight_decay=hparams.l2_reg) optimizer_state = torch.load(optim_file_name) optim.load_state_dict(optimizer_state) extra_file_name = os.path.join(args.output_dir, "extra.pt") step, best_val_ppl, best_val_bleu, cur_attempt, lr = torch.load( extra_file_name) else: if args.trdec: if args.trdec_attn: model = TrDecAttn(hparams=hparams) elif args.trdec_attn_v1: model = TrDecAttn_v1(hparams=hparams) elif args.trdec_single: model = TrDecSingle(hparams=hparams) else: model = TrDec(hparams=hparams) else: model = Seq2Seq(hparams=hparams) if args.init_type == "uniform": print("initialize uniform with range {}".format(args.init_range)) for p in model.parameters(): p.data.uniform_(-args.init_range, args.init_range) trainable_params = [p for p in model.parameters() if p.requires_grad] optim = torch.optim.Adam(trainable_params, lr=hparams.lr, weight_decay=hparams.l2_reg) #optim = torch.optim.Adam(trainable_params) step = 0 best_val_ppl = 1e10 best_val_bleu = 0 cur_attempt = 0 lr = hparams.lr if args.reset_hparams: lr = args.lr crit = get_criterion(hparams) trainable_params = [p for p in model.parameters() if p.requires_grad] num_params = count_params(trainable_params) print("Model has {0} params".format(num_params)) print("-" * 80) print("start training...") start_time = log_start_time = time.time() target_words, total_loss, total_corrects = 0, 0, 0 target_rules, target_total, target_eos = 0, 0, 0 total_word_loss, total_rule_loss, total_eos_loss = 0, 0, 0 model.train() #i = 0 while True: ((x_train, x_mask, x_len, x_count), (y_train, y_mask, y_len, y_count), batch_size) = data.next_train() #print("x_train", x_train.size()) #print("y_train", y_train.size()) #print(i) #i += 1 #print("x_train", x_train) #print("x_mask", x_mask) #print("x_len", x_len) #print("y_train", y_train) #print("y_mask", y_mask) #exit(0) optim.zero_grad() if args.trdec: y_total_count, y_rule_count, y_word_count, y_eos_count = y_count target_total += (y_total_count - batch_size) target_rules += y_rule_count target_eos += y_eos_count target_words += (y_word_count - batch_size) logits = model.forward(x_train, x_mask, x_len, y_train[:, :-1, :], y_mask[:, :-1], y_len, y_train[:, 1:, 2], y_label=y_train[:, 1:, 0]) logits = logits.view( -1, hparams.target_word_vocab_size + hparams.target_rule_vocab_size) labels = y_train[:, 1:, 0].contiguous().view(-1) #print("x_train_logits", logits) #print("total:", y_total_count, "rule_count:", y_rule_count, "word_count:", y_word_count, "eos_count:", y_eos_count) tr_loss, tr_acc, rule_loss, word_loss, eos_loss, rule_count, word_count, eos_count = \ get_performance(crit, logits, labels, hparams) #print("perform rule_count:", rule_count, "word_count:", word_count, "eos_count", eos_count) #print((y_train[:,:,0] >= hparams.target_word_vocab_size).long().sum().data[0]) #print(y_rule_count) #print(rule_count.data[0]) assert y_rule_count == rule_count.item( ), "data rule count {}, performance rule count {}".format( y_rule_count, rule_count.item()) assert y_eos_count == eos_count.item( ), "data eos count {}, performance eos count {}".format( y_eos_count, eos_count.item()) assert y_word_count - batch_size == word_count.item( ), "data word count {}, performance word count {}".format( y_word_count - batch_size, word_count.item()) total_word_loss += word_loss.item() total_rule_loss += rule_loss.item() total_eos_loss += eos_loss.item() else: target_words += (y_count - batch_size) logits = model.forward(x_train, x_mask, x_len, y_train[:, :-1], y_mask[:, :-1], y_len) logits = logits.view(-1, hparams.target_vocab_size) labels = y_train[:, 1:].contiguous().view(-1) tr_loss, tr_acc = get_performance(crit, logits, labels, hparams) total_loss += tr_loss.item() total_corrects += tr_acc.item() step += 1 if args.trdec and args.loss_type == "rule": rule_loss.div_(batch_size) rule_loss.backward() elif args.trdec and args.loss_type == "word": word_loss.div_(batch_size) word_loss.backward() else: tr_loss.div_(batch_size) tr_loss.backward() grad_norm = torch.nn.utils.clip_grad_norm(model.parameters(), args.clip_grad) optim.step() # clean up GPU memory if step % args.clean_mem_every == 0: gc.collect() if step % args.log_every == 0: epoch = step // data.n_train_batches curr_time = time.time() since_start = (curr_time - start_time) / 60.0 elapsed = (curr_time - log_start_time) / 60.0 log_string = "ep={0:<3d}".format(epoch) log_string += " steps={0:<6.2f}".format(step / 1000) log_string += " lr={0:<9.7f}".format(lr) log_string += " loss={0:<7.2f}".format(tr_loss.item()) log_string += " |g|={0:<5.2f}".format(grad_norm) if args.trdec: log_string += " num_word={} num_rule={} num_eos={}".format( target_words, target_rules, target_eos) log_string += " ppl={0:<8.2f}".format( np.exp(total_loss / target_words)) log_string += " ppl_word={0:<8.2f}".format( np.exp(total_word_loss / target_words)) log_string += " ppl_rule={0:<8.2f}".format( np.exp(total_rule_loss / target_rules)) if not args.no_piece_tree: log_string += " ppl_eos={0:<8.2f}".format( np.exp(total_eos_loss / target_eos)) log_string += " acc={0:<5.4f}".format(total_corrects / target_total) else: log_string += " ppl={0:<8.2f}".format( np.exp(total_loss / target_words)) log_string += " acc={0:<5.4f}".format(total_corrects / target_words) log_string += " wpm(k)={0:<5.2f}".format(target_words / (1000 * elapsed)) log_string += " time(min)={0:<5.2f}".format(since_start) print(log_string) if step % args.eval_every == 0: based_on_bleu = args.eval_bleu and best_val_ppl <= args.ppl_thresh val_ppl, val_bleu = eval(model, data, crit, step, hparams, eval_bleu=based_on_bleu, valid_batch_size=args.valid_batch_size, tr_logits=logits) if based_on_bleu: if best_val_bleu <= val_bleu: save = True best_val_bleu = val_bleu cur_attempt = 0 else: save = False cur_attempt += 1 else: if best_val_ppl >= val_ppl: save = True best_val_ppl = val_ppl cur_attempt = 0 else: save = False cur_attempt += 1 if save: save_checkpoint( [step, best_val_ppl, best_val_bleu, cur_attempt, lr], model, optim, hparams, args.output_dir) else: lr = lr * args.lr_dec set_lr(optim, lr) # reset counter after eval log_start_time = time.time() target_words = total_corrects = total_loss = 0 target_rules = target_total = target_eos = 0 total_word_loss = total_rule_loss = total_eos_loss = 0 if args.patience >= 0: if cur_attempt > args.patience: break else: if step > args.n_train_steps: break
class Seq2SeqChatbot(object): def __init__(self): print(" # Welcome to Seq2Seq Chatbot.") print(" # Tensorflow detected: v{}".format(tf.__version__)) print() self.config = Config self.dataloader = DataLoader( self.config.num_utterance, self.config.max_length, self.config.split_ratio, self.config.batch_size) self.word_to_id = self.dataloader.word_to_id self.id_to_word = self.dataloader.id_to_word self.config.vocab_size = self.dataloader.vocab_size self.config.SOS_ID = self.dataloader.SOS_ID self.config.EOS_ID = self.dataloader.EOS_ID self.model = Model(self.config) print() print(" # Parameter Size: {}".format(self.model.get_parameter_size())) print() self.sess = tf.Session() self.config.checkpoint_dir = os.path.join("save", self.model.__class__.__name__) print(" # Save directory: {}".format(self.config.checkpoint_dir)) def main(self): # self.encoder_states(self.sess) # self.train_model(self.sess) if FLAGS.mode == 'train': ckpt = tf.train.get_checkpoint_state(self.config.checkpoint_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path) and (not FLAGS.retrain): print(" # Restoring model parameters from %s." % ckpt.model_checkpoint_path) self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print(" # Creating model with fresh parameters.") self.sess.run(self.model.init_op) self.train_model(self.sess) def encoder_states(self, sess): f = 0 count = 0 for (enc_inp, dec_inp, dec_tar) in tqdm(self.dataloader.data_generator(flag='test')): outputs = self.model.encoder_states_session(sess, enc_inp) encoder_states = outputs['encoder_states'] encoder_outputs = outputs['encoder_outputs'] if f <= 2: print('number of layer: {}'.format(len(encoder_states))) for state in encoder_states: print('shape of encoder_states: {}'.format(state.shape)) print('shape of encoder_outputs: {}'.format(encoder_outputs.shape)) f += 1 print(count) count += 1 def save_session(self, sess): print(" # Saving checkpoints.") save_dir = os.path.join(self.config.checkpoint_dir) model_name = self.model.__class__.__name__ + '.ckpt' checkpoint_path = os.path.join(save_dir, model_name) self.model.saver.save(sess, checkpoint_path) print(' # Model saved.') def train_model(self, sess): best_result_loss = 1000.0 for epoch in range(self.config.num_epoch): print() print('----epoch: {}/{} | lr: {}'.format(epoch, self.config.num_epoch, sess.run(self.model.lr))) tic = datetime.datetime.now() train_iterator = self.dataloader.data_generator(flag='train') test_iterator = self.dataloader.data_generator(flag='test') train_batch_num = self.dataloader.train_batch_num # test_batch_num = self.dataloader.test_batch_num total_loss = 0.0 nll_loss = 0.0 word_error_rate = 0.0 count = 0 for (enc_inp, dec_inp, dec_tar) in tqdm(train_iterator, desc='training'): train_out = self.model.train_session(sess, enc_inp, dec_inp, dec_tar) count += 1 step = train_out["step"] # step 表示训练了多少个batch total_loss += train_out["total_loss"] nll_loss += train_out["nll_loss"] word_error_rate += train_out["word_error_rate"] if step % 50 == 0: cur_loss = total_loss / count cur_nll_loss = nll_loss / count cur_word_error_rate = word_error_rate / count cur_perplexity = math.exp(float(cur_nll_loss)) if cur_nll_loss < 300 else float("inf") print(" Step %4d | Batch [%3d/%3d] | Loss %.6f | PPL %.6f | WER %.6f" % (step, count, train_batch_num, cur_loss, cur_perplexity, cur_word_error_rate)) print() total_loss /= count nll_loss /= count word_error_rate /= count perplexity = math.exp(float(nll_loss)) if nll_loss < 300 else float("inf") print(" Train Epoch %4d | Loss %.6f | PPL %.6f | WER %.6f" % (epoch, total_loss, perplexity, word_error_rate)) # testing after every epoch test_loss = 0.0 test_nll_loss = 0.0 test_count = 0 test_rate = 0.0 for (enc_inp, dec_inp, dec_tar) in tqdm(test_iterator, desc="testing"): test_outputs = self.model.eval_session(sess, enc_inp, dec_inp, dec_tar) test_loss += test_outputs["total_loss"] test_nll_loss += test_outputs["nll_loss"] test_rate += test_outputs["word_error_rate"] test_count += 1 test_loss /= test_count test_rate /= test_count test_nll_loss /= test_count test_perp = math.exp(float(test_nll_loss)) if test_nll_loss < 300 else float("inf") print(" Test Epoch %d | Loss %.6f | PPL %.6f | WER %.6f" % (epoch, test_loss, test_perp, test_rate)) print() if test_loss < best_result_loss: self.save_session(sess) if np.abs(best_result_loss - test_loss) < 0.03: cur_lr = sess.run(self.model.lr) sess.run(self.model.update_lr_op, feed_dict={self.model.new_lr: cur_lr * 0.5}) best_result_loss = test_loss toc = datetime.datetime.now() print(" # Epoch finished in {}".format(toc - tic))
use_selu=args.use_selu, model_path=args.model_path, use_gnn=args.use_gnn) ## Optimizer optimizer_ttime = torch.optim.Adam(TTime_gnn.parameters(), lr=args.lr, amsgrad=True) # optimizer_ttime = ScheduledOptim(torch.optim.Adam(TTime_gnn.parameters(), betas=(0.9, 0.98), eps=1e-9, amsgrad=False), 1, 128, 8000) ## Preparing the data trainfiles = list( filter(lambda x: x.endswith(".h5"), sorted(os.listdir(args.trainpath)))) validfiles = list( filter(lambda x: x.endswith(".h5"), sorted(os.listdir(args.validpath)))) train_dataloader = DataLoader(args.trainpath) print("Loading the training data...") train_dataloader.read_files(trainfiles, args.use_gnn) valid_dataloader = DataLoader(args.validpath) print("Loading the validation data...") valid_dataloader.read_files(validfiles, args.use_gnn) train_slot_size = np.array( list(map(lambda s: s.ntrips, train_dataloader.slotdata_pool))) train_num_iterations = int(np.ceil(train_slot_size / args.batch_size).sum()) print("There are {} trips in the training dataset".format( train_slot_size.sum())) print("Number of iterations for an epoch: {}".format(train_num_iterations)) valid_slot_size = np.array( list(map(lambda s: s.ntrips, valid_dataloader.slotdata_pool))) valid_num_iterations = int(np.ceil(valid_slot_size / args.batch_size).sum())
config = yaml.safe_load(open(TRANSFORMER_CONFIG_FILE, "r")) config = Dict.from_nested_dict(config) args = TrainerConfig.from_default() args.update(TRAINER_CONFIG.__dict__) if not args.load_pretrained_path: model = TransformerMaskPredict(config) else: model = TransformerMaskPredict.from_pretrained( args.load_pretrained_path) tokenizer = Tokenizer(model.config.encoder_id, model.config.decoder_id, model.config.length_token) dl = DataLoader(args, tokenizer) tr_dataset, val_dataset, test_dataset = dl() trainer = Trainer(model, tokenizer, args) if args.load_training_state: trainer.load_training_state_dict(args.base_dir) trainer.fit(tr_dataset, val_dataset) if args.save_pretrained_path: trainer.model.save_pretrained( os.path.join(args.base_dir, args.save_pretrained_path)) tst_loss = trainer.evaluate(test_dataset) wandb.log({"tst_loss": tst_loss})
def sample(config): """ :return: """ # config.batch_size = 1 loader = DataLoader(config) print('\nconfig vocab_size: {}, batch_num: {}, seq_length: {}'.format( config.vocab_size, config.batch_num, config.seq_length)) config.create_properties() print('\nstart building models ...') start = time.time() model = Model(config) print('... finished!') print('building model time: {}'.format(time.time() - start)) print('\ncreating batches ...') loader.create_batches() print('... finished!') # [0] is right number, [1] is total number # for da dt_rate = [0.0, 0.0] dt_rate_session = [0.0, 0.0] # for slot_value sv_rate = [0.0, 0.0] sv_rate_session = [0.0, 0.0] # for mask mask_rate = [0.0, 0.0] mask_rate_session = [0.0, 0.0] # for total rate = [0.0, 0.0] rate_session = [0.0, 0.0] test_dir = config.test_path + str(config.task_id) + '/' _time = str(datetime.datetime.now()) _time = _time.replace(':', '_') _time = _time.replace(' ', '_') _time = _time.replace('.', '_') test_file = test_dir + _time + '.csv' test_file = open(test_file, 'w+') _config = tf.ConfigProto() _config.gpu_options.allow_growth = True if not config.device == '/cpu:0': _config.allow_soft_placement = True with tf.Session(config=_config) as sess: loss_mask = [1.0, 0.0, 0.0, 0.0, 0.0] saver = tf.train.Saver() sess.run(tf.assign(model.embedding, config.embeddings)) sess.run(tf.assign(model.init_k_memory, config.slot_embedding)) sess.run(tf.assign(model.value_feature0, loader.value_feature)) for batch_index in range(config.batch_num): ckpt = tf.train.get_checkpoint_state(config.save_path) if ckpt and ckpt.model_checkpoint_path and batch_index == 0: saver.restore(sess, ckpt.model_checkpoint_path) print('\nmodel restored from {}'.format( ckpt.model_checkpoint_path)) index, x, y, t, slot_mask, seq_lengths_mask, x_sen_len, y_sen_len, attn, attn_mask, \ raw_x, raw_y, _, _ = loader.next_batch() print('\nbatch {} / {}'.format(batch_index, config.batch_num)) feed = { model.input_x: x, model.input_y: y, model.targets: t, model.masks: slot_mask, model.seq_masks: seq_lengths_mask, model.x_sentence_length: x_sen_len, model.y_sentence_length: y_sen_len, model.attention: attn, model.attention_mask: attn_mask, model.loss_mask: loss_mask } [prob, slot_prob, mask_result, attention_result] = sess.run([ model.prob, model.slot_prob, model.mask_result, model.attention_result ], feed) # calculate result and print result for i in range(config.batch_size): dt_session_right = True sv_session_right = True mask_session_right = True test_file.write('session {}/{}\n'.format( batch_index * config.batch_size + i, config.batch_num * config.batch_size)) for j in range(config.seq_length): sentence_right = True if raw_y[i][j] == 0.0 or raw_x[i][j] == 0.0: assert raw_y[i][j] == 0.0 or raw_x[i][j] == 0.0 break # write x sentence test_file.write('post: {},'.format(j)) test_file.write( str(raw_x[i][j]).strip().replace(' ', ',') + '\n') # write slot attention result for m in range(config.slot_size): slot = config.slots[m] test_file.write('[{}],'.format(slot)) att_ = attention_result[i][j][m] for n in range(config.sentence_length): test_file.write('{:.3f},'.format(att_[n][0])) test_file.write('\n') # write y sentence test_file.write('response: {},'.format(j)) test_file.write( str(raw_y[i][j]).strip().replace(' ', ',') + '\n') # split target tars = np.split(t[i][j], config.numpy_split_sizes) da_tar, sv_tar = tars[0], tars[1:config.slot_size + 1] # write da_type information da_index = np.argmax(da_tar) da_index_prob = np.argmax(prob[i][j][0]) test_file.write(config.da_types[da_index] + ',' + config.da_types[da_index_prob] + '\n') # calculate datype acc if da_index_prob != da_index: dt_session_right = False sentence_right = False else: dt_rate[0] += 1.0 dt_rate[1] += 1.0 # mask and slot_value for m in range(config.slot_size): slot = config.slots[m] test_file.write('[{}],'.format(slot)) # write mask information test_file.write('{:.3f},'.format( slot_mask[i][j][m][0])) test_file.write('{:.3f},'.format( mask_result[i][j][m][0])) # calculate mask acc smm = np.argmax(slot_mask[i][j][m]) mrm = np.argmax(mask_result[i][j][m]) if mrm != smm: mask_session_right = False sentence_right = False else: mask_rate[0] += 1.0 mask_rate[1] += 1.0 # write slot value value_index = np.argmax(sv_tar[m]) value_index_prob = np.argmax(slot_prob[i][j][m][0]) test_file.write(config.slot_value[slot][value_index] + ',') test_file.write( config.slot_value[slot][value_index_prob] + '\n') # calculate slotvalue acc if slot_mask[i][j][m][0] > 0.9: if value_index != value_index_prob: sv_session_right = False sentence_right = False else: sv_rate[0] += 1.0 sv_rate[1] += 1.0 # calculate sentence acc if sentence_right: rate[0] += 1.0 rate[1] += 1.0 if dt_session_right: dt_rate_session[0] += 1.0 dt_rate_session[1] += 1.0 if sv_session_right: sv_rate_session[0] += 1.0 sv_rate_session[1] += 1.0 if mask_session_right: mask_rate_session[0] += 1.0 mask_rate_session[1] += 1.0 if dt_session_right and sv_session_right and mask_session_right: rate_session[0] += 1.0 rate_session[1] += 1.0 test_file.close() test_file = test_dir + _time + '_final.csv' with open(test_file, 'w+') as fout: fout.write(', sentence, session\n') fout.write('da type, {}, {}\n'.format( dt_rate[0] / dt_rate[1], dt_rate_session[0] / dt_rate_session[1])) fout.write('slot value, {}, {}\n'.format( sv_rate[0] / sv_rate[1], sv_rate_session[0] / sv_rate_session[1])) fout.write('mask, {}, {}\n'.format( mask_rate[0] / mask_rate[1], mask_rate_session[0] / mask_rate_session[1])) fout.write('full, {}, {}\n'.format(rate[0] / rate[1], rate_session[0] / rate_session[1])) fout.close() print('final result writt to {}'.format(test_file)) return
optimizer_rho = torch.optim.Adam(probrho.parameters(), lr=args.lr, amsgrad=True) optimizer_traffic = torch.optim.Adam(probtraffic.parameters(), lr=args.lr, amsgrad=True) optimizer_ttime = torch.optim.Adam(probttime.parameters(), lr=args.lr, amsgrad=True) ## Preparing the data trainfiles = list( filter(lambda x: x.endswith(".h5"), sorted(os.listdir(args.trainpath)))) validfiles = list( filter(lambda x: x.endswith(".h5"), sorted(os.listdir(args.validpath)))) train_dataloader = DataLoader(args.trainpath) print("Loading the training data...") train_dataloader.read_files(trainfiles) valid_dataloader = DataLoader(args.validpath) print("Loading the validation data...") valid_dataloader.read_files(validfiles) train_slot_size = np.array( list(map(lambda s: s.ntrips, train_dataloader.slotdata_pool))) train_num_iterations = int(np.ceil(train_slot_size / args.batch_size).sum()) print("There are {} trips in the training dataset".format( train_slot_size.sum())) print("Number of iterations for an epoch: {}".format(train_num_iterations)) valid_slot_size = np.array( list(map(lambda s: s.ntrips, valid_dataloader.slotdata_pool))) valid_num_iterations = int(np.ceil(valid_slot_size / args.batch_size).sum())
import yaml from data_utils import DataLoader from training_utils import Trainer, TrainerConfig import training_utils from modeling import Model if __name__ == '__main__': with open("config/config.yaml", "r") as f: config = yaml.safe_load(f) model = Model(config) args = getattr(training_utils, "baseline") dl = DataLoader(args) tr_dataset, val_dataset = dl.setup() tr_dataset = dl.train_dataloader(tr_dataset) val_dataset = dl.val_dataloader(val_dataset) trainer = Trainer(model, args) trainer.fit(tr_dataset, val_dataset)
def main(args): # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Define model model = nn.DataParallel(TransformerTTS()).to(device) print("Model Has Been Defined") # Get dataset dataset = TransformerTTSDataLoader() # Optimizer and loss optimizer = torch.optim.Adam(model.parameters(), lr=hp.learning_rate) transformer_loss = TransformerTTSLoss().to(device) # Get training loader print("Get Training Loader") training_loader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn, drop_last=True, num_workers=cpu_count()) try: checkpoint = torch.load( os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) # optimizer.load_state_dict(checkpoint['optimizer']) print("---Model Restored at Step %d---\n" % args.restore_step) except: print("---Start New Training---\n") if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) # Init logger if not os.path.exists("logger"): os.mkdir("logger") # Training model = model.train() total_step = hp.epochs * len(training_loader) Time = np.array(list()) Start = time.clock() for epoch in range(hp.epochs): for i, data_of_batch in enumerate(training_loader): start_time = time.clock() current_step = i + args.restore_step + \ epoch * len(training_loader) + 1 # Init optimizer.zero_grad() # Prepare Data src_seq = data_of_batch["texts"] src_pos = data_of_batch["pos_padded"] tgt_seq = data_of_batch["tgt_sep"] tgt_pos = data_of_batch["tgt_pos"] mel_tgt = data_of_batch["mels"] gate_target = data_of_batch["gate_target"] src_seq = torch.from_numpy(src_seq).long().to(device) src_pos = torch.from_numpy(src_pos).long().to(device) tgt_seq = torch.from_numpy(tgt_seq).long().to(device) tgt_pos = torch.from_numpy(tgt_pos).long().to(device) mel_tgt = torch.from_numpy(mel_tgt).float().to(device) gate_target = torch.from_numpy(gate_target).float().to(device) # Forward mel_output, mel_output_postnet, stop_token = model( src_seq, src_pos, tgt_seq, tgt_pos, mel_tgt) # Cal Loss mel_loss, mel_postnet_loss, gate_loss = transformer_loss( mel_output, mel_output_postnet, stop_token, mel_tgt, gate_target) total_mel_loss = mel_loss + mel_postnet_loss total_loss = total_mel_loss + gate_loss # Logger t_m_l = total_mel_loss.item() m_l = mel_loss.item() m_p_l = mel_postnet_loss.item() g_l = gate_loss.item() with open(os.path.join("logger", "total_mel_loss.txt"), "a") as f_total_loss: f_total_loss.write(str(t_m_l) + "\n") with open(os.path.join("logger", "mel_loss.txt"), "a") as f_mel_loss: f_mel_loss.write(str(m_l) + "\n") with open(os.path.join("logger", "mel_postnet_loss.txt"), "a") as f_mel_postnet_loss: f_mel_postnet_loss.write(str(m_p_l) + "\n") with open(os.path.join("logger", "gate_loss.txt"), "a") as f_gate_loss: f_gate_loss.write(str(g_l) + "\n") # Backward total_loss.backward() # Clipping gradients to avoid gradient explosion nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh) # Update weights optimizer.step() # Print if current_step % hp.log_step == 0: Now = time.clock() str1 = "Epoch [{}/{}], Step [{}/{}], Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f}, Gate Loss: {:.4f}, Total Loss: {:.4f}.".format( epoch + 1, hp.epochs, current_step, total_step, mel_loss.item(), mel_postnet_loss.item(), gate_loss.item(), total_loss.item()) str2 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format( (Now - Start), (total_step - current_step) * np.mean(Time)) print(str1) print(str2) with open(os.path.join("logger", "logger.txt"), "a") as f_logger: f_logger.write(str1 + "\n") f_logger.write(str2 + "\n") f_logger.write("\n") if current_step % hp.save_step == 0: torch.save( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % current_step)) print("save model at step %d ..." % current_step) if current_step in hp.decay_step: optimizer = adjust_learning_rate(optimizer, current_step) end_time = time.clock() Time = np.append(Time, end_time - start_time) if len(Time) == hp.clear_Time: temp_value = np.mean(Time) Time = np.delete(Time, [i for i in range(len(Time))], axis=None) Time = np.append(Time, temp_value)
def main(args): # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Define model model = nn.DataParallel(BERT_Tacotron2(hp)).to(device) # model = Tacotron2(hp).to(device) print("Model Have Been Defined") num_param = sum(param.numel() for param in model.parameters()) print('Number of Tacotron Parameters:', num_param) # Get dataset dataset = BERTTacotron2Dataset() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=hp.learning_rate, weight_decay=hp.weight_decay) # Criterion criterion = Tacotron2Loss() # Load checkpoint if exists try: checkpoint = torch.load( os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("\n---Model Restored at Step %d---\n" % args.restore_step) except: print("\n---Start New Training---\n") if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) # Init logger if not os.path.exists(hp.logger_path): os.mkdir(hp.logger_path) # Define Some Information Time = np.array([]) Start = time.clock() # Training model = model.train() for epoch in range(hp.epochs): # Get training loader training_loader = DataLoader(dataset, batch_size=hp.batch_size**2, shuffle=True, collate_fn=collate_fn, drop_last=True, num_workers=0) total_step = hp.epochs * len(training_loader) * hp.batch_size for i, batchs in enumerate(training_loader): for j, data_of_batch in enumerate(batchs): start_time = time.clock() current_step = i * hp.batch_size + j + args.restore_step + \ epoch * len(training_loader)*hp.batch_size + 1 # Init optimizer.zero_grad() # Get Data character = torch.from_numpy( data_of_batch["text"]).long().to(device) mel_target = torch.from_numpy( data_of_batch["mel_target"]).float().to( device).contiguous().transpose(1, 2) stop_target = torch.from_numpy( data_of_batch["stop_token"]).float().to(device) embeddings = data_of_batch["bert_embeddings"].float().to( device) input_lengths = torch.from_numpy( data_of_batch["length_text"]).long().to(device) output_lengths = torch.from_numpy( data_of_batch["length_mel"]).long().to(device) # Forward batch = character, input_lengths, mel_target, stop_target, output_lengths, embeddings x, y = model.module.parse_batch(batch) y_pred = model(x) # Cal Loss mel_loss, mel_postnet_loss, stop_pred_loss = criterion( y_pred, y) total_loss = mel_loss + mel_postnet_loss + stop_pred_loss # Logger t_l = total_loss.item() m_l = mel_loss.item() m_p_l = mel_postnet_loss.item() s_l = stop_pred_loss.item() with open(os.path.join("logger", "total_loss.txt"), "a") as f_total_loss: f_total_loss.write(str(t_l) + "\n") with open(os.path.join("logger", "mel_loss.txt"), "a") as f_mel_loss: f_mel_loss.write(str(m_l) + "\n") with open(os.path.join("logger", "mel_postnet_loss.txt"), "a") as f_mel_postnet_loss: f_mel_postnet_loss.write(str(m_p_l) + "\n") with open(os.path.join("logger", "stop_pred_loss.txt"), "a") as f_s_loss: f_s_loss.write(str(s_l) + "\n") # Backward total_loss.backward() # Clipping gradients to avoid gradient explosion nn.utils.clip_grad_norm_(model.parameters(), 1.) # Update weights optimizer.step() adjust_learning_rate(optimizer, current_step) # Print if current_step % hp.log_step == 0: Now = time.clock() str1 = "Epoch [{}/{}], Step [{}/{}], Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f};".format( epoch + 1, hp.epochs, current_step, total_step, mel_loss.item(), mel_postnet_loss.item()) str2 = "Stop Predicted Loss: {:.4f}, Total Loss: {:.4f}.".format( stop_pred_loss.item(), total_loss.item()) str3 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format( (Now - Start), (total_step - current_step) * np.mean(Time)) print("\n" + str1) print(str2) print(str3) with open(os.path.join("logger", "logger.txt"), "a") as f_logger: f_logger.write(str1 + "\n") f_logger.write(str2 + "\n") f_logger.write(str3 + "\n") f_logger.write("\n") if current_step % hp.save_step == 0: torch.save( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % current_step)) print("save model at step %d ..." % current_step) end_time = time.clock() Time = np.append(Time, end_time - start_time) if len(Time) == hp.clear_Time: temp_value = np.mean(Time) Time = np.delete(Time, [i for i in range(len(Time))], axis=None) Time = np.append(Time, temp_value)
times = data.times.to(device) loss = log_prob_loss(logμ, logλ, times) total_loss += loss.item() * data.trips.shape[0] total_mse += F.mse_loss(torch.exp(logμ), times).item() * data.trips.shape[0] total_l1 += F.l1_loss(torch.exp(logμ), times).item() * data.trips.shape[0] mean_loss = total_loss / np.sum(test_slot_size) mean_mse = total_mse / np.sum(test_slot_size) mean_l1 = total_l1 / np.sum(test_slot_size) print("Testing Loss {0:.4f} MSE {1:.4f} L1 {2:.4f}".format(mean_loss, mean_mse, mean_l1)) return mean_loss, mean_mse probrho, probtraffic, probttime = load_model(args.model) probrho.eval() probtraffic.eval() probttime.eval() testfiles = list(filter(lambda x:x.endswith(".h5"), sorted(os.listdir(args.testpath)))) test_dataloader = DataLoader(args.testpath) print("Loading the testing data...") test_dataloader.read_files(testfiles) test_slot_size = np.array(list(map(lambda s:s.ntrips, test_dataloader.slotdata_pool))) print("There are {} trips in total".format(test_slot_size.sum())) test_num_iterations = int(np.ceil(test_slot_size/args.batch_size).sum()) tic = time.time() with torch.no_grad(): validate(test_num_iterations, probrho, probtraffic, probttime) print("Time passed: {} seconds".format(time.time() - tic))
def main(args): # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Define model model = SpeakerEncoder().to(device) GE2E_loss = GE2ELoss() print("Model Have Been Defined") # Get dataset dataset = SVData() # Optimizer optimizer = torch.optim.SGD([{ 'params': model.parameters() }, { 'params': GE2E_loss.parameters() }], lr=hp.learning_rate) # Get training loader training_loader = DataLoader(dataset, batch_size=hp.N, shuffle=True, drop_last=True, collate_fn=collate_fn, num_workers=cpu_count()) print("Get Training Loader") # Load checkpoint if exists try: checkpoint = torch.load( os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("\n---Model Restored at Step %d---\n" % args.restore_step) except: print("\n---Start New Training---\n") if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) # Define Some Information total_step = hp.epochs * len(training_loader) Time = np.array([]) Start = time.perf_counter() # Training model = model.train() for epoch in range(hp.epochs): for i, batch in enumerate(training_loader): start_time = time.perf_counter() # Count step current_step = i + args.restore_step + \ epoch * len(training_loader) + 1 # Init optimizer.zero_grad() # Load Data # ====================================================== # # batch: (hp.N * hp.M, hp.tisv_frame, hp.n_mels_channel) # # ====================================================== # batch = torch.from_numpy(batch).float().to(device) embeddings = model(batch) # Loss embeddings = embeddings.contiguous().view(hp.N, hp.M, -1) loss = GE2E_loss(embeddings) # Backward loss.backward() # Clipping gradients to avoid gradient explosion nn.utils.clip_grad_norm_(model.parameters(), 3.0) nn.utils.clip_grad_norm_(GE2E_loss.parameters(), 1.0) # Update weights optimizer.step() if current_step % hp.log_step == 0: Now = time.perf_counter() str_loss = "Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}.".format( epoch + 1, hp.epochs, current_step, total_step, loss.item()) str_time = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format( (Now - Start), (total_step - current_step) * np.mean(Time)) print(str_loss) print(str_time) with open("logger.txt", "a") as f_logger: f_logger.write(str_loss + "\n") f_logger.write(str_time + "\n") f_logger.write("\n") if current_step % hp.save_step == 0: torch.save( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % current_step)) print("\nsave model at step %d ...\n" % current_step) end_time = time.perf_counter() Time = np.append(Time, end_time - start_time) if len(Time) == hp.clear_Time: temp_value = np.mean(Time) Time = np.delete(Time, [i for i in range(len(Time))], axis=None) Time = np.append(Time, temp_value)
def train(): if args.load_model: hparams_file_name = os.path.join(args.output_dir, "hparams.pt") hparams = torch.load(hparams_file_name) else: hparams = Iwslt16EnDeBpe32SharedParams( data_path=args.data_path, source_train=args.source_train, target_train=args.target_train, source_valid=args.source_valid, target_valid=args.target_valid, source_vocab=args.source_vocab, target_vocab=args.target_vocab, source_test=args.source_test, target_test=args.target_test, max_len=args.max_len, n_train_sents=args.n_train_sents, cuda=args.cuda, d_word_vec=args.d_word_vec, d_model=args.d_model, d_inner=args.d_inner, d_k=args.d_k, d_v=args.d_v, n_layers=args.n_layers, n_heads=args.n_heads, batch_size=args.batch_size, batcher=args.batcher, n_train_steps=args.n_train_steps, n_warm_ups=args.n_warm_ups, share_emb_and_softmax=args.share_emb_and_softmax, dropout=args.dropout, label_smoothing=args.label_smoothing, grad_bound=args.grad_bound, init_range=args.init_range, optim_switch=args.optim_switch, lr_adam=args.lr_adam, lr_sgd=args.lr_sgd, lr_dec=args.lr_dec, l2_reg=args.l2_reg, loss_norm=args.loss_norm, init_type=args.init_type, pos_emb_size=args.pos_emb_size, raml_source=args.raml_source, raml_target=args.raml_target, raml_tau=args.raml_tau, raml_src_tau=args.raml_src_tau, src_pad_corrupt=args.src_pad_corrupt, trg_pad_corrupt=args.trg_pad_corrupt, dist_corrupt=args.dist_corrupt, dist_corrupt_tau=args.dist_corrupt_tau, glove_emb_file=args.glove_emb_file, glove_emb_dim=args.glove_emb_dim, max_glove_vocab_size=args.max_glove_vocab_size, ) data = DataLoader(hparams=hparams) hparams.add_param("source_vocab_size", data.source_vocab_size) hparams.add_param("target_vocab_size", data.target_vocab_size) hparams.add_param("pad_id", data.pad_id) hparams.add_param("unk_id", data.unk_id) hparams.add_param("bos_id", data.bos_id) hparams.add_param("eos_id", data.eos_id) hparams.add_param("l2_reg", args.l2_reg) hparams.add_param("n_train_steps", args.n_train_steps) # build or load model model print("-" * 80) print("Creating model") if args.load_model: model_file_name = os.path.join(args.output_dir, "model.pt") print("Loading model from '{0}'".format(model_file_name)) model = torch.load(model_file_name) else: print("Initialize with {}".format(hparams.init_type)) model = Transformer(hparams=hparams, init_type=hparams.init_type) crit = get_criterion(hparams) trainable_params = [ p for p in model.trainable_parameters() if p.requires_grad ] num_params = count_params(trainable_params) print("Model has {0} params".format(num_params)) # build or load optimizer if args.optim == 'adam': print("Using adam optimizer...") optim = torch.optim.Adam(trainable_params, lr=hparams.lr_adam, weight_decay=hparams.l2_reg) else: print("Using sgd optimizer...") optim = torch.optim.SGD(trainable_params, lr=hparams.lr_sgd, weight_decay=hparams.l2_reg) print("Using transformer lr schedule: {}".format(args.lr_schedule)) if args.load_model: optim_file_name = os.path.join(args.output_dir, "optimizer.pt") print("Loading optim from '{0}'".format(optim_file_name)) optimizer_state = torch.load(optim_file_name) optim.load_state_dict(optimizer_state) try: extra_file_name = os.path.join(args.output_dir, "extra.pt") if args.checkpoint > 0: (step, best_val_ppl, best_val_bleu, cur_attempt, lr, checkpoint_queue) = torch.load(extra_file_name) else: (step, best_val_ppl, best_val_bleu, cur_attempt, lr) = torch.load(extra_file_name) except: raise RuntimeError("Cannot load checkpoint!") else: optim = torch.optim.Adam(trainable_params, lr=hparams.lr_adam, weight_decay=hparams.l2_reg) step = 0 best_val_ppl = 1e10 best_val_bleu = 0 cur_attempt = 0 lr = hparams.lr_adam if args.checkpoint > 0: checkpoint_queue = deque( ["checkpoint_" + str(i) for i in range(args.checkpoint)]) if not type(best_val_ppl) == dict: best_val_ppl = {} best_val_bleu = {} if args.save_nbest > 1: for i in range(args.save_nbest): best_val_ppl['model' + str(i)] = 1e10 best_val_bleu['model' + str(i)] = 0 else: best_val_ppl['model'] = 1e10 best_val_bleu['model'] = 0 set_patience = args.patience >= 0 # train loop print("-" * 80) print("Start training") ppl_thresh = args.ppl_thresh start_time = time.time() actual_start_time = time.time() target_words, total_loss, total_corrects = 0, 0, 0 n_train_batches = data.n_train_batches while True: # training activities model.train() while True: # next batch if hparams.raml_source and hparams.raml_target: ((x_train_raml, x_train, x_mask, x_pos_emb_indices, x_count), (y_train_raml, y_train, y_mask, y_pos_emb_indices, y_count), batch_size) = data.next_train() elif hparams.raml_source: ((x_train_raml, x_train, x_mask, x_pos_emb_indices, x_count), (y_train, y_mask, y_pos_emb_indices, y_count), batch_size) = data.next_train() elif hparams.raml_target: ((x_train, x_mask, x_pos_emb_indices, x_count), (y_train_raml, y_train, y_mask, y_pos_emb_indices, y_count), batch_size) = data.next_train() else: ((x_train, x_mask, x_pos_emb_indices, x_count), (y_train, y_mask, y_pos_emb_indices, y_count), batch_size) = data.next_train() # book keeping count # Since you are shifting y_train, i.e. y_train[:, :-1] and y_train[:, 1:] y_count -= batch_size target_words += y_count # forward pass optim.zero_grad() if hparams.raml_source and hparams.raml_target: logits = model.forward(x_train_raml, x_mask, x_pos_emb_indices, y_train_raml[:, :-1], y_mask[:, :-1], y_pos_emb_indices[:, :-1].contiguous()) elif hparams.raml_source: logits = model.forward(x_train_raml, x_mask, x_pos_emb_indices, y_train[:, :-1], y_mask[:, :-1], y_pos_emb_indices[:, :-1].contiguous()) elif hparams.raml_target: logits = model.forward(x_train, x_mask, x_pos_emb_indices, y_train_raml[:, :-1], y_mask[:, :-1], y_pos_emb_indices[:, :-1].contiguous()) else: logits = model.forward(x_train, x_mask, x_pos_emb_indices, y_train[:, :-1], y_mask[:, :-1], y_pos_emb_indices[:, :-1].contiguous()) logits = logits.view(-1, hparams.target_vocab_size) if hparams.raml_target: labels = y_train_raml[:, 1:].contiguous().view(-1) else: labels = y_train[:, 1:].contiguous().view(-1) tr_loss, tr_acc = get_performance(crit, logits, labels, hparams) total_loss += tr_loss.data[0] total_corrects += tr_acc.data[0] # normalizing tr_loss if hparams.loss_norm == "sent": loss_div = batch_size elif hparams.loss_norm == "word": assert y_count == (1 - y_mask[:, 1:].int()).sum() loss_div = y_count else: raise ValueError("Unknown batcher '{0}'".format( hparams.batcher)) # set learning rate if args.lr_schedule: s = step + 1 lr = pow(hparams.d_model, -0.5) * min( pow(s, -0.5), s * pow(hparams.n_warm_ups, -1.5)) else: if step < hparams.n_warm_ups: if hparams.optim_switch is not None and step < hparams.optim_switch: base_lr = hparams.lr_adam else: base_lr = hparams.lr_sgd lr = base_lr * (step + 1) / hparams.n_warm_ups tr_loss.div_(loss_div).backward() set_lr(optim, lr) grad_norm = grad_clip(trainable_params, grad_bound=hparams.grad_bound) optim.step() step += 1 if step % args.log_every == 0: epoch = step // data.n_train_batches curr_time = time.time() since_start = (curr_time - start_time) / 60.0 elapsed = (curr_time - actual_start_time) / 60.0 log_string = "ep={0:<3d}".format(epoch) log_string += " steps={0:<6.2f}".format(step / 1000) log_string += " lr={0:<9.7f}".format(lr) log_string += " loss={0:<7.2f}".format(tr_loss.data[0]) log_string += " |g|={0:<6.2f}".format(grad_norm) log_string += " ppl={0:<8.2f}".format( np.exp(total_loss / target_words)) log_string += " acc={0:<5.4f}".format(total_corrects / target_words) log_string += " wpm(K)={0:<5.2f}".format(target_words / (1000 * elapsed)) log_string += " mins={0:<5.2f}".format(since_start) print(log_string) if step == hparams.optim_switch: lr = hparams.lr_sgd print(("Reached {0} steps. Switching from Adam to SGD " "with learning_rate {1:<9.7f}").format(step, lr)) optim = torch.optim.SGD(trainable_params, lr=hparams.lr_sgd, weight_decay=hparams.l2_reg) # clean up GPU memory if step % args.clean_mem_every == 0: gc.collect() # eval if step % args.eval_every == 0: val_ppl, val_bleu = eval( model, data, crit, step, hparams, min(best_val_ppl.values()) < ppl_thresh, valid_batch_size=args.valid_batch_size) # determine whether to update best_val_ppl or best_val_bleu based_on_bleu = args.eval_bleu and (min(best_val_ppl.values()) < ppl_thresh) if based_on_bleu: if min(best_val_bleu.values()) <= val_bleu: save_model_name = min(best_val_bleu, key=best_val_bleu.get) best_val_bleu[save_model_name] = val_bleu save = True else: save = False else: if max(best_val_ppl.values()) >= val_ppl: save_model_name = max(best_val_ppl, key=best_val_ppl.get) best_val_ppl[save_model_name] = val_ppl save = True else: save = False if args.checkpoint > 0: cur_name = checkpoint_queue.popleft() checkpoint_queue.append(cur_name) print("Saving checkpoint to {}".format(cur_name)) torch.save(model, os.path.join(args.output_dir, cur_name)) if save: save_extra = [ step, best_val_ppl, best_val_bleu, cur_attempt, lr ] if args.checkpoint > 0: save_extra.append(cur_name) save_checkpoint(save_extra, model, optim, hparams, args.output_dir, save_model_name) cur_attempt = 0 else: lr /= hparams.lr_dec cur_attempt += 1 actual_start_time = time.time() target_words = 0 total_loss = 0 total_corrects = 0 if set_patience: if cur_attempt >= args.patience: break else: if step >= hparams.n_train_steps: break # stop if trained for more than n_train_steps stop = False if set_patience and cur_attempt >= args.patience: stop = True elif not set_patience and step > hparams.n_train_steps: stop = True if stop: print("Reach {0} steps. Stop training".format(step)) based_on_bleu = args.eval_bleu and (min(best_val_ppl.values()) < ppl_thresh) if based_on_bleu: if min(best_val_bleu.values()) <= val_bleu: save_model_name = min(best_val_bleu, key=best_val_bleu.get) best_val_bleu[save_model_name] = val_bleu save = True else: save = False else: if max(best_val_ppl) >= val_ppl: save_model_name = max(best_val_ppl, key=best_val_ppl.get) best_val_ppl[save_model_name] = val_ppl save = True else: save = False if save: save_checkpoint( [step, best_val_ppl, best_val_bleu, cur_attempt, lr], model, optim, hparams, args.output_dir, save_model_name) break
if __name__ == "__main__": # Test device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.manual_seed(hp.seed) torch.cuda.manual_seed(hp.seed) model = WaveGlow().cuda() checkpoint = torch.load('test/TTSglow_130000') model.load_state_dict(checkpoint['model'].state_dict()) dataset = FastSpeechDataset() testing_loader = DataLoader(dataset, batch_size=1, shuffle=False, collate_fn=collate_fn, drop_last=True, num_workers=4) model = model.eval() for i, data_of_batch in enumerate(testing_loader): src_seq = data_of_batch["texts"] src_pos = data_of_batch["pos"] src_seq = torch.from_numpy(src_seq).long().to(device) src_pos = torch.from_numpy(src_pos).long().to(device) mel = model.inference(src_seq, src_pos, sigma=1.0, alpha=1.0) mel = mel.squeeze() print(mel.size()) mel_path = os.path.join("results", "{}_synthesis.pt".format(i))
def main(args): # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Define model model_SE = SpeakerEncoder().to(device) model_SV = SpeakerVerification().to(device) GE2E_loss = GE2ELoss() SV_loss = nn.CrossEntropyLoss() print("Models and Loss Have Been Defined") # Optimizer optimizer_SE = torch.optim.SGD([{ 'params': model_SE.parameters() }, { 'params': GE2E_loss.parameters() }], lr=hp.learning_rate) optimizer_SV = torch.optim.Adam(model_SV.parameters(), lr=1e-3) # Load checkpoint if exists try: checkpoint_SE = torch.load( os.path.join(hp.checkpoint_path, 'checkpoint_SE_%d.pth.tar' % args.restore_step)) model_SE.load_state_dict(checkpoint_SE['model']) optimizer_SE.load_state_dict(checkpoint_SE['optimizer']) checkpoint_SV = torch.load( os.path.join(hp.checkpoint_path, 'checkpoint_SV_%d.pth.tar' % args.restore_step)) model_SV.load_state_dict(checkpoint_SV['model']) optimizer_SV.load_state_dict(checkpoint_SV['optimizer']) print("\n---Model Restored at Step %d---\n" % args.restore_step) except: print("\n---Start New Training---\n") if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) # # Change Learning Rate # learning_rate = 0.005 # for param_group in optimizer_SE.param_groups: # param_group['lr'] = learning_rate # Get dataset dataset = SVData() # Get training loader training_loader = DataLoader(dataset, batch_size=hp.N, shuffle=True, drop_last=True, collate_fn=collate_fn, num_workers=cpu_count()) # Define Some Information total_step = hp.epochs * len(training_loader) Time = np.array([]) Start = time.perf_counter() # Training model_SE = model_SE.train() model_SV = model_SV.train() for epoch in range(hp.epochs): dataset = SVData() training_loader = DataLoader(dataset, batch_size=hp.N, shuffle=True, drop_last=True, collate_fn=collate_fn, num_workers=cpu_count()) for i, batch in enumerate(training_loader): start_time = time.perf_counter() # Count step current_step = i + args.restore_step + \ epoch * len(training_loader) + 1 # Init optimizer_SE.zero_grad() optimizer_SV.zero_grad() # Load Data mels, lengths, target = batch mels = torch.from_numpy(mels).float().to(device) target = torch.Tensor(target).long().to(device) # Forward speaker_embeddings = model_SE(mels, lengths) speaker_embeddings_ = torch.Tensor( speaker_embeddings.cpu().data).to(device) out = model_SV(speaker_embeddings_) # Loss speaker_embeddings = speaker_embeddings.contiguous().view( hp.N, hp.M, -1) ge2e_loss = GE2E_loss(speaker_embeddings) classify_loss = SV_loss(out, target) # Backward ge2e_loss.backward() classify_loss.backward() # Clipping gradients to avoid gradient explosion nn.utils.clip_grad_norm_(model_SE.parameters(), 3.0) nn.utils.clip_grad_norm_(GE2E_loss.parameters(), 1.0) # Update weights optimizer_SE.step() optimizer_SV.step() if current_step % hp.log_step == 0: Now = time.perf_counter() str_loss = "Epoch [{}/{}], Step [{}/{}], GE2E Loss: {:.4f}, Classify Loss: {:.4f}.".format( epoch + 1, hp.epochs, current_step, total_step, ge2e_loss.item(), classify_loss.item()) str_time = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format( (Now - Start), (total_step - current_step) * np.mean(Time)) print(str_loss) print(str_time) with open("logger.txt", "a") as f_logger: f_logger.write(str_loss + "\n") f_logger.write(str_time + "\n") f_logger.write("\n") if current_step % hp.save_step == 0: torch.save( { 'model': model_SE.state_dict(), 'optimizer': optimizer_SE.state_dict() }, os.path.join(hp.checkpoint_path, 'checkpoint_SE_%d.pth.tar' % current_step)) torch.save( { 'model': model_SV.state_dict(), 'optimizer': optimizer_SV.state_dict() }, os.path.join(hp.checkpoint_path, 'checkpoint_SV_%d.pth.tar' % current_step)) print("\nsave model at step %d ...\n" % current_step) end_time = time.perf_counter() Time = np.append(Time, end_time - start_time) if len(Time) == hp.clear_Time: temp_value = np.mean(Time) Time = np.delete(Time, [i for i in range(len(Time))], axis=None) Time = np.append(Time, temp_value)
def train(num_gpus, rank, group_name, output_directory, log_directory, checkpoint_path): # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.manual_seed(hp.seed) torch.cuda.manual_seed(hp.seed) #=====START: ADDED FOR DISTRIBUTED====== if num_gpus > 1: init_distributed(rank, num_gpus, group_name, **dist_config) #=====END: ADDED FOR DISTRIBUTED====== criterion = WaveGlowLoss(hp.sigma) model = WaveGlow().cuda() #=====START: ADDED FOR DISTRIBUTED====== if num_gpus > 1: model = apply_gradient_allreduce(model) #=====END: ADDED FOR DISTRIBUTED====== learning_rate = hp.learning_rate optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) if hp.fp16_run: from apex import amp model, optimizer = amp.initialize(model, optimizer, opt_level='O1') # Load checkpoint if one exists iteration = 0 if checkpoint_path: model, optimizer, iteration = load_checkpoint(checkpoint_path, model, optimizer) iteration += 1 # next iteration is iteration + 1 # Get dataset dataset = FastSpeechDataset() # Get training loader print("Get Training Loader") training_loader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn, drop_last=True, num_workers=cpu_count()) if rank == 0: if not os.path.isdir(output_directory): os.makedirs(output_directory) os.chmod(output_directory, 0o775) print("output directory", output_directory) if hp.with_tensorboard and rank == 0: logger = prepare_directories_and_logger(output_directory, log_directory) model = model.train() epoch_offset = max(0, int(iteration / len(training_loader))) beta = hp.batch_size print("Total Epochs: {}".format(hp.epochs)) print("Batch Size: {}".format(hp.batch_size)) # ================ MAIN TRAINNIG LOOP! =================== for epoch in range(epoch_offset, hp.epochs): print("Epoch: {}".format(epoch)) for i, data_of_batch in enumerate(training_loader): model.zero_grad() if not hp.pre_target: # Prepare Data src_seq = data_of_batch["texts"] src_pos = data_of_batch["pos"] mel_tgt = data_of_batch["mels"] src_seq = torch.from_numpy(src_seq).long().to(device) src_pos = torch.from_numpy(src_pos).long().to(device) mel_tgt = torch.from_numpy(mel_tgt).float().to(device) alignment_target = get_alignment(src_seq, tacotron2).float().to(device) # For Data Parallel mel_max_len = mel_tgt.size(1) else: # Prepare Data src_seq = data_of_batch["texts"] src_pos = data_of_batch["pos"] mel_tgt = data_of_batch["mels"] alignment_target = data_of_batch["alignment"] src_seq = torch.from_numpy(src_seq).long().to(device) src_pos = torch.from_numpy(src_pos).long().to(device) mel_tgt = torch.from_numpy(mel_tgt).float().to(device) alignment_target = torch.from_numpy( alignment_target).float().to(device) # For Data Parallel mel_max_len = mel_tgt.size(1) outputs = model(src_seq, src_pos, mel_tgt, mel_max_len, alignment_target) _, _, _, duration_predictor = outputs mel_tgt = mel_tgt.transpose(1, 2) max_like, dur_loss = criterion(outputs, alignment_target, mel_tgt) if beta > 1 and iteration % 10000 == 0: beta = beta // 2 loss = max_like + dur_loss if num_gpus > 1: reduced_loss = reduce_tensor(loss.data, num_gpus).item() else: reduced_loss = loss.item() if hp.fp16_run: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() #grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh) optimizer.step() print("{}:\t{:.9f}".format(iteration, reduced_loss)) if hp.with_tensorboard and rank == 0: logger.log_training(reduced_loss, dur_loss, learning_rate, iteration) if (iteration % hp.save_step == 0): if rank == 0: # logger.log_alignment(model, mel_predict, mel_tgt, iteration) checkpoint_path = "{}/TTSglow_{}".format( output_directory, iteration) save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path) iteration += 1
self.targets: y })) print("targets", self.targets.eval({ self.inputs: x, self.targets: y })) print("logits", self.logits.eval({ self.inputs: x, self.targets: y })) print("prob", self.prob.eval({ self.inputs: x, self.targets: y })) print("prob1", self.prob1.eval({ self.inputs: x, self.targets: y })) if step == 7810: break if __name__ == '__main__': dt = DataLoader(r"D:\python project\text_classification\aclImdb", 'glove_data', 50, 'trimmed.npz') m = Model(dt, 10, 100, 200) m.train()
hparams.add_param("pad_id", model.hparams.pad_id) hparams.add_param("bos_id", model.hparams.bos_id) hparams.add_param("eos_id", model.hparams.eos_id) hparams.add_param("unk_id", model.hparams.unk_id) model.hparams.cuda = hparams.cuda if not hasattr(model.hparams, "parent_feed"): model.hparams.parent_feed = 1 if not hasattr(model.hparams, "rule_parent_feed"): model.hparams.rule_parent_feed = 1 model.hparams.root_label = args.root_label model.hparams.ignore_rule_len = args.ignore_rule_len model.hparams.nbest = args.nbest model.hparams.force_rule = args.force_rule model.hparams.force_rule_step = args.force_rule_step data = DataLoader(hparams=hparams, decode=True) filts = [model.hparams.pad_id, model.hparams.eos_id, model.hparams.bos_id] if args.ccg_tag_file: ccg_tag_file = os.path.join(args.data_path, args.ccg_tag_file) with open(ccg_tag_file, 'r') as tag_file: for line in tag_file: line = line.strip() if line in data.target_word_to_index: f_id = data.target_word_to_index[line] filts.append(f_id) hparams.add_param("filtered_tokens", set(filts)) if args.debug: hparams.add_param("target_word_vocab_size", data.target_word_vocab_size) hparams.add_param("target_rule_vocab_size", data.target_rule_vocab_size) crit = get_criterion(hparams)
class HREDChatbot(object): def __init__(self): print(" # Welcome to HRED Chatbot.") print(" # Tensorflow detected: v{}".format(tf.__version__)) print() self.config = HConfig() self.dataloader = DataLoader(self.config.num_utterance, self.config.max_length, self.config.split_ratio, self.config.batch_size) self.word_to_id = self.dataloader.word_to_id self.id_to_word = self.dataloader.id_to_word self.config.vocab_size = self.dataloader.vocab_size self.config.SOS_ID = self.dataloader.SOS_ID self.config.EOS_ID = self.dataloader.EOS_ID self.model = Model(self.config) print() print(" # Parameter Size: {}".format(self.model.get_parameter_size())) print() ### session self.sess = tf.Session() self.config.checkpoint_dir = os.path.join( "save", self.model.__class__.__name__) print(" # Save directory: {}".format(self.config.checkpoint_dir)) def main(self): if FLAGS.mode == "train": ckpt = tf.train.get_checkpoint_state(self.config.checkpoint_dir) if ckpt and tf.train.checkpoint_exists( ckpt.model_checkpoint_path) and (not FLAGS.retrain): print(" # Restoring model parameters from %s." % ckpt.model_checkpoint_path) self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print(" # Creating model with fresh parameters.") self.sess.run(self.model.init_op) self.train_model(self.sess) elif FLAGS.mode == "evaluate": ckpt = tf.train.get_checkpoint_state(self.config.checkpoint_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print(" # Restoring model parameters from %s." % ckpt.model_checkpoint_path) self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) self.evaluate_model(self.sess) self.evaluate_embedding(self.sess) elif FLAGS.mode == "embedding": ckpt = tf.train.get_checkpoint_state(self.config.checkpoint_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print(" # Restoring model parameters from %s." % ckpt.model_checkpoint_path) self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) self.evaluate_embedding(self.sess) elif FLAGS.mode == "generate": ckpt = tf.train.get_checkpoint_state(self.config.checkpoint_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print(" # Restoring model parameters from %s." % ckpt.model_checkpoint_path) self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) self.generate(self.sess) def train_model(self, sess): best_result_loss = 1000.0 for epoch in range(self.config.num_epoch): print() print("---- epoch: {}/{} | lr: {} ----".format( epoch, self.config.num_epoch, sess.run(self.model.lr))) tic = datetime.datetime.now() train_iterator = self.dataloader.train_generator() test_iterator = self.dataloader.test_generator() train_batch_num = self.dataloader.train_batch_num test_batch_num = self.dataloader.test_batch_num total_loss = 0.0 nll_loss = 0.0 word_error_rate = 0.0 count = 0 last_nll_loss = 0.0 last_word_error_rate = 0.0 for (enc_inp, dec_inp, dec_tar) in tqdm(train_iterator, desc="training"): train_out = self.model.train_session(sess, enc_inp, dec_inp, dec_tar) count += 1 step = train_out["step"] total_loss += train_out["total_loss"] nll_loss += train_out["nll_loss"] word_error_rate += train_out["word_error_rate"] last_nll_loss += train_out["last_nll_loss"] last_word_error_rate += train_out["last_word_error_rate"] if step % 50 == 0: cur_loss = total_loss / count cur_word_error_rate = word_error_rate / count cur_last_word_error_rate = last_word_error_rate / count cur_nll_loss = nll_loss / count cur_last_nll_loss = last_nll_loss / count cur_perplexity = math.exp(float( cur_nll_loss)) if cur_nll_loss < 300 else float("inf") cur_last_perplexity = math.exp( float(cur_last_nll_loss )) if cur_last_nll_loss < 300 else float("inf") print( " Step %4d | Batch [%3d/%3d] | Loss %.6f | PPL %.6f | PPL@L %.6f | WER %.6f | WER@L %.6f" % (step, count, train_batch_num, cur_loss, cur_perplexity, cur_last_perplexity, cur_word_error_rate, cur_last_word_error_rate)) print("\n") total_loss /= count nll_loss /= count word_error_rate /= count last_nll_loss /= count last_word_error_rate /= count last_perplexity = math.exp( float(last_nll_loss)) if last_nll_loss < 300 else float("inf") perplexity = math.exp( float(nll_loss)) if nll_loss < 300 else float("inf") print( " Train Epoch %4d | Loss %.6f | PPL %.6f | PPL@L %.6f | WER %.6f | WER@L %.6f" % (epoch, total_loss, perplexity, last_perplexity, word_error_rate, last_word_error_rate)) test_loss = 0.0 test_nll_loss = 0.0 test_count = 0 test_rate = 0.0 test_last_nll_loss = 0.0 test_last_word_error_rate = 0.0 for (enc_inp, dec_inp, dec_tar) in tqdm(test_iterator, desc="testing"): # print(np.array(enc_inp).shape) test_outputs = self.model.eval_session(sess, enc_inp, dec_inp, dec_tar) test_loss += test_outputs["total_loss"] test_nll_loss += test_outputs["nll_loss"] test_rate += test_outputs["word_error_rate"] test_last_nll_loss += test_outputs["last_nll_loss"] test_last_word_error_rate += test_outputs[ "last_word_error_rate"] test_count += 1 test_loss /= test_count test_rate /= test_count test_nll_loss /= test_count test_last_word_error_rate /= test_count test_last_nll_loss /= test_count test_last_perp = math.exp( float(test_last_nll_loss )) if test_last_nll_loss < 300 else float("inf") test_perp = math.exp( float(test_nll_loss)) if test_nll_loss < 300 else float("inf") print( " Test Epoch %d | Loss %.6f | PPL %.6f | PPL@L %.6f | WER %.6f | WER@L %.6f" % (epoch, test_loss, test_perp, test_last_perp, test_rate, test_last_word_error_rate)) print() if test_loss < best_result_loss: self.save_session(sess) if np.abs(best_result_loss - test_loss) < 0.1: cur_lr = sess.run(self.model.lr) sess.run(self.model.update_lr_op, feed_dict={self.model.new_lr: cur_lr * 0.5}) best_result_loss = test_loss toc = datetime.datetime.now() print(" # Epoch finished in {}".format(toc - tic)) def save_session(self, sess): print(" # Saving checkpoints.") save_dir = os.path.join(self.config.checkpoint_dir) model_name = self.model.__class__.__name__ + ".ckpt" checkpoint_path = os.path.join(save_dir, model_name) self.model.saver.save(sess, checkpoint_path) print(' # Model saved.') def evaluate_model(self, sess): print() print(" # Start Evaluating Metrics on Test Dataset.") print( " # Evaluating Per-word Perplexity and Word Error Rate on Test Dataset..." ) test_iterator = self.dataloader.test_generator() test_loss = 0.0 test_nll_loss = 0.0 test_count = 0 test_rate = 0.0 test_last_nll_loss = 0.0 test_last_word_error_rate = 0.0 for (enc_inp, dec_inp, dec_tar) in tqdm(test_iterator, desc="testing"): test_outputs = self.model.eval_session(sess, enc_inp, dec_inp, dec_tar) test_loss += test_outputs["total_loss"] test_nll_loss += test_outputs["nll_loss"] test_rate += test_outputs["word_error_rate"] test_last_nll_loss += test_outputs["last_nll_loss"] test_last_word_error_rate += test_outputs["last_word_error_rate"] test_count += 1 test_loss /= test_count test_rate /= test_count test_nll_loss /= test_count test_last_word_error_rate /= test_count test_last_nll_loss /= test_count test_last_perp = math.exp(float( test_last_nll_loss)) if test_last_nll_loss < 300 else float("inf") test_perp = math.exp( float(test_nll_loss)) if test_nll_loss < 300 else float("inf") print( " Test Epoch | Loss %.6f | PPL %.6f | PPL@L %.6f | WER %.6f | WER@L %.6f" % (test_loss, test_perp, test_last_perp, test_rate, test_last_word_error_rate)) print() def evaluate_embedding(self, sess): print() print(" # Evaluating Embedding-based Metrics on Test Dataset.") print(" # Loading word2vec embedding...") word2vec_path = "data/GoogleNews-vectors-negative300.bin" word2vec = gensim.models.KeyedVectors.load_word2vec_format( word2vec_path, binary=True) keys = word2vec.vocab test_iterator = self.dataloader.test_generator() metric_average_history = [] metric_extrema_history = [] metric_greedy_history = [] metric_average_history_1 = [] metric_extrema_history_1 = [] metric_greedy_history_1 = [] for (enc_inp, dec_inp, dec_tar) in tqdm(test_iterator, desc="testing"): test_outputs = self.model.infer_session(sess, enc_inp, dec_inp, dec_tar) infer_sample_id = test_outputs["infer_sample_id"][1] train_sample_id = test_outputs["train_sample_id"][1] ground_truth = dec_tar[1] ground_truth = [[ self.id_to_word.get(idx, "<unk>") for idx in sent ] for sent in ground_truth] infer_sample_id = [[ self.id_to_word.get(idx, "<unk>") for idx in sent ] for sent in infer_sample_id] train_sample_id = [[ self.id_to_word.get(idx, "<unk>") for idx in sent ] for sent in train_sample_id] ground_truth = [[word2vec[w] for w in sent if w in keys] for sent in ground_truth] infer_sample_id = [[word2vec[w] for w in sent if w in keys] for sent in infer_sample_id] train_sample_id = [[word2vec[w] for w in sent if w in keys] for sent in train_sample_id] infer_indices = [ i for i, s, g in zip(range(len(infer_sample_id)), infer_sample_id, ground_truth) if s != [] and g != [] ] train_indices = [ i for i, s, g in zip(range(len(train_sample_id)), train_sample_id, ground_truth) if s != [] and g != [] ] infer_samples = [infer_sample_id[i] for i in infer_indices] train_samples = [train_sample_id[i] for i in train_indices] infer_ground_truth = [ground_truth[i] for i in infer_indices] train_ground_truth = [ground_truth[i] for i in train_indices] metric_average = embedding_metric(infer_samples, infer_ground_truth, word2vec, 'average') metric_extrema = embedding_metric(infer_samples, infer_ground_truth, word2vec, 'extrema') metric_greedy = embedding_metric(infer_samples, infer_ground_truth, word2vec, 'greedy') metric_average_history.append(metric_average) metric_extrema_history.append(metric_extrema) metric_greedy_history.append(metric_greedy) avg = embedding_metric(train_samples, train_ground_truth, word2vec, "average") ext = embedding_metric(train_samples, train_ground_truth, word2vec, "extrema") gre = embedding_metric(train_samples, train_ground_truth, word2vec, "greedy") metric_average_history_1.append(avg) metric_extrema_history_1.append(ext) metric_greedy_history_1.append(gre) epoch_average = np.mean(np.concatenate(metric_average_history), axis=0) epoch_extrema = np.mean(np.concatenate(metric_extrema_history), axis=0) epoch_greedy = np.mean(np.concatenate(metric_greedy_history), axis=0) print() print( ' # Embedding Metrics | Average: %.6f | Extrema: %.6f | Greedy: %.6f' % (epoch_average, epoch_extrema, epoch_greedy)) print() epoch_average = np.mean(np.concatenate(metric_average_history_1), axis=0) epoch_extrema = np.mean(np.concatenate(metric_extrema_history_1), axis=0) epoch_greedy = np.mean(np.concatenate(metric_greedy_history_1), axis=0) print() print( ' # Embedding Metrics | Average: %.6f | Extrema: %.6f | Greedy: %.6f' % (epoch_average, epoch_extrema, epoch_greedy)) print() def generate(self, sess): test_iterator = self.dataloader.test_generator() for (enc_inp, dec_inp, dec_tar) in tqdm(test_iterator, desc="testing"): test_outputs = self.model.infer_session(sess, enc_inp, dec_inp, dec_tar) infer_sample_id = test_outputs["infer_sample_id"][1] ground_truth = dec_tar[0] for i in range(self.config.batch_size): ground = ground_truth[i] gener = infer_sample_id[i] ground_list = [ self.id_to_word.get(idx, "<unk>") for idx in ground ] gener_list = [ self.id_to_word.get(idx, "<unk>") for idx in gener ] print(" ".join(ground_list)) print(" ".join(gener_list)) print()
def main(args): # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Define model model = nn.DataParallel(FastSpeech()).to(device) #tacotron2 = get_tacotron2() print("FastSpeech and Tacotron2 Have Been Defined") num_param = sum(param.numel() for param in model.parameters()) print('Number of FastSpeech Parameters:', num_param) # Get dataset dataset = FastSpeechDataset() # Optimizer and loss optimizer = torch.optim.Adam( model.parameters(), betas=(0.9, 0.98), eps=1e-9) scheduled_optim = ScheduledOptim(optimizer, hp.word_vec_dim, hp.n_warm_up_step, args.restore_step) fastspeech_loss = FastSpeechLoss().to(device) print("Defined Optimizer and Loss Function.") # Get training loader print("Get Training Loader") training_loader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn, drop_last=True, num_workers=cpu_count()) try: checkpoint = torch.load(os.path.join( hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("\n------Model Restored at Step %d------\n" % args.restore_step) except: print("\n------Start New Training------\n") if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) # Init logger if not os.path.exists(hp.logger_path): os.mkdir(hp.logger_path) # Training model = model.train() total_step = hp.epochs * len(training_loader) Time = np.array(list()) Start = time.clock() summary = SummaryWriter() for epoch in range(hp.epochs): for i, data_of_batch in enumerate(training_loader): start_time = time.clock() current_step = i + args.restore_step + \ epoch * len(training_loader) + 1 # Init scheduled_optim.zero_grad() if not hp.pre_target: # Prepare Data src_seq = data_of_batch["texts"] src_pos = data_of_batch["pos"] mel_tgt = data_of_batch["mels"] src_seq = torch.from_numpy(src_seq).long().to(device) src_pos = torch.from_numpy(src_pos).long().to(device) mel_tgt = torch.from_numpy(mel_tgt).float().to(device) alignment_target = get_alignment( src_seq, tacotron2).float().to(device) # For Data Parallel mel_max_len = mel_tgt.size(1) else: # Prepare Data src_seq = data_of_batch["texts"] src_pos = data_of_batch["pos"] mel_tgt = data_of_batch["mels"] alignment_target = data_of_batch["alignment"] # print(alignment_target) # print(alignment_target.shape) # print(mel_tgt.shape) # print(src_seq.shape) # print(src_seq) src_seq = torch.from_numpy(src_seq).long().to(device) src_pos = torch.from_numpy(src_pos).long().to(device) mel_tgt = torch.from_numpy(mel_tgt).float().to(device) alignment_target = torch.from_numpy( alignment_target).float().to(device) # For Data Parallel mel_max_len = mel_tgt.size(1) # print(alignment_target.shape) # Forward mel_output, mel_output_postnet, duration_predictor_output = model( src_seq, src_pos, mel_max_length=mel_max_len, length_target=alignment_target) # Cal Loss mel_loss, mel_postnet_loss, duration_predictor_loss = fastspeech_loss( mel_output, mel_output_postnet, duration_predictor_output, mel_tgt, alignment_target) total_loss = mel_loss + mel_postnet_loss + duration_predictor_loss # Logger t_l = total_loss.item() m_l = mel_loss.item() m_p_l = mel_postnet_loss.item() d_p_l = duration_predictor_loss.item() with open(os.path.join("logger", "total_loss.txt"), "a") as f_total_loss: f_total_loss.write(str(t_l)+"\n") with open(os.path.join("logger", "mel_loss.txt"), "a") as f_mel_loss: f_mel_loss.write(str(m_l)+"\n") with open(os.path.join("logger", "mel_postnet_loss.txt"), "a") as f_mel_postnet_loss: f_mel_postnet_loss.write(str(m_p_l)+"\n") with open(os.path.join("logger", "duration_predictor_loss.txt"), "a") as f_d_p_loss: f_d_p_loss.write(str(d_p_l)+"\n") # Backward total_loss.backward() # Clipping gradients to avoid gradient explosion nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh) # Update weights if args.frozen_learning_rate: scheduled_optim.step_and_update_lr_frozen( args.learning_rate_frozen) else: scheduled_optim.step_and_update_lr() # Print if current_step % hp.log_step == 0: Now = time.clock() str1 = "Epoch [{}/{}], Step [{}/{}], Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f};".format( epoch+1, hp.epochs, current_step, total_step, mel_loss.item(), mel_postnet_loss.item()) str2 = "Duration Predictor Loss: {:.4f}, Total Loss: {:.4f}.".format( duration_predictor_loss.item(), total_loss.item()) str3 = "Current Learning Rate is {:.6f}.".format( scheduled_optim.get_learning_rate()) str4 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format( (Now-Start), (total_step-current_step)*np.mean(Time)) print("\n" + str1) print(str2) print(str3) print(str4) with open(os.path.join("logger", "logger.txt"), "a") as f_logger: f_logger.write(str1 + "\n") f_logger.write(str2 + "\n") f_logger.write(str3 + "\n") f_logger.write(str4 + "\n") f_logger.write("\n") summary.add_scalar('loss', total_loss.item(), current_step) if current_step % hp.save_step == 0: torch.save({'model': model.state_dict(), 'optimizer': optimizer.state_dict( )}, os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % current_step)) print("save model at step %d ..." % current_step) end_time = time.clock() Time = np.append(Time, end_time - start_time) if len(Time) == hp.clear_Time: temp_value = np.mean(Time) Time = np.delete( Time, [i for i in range(len(Time))], axis=None) Time = np.append(Time, temp_value)
def main(args): # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Define model model = nn.DataParallel(network.Model()).to(device) print("Model Ha s Been Defined") num_param = sum(param.numel() for param in model.parameters()) print('Number of Transformer-TTS Parameters:', num_param) # Get dataset dataset = TransformerTTSDataset() # Optimizer and loss optimizer = torch.optim.Adam(model.parameters(), lr=hp.lr) print("Defined Optimizer") # Get training loader training_loader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn, drop_last=True, num_workers=cpu_count()) print("Got Training Loader") try: checkpoint = torch.load( os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("\n------Model Restored at Step %d------\n" % args.restore_step) except: print("\n------Start New Training------\n") if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) # Init logger if not os.path.exists(hp.logger_path): os.mkdir(hp.logger_path) # Training model = model.train() total_step = hp.epochs * len(training_loader) Time = np.array(list()) Start = time.clock() for epoch in range(hp.epochs): for i, data_of_batch in enumerate(training_loader): start_time = time.clock() current_step = i + args.restore_step + \ epoch * len(training_loader) + 1 # Init optimizer.zero_grad() # Get Data character = torch.from_numpy( data_of_batch["texts"]).long().to(device) mel_input = torch.from_numpy( data_of_batch["mel_input"]).float().to(device) mel_target = torch.from_numpy( data_of_batch["mel_target"]).float().to(device) pos_text = torch.from_numpy( data_of_batch["pos_text"]).long().to(device) pos_mel = torch.from_numpy( data_of_batch["pos_mel"]).long().to(device) stop_target = pos_mel.eq(0).float().to(device) # Forward mel_pred, postnet_pred, _, stop_preds, _, _ = model.forward( character, mel_input, pos_text, pos_mel) # Cal Loss mel_loss = nn.L1Loss()(mel_pred, mel_target) mel_postnet_loss = nn.L1Loss()(postnet_pred, mel_target) stop_pred_loss = nn.MSELoss()(stop_preds, stop_target) total_loss = mel_loss + mel_postnet_loss + stop_pred_loss # Logger t_l = total_loss.item() m_l = mel_loss.item() m_p_l = mel_postnet_loss.item() s_l = stop_pred_loss.item() with open(os.path.join("logger", "total_loss.txt"), "a") as f_total_loss: f_total_loss.write(str(t_l) + "\n") with open(os.path.join("logger", "mel_loss.txt"), "a") as f_mel_loss: f_mel_loss.write(str(m_l) + "\n") with open(os.path.join("logger", "mel_postnet_loss.txt"), "a") as f_mel_postnet_loss: f_mel_postnet_loss.write(str(m_p_l) + "\n") with open(os.path.join("logger", "stop_pred_loss.txt"), "a") as f_s_loss: f_s_loss.write(str(s_l) + "\n") # Backward total_loss.backward() # Clipping gradients to avoid gradient explosion nn.utils.clip_grad_norm_(model.parameters(), 1.) # Update weights optimizer.step() current_learning_rate = adjust_learning_rate( optimizer, current_step) # Print if current_step % hp.log_step == 0: Now = time.clock() str1 = "Epoch [{}/{}], Step [{}/{}], Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f};".format( epoch + 1, hp.epochs, current_step, total_step, mel_loss.item(), mel_postnet_loss.item()) str2 = "Stop Predicted Loss: {:.4f}, Total Loss: {:.4f}.".format( stop_pred_loss.item(), total_loss.item()) str3 = "Current Learning Rate is {:.6f}.".format( current_learning_rate) str4 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format( (Now - Start), (total_step - current_step) * np.mean(Time)) print("\n" + str1) print(str2) print(str3) print(str4) with open(os.path.join("logger", "logger.txt"), "a") as f_logger: f_logger.write(str1 + "\n") f_logger.write(str2 + "\n") f_logger.write(str3 + "\n") f_logger.write(str4 + "\n") f_logger.write("\n") if current_step % hp.save_step == 0: torch.save( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % current_step)) print("save model at step %d ..." % current_step) end_time = time.clock() Time = np.append(Time, end_time - start_time) if len(Time) == hp.clear_Time: temp_value = np.mean(Time) Time = np.delete(Time, [i for i in range(len(Time))], axis=None) Time = np.append(Time, temp_value)