def init(): """ Sample script """ import preprocess if args.use_stopwords == 1: stopwords = set(line.strip() for line in open("stopwords_en.txt", encoding='utf-8')) else: stopwords = set() word_to_file = {} word_to_file, _, files = preprocess.get_dataset(dataset=args.data, type="train") if args.use_full_vocab == 1: valid_vocab = -1 else: valid_vocab = word_to_file.keys() ### this is what you care about encoder = BertWordFromTextEncoder(valid_vocab=valid_vocab) encoder.test_encoder() encoder.encode_docs(docs=files, save_fn=args.save_fn, agg_by=args.agg_by, layer=args.nlayer)
def experiment_fn(run_config, params): run_config = run_config.replace( save_checkpoints_steps=params.min_eval_frequency) estimator = get_estimator(run_config, params) # Setup data loaders if params.run_preprocess: print('Running preprocess') datasets = preprocess.get_dataset(params.data_path) if params.run_preprocess else preprocess.preprocess_ego( params.data_path) train_input_fn, train_input_hook = get_train_inputs( batch_size=data.BATCH_SIZE, datasets=datasets) eval_input_fn, eval_input_hook = get_test_inputs( batch_size=data.BATCH_SIZE, datasets=datasets) # Define the experiment experiment = tf.contrib.learn.Experiment( estimator=estimator, # Estimator train_input_fn=train_input_fn, # First-class function eval_input_fn=eval_input_fn, # First-class function train_steps=params.train_steps, # Mini-batch steps min_eval_frequency=params.min_eval_frequency, # Eval frequency train_monitors=[train_input_hook], # Hooks for training eval_hooks=[eval_input_hook], # Hooks for evaluation eval_steps=None # Use evaluation feeder until its empty ) return experiment
def main(): dataset = get_dataset() modelo.train() writer = SummaryWriter("runs/tranformer") estep = 0 for epoch in range(NUM_EPOCHS): dataloader = DataLoader(dataset, batch_size=hp.batch_size, collate_fn=collate_fn_transformer, drop_last=True, shuffle=True) pbar = tqdm(dataloader) losses = 0 for i, data in enumerate(pbar): estep = estep + 1 pbar.set_description("Processing at epoch %d" % epoch) character, mel_input, pos_text, pos_mel, _ = data character = character.to(DEVICE) mel_input = mel_input.to(DEVICE) pos_text = pos_text.to(DEVICE) pos_mel = pos_mel.to(DEVICE) output = modelo(character, mel_input, pos_text, pos_mel) # print(output) if estep == 1: writer.add_graph( modelo, input_to_model=[character, mel_input, pos_text, pos_mel]) # print("output modelo...."+str(output.shape)) # print("output trasformado..."+str(output.reshape(-1, output.shape[-1]).shape)) # print("caracter ......"+str(character.reshape(-1).shape)) optimizer.zero_grad() loss = loss_fn(output.reshape(-1, output.shape[-1]), character.reshape(-1)) output = output.transpose(0, 1) loss2 = loss.item() writer.add_scalar("loss :", loss2, estep) # print("/////////////////") # print(np.argmax(output[0].detach().numpy(),axis=1)) print("loss..........." + str(loss2)) # print("Epoch.........."+str(epoch)) loss.backward() optimizer.step() losses += loss.item() writer.add_scalar("loss2 :", losses, epoch) if epoch + 1 % hp.save_step == 0: t.save( { 'model': modelo.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % epoch)) writer.close()
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') dataset = preprocessor.get_dataset( preprocessor.load_dataset(FLAGS.dataset_path), preprocessor.load_json(FLAGS.split_path)) preprocessor.write_dataset(dataset, FLAGS.save_path) token_vocab = preprocessor.get_token_vocab(FLAGS.save_path) preprocessor.write_token_vocab(token_vocab, FLAGS.save_path)
def main(): # hyperparameters num_layers = 4 d_model = 128 dff = 512 num_heads = 8 dropout_rate = 0.1 epochs = 20 pe_input, pe_target = 500, 500 # prepare dataset train_dataset, val_dataset, enc_vocab_size, dec_vocab_size = get_dataset( trainfile ='data/retrosynthesis-train.smi', validfile='data/retrosynthesis-valid.smi', n_read_threads=5, BUFFER_SIZE=20000, BATCH_SIZE=64) input_vocab_size = enc_vocab_size + 2 target_vocab_size = dec_vocab_size + 2 # build transformer model transformer = Transformer(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input=pe_input, pe_target=pe_target, rate=dropout_rate) # Create optimizer learning_rate = CustomSchedule(d_model) optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) # create model checkpoint ckpt_manager = get_ckpt_manager(transformer, optimizer) # training # train(train_dataset, transformer, epochs, ckpt_manager, optimizer) # evaluating # predicting inp_sequence = "Ic1ccc2n(CC(=O)N3CCCCC3)c3CCN(C)Cc3c2c1" reactant = predict(transformer, inp_sequence, max_length=160) print('Input Product: {}'.format(inp_sequence)) print('Predicted Reactants: {}'.format(reactant))
def run_test(model, fit_kwargs=None, predict_kwargs=None, seed=None, save_json=False, save_tex=False, df=False, verbose=False, position=0): from preprocess import get_dataset, preprocess_all, dataset_to_X_y, RUN_FEATURES from metrics import score_regression if not callable(model): modelfn = lambda: model else: modelfn = model _model = modelfn() assert hasattr(_model, "fit") and hasattr(_model, "predict") fit_kwargs = fit_kwargs or dict() predict_kwargs = predict_kwargs or dict() dataset = get_dataset(seed=seed) d = dict() dic_feat = RUN_FEATURES.items() if verbose: dic_feat = tqdm(dic_feat, desc="run", position=position) for k, v in dic_feat: _model = modelfn() run_data = preprocess_all(dataset, subset=v) X_train, y_train, X_val, y_val = dataset_to_X_y( run_data, keys=["train", "validation"], datatype="numpy") _model.fit(X_train, y_train, **fit_kwargs) y_train_hat = _model.predict(X_train, **predict_kwargs) train_loss = score_regression(y_train, y_train_hat) y_val_hat = _model.predict(X_val, **predict_kwargs) val_loss = score_regression(y_val, y_val_hat) d[k] = dict(train_loss=train_loss, val_loss=val_loss) if save_json: jsonsave(d, _model.__class__.__name__ + ".json") if save_tex: dumptex(d, _model.__class__.__name__ + ".tex") if not df: return d return pd.DataFrame(d)
def make_history_month_features_all(): pw_df_list = [] dataset = get_dataset() dataset.power_consumption = dataset.power_consumption for user_id in get_user_id_list(): print user_id if not check_empty(user_id): user_df = filter_user_id(dataset, user_id).resample('1D').mean().fillna(1) #add to list pw_df_list.append((user_id, user_df)) #make_features(user_id,user_df) p = m_Pool(64) for arg in pw_df_list: p.apply_async(make_history_month_features, args=(arg)) print 'Waiting for all subprocesses done...' p.close() p.join()
def main(): print('starting here...') dataset = get_dataset() global_step = 0 m = nn.DataParallel(Model().cuda()) # if LOADCHECKPOINT: # m.load_state_dict(t.load(hp.checkpoint_file_transformer)) # print('loaded checkpoint...') # m.eval() m.train() optimizer = t.optim.Adam(m.parameters(), lr=hp.lr) pos_weight = t.FloatTensor([5.]).cuda() writer = SummaryWriter() for epoch in range(hp.epochs): print('at epoch', epoch) dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=1) pbar = tqdm(dataloader) for i, data in enumerate(pbar): pbar.set_description("Processing at epoch %d" % epoch) global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) eeg_array, mel, mel_input, pos_eeg_signal, pos_mel, _ = data stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1) eeg_array = eeg_array.cuda() mel = mel.cuda() mel_input = mel_input.cuda() pos_eeg_signal = pos_eeg_signal.cuda() pos_mel = pos_mel.cuda() print('before m.forward()...') mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward( eeg_array, mel_input, pos_eeg_signal, pos_mel) mel_loss = nn.L1Loss()(mel_pred, mel) post_mel_loss = nn.L1Loss()(postnet_pred, mel) loss = mel_loss + post_mel_loss writer.add_scalars('training_loss', { 'mel_loss': mel_loss, 'post_mel_loss': post_mel_loss, }, global_step) writer.add_scalars( 'alphas', { 'encoder_alpha': m.module.encoder.alpha.data, 'decoder_alpha': m.module.decoder.alpha.data, }, global_step) if global_step % hp.image_step == 1: # summarywriter add_image params num_images_per_loop = 4 writer_start_val = int(hp.batch_size / 2) writer_end_val = int(hp.batch_size * num_images_per_loop) writer_step_val = int(hp.batch_size) for i, prob in enumerate(attn_probs): num_h = prob.size(0) for j in range(writer_start_val, writer_end_val, writer_step_val): x = vutils.make_grid([prob[j] * 255]) # x = prob[j] * 255 writer.add_image('Attention_%d_0' % global_step, x, i * num_images_per_loop + j) for i, prob in enumerate(attns_enc): num_h = prob.size(0) for j in range(writer_start_val, writer_end_val, writer_step_val): x = vutils.make_grid([prob[j] * 255]) # x = prob[j] * 255 writer.add_image('Attention_enc_%d_0' % global_step, x, i * num_images_per_loop + j) for i, prob in enumerate(attns_dec): num_h = prob.size(0) for j in range(writer_start_val, writer_end_val, writer_step_val): x = vutils.make_grid([prob[j] * 255]) # x = prob[j] * 255 writer.add_image('Attention_dec_%d_0' % global_step, x, i * num_images_per_loop + j) optimizer.zero_grad() # Calculate gradients loss.backward() nn.utils.clip_grad_norm_(m.parameters(), 1.) # Update weights optimizer.step() if global_step % hp.save_step == 0: t.save( { 'model': m.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join( hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % global_step))
def synthesis(args): m = Model() m_post = ModelPostNet() m_stop = ModelStopToken() m.load_state_dict(load_checkpoint(args.restore_step1, "transformer")) m_stop.load_state_dict(load_checkpoint(args.restore_step3, "stop_token")) m_post.load_state_dict(load_checkpoint(args.restore_step2, "postnet")) m=m.cuda() m_post = m_post.cuda() m_stop = m_stop.cuda() m.train(False) m_post.train(False) m_stop.train(False) test_dataset = get_dataset(hp.test_data_csv) test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn_transformer, drop_last=True, num_workers=1) ref_dataset = get_dataset(hp.test_data_csv) ref_dataloader = DataLoader(ref_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=1) writer = get_writer(hp.checkpoint_path, hp.log_directory) ref_dataloader_iter = iter(ref_dataloader) for i, data in enumerate(test_dataloader): character, mel, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data ref_character, ref_mel, ref_mel_input, ref_pos_text, ref_pos_mel, ref_text_length, ref_mel_length, ref_fname = next(ref_dataloader_iter) stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1) mel_input = t.zeros([1,1,80]).cuda() stop=[] character = character.cuda() mel = mel.cuda() mel_input = mel_input.cuda() pos_text = pos_text.cuda() pos_mel = pos_mel.cuda() ref_character = ref_character.cuda() ref_mel = ref_mel.cuda() ref_mel_input = ref_mel_input.cuda() ref_pos_text = ref_pos_text.cuda() ref_pos_mel = ref_pos_mel.cuda() with t.no_grad(): start=time.time() for i in range(args.max_len): pos_mel = t.arange(1,mel_input.size(1)+1).unsqueeze(0).cuda() mel_pred, postnet_pred, attn_probs, decoder_output, attns_enc, attns_dec, attns_style = m.forward(character, mel_input, pos_text, pos_mel, ref_mel, ref_pos_mel) stop_token = m_stop.forward(decoder_output) mel_input = t.cat([mel_input, postnet_pred[:,-1:,:]], dim=1) stop.append(t.sigmoid(stop_token).squeeze(-1)[0,-1]) if stop[-1] > 0.5: print("stop token at " + str(i) + " is :", stop[-1]) print("model inference time: ", time.time() - start) break if stop[-1] == 0: continue mag_pred = m_post.forward(postnet_pred) inf_time = time.time() - start print("inference time: ", inf_time) wav = spectrogram2wav(mag_pred.squeeze(0).cpu().numpy()) print("rtx : ", (len(wav)/hp.sr) / inf_time) wav_path = os.path.join(hp.sample_path, 'wav') if not os.path.exists(wav_path): os.makedirs(wav_path) write(os.path.join(wav_path, "text_{}_ref_{}_synth.wav".format(fname, ref_fname)), hp.sr, wav) print("written as text{}_ref_{}_synth.wav".format(fname, ref_fname)) attns_enc_new=[] attns_dec_new=[] attn_probs_new=[] attns_style_new=[] for i in range(len(attns_enc)): attns_enc_new.append(attns_enc[i].unsqueeze(0)) attns_dec_new.append(attns_dec[i].unsqueeze(0)) attn_probs_new.append(attn_probs[i].unsqueeze(0)) attns_style_new.append(attns_style[i].unsqueeze(0)) attns_enc = t.cat(attns_enc_new, 0) attns_dec = t.cat(attns_dec_new, 0) attn_probs = t.cat(attn_probs_new, 0) attns_style = t.cat(attns_style_new, 0) attns_enc = attns_enc.contiguous().view(attns_enc.size(0), 1, hp.n_heads, attns_enc.size(2), attns_enc.size(3)) attns_enc = attns_enc.permute(1,0,2,3,4) attns_dec = attns_dec.contiguous().view(attns_dec.size(0), 1, hp.n_heads, attns_dec.size(2), attns_dec.size(3)) attns_dec = attns_dec.permute(1,0,2,3,4) attn_probs = attn_probs.contiguous().view(attn_probs.size(0), 1, hp.n_heads, attn_probs.size(2), attn_probs.size(3)) attn_probs = attn_probs.permute(1,0,2,3,4) attns_style = attns_style.contiguous().view(attns_style.size(0), 1, hp.n_heads, attns_style.size(2), attns_style.size(3)) attns_style = attns_style.permute(1,0,2,3,4) save_dir = os.path.join(hp.sample_path, 'figure', "text_{}_ref_{}_synth.wav".format(fname, ref_fname)) if not os.path.exists(save_dir): os.makedirs(save_dir) writer.add_alignments(attns_enc.detach().cpu(), attns_dec.detach().cpu(), attn_probs.detach().cpu(), attns_style.detach().cpu(), mel_length, text_length, args.restore_step1, 'Validation', save_dir)
def main(): train_dataset = get_dataset(hp.train_data_csv) val_dataset = get_dataset(hp.val_data_csv) restore_step = hp.restore_step global_step = restore_step if restore_step != 0: restore_flag = True else: restore_flag = False m = Model() if os.path.exists('./checkpoints/checkpoint_%s_%d.pth.tar' % ('transformer', global_step)): state_dict = t.load('./checkpoints/checkpoint_%s_%d.pth.tar' % ('transformer', global_step)) new_state_dict = OrderedDict() for k, value in state_dict['model'].items(): key = k[7:] new_state_dict[key] = value m.load_state_dict(new_state_dict) m = nn.DataParallel(m.cuda()) m.train() vocoder = SmartVocoder(Hyperparameters(parse_args())) vocoder.load_state_dict( t.load('./mel2audio/checkpoint_step000588458.pth')["state_dict"]) vocoder = vocoder.cuda() vocoder.eval() optimizer = t.optim.Adam(m.parameters(), lr=hp.lr) writer = get_writer(hp.checkpoint_path, hp.log_directory) cur_epoch = 0 for epochs in range(hp.epochs): train_dataloader = DataLoader(train_dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=1) val_dataloader = DataLoader(val_dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True) if restore_flag: cur_epoch = int(restore_step / len(train_dataloader)) restore_flag = not restore_flag for i, data in enumerate(train_dataloader): global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) character, mel, mag, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data mel_max_length_array = t.zeros(mel_length.size(0)).long() mel_max_length_array = t.LongTensor(mel_max_length_array) mel_max_length_array[:] = t.max(mel_length) mel_max_length_array = mel_max_length_array.cuda() character = character.cuda() mel = mel.cuda() mag = mag.cuda() mel_input = mel_input.cuda() pos_text = pos_text.cuda() pos_mel = pos_mel.cuda() text_length = text_length.cuda() mel_length = mel_length.cuda() loading_time = time.time() mask = get_mask_from_lengths(mel_length).cuda() mel_pred, postnet_pred, attn_probs, decoder_outputs, attns_enc, attns_dec, attns_style, post_linear, duration_predictor_output, duration, weights = m.forward( character, mel_input, pos_text, pos_mel, mel, pos_mel, mel_max_length_array=mel_max_length_array) mel_loss = t.mean( t.abs(mel_pred - mel).masked_select(mask.unsqueeze(-1))) post_mel_loss = t.mean( t.abs(postnet_pred - mel).masked_select(mask.unsqueeze(-1))) n_priority_freq = int(2000 / (hp.sr * 0.5) * (hp.n_fft / 2 + 1)) post_linear_loss = 0.5 * t.mean( t.abs(post_linear - mag).masked_select(mask.unsqueeze(-1)) ) + 0.5 * t.mean( t.abs(post_linear - mag)[:, :, :n_priority_freq].masked_select( mask.unsqueeze(-1))) duration_loss = nn.L1Loss()(t.sum( duration_predictor_output, -1, keepdim=True), mel_length) / t.sum(text_length) loss = (mel_loss + post_mel_loss + 0.3 * post_linear_loss + duration_loss) / hp.accum writer.add_losses(mel_loss.item(), post_mel_loss.item(), 0.3 * post_linear_loss, duration_loss, global_step, 'Train') # Calculate gradients loss.backward() msg = "| Epoch: {}, {}/{}th loss : {:.4f} + {:.4f} + {:.4f} + {:.4f} = {:.4f}".format( cur_epoch, i, len(train_dataloader), mel_loss, post_mel_loss, 0.3 * post_linear_loss, duration_loss, loss) stream(msg) if global_step % hp.accum == 0: nn.utils.clip_grad_norm_(m.parameters(), 1.) # Update weights optimizer.step() optimizer.zero_grad() if global_step % hp.val_step == 0 or global_step == 1: validate(m, vocoder, val_dataloader, global_step, writer) if global_step % hp.save_step == 0: t.save( { 'model': m.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join( hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % global_step)) if cur_epoch == hp.stop_epoch: break cur_epoch += 1 print(' ')
# Cuda Flags # ############## if config["cuda"]: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") else: device = torch.device("cpu") ############################### # Creating the dataset object # ############################### # Create training data object bidirectional = config.getboolean("bidirectional") trainset, source_vocab, target_vocab = get_dataset( types="train", batch_size=int(config["batch_size"]), shuffle=True, num_workers=int(config["num_workers"]), pin_memory=False, drop_last=True) encoder1 = EncoderRNN(int(config["hidden_size_encoder"]), len(source_vocab) + 2, int(config["batch_size"]), num_layers=int(config["num_layer_encoder"]), bidirectional=bidirectional).to(device) bridge = Linear(bidirectional, int(config["hidden_size_encoder"]), int(config["hidden_size_decoder"])).to(device) decoder1 = DecoderRNN(int(config["hidden_size_decoder"]), len(target_vocab) + 2, int(config["batch_size"]), num_layers=int(config["num_layer_decoder"])).to(device) trainIters(trainset,
def synthesis(args): m = Model() m.load_state_dict(load_checkpoint(args.restore_step1, "transformer")) m = m.cuda() m.train(False) vocoder = SmartVocoder(Hyperparameters(parse_args())) vocoder.load_state_dict( t.load('./mel2audio/merged_STFT_checkpoint.pth')["state_dict"]) vocoder = vocoder.cuda() vocoder.eval() with open('./hifi_gan/config.json') as f: data = f.read() json_config = json.loads(data) h = AttrDict(json_config) hifi_gan = Generator(h).cuda() state_dict_g = t.load('./hifi_gan/g_00334000', map_location='cuda') hifi_gan.load_state_dict(state_dict_g['generator']) hifi_gan.eval() hifi_gan.remove_weight_norm() test_dataset = get_dataset(hp.test_data_csv) test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn_transformer, drop_last=True, num_workers=1) ref_dataset = get_dataset(hp.test_data_csv_shuf) ref_dataloader = DataLoader(ref_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn_transformer, drop_last=True, num_workers=1) writer = get_writer(hp.checkpoint_path, hp.log_directory) mel_basis = t.from_numpy( librosa.filters.mel(hp.sr, hp.n_fft, hp.n_mels, 50, 11000)).unsqueeze(0) # (n_mels, 1+n_fft//2) ref_dataloader_iter = iter(ref_dataloader) _, ref_mel, _, _, _, ref_pos_mel, _, _, ref_fname = next( ref_dataloader_iter) for i, data in enumerate(test_dataloader): character, _, _, _, pos_text, _, text_length, _, fname = data mel_input = t.zeros([1, 1, 80]).cuda() character = character.cuda() ref_mel = ref_mel.cuda() mel_input = mel_input.cuda() pos_text = pos_text.cuda() with t.no_grad(): start = time.time() memory, c_mask, attns_enc, duration_mask = m.encoder(character, pos=pos_text) style, coarse_emb = m.ref_encoder(ref_mel) memory = t.cat((memory, coarse_emb.expand(-1, memory.size(1), -1)), -1) memory = m.memory_coarse_layer(memory) duration_predictor_output = m.duration_predictor( memory, duration_mask) duration = t.ceil(duration_predictor_output) duration = duration * duration_mask # max_length = t.sum(duration).type(t.LongTensor) # print("length : ", max_length) monotonic_interpolation, pos_mel_, weights = m.length_regulator( memory, duration, duration_mask) kv_mask = t.zeros([1, mel_input.size(1), character.size(1)]).cuda() # B, t', N kv_mask[:, :, :3] = 1 kv_mask = kv_mask.eq(0) stop_flag = False ctr = 0 for j in range(1200): pos_mel = t.arange(1, mel_input.size(1) + 1).unsqueeze(0).cuda() mel_pred, postnet_pred, attn_probs, decoder_output, attns_dec, attns_style = m.decoder( memory, style, mel_input, c_mask, pos=pos_mel, ref_pos=ref_pos_mel, mono_inter=monotonic_interpolation[:, :mel_input.shape[1]], kv_mask=kv_mask) mel_input = t.cat([mel_input, postnet_pred[:, -1:, :]], dim=1) # print("j", j, "mel_input", mel_input.shape) if stop_flag and ctr == 10: break elif stop_flag: ctr += 1 kv_mask, stop_flag = update_kv_mask( kv_mask, attn_probs) # B, t', N --> B, t'+1, N postnet_pred = t.cat((postnet_pred, t.zeros(postnet_pred.size(0), 5, postnet_pred.size(-1)).cuda()), 1) gen_length = mel_input.size(1) # print("gen_length", gen_length) post_linear = m.postnet(postnet_pred) post_linear = resample(post_linear, seq_len=mel_input.size(1), scale=args.rhythm_scale) postnet_pred = resample(mel_input, seq_len=mel_input.size(1), scale=args.rhythm_scale) inf_time = time.time() - start print("inference time: ", inf_time) # print("speech_rate: ", len(postnet_pred[0])/len(character[0])) postnet_pred_v = postnet_pred.transpose(2, 1) postnet_pred_v = (postnet_pred_v * 100 + 20 - 100) / 20 B, C, T = postnet_pred_v.shape z = t.randn(1, 1, T * hp.hop_length).cuda() z = z * 0.6 # Temp t.cuda.synchronize() timestemp = time.time() with t.no_grad(): y_gen = vocoder.reverse(z, postnet_pred_v).squeeze() t.cuda.synchronize() print('{} seconds'.format(time.time() - timestemp)) wav = y_gen.to(t.device("cpu")).data.numpy() wav = np.pad( wav, [0, 4800], mode='constant', constant_values=0) #pad 0 for 0.21 sec silence at the end post_linear_v = post_linear.transpose(1, 2) post_linear_v = 10**((post_linear_v * 100 + 20 - 100) / 20) mel_basis = mel_basis.repeat(post_linear_v.shape[0], 1, 1) post_linear_mel_v = t.log10(t.bmm(mel_basis.cuda(), post_linear_v)) B, C, T = post_linear_mel_v.shape z = t.randn(1, 1, T * hp.hop_length).cuda() z = z * 0.6 # Temp t.cuda.synchronize() timestemp = time.time() with t.no_grad(): y_gen_linear = vocoder.reverse(z, post_linear_mel_v).squeeze() t.cuda.synchronize() wav_linear = y_gen_linear.to(t.device("cpu")).data.numpy() wav_linear = np.pad( wav_linear, [0, 4800], mode='constant', constant_values=0) #pad 0 for 0.21 sec silence at the end wav_hifi = hifi_gan(post_linear_mel_v).squeeze().clamp( -1, 1).detach().cpu().numpy() wav_hifi = np.pad( wav_hifi, [0, 4800], mode='constant', constant_values=0) #pad 0 for 0.21 sec silence at the end mel_path = os.path.join(hp.sample_path + '_' + str(args.rhythm_scale), 'mel') if not os.path.exists(mel_path): os.makedirs(mel_path) np.save( os.path.join( mel_path, 'text_{}_ref_{}_synth_{}.mel'.format(i, ref_fname, str(args.rhythm_scale))), postnet_pred.cpu()) linear_path = os.path.join( hp.sample_path + '_' + str(args.rhythm_scale), 'linear') if not os.path.exists(linear_path): os.makedirs(linear_path) np.save( os.path.join( linear_path, 'text_{}_ref_{}_synth_{}.linear'.format( i, ref_fname, str(args.rhythm_scale))), post_linear.cpu()) wav_path = os.path.join(hp.sample_path + '_' + str(args.rhythm_scale), 'wav') if not os.path.exists(wav_path): os.makedirs(wav_path) write( os.path.join( wav_path, "text_{}_ref_{}_synth_{}.wav".format(i, ref_fname, str(args.rhythm_scale))), hp.sr, wav) print("rtx : ", (len(wav) / hp.sr) / inf_time) wav_linear_path = os.path.join( hp.sample_path + '_' + str(args.rhythm_scale), 'wav_linear') if not os.path.exists(wav_linear_path): os.makedirs(wav_linear_path) write( os.path.join( wav_linear_path, "text_{}_ref_{}_synth_{}.wav".format(i, ref_fname, str(args.rhythm_scale))), hp.sr, wav_linear) wav_hifi_path = os.path.join( hp.sample_path + '_' + str(args.rhythm_scale), 'wav_hifi') if not os.path.exists(wav_hifi_path): os.makedirs(wav_hifi_path) write( os.path.join( wav_hifi_path, "text_{}_ref_{}_synth_{}.wav".format(i, ref_fname, str(args.rhythm_scale))), hp.sr, wav_hifi) show_weights = weights.contiguous().view(weights.size(0), 1, 1, weights.size(1), weights.size(2)) attns_enc_new = [] attns_dec_new = [] attn_probs_new = [] attns_style_new = [] for i in range(len(attns_enc)): attns_enc_new.append(attns_enc[i].unsqueeze(0)) attns_dec_new.append(attns_dec[i].unsqueeze(0)) attn_probs_new.append(attn_probs[i].unsqueeze(0)) attns_style_new.append(attns_style[i].unsqueeze(0)) attns_enc = t.cat(attns_enc_new, 0) attns_dec = t.cat(attns_dec_new, 0) attn_probs = t.cat(attn_probs_new, 0) attns_style = t.cat(attns_style_new, 0) attns_enc = attns_enc.contiguous().view(attns_enc.size(0), 1, hp.n_heads, attns_enc.size(2), attns_enc.size(3)) attns_enc = attns_enc.permute(1, 0, 2, 3, 4) attns_dec = attns_dec.contiguous().view(attns_dec.size(0), 1, hp.n_heads, attns_dec.size(2), attns_dec.size(3)) attns_dec = attns_dec.permute(1, 0, 2, 3, 4) attn_probs = attn_probs.contiguous().view(attn_probs.size(0), 1, hp.n_heads, attn_probs.size(2), attn_probs.size(3)) attn_probs = attn_probs.permute(1, 0, 2, 3, 4) attns_style = attns_style.contiguous().view(attns_style.size(0), 1, hp.n_heads, attns_style.size(2), attns_style.size(3)) attns_style = attns_style.permute(1, 0, 2, 3, 4) save_dir = os.path.join( hp.sample_path + '_' + str(args.rhythm_scale), 'figure', "text_{}_ref_{}_synth_{}.wav".format(fname, ref_fname, str(args.rhythm_scale))) if not os.path.exists(save_dir): os.makedirs(save_dir) writer.add_alignments(attns_enc.detach().cpu(), attns_dec.detach().cpu(), attn_probs.detach().cpu(), attns_style.detach().cpu(), show_weights.detach().cpu(), [t.tensor(gen_length).type(t.LongTensor)], text_length, args.restore_step1, 'Inference', save_dir)
decay_rate = 0.99 # 学习率的衰减速度 moving_average_decay_rate = 0.99 # 滑动平均衰减率 bottleneck_layer_size = 512 # 最后一层的输出维度 keep_probability = 0.8 # Dropout参数 weight_decay = 5e-5 # L2权重正则化参数 center_loss_alfa = 0.95 # 中心损失的中心更新率 center_loss_factor = 0.5 # 中心损失权重 train_step = tf.Variable(0, trainable=False) # 当前训练步数 pretrained_model_path = "/home/dc2-user/biyesheji/models_lfw/" # 之前训练的模型的路径 pretrained_model = False # 是否有已训练过的模型 if len(os.listdir(pretrained_model_path)) > 0: pretrained_model = True print("Using pretrained model") dataset = preprocess.get_dataset(image_path, dataset_type) image_path_list, label_list = preprocess.create_image_path_list_and_label_list( dataset=dataset) labels = ops.convert_to_tensor(label_list, dtype=tf.int32) size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(limit=size, num_epochs=None, shuffle=True, capacity=32) index_dequeue_op = index_queue.dequeue_many(batch_size * epoch_size) image_paths_placeholder = tf.placeholder(shape=(None, 1), dtype=tf.string, name="image_paths") labels_placeholder = tf.placeholder(dtype=tf.int32, name="labels")
def main(): train_dataset = get_dataset(hp.train_data_csv) val_dataset = get_dataset(hp.val_data_csv) restore_step = hp.restore_step global_step = restore_step if restore_step != 0: restore_flag = True else: restore_flag = False m = Model() if os.path.exists('./checkpoints/checkpoint_%s_%d.pth.tar' % ('transformer', global_step)): state_dict = t.load('./checkpoints/checkpoint_%s_%d.pth.tar' % ('transformer', global_step)) new_state_dict = OrderedDict() for k, value in state_dict['model'].items(): key = k[7:] new_state_dict[key] = value m.load_state_dict(new_state_dict) m = nn.DataParallel(m.cuda()) m.train() optimizer = t.optim.Adam(m.parameters(), lr=hp.lr) writer = get_writer(hp.checkpoint_path, hp.log_directory) cur_epoch = 0 for epochs in range(hp.epochs): train_dataloader = DataLoader(train_dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=1) val_dataloader = DataLoader(val_dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True) if restore_flag: cur_epoch = int(restore_step / len(train_dataloader)) restore_flag = not restore_flag for i, data in enumerate(train_dataloader): global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) character, mel, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data character = character.cuda() mel = mel.cuda() mel_input = mel_input.cuda() pos_text = pos_text.cuda() pos_mel = pos_mel.cuda() text_length = text_length.cuda() mel_length = mel_length.cuda() loading_time = time.time() mel_pred, postnet_pred, attn_probs, decoder_output, attns_enc, attns_dec, attns_style = m.forward( character, mel_input, pos_text, pos_mel, mel, pos_mel) mel_loss = nn.L1Loss()(mel_pred, mel) post_mel_loss = nn.L1Loss()(postnet_pred, mel) loss = (mel_loss + post_mel_loss) / hp.accum writer.add_losses(mel_loss.item(), post_mel_loss.item(), global_step, 'Train') # Calculate gradients loss.backward() msg = "| Epoch: {}, {}/{}th loss : {:.4f} + {:.4f} = {:.4f}".format( cur_epoch, i, len(train_dataloader), mel_loss, post_mel_loss, loss) stream(msg) if global_step % hp.accum == 0: nn.utils.clip_grad_norm_(m.parameters(), 1.) # Update weights optimizer.step() optimizer.zero_grad() if global_step % hp.val_step == 0 or global_step == 1: validate(m, val_dataloader, global_step, writer) if global_step % hp.save_step == 0: t.save( { 'model': m.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join( hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % global_step)) if cur_epoch == hp.stop_epoch: break cur_epoch += 1 print(' ')
def main(): if not os.path.exists("logger"): os.mkdir("logger") dataset = get_dataset() global_step = 0 m = nn.DataParallel(Model().cuda()) num_param = sum(param.numel() for param in m.parameters()) print('Number of Transformer-TTS Parameters:', num_param) m.train() optimizer = t.optim.Adam(m.parameters(), lr=hp.lr) pos_weight = t.FloatTensor([5.]).cuda() # writer = SummaryWriter() for epoch in range(hp.epochs): dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=16) # pbar = tqdm(dataloader) for i, data in enumerate(dataloader): # pbar.set_description("Processing at epoch %d"%epoch) global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) character, mel, mel_input, pos_text, pos_mel, _ = data stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1) character = character.cuda() mel = mel.cuda() mel_input = mel_input.cuda() pos_text = pos_text.cuda() pos_mel = pos_mel.cuda() # print(mel) mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward( character, mel_input, pos_text, pos_mel) mel_loss = nn.L1Loss()(mel_pred, mel) post_mel_loss = nn.L1Loss()(postnet_pred, mel) loss = mel_loss + post_mel_loss t_l = loss.item() m_l = mel_loss.item() m_p_l = post_mel_loss.item() # s_l = stop_pred_loss.item() with open(os.path.join("logger", "total_loss.txt"), "a") as f_total_loss: f_total_loss.write(str(t_l) + "\n") with open(os.path.join("logger", "mel_loss.txt"), "a") as f_mel_loss: f_mel_loss.write(str(m_l) + "\n") with open(os.path.join("logger", "mel_postnet_loss.txt"), "a") as f_mel_postnet_loss: f_mel_postnet_loss.write(str(m_p_l) + "\n") # with open(os.path.join("logger", "stop_pred_loss.txt"), "a") as f_s_loss: # f_s_loss.write(str(s_l)+"\n") # Print if global_step % hp.log_step == 0: # Now = time.clock() str1 = "Epoch [{}/{}], Step [{}], Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f};".format( epoch + 1, hp.epochs, global_step, mel_loss.item(), post_mel_loss.item()) str2 = "Total Loss: {:.4f}.".format(loss.item()) current_learning_rate = 0 for param_group in optimizer.param_groups: current_learning_rate = param_group['lr'] str3 = "Current Learning Rate is {:.6f}.".format( current_learning_rate) # str4 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format( # (Now-Start), (total_step-current_step)*np.mean(Time)) print("\n" + str1) print(str2) print(str3) # print(str4) with open(os.path.join("logger", "logger.txt"), "a") as f_logger: f_logger.write(str1 + "\n") f_logger.write(str2 + "\n") f_logger.write(str3 + "\n") # f_logger.write(str4 + "\n") f_logger.write("\n") # writer.add_scalars('training_loss',{ # 'mel_loss':mel_loss, # 'post_mel_loss':post_mel_loss, # }, global_step) # writer.add_scalars('alphas',{ # 'encoder_alpha':m.module.encoder.alpha.data, # 'decoder_alpha':m.module.decoder.alpha.data, # }, global_step) # if global_step % hp.image_step == 1: # for i, prob in enumerate(attn_probs): # num_h = prob.size(0) # for j in range(4): # x = vutils.make_grid(prob[j*16] * 255) # writer.add_image('Attention_%d_0'%global_step, x, i*4+j) # for i, prob in enumerate(attns_enc): # num_h = prob.size(0) # for j in range(4): # x = vutils.make_grid(prob[j*16] * 255) # writer.add_image('Attention_enc_%d_0'%global_step, x, i*4+j) # for i, prob in enumerate(attns_dec): # num_h = prob.size(0) # for j in range(4): # x = vutils.make_grid(prob[j*16] * 255) # writer.add_image('Attention_dec_%d_0'%global_step, x, i*4+j) optimizer.zero_grad() # Calculate gradients loss.backward() nn.utils.clip_grad_norm_(m.parameters(), 1.) # Update weights optimizer.step() if global_step % hp.save_step == 0: t.save( { 'model': m.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join( hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % global_step))
print(G) D = Discriminator_noSigmoid(in_dim=3).cuda() #channel=3 #D.load_state_dict(torch.load('model/wgangp_d.pth30')) print(D) G.train() D.train() # loss criterion criterion = nn.BCELoss() # optimizer opt_D = torch.optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.999)) opt_G = torch.optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.999)) # dataloader (You might need to edit the dataset path if you use extra dataset.) dataset = get_dataset(os.path.join(workspace_dir)) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4) # show one image #plt.imshow(dataset[10].numpy().transpose(1,2,0)) #plt.show() # for logging z_sample = Variable(torch.randn(100, z_dim)).cuda() # main training loop for e, epoch in enumerate(range(n_epoch)): for i, data in enumerate(dataloader): imgs = data imgs = imgs.cuda() bs = imgs.size(0)
input_tensor, device, idx2word_hin, max_length=20, bidirectional=False) #CODE_BlANK_6 #Joining the predicted output to form the predicted sentence output_sentence = ' '.join(output_words) print('Predicted Output: ', output_sentence) print('') if (j == n): break from preprocess import get_dataset device = torch.device("cpu") testset, idx2word_en, idx2word_hin = get_dataset(batch_size=1, types="val", shuffle=False, num_workers=1, pin_memory=False, drop_last=False) encoder = torch.load("encoder.pt") encoder = encoder.to(device) decoder = torch.load("decoder.pt") decoder = decoder.to(device) bridge = torch.load("bridge.pt") bridge = bridge.to(device) evaluateRandomly(encoder, decoder, bridge, device, testset, idx2word_en, idx2word_hin)
def main(): dataset = get_dataset() global_step = 0 sum_loss = 0 m = nn.DataParallel(Model().cuda()) # TODO:dataparalle # m = Model().cuda() m.train() optimizer = t.optim.Adam(m.parameters(), lr=hp.lr) pos_weight = t.FloatTensor([5.]).cuda() writer = SummaryWriter() for epoch in range(hp.epochs): dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=16) pbar = tqdm(dataloader) sum_loss = 0 for i, data in enumerate(pbar): pbar.set_description("Processing at epoch %d"%epoch) global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) character, mel, mel_input, pos_text, pos_mel, _ = data stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1) character = character.cuda() mel = mel.cuda() mel_input = mel_input.cuda() pos_text = pos_text.cuda() pos_mel = pos_mel.cuda() mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(character, mel_input, pos_text, pos_mel) mel_loss = nn.L1Loss()(mel_pred, mel) post_mel_loss = nn.L1Loss()(postnet_pred, mel) loss = mel_loss + post_mel_loss writer.add_scalars('training_loss',{ 'mel_loss':mel_loss, 'post_mel_loss':post_mel_loss, }, global_step) writer.add_scalars('alphas',{ 'encoder_alpha':m.module.encoder.alpha.data, 'decoder_alpha':m.module.decoder.alpha.data, }, global_step) if global_step % hp.image_step == 1: for i, prob in enumerate(attn_probs): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j*16] * 255) writer.add_image('Attention_%d_0'%global_step, x, i*4+j) for i, prob in enumerate(attns_enc): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j*16] * 255) writer.add_image('Attention_enc_%d_0'%global_step, x, i*4+j) for i, prob in enumerate(attns_dec): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j*16] * 255) writer.add_image('Attention_dec_%d_0'%global_step, x, i*4+j) optimizer.zero_grad() # Calculate gradients loss.backward() nn.utils.clip_grad_norm_(m.parameters(), 1.) # Update weights optimizer.step() if global_step % hp.save_step == 0: t.save({'model':m.state_dict(), 'optimizer':optimizer.state_dict()}, os.path.join(hp.checkpoint_path,'checkpoint_transformer_%d.pth.tar' % global_step)) sum_loss += loss.item() print(f'epoch:{epoch}, sum_loss: {sum_loss / (i + 1)}')
def main(): dataset = get_dataset(hp.train_data_csv) global_step = 0 m = nn.DataParallel(ModelStopToken().cuda()) trans_model = Model() trans_model.load_state_dict(load_checkpoint(100000, "transformer")) for name, param in trans_model.named_parameters(): param.requires_grad = False print(name, " : weight frozen") trans_model = nn.DataParallel(trans_model.cuda()) m.train() trans_model.train(False) optimizer = t.optim.Adam(m.parameters(), lr=hp.lr) writer = SummaryWriter() for epoch in range(hp.epochs): dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=8) for i, data in enumerate(dataloader): global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) character, mel, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data character = character.cuda() mel = mel.cuda() mel_input = mel_input.cuda() pos_text = pos_text.cuda() pos_mel = pos_mel.cuda() mel_length = mel_length.cuda() stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1).cuda() for j, length in enumerate(mel_length): stop_tokens[j, length - 1] += 1 mel_pred, postnet_pred, attn, decoder_output, _, attn_dec, attn_style = trans_model.forward( character, mel_input, pos_text, pos_mel, mel, pos_mel) stop_preds = m.forward(decoder_output) if global_step % 100 == 0: print("pos_mel", pos_mel[0]) print("stop_pred", t.sigmoid(stop_preds.squeeze()[0])) print("stop_tokens", stop_tokens[0]) mask = get_mask_from_lengths(mel_length) stop_preds = stop_preds.squeeze().masked_select(mask) stop_tokens = stop_tokens.masked_select(mask) loss = nn.BCEWithLogitsLoss( pos_weight=t.tensor(hp.bce_pos_weight))(stop_preds, stop_tokens) print("| Epoch: {}, {}/{}th loss : {:.4f}".format( epoch, i, len(dataloader), loss)) writer.add_scalars('training_loss', { 'loss': loss, }, global_step) optimizer.zero_grad() # Calculate gradients loss.backward() nn.utils.clip_grad_norm_(m.parameters(), 1.) # Update weights optimizer.step() if global_step % hp.save_step == 0: t.save( { 'model': m.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join( hp.checkpoint_path, 'checkpoint_stop_token_%d.pth.tar' % global_step)) if epoch == hp.stop_epoch: break
D[max_index] = D[max_index] + 1 return D if not os.path.exists('BN_alignments'): os.mkdir('BN_alignments') check_point = './BZ_checkpoint/checkpoint_transformer_820000.pth.tar' para_file = t.load(check_point) model = nn.DataParallel(Model().cuda()) model.load_state_dict(para_file['model'], map_location={'cuda:5': 'cuda:0'}) model.eval() for epoch in range(1): dataset = get_dataset() dataloader = DataLoader(dataset, batch_size=1, shuffle=False, collate_fn=collate_fn_transformer, drop_last=False, num_workers=1) k = 0 # pbar = tqdm(dataloader) # for i, data in enumerate(pbar): for character, mel, mel_input, pos_text, pos_mel, _ in dataloader: # pbar.set_description("Processing at epoch %d"%epoch) # character, mel, mel_input, pos_text, pos_mel, _ = data stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--step', type=int, help='Global step to restore checkpoint', default=0) args = parser.parse_args() dataset = get_dataset() global_step = args.step m = Model().cuda() m = nn.DataParallel(m, device_ids=[i for i in range(8)]) if not os.path.exists(hp.checkpoint_path): os.makedirs(hp.checkpoint_path) if args.step > 0: ckpt_path = os.path.join( hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % global_step) ckpt = torch.load(ckpt_path) m.load_state_dict(ckpt['model']) m.train() optimizer = torch.optim.Adam(m.parameters(), lr=hp.lr) if args.step > 0: optimizer.load_state_dict(ckpt['optimizer']) pos_weight = torch.FloatTensor([5.]).cuda() writer = SummaryWriter() for epoch in range(hp.epochs): dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=16) pbar = tqdm(dataloader) for i, data in enumerate(pbar): pbar.set_description("Processing at epoch %d" % epoch) global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) character, mel, mel_input, pos_text, pos_mel, _ = data stop_tokens = torch.abs(pos_mel.ne(0).type(torch.float) - 1) character = character.cuda() mel = mel.cuda() mel_input = mel_input.cuda() pos_text = pos_text.cuda() pos_mel = pos_mel.cuda() mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward( character, mel_input, pos_text, pos_mel) mel_loss = nn.L1Loss()(mel_pred, mel) post_mel_loss = nn.L1Loss()(postnet_pred, mel) loss = mel_loss + post_mel_loss writer.add_scalars('training_loss', { 'mel_loss': mel_loss, 'post_mel_loss': post_mel_loss, }, global_step) writer.add_scalars( 'alphas', { 'encoder_alpha': m.module.encoder.alpha.data, 'decoder_alpha': m.module.decoder.alpha.data, }, global_step) if global_step % hp.image_step == 1: for i, prob in enumerate(attn_probs): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j * 16] * 255) writer.add_image('Attention_%d_0' % global_step, x, i * 4 + j) for i, prob in enumerate(attns_enc): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j * 16] * 255) writer.add_image('Attention_enc_%d_0' % global_step, x, i * 4 + j) for i, prob in enumerate(attns_dec): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j * 16] * 255) writer.add_image('Attention_dec_%d_0' % global_step, x, i * 4 + j) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(m.parameters(), 1.) optimizer.step() if global_step % hp.save_step == 0: torch.save( { 'model': m.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join( hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % global_step))
import numpy as np import torch from tqdm import tqdm import json from model.Bilstm import BiLSTM from torch import nn from sklearn.metrics import f1_score from sklearn.metrics import accuracy_score from preprocess import get_iterator,get_dataset from configs import * train,val = get_dataset() num_epochs = 50 batch_size= 32 vocab_size = len(train.fields['src'].vocab.stoi) pos_vocab_size = len(train.fields['pos'].vocab.stoi) output_dim = len(train.fields['tgt'].vocab.stoi) ''' vocab_set = { 'word2idx' : {}, 'pos2idx' : {}, 'tag2idx' : {}, 'idx2tag' : {} } vocab_set['word2idx'] = dict(train.fields['src'].vocab.stoi) vocab_set['pos2idx'] = dict(train.fields['pos'].vocab.stoi) vocab_set['tag2idx'] = dict(train.fields['tgt'].vocab.stoi)
def main(): dataset = get_dataset() global_step = 0 # inference: https://blog.csdn.net/weixin_40087578/article/details/87186613 m = nn.DataParallel( Model().cuda()) # 将data分配给多GPU,默认用0号卡训练。如使用多卡,需提前指定device编号并设置环境变量 m.train() optimizer = t.optim.Adam(m.parameters(), lr=hp.lr) # Adam pos_weight = t.FloatTensor([5.]).cuda() writer = SummaryWriter() for epoch in range(hp.epochs): dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=16) pbar = tqdm(dataloader) for i, data in enumerate(pbar): pbar.set_description("Processing at epoch %d" % epoch) global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) # 调整学习率。但对Adam来说,似乎没什么必要。 # pos_text和pos_mel是全局排序。 character, mel, mel_input, pos_text, pos_mel, _ = data #取data stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1) character = character.cuda() #data拷贝至GPU mel = mel.cuda() mel_input = mel_input.cuda() pos_text = pos_text.cuda() pos_mel = pos_mel.cuda() mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward( character, mel_input, pos_text, pos_mel) # 这里的stop_token原本是用来标记音频结尾的符号。但代码作者表示,按原文加上loss会使模型不收敛。后续生成的 时候也只能凭借经验值确定生成长度。 mel_loss = nn.L1Loss()(mel_pred, mel) # L1 loss post_mel_loss = nn.L1Loss()(postnet_pred, mel) loss = mel_loss + post_mel_loss writer.add_scalars('training_loss', { 'mel_loss': mel_loss, 'post_mel_loss': post_mel_loss, }, global_step) writer.add_scalars( 'alphas', { 'encoder_alpha': m.module.encoder.alpha.data, 'decoder_alpha': m.module.decoder.alpha.data, }, global_step) if global_step % hp.image_step == 1: for i, prob in enumerate(attn_probs): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j * 16] * 255) writer.add_image('Attention_%d_0' % global_step, x, i * 4 + j) for i, prob in enumerate(attns_enc): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j * 16] * 255) writer.add_image('Attention_enc_%d_0' % global_step, x, i * 4 + j) for i, prob in enumerate(attns_dec): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j * 16] * 255) writer.add_image('Attention_dec_%d_0' % global_step, x, i * 4 + j) optimizer.zero_grad() # 手动清零梯度数组,方便下次计算。 # Calculate gradients loss.backward() # BP nn.utils.clip_grad_norm_(m.parameters(), 1.) # 梯度裁剪 # Update weights 更新权重。 optimizer.step() if global_step % hp.save_step == 0: t.save( { 'model': m.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join( hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % global_step))
def main(args): dataset = get_dataset() global_step = args.restore_step m = nn.DataParallel(Model().cuda()) # # print(type(m.module)) # for block in m.module: # for each in block.parameters(): # print(each.reqiures_grad) # for paras in m.parameters(): # print(paras.size(), paras.requires_grad) m.train() optimizer = t.optim.Adam(m.parameters(), lr=hp.lr) # print(os.path.join( # hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % args.restore_step)) try: print( os.path.join( hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % args.restore_step)) checkpoint = torch.load( os.path.join( hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % args.restore_step)) m.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("\n---Model Restored at Step %d---\n" % args.restore_step) except: print("\n---Start New Training---\n") if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) pos_weight = t.FloatTensor([5.]).cuda() writer = SummaryWriter() for epoch in range(args.start_epoch, hp.epochs): dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=0) pbar = tqdm(dataloader) for i, data in enumerate(pbar): pbar.set_description("Processing at epoch %d" % epoch) global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) character, mel, mel_input, pos_text, pos_mel, _ = data stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1) character = character.cuda() mel = mel.cuda() mel_input = mel_input.cuda() pos_text = pos_text.cuda() pos_mel = pos_mel.cuda() mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward( character, mel_input, pos_text, pos_mel) mel_loss = nn.L1Loss()(mel_pred, mel) post_mel_loss = nn.L1Loss()(postnet_pred, mel) loss = mel_loss + post_mel_loss writer.add_scalars('training_loss', { 'mel_loss': mel_loss, 'post_mel_loss': post_mel_loss, }, global_step) writer.add_scalars( 'alphas', { 'encoder_alpha': m.module.encoder.alpha.data, 'decoder_alpha': m.module.decoder.alpha.data, }, global_step) if global_step % hp.image_step == 1: for i, prob in enumerate(attn_probs): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j * 16] * 255) writer.add_image('Attention_%d_0' % global_step, x, i * 4 + j) for i, prob in enumerate(attns_enc): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j * 16] * 255) writer.add_image('Attention_enc_%d_0' % global_step, x, i * 4 + j) for i, prob in enumerate(attns_dec): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j * 16] * 255) writer.add_image('Attention_dec_%d_0' % global_step, x, i * 4 + j) optimizer.zero_grad() # Calculate gradients loss.backward() nn.utils.clip_grad_norm_(m.parameters(), 1.) # Update weights optimizer.step() if global_step % hp.save_step == 0: t.save( { 'model': m.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join( hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % global_step))