def evaluator(args): """ do evaluation interactively """ m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size, args.num_layers, args.dropout, args.bidirectional) m1 = nn.Sequential(nn.Linear(args.hidden_size, args.vocab_size), nn.LogSoftmax()) if os.path.isfile(args.checkpoint): print("=> loading checkpoint '{}'".format(args.checkpoint)) checkpoint = torch.load(args.checkpoint) m0.load_state_dict(checkpoint["m0"]) m1.load_state_dict(checkpoint["m1"]) while True: try: print("> ", end="") src = input() src = [int(x) for x in src.split()] trg = evaluate(src, (m0, m1), args.max_length) print(" ".join(map(str, trg))) except KeyboardInterrupt: break else: print("=> no checkpoint found at '{}'".format(args.checkpoint))
def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): "Helper: Construct a model from hyperparameters." c = copy.deepcopy attn = MultiHeadAttention(h, d_model) ff = PositionWiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab)) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model
def t2vec(args): "read source sequences from trj.t and write the tensor into file trj.h5" m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size, args.num_layers, args.dropout, args.bidirectional) if os.path.isfile(args.checkpoint): print("=> loading checkpoint '{}'".format(args.checkpoint)) checkpoint = torch.load(args.checkpoint) m0.load_state_dict(checkpoint["m0"]) if torch.cuda.is_available(): m0.cuda() m0.eval() vecs = [] scaner = DataOrderScaner( os.path.join(args.data, "{}-trj.t".format(args.prefix)), args.t2vec_batch) scaner.load() i = 0 while True: if i % 100 == 0: print("{}: Encoding {} trjs...".format(i, args.t2vec_batch)) i = i + 1 # src 该组最大轨迹长度*num_seqs(该组轨迹个数) src, lengths, invp = scaner.getbatch() if src is None: break if torch.cuda.is_available(): src, lengths, invp = src.cuda(), lengths.cuda(), invp.cuda() h, _ = m0.encoder(src, lengths) # 【层数*双向2,该组轨迹个数,隐藏层数】【6,10,128】 ## (num_layers, batch, hidden_size * num_directions) 【3,10,256】 h = m0.encoder_hn2decoder_h0(h) ## (batch, num_layers, hidden_size * num_directions) 【10,3,256】 h = h.transpose(0, 1).contiguous() ## (batch, *) #h = h.view(h.size(0), -1) vecs.append(h[invp].cpu().data) ## (num_seqs, num_layers, hidden_size * num_directions) vecs = torch.cat(vecs) # [10,3,256] ## (num_layers, num_seqs, hidden_size * num_directions) vecs = vecs.transpose(0, 1).contiguous() ## [3,10,256] path = os.path.join(args.data, "{}-trj.h5".format(args.prefix)) print("=> saving vectors into {}".format(path)) ## 存储三层 输出的隐藏层结构,每一层是 batch个256维的向量 with h5py.File(path, "w") as f: for i in range(m0.num_layers): f["layer" + str(i + 1)] = vecs[i].squeeze(0).numpy() #torch.save(vecs.data, path) #return vecs.data else: print("=> no checkpoint found at '{}'".format(args.checkpoint)) return vecs[m0.num_layers - 1]
def experiments(): use_cuda = True num_attr = 39 to_swap = 'Blond_Hair' #to_swap = '5_o_Clock_Shadow' encoder_decoder_fpath = join('data', 'weights', 'adver.params') encoder_decoder = EncoderDecoder(num_attr) encoder_decoder.load_state_dict(torch.load(encoder_decoder_fpath)) if use_cuda: encoder_decoder.cuda() _, _, test = split_train_val_test('data') test_iter = DataLoader(test, batch_size=32, shuffle=False) swap_idx, = np.where(test.attribute_names == to_swap)[0] encoder_decoder.eval() for iteration, (x, yb, yt) in enumerate(test_iter, start=1): yb[:, 2 * swap_idx] = 1 - yb[:, 2 * swap_idx] yb[:, 2 * swap_idx + 1] = 1 - yb[:, 2 * swap_idx + 1] if use_cuda: x, yb, yt = x.cuda(), yb.cuda(), yt.cuda() x, yb, yt = Variable(x), Variable(yb), Variable(yt) _, x_hat = encoder_decoder(x, yb) plot_samples(x, x_hat, prefix='test_%d' % (iteration))
def create_model(self) -> torch.nn.Module: return EncoderDecoder(len(self.train_set.in_vocabulary), len(self.train_set.out_vocabulary), self.helper.opt.state_size, self.helper.opt.n_layers, self.helper.opt.encoder_decoder.embedding_size, self.helper.opt.dropout, self.train_set.max_out_len)
def t2vec(args): "read source sequences from trj.t and write the tensor into file trj.h5" m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size, args.num_layers, args.dropout, args.bidirectional) if os.path.isfile(args.checkpoint): print("=> loading checkpoint '{}'".format(args.checkpoint)) checkpoint = torch.load(args.checkpoint) m0.load_state_dict(checkpoint["m0"]) if torch.cuda.is_available(): m0.cuda() m0.eval() vecs = [] scaner = DataOrderScaner(os.path.join(args.data, "trj.t"), args.t2vec_batch) scaner.load() i = 0 while True: if i % 10 == 0: print("{}: Encoding {} trjs...".format(i, args.t2vec_batch)) i = i + 1 src, lengths, invp = scaner.getbatch() if src is None: break src, lengths = Variable(src), Variable(lengths) if torch.cuda.is_available(): src, lengths, invp = src.cuda(), lengths.cuda(), invp.cuda() h, _ = m0.encoder(src, lengths) ## (num_layers, batch, hidden_size * num_directions) h = m0.encoder_hn2decoder_h0(h) ## (batch, num_layers, hidden_size * num_directions) h = h.transpose(0, 1).contiguous() ## (batch, *) #h = h.view(h.size(0), -1) vecs.append(h[invp].cpu().data) ## (num_seqs, num_layers, hidden_size * num_directions) vecs = torch.cat(vecs) ## (num_layers, num_seqs, hidden_size * num_directions) vecs = vecs.transpose(0, 1).contiguous() path = os.path.join(args.data, "trj.h5") print("=> saving vectors into {}".format(path)) with h5py.File(path, "w") as f: for i in range(m0.num_layers): f["layer" + str(i + 1)] = vecs[i].squeeze(0).numpy() #torch.save(vecs.data, path) #return vecs.data else: print("=> no checkpoint found at '{}'".format(args.checkpoint))
def getPredict(src, args): m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size, args.num_layers, args.dropout, args.bidirectional) m1 = nn.Sequential(nn.Linear(args.hidden_size, args.vocab_size), nn.LogSoftmax()) trg = [] if os.path.isfile(args.checkpoint): print("=> loading checkpoint '{}'".format(args.checkpoint)) checkpoint = torch.load(args.checkpoint) m0.load_state_dict(checkpoint["m0"]) m1.load_state_dict(checkpoint["m1"]) print("> ", end="") trg = evaluate(src, (m0, m1), args.max_length) trg = [trg[ii].tolist() for ii in range(len(trg))] print(trg) else: print("=> no checkpoint found at '{}'".format(args.checkpoint)) return trg
def model_init(args): "read source sequences from trj.t and write the tensor into file trj.h5" m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size, args.num_layers, args.dropout, args.bidirectional) if os.path.isfile(args.checkpoint): #print("=> loading checkpoint '{}'".format(args.checkpoint)) checkpoint = torch.load(args.checkpoint, map_location='cpu') m0.load_state_dict(checkpoint["m0"]) if torch.cuda.is_available(): print('mo by cuda') m0.cuda() m0.eval() else: print("=> no checkpoint found at '{}'".format(args.checkpoint)) return m0
def test(): print('Loading vocab and test dataset...') embed = {} with open(args.embed_path, 'r') as f: f.readline() for line in f.readlines(): line = line.strip().split() vec = [float(_) for _ in line[1:]] embed[line[0]] = vec vocab = Vocab(args, embed) train_data, val_data, test_data = [], [], [] fns = os.listdir(args.train_dir) fns.sort(key=lambda p: int(p.split('.')[0])) for fn in tqdm(fns): f = open(args.train_dir + fn, 'r') train_data.append(json.load(f)) f.close() vocab.add_sentence(train_data[-1]['reviewText'].split()) vocab.add_sentence(train_data[-1]['summary'].split()) fns = os.listdir(args.valid_dir) fns.sort(key=lambda p: int(p.split('.')[0])) for fn in tqdm(fns): f = open(args.valid_dir + fn, 'r') val_data.append(json.load(f)) f.close() vocab.add_sentence(val_data[-1]['reviewText'].split()) vocab.add_sentence(val_data[-1]['summary'].split()) fns = os.listdir(args.test_dir) fns.sort(key=lambda p: int(p.split('.')[0])) for fn in tqdm(fns): f = open(args.test_dir + fn, 'r') test_data.append(json.load(f)) f.close() vocab.add_sentence(test_data[-1]['reviewText'].split()) vocab.add_sentence(test_data[-1]['summary'].split()) embed = vocab.trim() args.embed_num = len(embed) args.embed_dim = len(embed[0]) test_dataset = Dataset(test_data) test_iter = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) print('Loading model...') checkpoint = torch.load(args.save_path + args.load_model) net = EncoderDecoder(checkpoint['args'], embed) net.load_state_dict(checkpoint['model']) if args.use_cuda: net.cuda() criterion = nn.NLLLoss(ignore_index=vocab.PAD_IDX, reduction='sum') print('Begin testing...') loss, r1, r2, rl = evaluate(net, criterion, vocab, test_iter, False) print('Loss: %f Rouge-1: %f Rouge-2: %f Rouge-l: %f' % (loss, r1, r2, rl))
def train(args): logging.basicConfig(filename="training.log", level=logging.INFO) trainsrc = os.path.join(args.data, "train.src") traintrg = os.path.join(args.data, "train.trg") trainData = DataLoader(trainsrc, traintrg, args.batch, args.bucketsize) print("Reading training data...") trainData.load(args.max_num_line) print("Allocation: {}".format(trainData.allocation)) print("Percent: {}".format(trainData.p)) valsrc = os.path.join(args.data, "val.src") valtrg = os.path.join(args.data, "val.trg") if os.path.isfile(valsrc) and os.path.isfile(valtrg): valData = DataLoader(valsrc, valtrg, args.batch, args.bucketsize, True) print("Reading validation data...") valData.load() assert valData.size > 0, "Validation data size must be greater than 0" print("Loaded validation data size {}".format(valData.size)) else: print("No validation data found, training without validating...") ## create criterion, model, optimizer if args.criterion_name == "NLL": criterion = NLLcriterion(args.vocab_size) lossF = lambda o, t: criterion(o, t) else: assert os.path.isfile(args.knearestvocabs),\ "{} does not exist".format(args.knearestvocabs) print("Loading vocab distance file {}...".format(args.knearestvocabs)) with h5py.File(args.knearestvocabs) as f: V, D = f["V"][...], f["D"][...] V, D = torch.LongTensor(V), torch.FloatTensor(D) D = dist2weight(D, args.dist_decay_speed) if args.cuda and torch.cuda.is_available(): V, D = V.cuda(), D.cuda() criterion = KLDIVcriterion(args.vocab_size) lossF = lambda o, t: KLDIVloss(o, t, criterion, V, D) m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size, args.num_layers, args.dropout, args.bidirectional) m1 = nn.Sequential(nn.Linear(args.hidden_size, args.vocab_size), nn.LogSoftmax(dim=1)) if args.cuda and torch.cuda.is_available(): print("=> training with GPU") m0.cuda() m1.cuda() criterion.cuda() #m0 = nn.DataParallel(m0, dim=1) else: print("=> training with CPU") m0_optimizer = torch.optim.Adam(m0.parameters(), lr=args.learning_rate) m1_optimizer = torch.optim.Adam(m1.parameters(), lr=args.learning_rate) ## load model state and optmizer state if os.path.isfile(args.checkpoint): print("=> loading checkpoint '{}'".format(args.checkpoint)) logging.info("Restore training @ {}".format(time.ctime())) checkpoint = torch.load(args.checkpoint) args.start_iteration = checkpoint["iteration"] best_prec_loss = checkpoint["best_prec_loss"] m0.load_state_dict(checkpoint["m0"]) m1.load_state_dict(checkpoint["m1"]) m0_optimizer.load_state_dict(checkpoint["m0_optimizer"]) m1_optimizer.load_state_dict(checkpoint["m1_optimizer"]) else: print("=> no checkpoint found at '{}'".format(args.checkpoint)) logging.info("Start training @ {}".format(time.ctime())) best_prec_loss = float('inf') #print("=> initializing the parameters...") #init_parameters(m0) #init_parameters(m1) ## here: load pretrained wrod (cell) embedding num_iteration = args.epochs * sum(trainData.allocation) // args.batch print("Iteration starts at {} " "and will end at {}".format(args.start_iteration, num_iteration-1)) ## training for iteration in range(args.start_iteration, num_iteration): try: input, lengths, target = trainData.getbatch() if args.cuda and torch.cuda.is_available(): input, lengths, target = input.cuda(), lengths.cuda(), target.cuda() m0_optimizer.zero_grad() m1_optimizer.zero_grad() ## forward computation output = m0(input, lengths, target) loss = batchloss(output, target, m1, lossF, args.generator_batch) ## compute the gradients loss.backward() ## clip the gradients clip_grad_norm_(m0.parameters(), args.max_grad_norm) clip_grad_norm_(m1.parameters(), args.max_grad_norm) ## one step optimization m0_optimizer.step() m1_optimizer.step() ## average loss for one word avg_loss = loss.item() / target.size(0) if iteration % args.print_freq == 0: print("Iteration: {}\tLoss: {}".format(iteration, avg_loss)) if iteration % args.save_freq == 0 and iteration > 0: prec_loss = validate(valData, (m0, m1), lossF, args) if prec_loss < best_prec_loss: best_prec_loss = prec_loss logging.info("Best model with loss {} at iteration {} @ {}"\ .format(best_prec_loss, iteration, time.ctime())) is_best = True else: is_best = False print("Saving the model at iteration {} validation loss {}"\ .format(iteration, prec_loss)) savecheckpoint({ "iteration": iteration, "best_prec_loss": best_prec_loss, "m0": m0.state_dict(), "m1": m1.state_dict(), "m0_optimizer": m0_optimizer.state_dict(), "m1_optimizer": m1_optimizer.state_dict() }, is_best) except KeyboardInterrupt: break
def set_model(self): print('[Runner] - Initializing Transformer model...') # text是speech2text, speech是text2speech. self.text_model = EncoderDecoder( encoder_config=self.speech_encoder_config, decoder_config=self.text_decoder_config, modality="text") self.text_model.to(self.device) self.text_model.train() self.speech_model = EncoderDecoder( encoder_config=self.text_encoder_config, decoder_config=self.speech_decoder_config, modality="speech") self.speech_model.to(self.device) self.speech_model.train() if self.args.multi_gpu: self.text_model = torch.nn.DataParallel(self.text_model) self.speech_model = torch.nn.DataParallel(self.speech_model) print('[Runner] - Multi-GPU training Enabled: ' + str(torch.cuda.device_count())) print('[Runner] - Number of parameters: ' + str(sum(p.numel() for p in self.text_model.parameters() if p.requires_grad) + \ sum(p.numel() for p in self.speech_model.parameters() if p.requires_grad))) param_optimizer = list(self.text_model.named_parameters()) + list( self.speech_model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] if 'type' not in self.config['optimizer']: self.config['optimizer']['type'] = 'adam' print('[Runner] - Optimizer: ' + ('apex Fused Adam' if self. apex else str(self.config['optimizer']['type']))) if self.apex: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) optimizer = FusedAdam(optimizer_grouped_parameters, lr=self.learning_rate, bias_correction=False, max_grad_norm=1.0) if self.config['optimizer']['loss_scale'] == 0: self.optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: self.optimizer = FP16_Optimizer( optimizer, static_loss_scale=self.config['optimizer']['loss_scale']) self.warmup_linear = WarmupLinearSchedule( warmup=self.warmup_proportion, t_total=self.total_steps) elif self.config['optimizer']['type'] == 'adam': self.optimizer = BertAdam(optimizer_grouped_parameters, lr=self.learning_rate, warmup=self.warmup_proportion, t_total=self.total_steps, schedule='warmup_linear') elif self.config['optimizer']['type'] == 'lamb' or self.config[ 'optimizer']['type'] == 'adamW': self.optimizer = Lamb( optimizer_grouped_parameters, lr=self.learning_rate, warmup=self.warmup_proportion, t_total=self.total_steps, schedule='warmup_linear', adam=True if self.config['optimizer']['type'] == 'adamW' else False, correct_bias=True if self.config['optimizer']['type'] == 'adamW' else False) else: raise NotImplementedError() if self.args.resume is not None: self.load_model(self.args.resume)
def train_fader_network(): gpu_id = 1 use_cuda = True data_dir = 'data' sample_every = 10 test_dir = join(data_dir, 'test-samples') encoder_decoder_fpath = join(data_dir, 'weights', 'adver.params') discriminator_fpath = join(data_dir, 'weights', 'discr.params') train, valid, test = split_train_val_test(data_dir) num_attr = train.attribute_names.shape[0] encoder_decoder = EncoderDecoder(num_attr, gpu_id=gpu_id) discriminator = Discriminator(num_attr) if use_cuda: encoder_decoder.cuda(gpu_id) discriminator.cuda(gpu_id) train_iter = DataLoader(train, batch_size=32, shuffle=True, num_workers=8) valid_iter = DataLoader(valid, batch_size=32, shuffle=False, num_workers=8) test_iter = DataLoader(test, batch_size=32, shuffle=False, num_workers=8) max_epochs = 1000 lr, beta1 = 2e-3, 0.5 adversarial_optimizer = optim.Adam(encoder_decoder.parameters(), lr=lr, betas=(beta1, 0.999)) discriminator_optimizer = optim.Adam(discriminator.parameters(), lr=lr, betas=(beta1, 0.999)) mse_loss = nn.MSELoss(size_average=True) bce_loss = nn.BCELoss(size_average=True) num_iters = 0 lambda_e = np.linspace(0, 1e-4, 500000) try: for epoch in range(1, max_epochs): encoder_decoder.train() discriminator.train() for iteration, (x, yb, yt, _) in enumerate(train_iter, start=1): if use_cuda: x = x.cuda(gpu_id) yb, yt = yb.cuda(gpu_id), yt.cuda(gpu_id) x, yb, yt = Variable(x), Variable(yb), Variable(yt) #print yb.data.cpu().numpy().shape #print yt.data.cpu().numpy().shape adversarial_optimizer.zero_grad() z, x_hat = encoder_decoder(x, yb) #if (epoch == 1) or (epoch % sample_every == 0): #if (epoch % sample_every == 0): # plot_samples(x, x_hat, prefix='train_%d_%d' % ( # epoch, iteration)) # send the output of the encoder as a new Variable that is not # part of the backward pass # not sure if this is the correct way to do so # https://discuss.pytorch.org/t/how-to-copy-a-variable-in-a-network-graph/1603/9 z_in = Variable(z.data, requires_grad=False) discriminator_optimizer.zero_grad() y_hat = discriminator(z_in) # adversarial loss y_in = Variable(y_hat.data, requires_grad=False) le_idx = min(500000 - 1, num_iters) le_val = Variable( torch.FloatTensor([lambda_e[le_idx]]).float(), requires_grad=False) if use_cuda: le_val = le_val.cuda(gpu_id) advers_loss = mse_loss(x_hat, x) +\ le_val * bce_loss(y_in, 1 - yt) advers_loss.backward() adversarial_optimizer.step() # discriminative loss discrim_loss = bce_loss(y_hat, yt) discrim_loss.backward() discriminator_optimizer.step() print(' Train epoch %d, iter %d (lambda_e = %.2e)' % ( epoch, iteration, le_val.data[0])) print(' adv. loss = %.6f' % (advers_loss.data[0])) print(' dsc. loss = %.6f' % (discrim_loss.data[0])) num_iters += 1 encoder_decoder.eval() discriminator.eval() for iteration, (x, yb, yt, _) in enumerate(valid_iter, start=1): if use_cuda: x = x.cuda(gpu_id) yb, yt = yb.cuda(gpu_id), yt.cuda(gpu_id) x, yb, yt = Variable(x), Variable(yb), Variable(yt) z, x_hat = encoder_decoder(x, yb) #plot_samples(x, x_hat, prefix='valid_%d_%d' % ( # epoch, iteration)) z_in = Variable(z.data, requires_grad=False) y_hat = discriminator(z_in) y_in = Variable(y_hat.data, requires_grad=False) valid_advers_loss = mse_loss(x_hat, x) +\ le_val * bce_loss(y_in, 1 - yt) valid_discrim_loss = bce_loss(y_hat, yt) print(' Valid epoch %d, iter %d (lambda_e = %.2e)' % ( epoch, iteration, le_val.data[0])) print(' adv. loss = %.6f' % (valid_advers_loss.data[0])) print(' dsc. loss = %.6f' % (valid_discrim_loss.data[0])) if (epoch % sample_every == 0): encoder_decoder.eval() for iteration, (x, yb, ys, fp) in enumerate(test_iter, 1): # randomly choose an attribute and swap the targets to_swap = np.random.choice(test.attribute_names) swap_idx, = np.where(test.attribute_names == to_swap)[0] # map (0, 1) --> (1, 0), and (1, 0) --> (0, 1) yb[:, 2 * swap_idx] = 1 - yb[:, 2 * swap_idx] yb[:, 2 * swap_idx + 1] = 1 - yb[:, 2 * swap_idx + 1] if use_cuda: x, yb = x.cuda(gpu_id), yb.cuda(gpu_id) x, yb = Variable(x), Variable(yb) _, x_hat = encoder_decoder(x, yb) sample_dir = join(test_dir, '%s' % epoch, '%s' % to_swap) if not exists(sample_dir): makedirs(sample_dir) fnames = ['%s.png' % splitext(basename(f))[0] for f in fp] fpaths = [join(sample_dir, f) for f in fnames] plot_samples(x, x_hat, fpaths) except KeyboardInterrupt: print('Caught Ctrl-C, interrupting training.') except RuntimeError: print('RuntimeError') print('Saving encoder/decoder parameters to %s' % (encoder_decoder_fpath)) torch.save(encoder_decoder.state_dict(), encoder_decoder_fpath) print('Saving discriminator parameters to %s' % (discriminator_fpath)) torch.save(discriminator.state_dict(), discriminator_fpath)
def train(): embed = None if args.embed_path is not None and os.path.exists(args.embed_path): print('Loading pretrained word embedding...') embed = {} with open(args.embed_path, 'r') as f: f.readline() for line in f.readlines(): line = line.strip().split() vec = [float(_) for _ in line[1:]] embed[line[0]] = vec vocab = Vocab(args, embed) print('Loading datasets...') train_data, val_data, test_data = [], [], [] fns = os.listdir(args.train_dir) fns.sort(key=lambda p: int(p.split('.')[0])) for fn in tqdm(fns): f = open(args.train_dir + fn, 'r') train_data.append(json.load(f)) f.close() vocab.add_sentence(train_data[-1]['reviewText'].split()) vocab.add_sentence(train_data[-1]['summary'].split()) vocab.add_user(train_data[-1]['userID']) vocab.add_product(train_data[-1]['productID']) fns = os.listdir(args.valid_dir) fns.sort(key=lambda p: int(p.split('.')[0])) for fn in tqdm(fns): f = open(args.valid_dir + fn, 'r') val_data.append(json.load(f)) f.close() vocab.add_sentence(val_data[-1]['reviewText'].split()) vocab.add_sentence(val_data[-1]['summary'].split()) vocab.add_user(val_data[-1]['userID']) vocab.add_product(val_data[-1]['productID']) fns = os.listdir(args.test_dir) fns.sort(key=lambda p: int(p.split('.')[0])) for fn in tqdm(fns): f = open(args.test_dir + fn, 'r') test_data.append(json.load(f)) f.close() vocab.add_sentence(test_data[-1]['reviewText'].split()) vocab.add_sentence(test_data[-1]['summary'].split()) vocab.add_user(test_data[-1]['userID']) vocab.add_product(test_data[-1]['productID']) print('Deleting rare words...') embed = vocab.trim() args.embed_num = len(embed) args.embed_dim = len(embed[0]) args.user_num = vocab.user_num args.product_num = vocab.product_num train_dataset = Dataset(train_data) val_dataset = Dataset(val_data) train_iter = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) val_iter = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False) net = EncoderDecoder(args, embed) if args.load_model is not None: print('Loading model...') checkpoint = torch.load(args.save_path + args.load_model) net = EncoderDecoder(checkpoint['args'], embed) net.load_state_dict(checkpoint['model']) if args.use_cuda: net.cuda() criterion = nn.NLLLoss(ignore_index=vocab.PAD_IDX, reduction='sum') optim = torch.optim.Adam(net.parameters(), lr=args.lr) print('Begin training...') for epoch in range(args.begin_epoch, args.epochs + 1): if epoch >= args.lr_decay_start: adjust_learning_rate(optim, epoch - args.lr_decay_start + 1) for i, batch in enumerate(train_iter): src, trg, src_embed, trg_embed, src_user, src_product, src_mask, src_lens, trg_lens, _1, _2 = vocab.read_batch( batch) pre_output = net(src, trg, src_embed, trg_embed, src_user, src_product, vocab.word_num, src_mask, src_lens, trg_lens) pre_output = torch.log( pre_output.view(-1, pre_output.size(-1)) + 1e-20) trg_output = trg.view(-1) loss = criterion(pre_output, trg_output) / len(src_lens) loss.backward() clip_grad_norm_(net.parameters(), args.max_norm) optim.step() optim.zero_grad() cnt = (epoch - 1) * len(train_iter) + i if cnt % args.print_every == 0: print('EPOCH [%d/%d]: BATCH_ID=[%d/%d] loss=%f' % (epoch, args.epochs, i, len(train_iter), loss.data)) if cnt % args.valid_every == 0: print('Begin valid... Epoch %d, Batch %d' % (epoch, i)) cur_loss, r1, r2, rl = evaluate(net, criterion, vocab, val_iter, True) save_path = args.save_path + 'valid_%d_%.4f_%.4f_%.4f_%.4f' % ( cnt / args.valid_every, cur_loss, r1, r2, rl) net.save(save_path) print( 'Epoch: %2d Cur_Val_Loss: %f Rouge-1: %f Rouge-2: %f Rouge-l: %f' % (epoch, cur_loss, r1, r2, rl)) return
def t2vec(args): "read source sequences from trj.t and write the tensor into file trj.h5" m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size, args.num_layers, args.dropout, args.bidirectional) if os.path.isfile(args.checkpoint): print("=> loading checkpoint '{}'".format(args.checkpoint)) checkpoint = torch.load(args.checkpoint) m0.load_state_dict(checkpoint["m0"]) if torch.cuda.is_available(): m0.cuda() m0.eval() vecs = [] scaner = DataOrderScaner( os.path.join( args.data, "trj-{}-{}{:.1f}.t".format(args.cityname, args.encode_data, args.grid_size)), args.t2vec_batch) scaner.load() i = 0 encode_time = 0.0 while True: if i % 10 == 0: print("{}: Encoding {} trjs...".format(i, args.t2vec_batch)) i = i + 1 src, lengths, invp = scaner.getbatch() if src is None: break if torch.cuda.is_available(): src, lengths, invp = src.cuda(), lengths.cuda(), invp.cuda() start_time = time.time() h, _ = m0.encoder(src, lengths) ## (num_layers, batch, hidden_size * num_directions) h = m0.encoder_hn2decoder_h0(h) ## (batch, num_layers, hidden_size * num_directions) h = h.transpose(0, 1).contiguous() end_time = time.time() encode_time += end_time - start_time ## (batch, *) #h = h.view(h.size(0), -1) vecs.append(h[invp].cpu().data) print("Encode data time is : ", encode_time) ## (num_seqs, num_layers, hidden_size * num_directions) vecs = torch.cat(vecs) ## (num_layers, num_seqs, hidden_size * num_directions) vecs = vecs.transpose(0, 1).contiguous() if not os.path.exists(os.path.join(args.data, 'traj_emb')): os.mkdir(os.path.join(args.data, 'traj_emb')) path = os.path.join( args.data, "traj_emb/trj_{}_{}{:.0f}.h5".format(args.cityname, args.encode_data, args.grid_size)) print("=> saving vectors into {}".format(path)) with h5py.File(path, "w") as f: for i in range(m0.num_layers): f["layer" + str(i + 1)] = vecs[i].squeeze(0).numpy() #torch.save(vecs.data, path) #return vecs.data else: print("=> no checkpoint found at '{}'".format(args.checkpoint)) #args = FakeArgs() #args.t2vec_batch = 128 #args.num_layers = 2 #args.hidden_size = 64 #vecs = t2vec(args) #vecs
fake_label = tf.ones([1, 1, 30, 30], tf.int32) real_A, real_B = np.asarray(train[0], dtype=np.float32) / 255.0, np.asarray( train[1], dtype=np.float32) / 255.0 # real_data = tf.placeholder(tf.float32, shape=[1, 256, 256, 3], name='') # t = tf.placeholder(tf.float32, shape=[None, 1]) real_A = real_A.transpose(2, 0, 1) real_B = real_B.transpose(2, 0, 1) real_A = real_A.reshape(1, 3, 256, 256) real_B = real_B.reshape(1, 3, 256, 256) real_A = tf.Variable(real_A) real_B = tf.Variable(real_B) output = EncoderDecoder(tf.concat((real_A, real_B), 1)) print("output ", output) break # label = (real_label) # err_d_real = loss_dis(output, label) # err_d_real.backward() # fake_b = encoderdecoder_model(real_A) # output = discriminator_model(F.concat((real_A, fake_b), axis=1)) # label = (fake_label) # err_d_fake = loss_dis(output, label) # err_d_fake.backward() # err_d = (err_d_real + err_d_fake) / 2.0 # optimizer_discriminator.update()
def train(args): logging.basicConfig(filename=os.path.join(args.data, "training.log"), level=logging.INFO) trainData, valData = loadTrainDataAndValidateDate(args) # 创建损失函数,模型以及最优化训练 lossF = setLossF(args) triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2) # 输入到输出整个encoder-decoder的map m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size, args.num_layers, args.dropout, args.bidirectional) # EncoderDecoder 的输出到词汇表向量的映射,并进行了log操作 m1 = nn.Sequential(nn.Linear(args.hidden_size, args.vocab_size), nn.LogSoftmax(dim=1)) if args.cuda and torch.cuda.is_available(): print("=> training with GPU") m0.cuda() m1.cuda() # criterion.cuda() 自己更改 #m0 = nn.DataParallel(m0, dim=1) else: print("=> training with CPU") m0_optimizer = torch.optim.Adam(m0.parameters(), lr=args.learning_rate) m1_optimizer = torch.optim.Adam(m1.parameters(), lr=args.learning_rate) ## 加载模型状态和优化器状态 ## 如果存在已经保存的训练状态,如果不存在则重新开始生成 if os.path.isfile(args.checkpoint): print("=> loading checkpoint '{}'".format(args.checkpoint)) logging.info("Restore training @ {}".format(time.ctime())) checkpoint = torch.load(args.checkpoint) args.start_iteration = checkpoint["iteration"] best_prec_loss = checkpoint["best_prec_loss"] m0.load_state_dict(checkpoint["m0"]) m1.load_state_dict(checkpoint["m1"]) m0_optimizer.load_state_dict(checkpoint["m0_optimizer"]) m1_optimizer.load_state_dict(checkpoint["m1_optimizer"]) else: print("=> no checkpoint found at '{}'".format(args.checkpoint)) logging.info("Start training @ {}".format(time.ctime())) best_prec_loss = float('inf') #print("=> initializing the parameters...") #init_parameters(m0) #init_parameters(m1) ## here: load pretrained wrod (cell) embedding # num_iteration = 67000*128 // args.batch num_iteration = args.iter_num print("开始训练:" + str(time.ctime())) print("Iteration starts at {} and will end at {} \n".format( args.start_iteration, num_iteration - 1)) ## training for iteration in range(args.start_iteration + 1, num_iteration): try: # 梯度初始化为0 m0_optimizer.zero_grad() m1_optimizer.zero_grad() ## 前向传播求预测值并计算损失 # 获取一批补位+转置后的数据对象 TF=['src', 'lengths', 'trg', 'invp'] # src (seq_len1, batch), lengths (1, batch), trg (seq_len2, batch) gendata = trainData.getbatch_generative() # 计算损失 genloss = genLoss(gendata, m0, m1, lossF, args) ## discriminative loss disloss_cross, disloss_inner = 0, 0 # 每10次计算1次discriminative loss if args.use_discriminative and iteration % 10 == 0: # a和p的轨迹更接近 a.src.size = [max_length,128] a, p, n = trainData.getbatch_discriminative_cross() disloss_cross = disLoss(a, p, n, m0, triplet_loss, args) # a,p,n是由同一组128个轨迹采样得到的新的128个下采样轨迹集合 a, p, n = trainData.getbatch_discriminative_inner() disloss_inner = disLoss(a, p, n, m0, triplet_loss, args) # print("计算三元损失:"+str(time.ctime())) # 损失按一定权重相加 genloss: 使损失尽可能小 discriminative——loss: 使序列尽可能相似 loss = genloss + args.discriminative_w * (disloss_cross + disloss_inner) ## 根据模型损失,计算梯度 loss.backward() ## 剪辑梯度,限制梯度下降的阈值,防止梯度消失现象 clip_grad_norm_(m0.parameters(), args.max_grad_norm) clip_grad_norm_(m1.parameters(), args.max_grad_norm) ## 更新全部参数一次 m0_optimizer.step() m1_optimizer.step() ## 计算一个词的平均损失 avg_genloss = genloss.item() / gendata.trg.size(0) ## 定期输出训练状态 if iteration % args.print_freq == 0: print("\n当前时间:" + str(time.ctime())) print("Iteration: {0:}\nGenerative Loss: {1:.3f}"\ "\nDiscriminative Cross Loss: {2:.3f}\nDiscriminative Inner Loss: {3:.3f}"\ .format(iteration, avg_genloss, disloss_cross, disloss_inner)) ## 定期存储训练状态,通过验证集前向计算当前模型损失,若能获得更小损失,则保存最新的模型参数 if iteration % args.save_freq == 0 and iteration >= 1000: print("验证并存储训练状态:" + str(time.ctime())) prec_loss = validate(valData, (m0, m1), lossF, args) if prec_loss < best_prec_loss: best_prec_loss = prec_loss logging.info("Best model with loss {} at iteration {} @ {}"\ .format(best_prec_loss, iteration, time.ctime())) is_best = True else: is_best = False print("Saving the model at iteration {} validation loss {}". format(iteration, prec_loss) + str(time.ctime())) savecheckpoint( { "iteration": iteration, "best_prec_loss": best_prec_loss, "m0": m0.state_dict(), "m1": m1.state_dict(), "m0_optimizer": m0_optimizer.state_dict(), "m1_optimizer": m1_optimizer.state_dict() }, is_best, args) except KeyboardInterrupt: break
from models import EncoderDecoder from data import get_datasets, PadAndOneHot from training import Trainer from helper_functions import one_hot_to_string # Generate datasets from text file data_path = "data" train_dataset, valid_dataset, test_dataset = get_datasets(data_path) checkpoint_path = "." # Initialize model model = EncoderDecoder( num_encoder_layers=2, num_encoder_hidden=512, num_decoder_layers=2, num_decoder_hidden=512, Sx_size=len(train_dataset.Sx), # input alphabet Sy_size=len(train_dataset.Sy), # output alphabet y_eos=train_dataset.y_eos, # index of end-of-sequence symbol for output dropout=0.1, use_attention=True) # Train the model num_epochs = 0 trainer = Trainer(model, lr=0.0001) trainer.load_checkpoint(checkpoint_path) for epoch in range(num_epochs): print("========= Epoch %d of %d =========" % (epoch + 1, num_epochs)) train_acc, train_loss = trainer.train(train_dataset) valid_acc, valid_loss = trainer.test(valid_dataset) trainer.save_checkpoint(epoch, checkpoint_path)
class Runner(): ''' Handler for complete pre-training progress of upstream models ''' def __init__(self, args, config, dae_dataloader, tokenizer, ckpdir): self.device = torch.device('cuda') if ( args.gpu and torch.cuda.is_available()) else torch.device('cpu') if torch.cuda.is_available(): print('[Runner] - CUDA is available!') self.model_kept = [] self.global_step = 1 self.log = SummaryWriter(ckpdir) self.args = args self.config = config self.dae_dataloader = dae_dataloader self.tokenizer = tokenizer self.ckpdir = ckpdir # optimizer self.learning_rate = float(config['optimizer']['learning_rate']) self.warmup_proportion = config['optimizer']['warmup_proportion'] self.gradient_accumulation_steps = config['optimizer'][ 'gradient_accumulation_steps'] self.gradient_clipping = config['optimizer']['gradient_clipping'] # Training details self.apex = config['runner']['apex'] self.total_steps = config['runner']['total_steps'] self.warm_up_epochs = config['runner']['warm_up_epochs'] self.log_step = config['runner']['log_step'] self.save_step = config['runner']['save_step'] self.duo_feature = config['runner']['duo_feature'] self.max_keep = config['runner']['max_keep'] # Model configs self.text_encoder_config = RobertaConfig(**config['semantic']) self.text_encoder_config.is_decoder = False self.text_encoder_config.add_cross_attention = False self.text_decoder_config = RobertaConfig(**config['semantic']) self.text_decoder_config.is_decoder = True self.text_decoder_config.add_cross_attention = True self.speech_encoder_config = RobertaConfig(**config['acoustic']) self.speech_encoder_config.is_decoder = False self.speech_encoder_config.add_cross_attention = False self.speech_decoder_config = RobertaConfig(**config['acoustic']) self.speech_decoder_config.is_decoder = True self.speech_decoder_config.add_cross_attention = True def set_model(self): print('[Runner] - Initializing Transformer model...') # text是speech2text, speech是text2speech. self.text_model = EncoderDecoder( encoder_config=self.speech_encoder_config, decoder_config=self.text_decoder_config, modality="text") self.text_model.to(self.device) self.text_model.train() self.speech_model = EncoderDecoder( encoder_config=self.text_encoder_config, decoder_config=self.speech_decoder_config, modality="speech") self.speech_model.to(self.device) self.speech_model.train() if self.args.multi_gpu: self.text_model = torch.nn.DataParallel(self.text_model) self.speech_model = torch.nn.DataParallel(self.speech_model) print('[Runner] - Multi-GPU training Enabled: ' + str(torch.cuda.device_count())) print('[Runner] - Number of parameters: ' + str(sum(p.numel() for p in self.text_model.parameters() if p.requires_grad) + \ sum(p.numel() for p in self.speech_model.parameters() if p.requires_grad))) param_optimizer = list(self.text_model.named_parameters()) + list( self.speech_model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] if 'type' not in self.config['optimizer']: self.config['optimizer']['type'] = 'adam' print('[Runner] - Optimizer: ' + ('apex Fused Adam' if self. apex else str(self.config['optimizer']['type']))) if self.apex: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) optimizer = FusedAdam(optimizer_grouped_parameters, lr=self.learning_rate, bias_correction=False, max_grad_norm=1.0) if self.config['optimizer']['loss_scale'] == 0: self.optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: self.optimizer = FP16_Optimizer( optimizer, static_loss_scale=self.config['optimizer']['loss_scale']) self.warmup_linear = WarmupLinearSchedule( warmup=self.warmup_proportion, t_total=self.total_steps) elif self.config['optimizer']['type'] == 'adam': self.optimizer = BertAdam(optimizer_grouped_parameters, lr=self.learning_rate, warmup=self.warmup_proportion, t_total=self.total_steps, schedule='warmup_linear') elif self.config['optimizer']['type'] == 'lamb' or self.config[ 'optimizer']['type'] == 'adamW': self.optimizer = Lamb( optimizer_grouped_parameters, lr=self.learning_rate, warmup=self.warmup_proportion, t_total=self.total_steps, schedule='warmup_linear', adam=True if self.config['optimizer']['type'] == 'adamW' else False, correct_bias=True if self.config['optimizer']['type'] == 'adamW' else False) else: raise NotImplementedError() if self.args.resume is not None: self.load_model(self.args.resume) def process_acoustic_data(self, acoustic_inputs): """Process training data for the masked acoustic model""" with torch.no_grad(): assert ( len(acoustic_inputs) == 4 ), 'dataloader should return (a_inputs, a_mask_labels, a_attn_mask, a_labels)' # Unpack and Hack bucket: Bucketing should cause acoustic feature to have shape 1xBxTxD' a_inputs = acoustic_inputs[0].squeeze(0) a_mask_labels = acoustic_inputs[1].squeeze(0) a_attention_mask = acoustic_inputs[2].squeeze(0) a_labels = acoustic_inputs[3].squeeze(0) a_inputs = a_inputs.float().to(device=self.device) a_mask_labels = a_mask_labels.bool().to(device=self.device) a_attention_mask = a_attention_mask.float().to(device=self.device) a_labels = a_labels.float().to(device=self.device) return a_inputs, a_mask_labels, a_attention_mask, a_labels def process_semantic_data(self, semantic_inputs): with torch.no_grad(): assert ( len(semantic_inputs) == 4 ), 'dataloader should return (s_inputs, s_attention_mask, s_labels, s_raw)' s_inputs = semantic_inputs[0].squeeze(0) s_attention_mask = semantic_inputs[1].squeeze(0) s_labels = semantic_inputs[2].squeeze(0) s_raw = semantic_inputs[3].squeeze(0) s_inputs = s_inputs.long().to(device=self.device) s_attention_mask = s_attention_mask.float().to(device=self.device) s_labels = s_labels.long().to(device=self.device) s_raw = s_raw.long().to(device=self.device) return s_inputs, s_attention_mask, s_labels, s_raw def load_model(self, ckptpth): ckpt = torch.load(ckptpth) self.text_model.load_state_dict(ckpt['semantic_model']) self.speech_model.load_state_dict(ckpt['acoustic_model']) self.optimizer.load_state_dict(ckpt['Optimizer']) self.global_step = ckpt['Global_step'] def save_model(self, name='states', to_path=None): all_states = { 'semantic_model': self.text_model.state_dict() if not self.args.multi_gpu else self.text_model.module.state_dict(), 'acoustic_model': self.speech_model.state_dict() if not self.args.multi_gpu else self.speech_model.module.state_dict(), } all_states['Optimizer'] = self.optimizer.state_dict() all_states['Global_step'] = self.global_step all_states['Settings'] = {'Config': self.config, 'Paras': self.args} if to_path is None: new_model_path = '{}/{}-{}.ckpt'.format(self.ckpdir, name, self.global_step) else: new_model_path = to_path torch.save(all_states, new_model_path) self.model_kept.append(new_model_path) if len(self.model_kept) >= self.max_keep: os.remove(self.model_kept[0]) self.model_kept.pop(0) def train(self, ): print("Start warm up with parallel data.") warmup_dataset = SupervisedDataset( file_path=self.config['dataloader']['data_path'], sets=self.config['dataloader']['sup_train_set'], bucket_size=self.config['dataloader']['batch_size'], max_timestep=self.config['dataloader']['max_timestep'], drop=True, acoustic_config=self.config['acoustic'], semantic_config=self.config['semantic'], tokenizer=self.tokenizer, main_random_noise=False, mask_proportion=1.0) #全部mask成[MASK] warmup_dataloader = DataLoader( dataset=warmup_dataset, batch_size=1, shuffle=True, drop_last=False, num_workers=self.config['dataloader']['n_jobs'], pin_memory=True) tk0 = tqdm(range(self.warm_up_epochs), total=self.warm_up_epochs, desc="warm up training with parallel data.") for _ in tk0: accum_step = 0 accum_text_sup_loss = 0 accum_speech_sup_loss = 0 for warmup_batch in warmup_dataloader: warmup_batch_is_valid, warmup_speech_batch, warmup_text_batch = warmup_batch if not warmup_batch_is_valid: continue speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data( warmup_speech_batch) text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data( warmup_text_batch) text_sup_loss = self.text_model( encoder_inputs=speech_labels, encoder_attention_mask=speech_attention_mask, decoder_inputs=text_inputs, decoder_attention_mask=text_attention_mask, decoder_labels=text_labels) speech_sup_loss = self.speech_model( encoder_inputs=text_raw, encoder_attention_mask=text_attention_mask, decoder_inputs=speech_inputs, decoder_attention_mask=speech_attention_mask, decoder_labels=(speech_labels, speech_mask_labels)) loss = text_sup_loss + speech_sup_loss if self.args.multi_gpu: loss = loss.mean() text_sup_loss = text_sup_loss.mean() speech_sup_loss = speech_sup_loss.mean() loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_( list(self.text_model.parameters()) + list(self.speech_model.parameters()), self.gradient_clipping) self.optimizer.step() self.optimizer.zero_grad() batch_size = text_inputs.size(0) accum_step += batch_size accum_text_sup_loss += text_sup_loss.item() * batch_size accum_speech_sup_loss += speech_sup_loss.item() * batch_size tk0.set_postfix(text_loss=accum_text_sup_loss / accum_step, speech_loss=accum_speech_sup_loss / accum_step) del warmup_dataset, warmup_dataloader gc.collect() previous_speech2text_dataloader = None previous_text2speech_dataloader = None epoch = 0 pbar = tqdm(total=self.total_steps) pbar.n = self.global_step - 1 while self.global_step <= self.total_steps: print("\nStart Generation. Epoch: {}.\n".format(epoch)) self.text_model.eval() self.speech_model.eval() if previous_speech2text_dataloader is None or previous_text2speech_dataloader is None: gen_dataset = DAEDataset( file_path=self.config['dataloader']['data_path'], sets=self.config['dataloader']['sup_train_set'] + self.config['dataloader']['dt_train_set'], bucket_size=self.config['dataloader']['batch_size'], max_timestep=self.config['dataloader']['max_timestep'], drop=True, acoustic_config=self.config['acoustic'], semantic_config=self.config['semantic'], tokenizer=self.tokenizer, main_random_noise=False) gen_dataloader = DataLoader( dataset=gen_dataset, batch_size=1, shuffle=True, drop_last=False, num_workers=self.config['dataloader']['n_jobs'], pin_memory=True) all_speech = [] all_speech_mask = [] all_gen_text = [] all_text = [] all_text_mask = [] all_gen_speech = [] with torch.no_grad(): if previous_speech2text_dataloader is None or previous_text2speech_dataloader is None: for gen_batch in tqdm(gen_dataloader, desc="Generating First Time."): gen_batch_is_valid, gen_speech_batch, gen_text_batch = gen_batch if not gen_batch_is_valid: continue # 这里的speech和text不配对。 speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data( gen_speech_batch) text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data( gen_text_batch) batch_size = speech_labels.size(0) text_mask_inputs = torch.ones((batch_size, self.text_decoder_config.max_output_length), dtype=torch.long).to(self.device) * \ self.tokenizer.mask_token_id output_text = self.text_model( encoder_inputs=speech_labels, encoder_attention_mask=speech_attention_mask, decoder_inputs=text_mask_inputs, ) all_speech.append(speech_labels.detach().cpu().numpy()) all_speech_mask.append( speech_attention_mask.detach().cpu().numpy()) all_gen_text.append(output_text.detach().cpu().numpy()) batch_size = text_raw.size(0) speech_mask_inputs = torch.zeros( (batch_size, self.speech_decoder_config.max_output_length, self.speech_decoder_config.audio_size * self.speech_decoder_config.downsample_rate), dtype=torch.float).to(self.device) output_speech = self.speech_model( encoder_inputs=text_raw, encoder_attention_mask=text_attention_mask, decoder_inputs=speech_mask_inputs, ) all_text.append(text_raw.detach().cpu().numpy()) all_text_mask.append( text_attention_mask.detach().cpu().numpy()) all_gen_speech.append( output_speech.detach().cpu().numpy()) del gen_dataset, gen_dataloader gc.collect() else: for gen_batch in tqdm(previous_speech2text_dataloader, desc="Generating Text."): gen_batch_is_valid, gen_speech_batch, gen_text_batch = gen_batch if not gen_batch_is_valid: continue # 这里的speech和text是配对的。 speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data( gen_speech_batch) text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data( gen_text_batch) output_text = self.text_model( encoder_inputs=speech_labels, encoder_attention_mask=speech_attention_mask, decoder_inputs_embeds= text_raw, # 上一轮生成的结果,是token embeds形式。 ) all_speech.append(speech_labels.detach().cpu().numpy()) all_speech_mask.append( speech_attention_mask.detach().cpu().numpy()) all_gen_text.append(output_text.detach().cpu().numpy()) del speech2text_dt_dataset, speech2text_dt_dataloader, previous_speech2text_dataloader gc.collect() for gen_batch in tqdm(previous_text2speech_dataloader, desc="Generating Speech."): gen_batch_is_valid, gen_speech_batch, gen_text_batch = gen_batch if not gen_batch_is_valid: continue # 这里的speech和text是配对的。 speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data( gen_speech_batch) text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data( gen_text_batch) output_speech = self.speech_model( encoder_inputs=text_raw, encoder_attention_mask=text_attention_mask, decoder_inputs= speech_labels, # 上一轮生成的结果,是mel spec形式。 ) all_text.append(text_raw.detach().cpu().numpy()) all_text_mask.append( text_attention_mask.detach().cpu().numpy()) all_gen_speech.append( output_speech.detach().cpu().numpy()) del text2speech_dt_dataset, text2speech_dt_dataloader, previous_text2speech_dataloader gc.collect() speech2text = ((all_speech, all_speech_mask), all_gen_text) text2speech = ((all_text, all_text_mask), all_gen_speech) current_epoch_dt_mask_prop = min( max(self.config['semantic']['dt_mask_proportion'], self.config['acoustic']['dt_mask_proportion']), 0.3 + 0.01 * epoch) speech2text_dt_dataset = Speech2TextDTDataset( speech2text, bucket_size=self.config['dataloader']['batch_size'], acoustic_config=self.config['acoustic'], semantic_config=self.config['semantic'], tokenizer=self.tokenizer, main_random_noise=False, mask_proportion=current_epoch_dt_mask_prop) speech2text_dt_dataloader = DataLoader( dataset=speech2text_dt_dataset, batch_size=1, shuffle=True, drop_last=False, num_workers=self.config['dataloader']['n_jobs'], pin_memory=True) text2speech_dt_dataset = Text2SpeechDTDataset( text2speech, bucket_size=self.config['dataloader']['batch_size'], acoustic_config=self.config['acoustic'], semantic_config=self.config['semantic'], tokenizer=self.tokenizer, main_random_noise=False, mask_proportion=current_epoch_dt_mask_prop) text2speech_dt_dataloader = DataLoader( dataset=text2speech_dt_dataset, batch_size=1, shuffle=True, drop_last=False, num_workers=self.config['dataloader']['n_jobs'], pin_memory=True) previous_speech2text_dataloader = speech2text_dt_dataloader previous_text2speech_dataloader = text2speech_dt_dataloader del speech2text, text2speech gc.collect() current_epoch_sup_mask_prop = min( max(self.config['semantic']['sup_mask_proportion'], self.config['acoustic']['sup_mask_proportion']), 0.3 + 0.01 * epoch) sup_dataset = SupervisedDataset( file_path=self.config['dataloader']['data_path'], sets=self.config['dataloader']['sup_train_set'], bucket_size=self.config['dataloader']['batch_size'], max_timestep=self.config['dataloader']['max_timestep'], drop=True, acoustic_config=self.config['acoustic'], semantic_config=self.config['semantic'], tokenizer=self.tokenizer, main_random_noise=False, mask_proportion=current_epoch_sup_mask_prop) sup_dataloader = DataLoader( dataset=sup_dataset, batch_size=1, shuffle=True, drop_last=False, num_workers=self.config['dataloader']['n_jobs'], pin_memory=True) ################################################## progress = tqdm(self.dae_dataloader, desc="Main Training Iteration.") s2t_dt_iter = speech2text_dt_dataloader.__iter__() t2s_dt_iter = text2speech_dt_dataloader.__iter__() sup_iter = sup_dataloader.__iter__() loss_val = 0 speech_dt_loss_val, text_dt_loss_val, speech_dt_stop_loss_val = 0, 0, 0 speech_sup_loss_val, text_sup_loss_val, speech_sup_stop_loss_val = 0, 0, 0 speech_dae_loss_val, text_dae_loss_val, speech_dae_stop_loss_val = 0, 0, 0 self.text_model.train() self.speech_model.train() for dae_batch in progress: try: s2t_dt_batch = next(s2t_dt_iter) except StopIteration: del s2t_dt_iter gc.collect() s2t_dt_iter = speech2text_dt_dataloader.__iter__() s2t_dt_batch = next(s2t_dt_iter) try: t2s_dt_batch = next(t2s_dt_iter) except StopIteration: del t2s_dt_iter gc.collect() t2s_dt_iter = text2speech_dt_dataloader.__iter__() t2s_dt_batch = next(t2s_dt_iter) try: sup_batch = next(sup_iter) except StopIteration: del sup_iter gc.collect() sup_iter = sup_dataloader.__iter__() sup_batch = next(sup_iter) dae_batch_is_valid, dae_speech_batch, dae_text_batch = dae_batch s2t_dt_batch_is_valid, s2t_dt_speech_batch, s2t_dt_text_batch = s2t_dt_batch t2s_dt_batch_is_valid, t2s_dt_speech_batch, t2s_dt_text_batch = t2s_dt_batch sup_batch_is_valid, sup_speech_batch, sup_text_batch = sup_batch try: if self.global_step > self.total_steps: break if not s2t_dt_batch_is_valid or not t2s_dt_batch_is_valid or not dae_batch_is_valid or not sup_batch_is_valid: continue ######## Dual Transformation ###### # 数据集不能混在一起。得分为两部分,生成的文本和原始的音频用来还原音频。vise versa. # 生成的进encoder, 真实的进decoder. speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data( t2s_dt_speech_batch) text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data( t2s_dt_text_batch) text_dt_loss = self.text_model( encoder_inputs=speech_labels, # 生成的。 encoder_attention_mask=speech_attention_mask, decoder_inputs=text_inputs, decoder_attention_mask=text_attention_mask, decoder_labels=text_labels) speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data( s2t_dt_speech_batch) text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data( s2t_dt_text_batch) speech_dt_loss = self.speech_model( encoder_inputs_embeds=text_raw, # 生成的。 encoder_attention_mask=text_attention_mask, decoder_inputs=speech_inputs, decoder_attention_mask=speech_attention_mask, decoder_labels=(speech_labels, speech_mask_labels)) ######## Supervised ####### speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data( sup_speech_batch) text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data( sup_text_batch) text_sup_loss = self.text_model( encoder_inputs=speech_inputs, encoder_attention_mask=speech_attention_mask, decoder_inputs=text_inputs, decoder_attention_mask=text_attention_mask, decoder_labels=text_labels) speech_sup_loss = self.speech_model( encoder_inputs=text_inputs, encoder_attention_mask=text_attention_mask, decoder_inputs=speech_inputs, decoder_attention_mask=speech_attention_mask, decoder_labels=(speech_labels, speech_mask_labels)) ######## Denoise AutoEncoding ######## speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data( dae_speech_batch) text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data( dae_text_batch) text_dae_loss = self.text_model( encoder_inputs=speech_inputs, encoder_attention_mask=speech_attention_mask, encoder_labels=(speech_labels, speech_mask_labels)) speech_dae_loss = self.speech_model( encoder_inputs=text_inputs, encoder_attention_mask=text_attention_mask, encoder_labels=text_labels) ####################################### if self.args.multi_gpu: text_dt_loss = text_dt_loss.mean() speech_dt_loss = speech_dt_loss.mean() text_sup_loss = text_sup_loss.mean() speech_sup_loss = speech_sup_loss.mean() text_dae_loss = text_dae_loss.mean() speech_dae_loss = speech_dae_loss.mean() loss = (text_dt_loss + speech_dt_loss) + \ 0.1 * (text_sup_loss + speech_sup_loss) + \ (text_dae_loss + speech_dae_loss) # Accumulate Loss if self.gradient_accumulation_steps > 1: loss = loss / self.gradient_accumulation_steps if self.apex and self.args.multi_gpu: raise NotImplementedError elif self.apex: self.optimizer.backward(loss) else: loss.backward() loss_val += loss.item() speech_dt_loss_val += speech_dt_loss.item() text_dt_loss_val += text_dt_loss.item() speech_sup_loss_val += speech_sup_loss.item() text_sup_loss_val += text_sup_loss.item() speech_dae_loss_val += speech_dae_loss.item() text_dae_loss_val += text_dae_loss.item() if (self.total_steps + 1) % self.gradient_accumulation_steps == 0: if self.apex: # modify learning rate with special warm up BERT uses # if conifg.apex is False, BertAdam is used and handles this automatically lr_this_step = self.learning_rate * self.warmup_linear.get_lr( self.global_step, self.warmup_proportion) for param_group in self.optimizer.param_groups: param_group['lr'] = lr_this_step # Step grad_norm = torch.nn.utils.clip_grad_norm_( list(self.text_model.parameters()) + list(self.speech_model.parameters()), self.gradient_clipping) if math.isnan(grad_norm): print( '[Runner] - Error : grad norm is NaN @ step ' + str(self.global_step)) else: self.optimizer.step() self.optimizer.zero_grad() if self.global_step % self.log_step == 0: # Log self.log.add_scalar('lr', self.optimizer.get_lr()[0], self.global_step) self.log.add_scalar('loss', (loss_val), self.global_step) self.log.add_scalar('speech_dt_loss', (speech_dt_loss_val), self.global_step) self.log.add_scalar('text_dt_loss', (text_dt_loss_val), self.global_step) self.log.add_scalar('speech_dt_stop_loss', (speech_dt_stop_loss_val), self.global_step) self.log.add_scalar('speech_sup_loss', (speech_sup_loss_val), self.global_step) self.log.add_scalar('text_sup_loss', (text_sup_loss_val), self.global_step) self.log.add_scalar('speech_sup_stop_loss', (speech_sup_stop_loss_val), self.global_step) self.log.add_scalar('speech_dae_loss', (speech_dae_loss_val), self.global_step) self.log.add_scalar('text_dae_loss', (text_dae_loss_val), self.global_step) self.log.add_scalar('speech_dae_stop_loss', (speech_dae_stop_loss_val), self.global_step) self.log.add_scalar('gradient norm', grad_norm, self.global_step) progress.set_description( "Loss {:.4f} - DT Loss {:.4f} - SUP Loss {:.4f} - DAE Loss {:.4f}" .format(loss_val, (speech_dt_loss_val + text_dt_loss_val + speech_dt_stop_loss_val), (speech_sup_loss_val + text_sup_loss_val + speech_sup_stop_loss_val), (speech_dae_loss_val + text_dae_loss_val + speech_dae_stop_loss_val))) if self.global_step % self.save_step == 0: self.save_model('states') loss_val = 0 speech_dt_loss_val, text_dt_loss_val, speech_dt_stop_loss_val = 0, 0, 0 speech_sup_loss_val, text_sup_loss_val, speech_sup_stop_loss_val = 0, 0, 0 speech_dae_loss_val, text_dae_loss_val, speech_dae_stop_loss_val = 0, 0, 0 pbar.update(1) self.global_step += 1 except RuntimeError as e: if 'CUDA out of memory' in str(e): print('CUDA out of memory at step: ', self.global_step) torch.cuda.empty_cache() self.optimizer.zero_grad() else: raise epoch += 1 del sup_dataset, sup_dataloader, sup_iter, s2t_dt_iter, t2s_dt_iter gc.collect() pbar.close() self.log.close()
print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('===> Loading datasets') root_path = "dataset/" train_set = get_training_set(root_path + args.dataset) test_set = get_test_set(root_path + args.dataset) # for iteration, batch in enumerate(train_set, 1): # print("iteration", iteration) # print(batch[0].shape) # print(batch[1].shape) # break print('===> Building model') encoderdecoder_model = EncoderDecoder(args.input_nc, args.output_nc, args.ngf) discriminator_model = Discriminator(args.input_nc, args.output_nc, args.ngf) if args.gpu >= 0: print("use gpu") chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current encoderdecoder_model.to_gpu() discriminator_model.to_gpu() optimizer_encoderdecoder = chainer.optimizers.Adam(alpha=0.0002, beta1=0.5) optimizer_encoderdecoder.setup(encoderdecoder_model) serializers.save_npz("encoderdecoder_model_" + str(1), encoderdecoder_model) if args.gpu >= 0: xp = cuda.cupy label = xp.random.randn(args.batchsize)