def train(train_loader, val_loader, epochnum, save_path='.', save_freq=None): iter_size = len(train_loader) net = Encoder() net.cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=2e-4) for epoch in range(epochnum): print('epoch : {}'.format(epoch)) net.train() train_loss = 0 train_correct = 0 total = 0 net.training = True for i, data in enumerate(train_loader): sys.stdout.write('iter : {} / {}\r'.format(i, iter_size)) sys.stdout.flush() #print('iter: {} / {}'.format(i, iter_size)) inputs, labels = data inputs, labels = Variable(inputs.cuda()), labels.cuda() optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, Variable(labels)) loss.backward() optimizer.step() train_loss += loss.data[0] pred = (torch.max(outputs.data, 1)[1]) train_correct += (pred == labels).sum() total += labels.size(0) sys.stdout.write(' ' * 20 + '\r') sys.stdout.flush() print('train_loss:{}, train_acc:{:.2%}'.format(train_loss / total, train_correct / total)) val_loss = 0 val_correct = 0 total = 0 net.training = False for data in val_loader: net.eval() inputs, labels = data inputs, labels = Variable(inputs).cuda(), labels.cuda() outputs = net(inputs) pred = torch.max(outputs.data, 1)[1] total += labels.size(0) loss = criterion(outputs, Variable(labels)) val_loss += loss.data[0] val_correct += (pred == labels).sum() print('val_loss:{}, val_acc:{:.2%}'.format(val_loss / total, val_correct / total)) optimizer.param_groups[0]['lr'] *= np.exp(-0.4) if save_freq and epoch % save_freq == save_freq - 1: net_name = os.path.join(save_path, 'epoch_{}'.format(epoch)) torch.save(net, net_name) torch.save(net, os.path.join(save_path, 'trained_net'))
def train(model_path=None): dataloader = DataLoader(Augmentation()) encoder = Encoder() dict_len = len(dataloader.data.dictionary) decoder = DecoderWithAttention(dict_len) if cuda: encoder = encoder.cuda() decoder = decoder.cuda() # if model_path: # text_generator.load_state_dict(torch.load(model_path)) train_iter = 1 encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=cfg.encoder_learning_rate) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=cfg.decoder_learning_rate) val_bleu = list() losses = list() while True: batch_image, batch_label = dataloader.get_next_batch() batch_image = torch.from_numpy(batch_image).type(torch.FloatTensor) batch_label = torch.from_numpy(batch_label).type(torch.LongTensor) if cuda: batch_image = batch_image.cuda() batch_label = batch_label.cuda() # print(batch_image.size()) # print(batch_label.size()) print('Training') output = encoder(batch_image) # print('encoder output:', output.size()) predictions, alphas = decoder(output, batch_label) loss = cal_loss(predictions, batch_label, alphas, 1) decoder_optimizer.zero_grad() encoder_optimizer.zero_grad() loss.backward() decoder_optimizer.step() encoder_optimizer.step() print('Iter', train_iter, '| loss:', loss.cpu().data.numpy(), '| batch size:', cfg.batch_size, '| encoder learning rate:', cfg.encoder_learning_rate, '| decoder learning rate:', cfg.decoder_learning_rate) losses.append(loss.cpu().data.numpy()) if train_iter % cfg.save_model_iter == 0: val_bleu.append(val_eval(encoder, decoder, dataloader)) torch.save( encoder.state_dict(), './models/train/encoder_' + cfg.pre_train_model + '_' + str(train_iter) + '.pkl') torch.save(decoder.state_dict(), './models/train/decoder_' + str(train_iter) + '.pkl') np.save('./result/train_bleu4.npy', val_bleu) np.save('./result/losses.npy', losses) if train_iter == cfg.train_iter: break train_iter += 1
def instantiate_model(config, tokenizer): configure_devices(config) model = Model(config) optimizer = transformers.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=0) metrics = None if config.continue_training: state_dict = torch.load(config.continue_training, map_location='cpu') model.load_state_dict(state_dict['model']) if 'optimizer_state_dict' in state_dict: optimizer.load_state_dict(state_dict['optimizer_state_dict']) for g in optimizer.param_groups: g['lr'] = config.learning_rate try: print(f"Loaded model:\nEpochs: {state_dict['epoch']}\nLoss: {state_dict['loss']}\n", f"Recall: {state_dict['rec']}\nMRR: {state_dict['mrr']}") except: pass if config.use_cuda: model = model.cuda() optimizer_to(optimizer, config.device) model = torch.nn.DataParallel(model, device_ids=config.devices) return model, optimizer, metrics
def display_network(opt): cuda = True if torch.cuda.is_available() else False # Dimensionality input_shape = (opt.channels, opt.img_height, opt.img_width) shared_dim = opt.dim * (2**opt.n_downsample) # Initialize generator and discriminator shared_E = ResidualBlock(in_channels=shared_dim) E1 = Encoder(dim=opt.dim, n_downsample=opt.n_downsample, shared_block=shared_E) E2 = Encoder(dim=opt.dim, n_downsample=opt.n_downsample, shared_block=shared_E) shared_G = ResidualBlock(in_channels=shared_dim) G1 = Generator(dim=opt.dim, n_upsample=opt.n_upsample, shared_block=shared_G) G2 = Generator(dim=opt.dim, n_upsample=opt.n_upsample, shared_block=shared_G) D1 = Discriminator(input_shape) D2 = Discriminator(input_shape) if cuda: E1 = E1.cuda() E2 = E2.cuda() G1 = G1.cuda() G2 = G2.cuda() D1 = D1.cuda() D2 = D2.cuda() summary(E1, (opt.channels, opt.img_height, opt.img_width)) summary(E2, (opt.channels, opt.img_height, opt.img_width)) summary(G1, (opt.img_height, opt.dim, opt.dim)) summary(G2, (opt.img_height, opt.dim, opt.dim)) summary(D1, (opt.channels, opt.img_height, opt.img_width)) summary(D2, (opt.channels, opt.img_height, opt.img_width))
class PretrainingTrainer: def __init__(self): self.preprocessor = None self.model = None self.optimizer = None def setup_preprocessed_data(self): self.preprocessor = Preprocess() self.preprocessor.setup() def setup_model(self): # Create multilingual vocabulary self.model = Encoder() if con.CUDA: self.model = self.model.cuda() def setup_scheduler_optimizer(self): lr_rate = 0.001 self.optimizer = optim.Adam(self.model.parameters(), lr=lr_rate, weight_decay=0) def train_model(self): train_loader = self.preprocessor.train_loaders batch_size = 8 self.model.train() train_loss = 0 batch_correct = 0 total_correct = 0 index = 0 for hrl_src, lrl_src, hrl_att, lrl_att in train_loader: logits = self.model(hrl_src) print(logits.shape) break # self.optimizer.zero_grad() # batch_loss.backward() # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) # self.optimizer.step() # batch_correct += self.evaluate(masked_outputs=masked_outputs, masked_lm_ids=masked_lm_ids) # total_correct += (8 * 20) def run_pretraining(self): self.setup_preprocessed_data() self.setup_model() self.setup_scheduler_optimizer() self.train_model()
def predict(image_name, model_path=None): print(len(data.dictionary)) encoder = Encoder() decoder = DecoderWithAttention(len(data.dictionary)) if cuda: encoder = encoder.cuda() decoder = decoder.cuda() if model_path: print('Loading the parameters of model.') if cuda: encoder.load_state_dict(torch.load(model_path[0])) decoder.load_state_dict(torch.load(model_path[1])) else: encoder.load_state_dict( torch.load(model_path[0], map_location='cpu')) decoder.load_state_dict( torch.load(model_path[1], map_location='cpu')) encoder.eval() decoder.eval() image = cv2.imread(image_name) image = cv2.resize(image, (224, 224)) image = image.astype(np.float32) / 255.0 image = image.transpose([2, 0, 1]) image = np.expand_dims(image, axis=0) image = torch.from_numpy(image).type(torch.FloatTensor) if cuda: image = image.cuda() output = encoder(image) # print('encoder output:', output.size()) sentences, alphas = beam_search(data, decoder, output) # print(sentences) show(image_name, sentences[0], alphas[0]) for sentence in sentences: prediction = [] for word in sentence: prediction.append(data.dictionary[word]) if word == 2: break # print(prediction) prediction = ' '.join([word for word in prediction]) print('The prediction sentence:', prediction)
def instantiate_model(config, tokenizer): configure_devices(config) model = Model(config) optimizer = transformers.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=0) last_epoch = 0 epoch_avg_loss = 0 if config.continue_training: state_dict = torch.load(config.continue_training, map_location='cpu') model.load_state_dict(state_dict['model']) if 'optimizer_state_dict' in state_dict: optimizer.load_state_dict(state_dict['optimizer_state_dict']) last_epoch = state_dict['epoch'] # epoch_avg_loss = state_dict['loss'] # del state_dict # TODO TEST if config.use_cuda: model = model.cuda() optimizer_to(optimizer, config.device) model = torch.nn.DataParallel(model, device_ids=config.devices) return model, optimizer, last_epoch, epoch_avg_loss
def infer(opt): cuda = True if torch.cuda.is_available() else False FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor # Dimensionality shared_dim = opt.dim * (2**opt.n_downsample) # Initialize generator and discriminator shared_E = ResidualBlock(in_channels=shared_dim) shared_G = ResidualBlock(in_channels=shared_dim) E1 = Encoder(dim=opt.dim, n_downsample=opt.n_downsample, shared_block=shared_E) G2 = Generator(dim=opt.dim, n_upsample=opt.n_upsample, shared_block=shared_G) shared_E.load_state_dict( torch.load(opt.load_model.replace('*', 'shared_E'))) shared_G.load_state_dict( torch.load(opt.load_model.replace('*', 'shared_G'))) E1.load_state_dict(torch.load(opt.load_model.replace('*', 'E1'))) G2.load_state_dict(torch.load(opt.load_model.replace('*', 'G2'))) if cuda: shared_E.cuda() shared_G.cuda() E1 = E1.cuda() G2 = G2.cuda() sample = load_img(opt) sample = Variable(sample.unsqueeze(0).type(FloatTensor)) _, Z1 = E1(sample) fake_X2 = G2(Z1) sample = torch.cat((sample.data, fake_X2.data), -1) save_image(sample, "images/infer.png", nrow=1, normalize=True)
def main(_): # Load the configuration file. with open(FLAGS.config, 'r') as f: config = yaml.load(f) # Create the checkpoint directory if it does not already exist. ckpt_dir = os.path.join(config['data']['ckpt'], config['experiment_name']) if not os.path.exists(ckpt_dir): os.mkdir(ckpt_dir) # Check if a pre-existing configuration file exists and matches the current # configuration. Otherwise save a copy of the configuration to the # checkpoint directory. prev_config_path = os.path.join(ckpt_dir, 'config.yaml') if os.path.exists(prev_config_path): with open(prev_config_path, 'r') as f: prev_config = yaml.load(f) assert config == prev_config else: shutil.copyfile(FLAGS.config, prev_config_path) # Load the vocabularies. src_vocab = Vocab.load(config['data']['src']['vocab']) tgt_vocab = Vocab.load(config['data']['tgt']['vocab']) # Load the training and dev datasets. train_data = ShakespeareDataset('train', config, src_vocab, tgt_vocab) dev_data = ShakespeareDataset('dev', config, src_vocab, tgt_vocab) # Build the model. src_vocab_size = len(src_vocab) tgt_vocab_size = len(tgt_vocab) encoder = Encoder(src_vocab_size, config['model']['embedding_dim']) decoder = Decoder(tgt_vocab_size, config['model']['embedding_dim']) if torch.cuda.is_available(): encoder = encoder.cuda() decoder = decoder.cuda() # Define the loss function + optimizer. loss_weights = torch.ones(decoder.tgt_vocab_size) loss_weights[0] = 0 if torch.cuda.is_available(): loss_weights = loss_weights.cuda() criterion = torch.nn.NLLLoss(loss_weights) learning_rate = config['training']['learning_rate'] encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate) # Restore saved model (if one exists). ckpt_path = os.path.join(ckpt_dir, 'model.pt') if os.path.exists(ckpt_path): print('Loading checkpoint: %s' % ckpt_path) ckpt = torch.load(ckpt_path) epoch = ckpt['epoch'] encoder.load_state_dict(ckpt['encoder']) decoder.load_state_dict(ckpt['decoder']) encoder_optimizer.load_state_dict(ckpt['encoder_optimizer']) decoder_optimizer.load_state_dict(ckpt['decoder_optimizer']) else: epoch = 0 train_log_string = '%s :: Epoch %i :: Iter %i / %i :: train loss: %0.4f' dev_log_string = '\n%s :: Epoch %i :: dev loss: %0.4f' while epoch < config['training']['num_epochs']: # Main training loop. train_loss = [] sampler = RandomSampler(train_data) for i, train_idx in enumerate(sampler): src, tgt = train_data[train_idx] # Clear gradients encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() # Feed inputs one by one from src into encoder (in reverse). src_length = src.size()[0] hidden = None for j in reversed(range(src_length)): encoder_output, hidden = encoder(src[j], hidden) # Feed desired outputs one by one from tgt into decoder # and measure loss. tgt_length = tgt.size()[0] loss = 0 for j in range(tgt_length - 1): decoder_output, hidden = decoder(tgt[j], hidden) loss += criterion(decoder_output, tgt[j + 1]) # Backpropagate the loss and update the model parameters. loss.backward() encoder_optimizer.step() decoder_optimizer.step() train_loss.append(loss.data.cpu()) # Every once and a while check on the loss if ((i + 1) % 100) == 0: print(train_log_string % (datetime.now(), epoch, i + 1, len(train_data), np.mean(train_loss)), end='\r') train_loss = [] # Evaluation loop. dev_loss = [] for src, tgt in dev_data: # Feed inputs one by one from src into encoder. src_length = src.size()[0] hidden = None for j in reversed(range(src_length)): encoder_output, hidden = encoder(src[j], hidden) # Feed desired outputs one by one from tgt into decoder # and measure loss. tgt_length = tgt.size()[0] loss = 0 for j in range(tgt_length - 1): decoder_output, hidden = decoder(tgt[j], hidden) loss += criterion(decoder_output, tgt[j + 1]) dev_loss.append(loss.data.cpu()) print(dev_log_string % (datetime.now(), epoch, np.mean(dev_loss))) state_dict = { 'epoch': epoch, 'encoder': encoder.state_dict(), 'decoder': decoder.state_dict(), 'encoder_optimizer': encoder_optimizer.state_dict(), 'decoder_optimizer': decoder_optimizer.state_dict() } torch.save(state_dict, ckpt_path) epoch += 1
def train(config, encoder_in = None, decoder_in = None): train_data, word2index, tag2index, intent2index = preprocessing(config.file_path,config.max_length) if train_data==None: print("Please check your data or its path") return if encoder_in != None: encoder = encoder_in decoder = decoder_in else: encoder = Encoder(len(word2index),config.embedding_size,config.hidden_size) decoder = Decoder(len(tag2index),len(intent2index),len(tag2index)//3,config.hidden_size*2) if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() encoder.init_weights() decoder.init_weights() loss_function_1 = nn.CrossEntropyLoss(ignore_index=0) loss_function_2 = nn.CrossEntropyLoss() enc_optim= optim.Adam(encoder.parameters(), lr=config.learning_rate) dec_optim = optim.Adam(decoder.parameters(),lr=config.learning_rate) for step in range(config.step_size): losses=[] for i, batch in enumerate(getBatch(config.batch_size,train_data)): x,y_1,y_2 = zip(*batch) # sin,sout,intent x = torch.cat(x) tag_target = torch.cat(y_1) intent_target = torch.cat(y_2) x_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in x]).view(config.batch_size,-1) y_1_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in tag_target]).view(config.batch_size,-1) encoder.zero_grad() decoder.zero_grad() output, hidden_c = encoder(x,x_mask) start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*config.batch_size])).cuda().transpose(1,0) if USE_CUDA else Variable(torch.LongTensor([[word2index['<SOS>']]*config.batch_size])).transpose(1,0) tag_score, intent_score = decoder(start_decode,hidden_c,output,x_mask) loss_1 = loss_function_1(tag_score,tag_target.view(-1)) loss_2 = loss_function_2(intent_score,intent_target) loss = loss_1+loss_2 losses.append(loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy()) loss.backward() torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0) torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0) enc_optim.step() dec_optim.step() if i % 100==0: print("Step",step," epoch",i," : ",np.mean(losses)) losses=[] t = Check() t.test(encoder,decoder) count = t.test_error_count rate = t.test_error_rate if not os.path.exists(config.model_dir): os.makedirs(config.model_dir) torch.save(decoder, os.path.join(config.model_dir, str(count)+'_'+str(rate)+'_'+'decoder.pkl')) torch.save(encoder, os.path.join(config.model_dir, str(count)+'_'+str(rate)+'_'+'encoder.pkl')) # if not os.path.exists(config.model_dir): # os.makedirs(config.model_dir) # torch.save(decoder.state_dict(),os.path.join(config.model_dir,'jointnlu-decoder.pkl')) # torch.save(encoder.state_dict(),os.path.join(config.model_dir, 'jointnlu-encoder.pkl')) # torch.save(decoder,os.path.join(config.model_dir,'jointnlu-decoder.pkl')) # torch.save(encoder,os.path.join(config.model_dir, 'jointnlu-encoder.pkl')) print("Train Complete!")
plt.yticks(()) plt.show() if __name__ == '__main__': # predict('./data/RSICD/RSICD_images/00110.jpg', ['./models/train/encoder_mobilenet_60000.pkl', './models/train/decoder_60000.pkl']) # predict('./data/RSICD/test/00029.jpg', ['./models/train/encoder_resnet_50000.pkl', './models/train/decoder_50000.pkl']) model_path = [ './models/train/encoder_mobilenet_60000.pkl', './models/train/decoder_60000.pkl' ] encoder = Encoder() decoder = DecoderWithAttention(len(data.dictionary)) if cuda: encoder = encoder.cuda() decoder = decoder.cuda() if model_path: print('Loading the parameters of model.') if cuda: encoder.load_state_dict(torch.load(model_path[0])) decoder.load_state_dict(torch.load(model_path[1])) else: encoder.load_state_dict( torch.load(model_path[0], map_location='cpu')) decoder.load_state_dict( torch.load(model_path[1], map_location='cpu')) encoder.eval() decoder.eval() test_eval(encoder, decoder, data)
def train(description_db, entity_db, word_vocab, entity_vocab, target_entity_vocab, out_file, embeddings, dim_size, batch_size, negative, epoch, optimizer, max_text_len, max_entity_len, pool_size, seed, save, **model_params): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) word_matrix = np.random.uniform(low=-0.05, high=0.05, size=(word_vocab.size, dim_size)) word_matrix = np.vstack([np.zeros(dim_size), word_matrix]).astype('float32') entity_matrix = np.random.uniform(low=-0.05, high=0.05, size=(entity_vocab.size, dim_size)) entity_matrix = np.vstack([np.zeros(dim_size), entity_matrix]).astype('float32') target_entity_matrix = np.random.uniform(low=-0.05, high=0.05, size=(target_entity_vocab.size, dim_size)) target_entity_matrix = np.vstack( [np.zeros(dim_size), target_entity_matrix]).astype('float32') for embedding in embeddings: for word in word_vocab: vec = embedding.get_word_vector(word) if vec is not None: word_matrix[word_vocab.get_index(word)] = vec for title in entity_vocab: vec = embedding.get_entity_vector(title) if vec is not None: entity_matrix[entity_vocab.get_index(title)] = vec for title in target_entity_vocab: vec = embedding.get_entity_vector(title) if vec is not None: target_entity_matrix[target_entity_vocab.get_index( title)] = vec entity_negatives = np.arange(1, target_entity_matrix.shape[0]) model_params.update(dict(dim_size=dim_size)) model = Encoder(word_embedding=word_matrix, entity_embedding=entity_matrix, target_entity_embedding=target_entity_matrix, word_vocab=word_vocab, entity_vocab=entity_vocab, target_entity_vocab=target_entity_vocab, **model_params) del word_matrix del entity_matrix del target_entity_matrix model = model.cuda() model.train() parameters = [p for p in model.parameters() if p.requires_grad] optimizer_ins = getattr(optim, optimizer)(parameters) n_correct = 0 n_total = 0 cur_correct = 0 cur_total = 0 cur_loss = 0.0 batch_idx = 0 joblib.dump( dict(model_params=model_params, word_vocab=word_vocab.serialize(), entity_vocab=entity_vocab.serialize(), target_entity_vocab=target_entity_vocab.serialize()), out_file + '.pkl') if not save or 0 in save: state_dict = model.state_dict() torch.save(state_dict, out_file + '_epoch0.bin') for n_epoch in range(1, epoch + 1): logger.info('Epoch: %d', n_epoch) for (batch_idx, (args, target)) in enumerate( generate_data(description_db, word_vocab, entity_vocab, target_entity_vocab, entity_negatives, batch_size, negative, max_text_len, max_entity_len, pool_size), batch_idx): args = tuple([o.cuda(async=True) for o in args]) target = target.cuda() optimizer_ins.zero_grad() output = model(args) loss = F.cross_entropy(output, target) loss.backward() optimizer_ins.step() cur_correct += (torch.max(output, 1)[1].view( target.size()).data == target.data).sum() cur_total += len(target) cur_loss += loss.data if batch_idx != 0 and batch_idx % 1000 == 0: n_correct += cur_correct n_total += cur_total logger.info( 'Processed %d batches (epoch: %d, loss: %.4f acc: %.4f total acc: %.4f)' % (batch_idx, n_epoch, cur_loss[0] / cur_total, 100. * cur_correct / cur_total, 100. * n_correct / n_total)) cur_correct = 0 cur_total = 0 cur_loss = 0.0
DATA_PATH, train=False, download=True, transform=transforms.ToTensor()), batch_size=NUM_BATCH, shuffle=True) def cuda_tensors(obj): for attr in dir(obj): value = getattr(obj, attr) if isinstance(value, torch.Tensor): setattr(obj, attr, value.cuda()) enc = Encoder() dec = Decoder() if CUDA: enc.cuda() dec.cuda() cuda_tensors(enc) cuda_tensors(dec) optimizer = torch.optim.Adam(list(enc.parameters()) + list(dec.parameters()), lr=LEARNING_RATE, betas=(BETA1, 0.999)) def elbo(q, p, alpha=0.1): if NUM_SAMPLES is None: return probtorch.objectives.montecarlo.elbo(q, p, sample_dim=None, batch_dim=0,
class Image_Captioning: def __init__(self): parser = argparse.ArgumentParser(description='Image Captioning') parser.add_argument('--root', default='../../../cocodataset/', type=str) parser.add_argument('--crop_size', default=224, type=int) parser.add_argument('--epochs', default=100, type=int) parser.add_argument('--lr', default=1e-4, type=float) parser.add_argument('--batch_size', default=128, help='') parser.add_argument('--num_workers', default=4, type=int) parser.add_argument('--embed_dim', default=256, type=int) parser.add_argument('--hidden_size', default=512, type=int) parser.add_argument('--num_layers', default=1, type=int) parser.add_argument('--model_path', default='./model/', type=str) parser.add_argument('--vocab_path', default='./vocab/', type=str) parser.add_argument('--save_step', default=1000, type=int) self.args = parser.parse_args() self.Multi_GPU = False # if torch.cuda.device_count() > 1: # print('Multi GPU Activate!') # print('Using GPU :', int(torch.cuda.device_count())) # self.Multi_GPU = True os.makedirs(self.args.model_path, exist_ok=True) transform = transforms.Compose([ transforms.RandomCrop(self.args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) with open(self.args.vocab_path + 'vocab.pickle', 'rb') as f: data = pickle.load(f) self.vocab = data self.DataLoader = get_dataloader(root=self.args.root, transform=transform, shuffle=True, batch_size=self.args.batch_size, num_workers=self.args.num_workers, vocab=self.vocab) self.Encoder = Encoder(embed_dim=self.args.embed_dim) self.Decoder = Decoder(embed_dim=self.args.embed_dim, hidden_size=self.args.hidden_size, vocab_size=len(self.vocab), num_layers=self.args.num_layers) # print(self.Encoder) # print(self.Decoder) def train(self): if self.Multi_GPU: self.Encoder = torch.nn.DataParallel(self.Encoder) self.Decoder = torch.nn.DataParallel(self.Decoder) parameters = list(self.Encoder.module.fc.parameters()) + list( self.Encoder.module.BN.parameters()) + list( self.Decoder.parameters()) else: parameters = list(self.Encoder.fc.parameters()) + list( self.Encoder.BN.parameters()) + list(self.Decoder.parameters()) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(parameters, lr=self.args.lr) self.Encoder.cuda() self.Decoder.cuda() self.Encoder.train() self.Decoder.train() print('-' * 100) print('Now Training') print('-' * 100) for epoch in range(self.args.epochs): total_loss = 0 for batch_idx, (image, captions, lengths) in enumerate(self.DataLoader): optimizer.zero_grad() image, captions = image.cuda(), captions.cuda() targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] if self.Multi_GPU: img_features = nn.parallel.DataParallel( self.Encoder, image) outputs = nn.parallel.DataParallel( self.Decoder, (img_features, captions, lengths)) else: img_features = self.Encoder(image) outputs = self.Decoder(img_features, captions, lengths) loss = criterion(outputs, targets) total_loss += loss.item() loss.backward() optimizer.step() if batch_idx % 30 == 0: print('Epoch : {}, Step : [{}/{}], Step Loss : {:.4f}'. format(epoch, batch_idx, len(self.DataLoader), loss.item())) print('Epoch : [{}/{}], Total loss : {:.4f}'.format( epoch, self.args.epochs, total_loss / len(self.DataLoader))) print('Now saving the models') torch.save( self.Encoder.state_dict(), self.args.model_path + 'Encoder-{}.ckpt'.format(self.args.epochs)) torch.save( self.Decoder.state_dict(), self.args.model_path + 'Decoder-{}.ckpt'.format(self.args.epochs))
class Classifier(object): def __init__(self, hps, data_loader, valid_data_loader, log_dir='./log/'): self.hps = hps self.data_loader = data_loader self.valid_data_loader = valid_data_loader self.model_kept = [] self.max_keep = 10 self.build_model() self.logger = Logger(log_dir) def build_model(self): hps = self.hps self.SpeakerClassifier = SpeakerClassifier(ns=hps.ns, dp=hps.dp, n_class=hps.n_speakers) self.Encoder = Encoder(ns=hps.ns) if torch.cuda.is_available(): self.SpeakerClassifier.cuda() self.Encoder.cuda() betas = (0.5, 0.9) self.opt = optim.Adam(self.SpeakerClassifier.parameters(), lr=self.hps.lr, betas=betas) def load_encoder(self, model_path): print('load model from {}'.format(model_path)) with open(model_path, 'rb') as f_in: all_model = torch.load(f_in) self.Encoder.load_state_dict(all_model['encoder']) def save_model(self, model_path, iteration): new_model_path = '{}-{}'.format(model_path, iteration) torch.save(self.SpeakerClassifier.state_dict(), new_model_path) self.model_kept.append(new_model_path) if len(self.model_kept) >= self.max_keep: os.remove(self.model_kept[0]) self.model_kept.pop(0) def load_model(self, model_path): print('load model from {}'.format(model_path)) self.SpeakerClassifier.load_state_dict(torch.load(model_path)) def set_eval(self): self.SpeakerClassifier.eval() def set_train(self): self.SpeakerClassifier.train() def permute_data(self, data): C = to_var(data[0], requires_grad=False) X = to_var(data[2]).permute(0, 2, 1) return C, X def encode_step(self, x): enc = self.Encoder(x) return enc def forward_step(self, enc): logits = self.SpeakerClassifier(enc) return logits def cal_loss(self, logits, y_true): # calculate loss criterion = nn.CrossEntropyLoss() loss = criterion(logits, y_true) return loss def valid(self, n_batches=10): # input: valid data, output: (loss, acc) total_loss, total_acc = 0., 0. self.set_eval() for i in range(n_batches): data = next(self.valid_data_loader) y, x = self.permute_data(data) enc = self.Encoder(x) logits = self.SpeakerClassifier(enc) loss = self.cal_loss(logits, y) acc = cal_acc(logits, y) total_loss += loss.data[0] total_acc += acc self.set_train() return total_loss / n_batches, total_acc / n_batches def train(self, model_path, flag='train'): # load hyperparams hps = self.hps for iteration in range(hps.iters): data = next(self.data_loader) y, x = self.permute_data(data) # encode enc = self.encode_step(x) # forward to classifier logits = self.forward_step(enc) # calculate loss loss = self.cal_loss(logits, y) # optimize reset_grad([self.SpeakerClassifier]) loss.backward() grad_clip([self.SpeakerClassifier], self.hps.max_grad_norm) self.opt.step() # calculate acc acc = cal_acc(logits, y) # print info info = { f'{flag}/loss': loss.data[0], f'{flag}/acc': acc, } slot_value = (iteration + 1, hps.iters) + tuple([value for value in info.values()]) log = 'iter:[%06d/%06d], loss=%.3f, acc=%.3f' print(log % slot_value, end='\r') for tag, value in info.items(): self.logger.scalar_summary(tag, value, iteration) if iteration % 1000 == 0 or iteration + 1 == hps.iters: valid_loss, valid_acc = self.valid(n_batches=10) # print info info = { f'{flag}/valid_loss': valid_loss, f'{flag}/valid_acc': valid_acc, } slot_value = (iteration + 1, hps.iters) + \ tuple([value for value in info.values()]) log = 'iter:[%06d/%06d], valid_loss=%.3f, valid_acc=%.3f' print(log % slot_value) for tag, value in info.items(): self.logger.scalar_summary(tag, value, iteration) self.save_model(model_path, iteration)
class Solver(object): def __init__(self, hps, data_loader, log_dir='./log/'): self.hps = hps self.data_loader = data_loader self.model_kept = [] self.max_keep = 20 self.build_model() self.logger = Logger(log_dir) def build_model(self): hps = self.hps ns = self.hps.ns emb_size = self.hps.emb_size self.Encoder = Encoder(ns=ns, dp=hps.enc_dp) self.Decoder = Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size) self.Generator = Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size) self.LatentDiscriminator = LatentDiscriminator(ns=ns, dp=hps.dis_dp) self.PatchDiscriminator = PatchDiscriminator(ns=ns, n_class=hps.n_speakers) if torch.cuda.is_available(): self.Encoder.cuda() self.Decoder.cuda() self.Generator.cuda() self.LatentDiscriminator.cuda() self.PatchDiscriminator.cuda() betas = (0.5, 0.9) params = list(self.Encoder.parameters()) + list( self.Decoder.parameters()) self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas) self.gen_opt = optim.Adam(self.Generator.parameters(), lr=self.hps.lr, betas=betas) self.lat_opt = optim.Adam(self.LatentDiscriminator.parameters(), lr=self.hps.lr, betas=betas) self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(), lr=self.hps.lr, betas=betas) def save_model(self, model_path, iteration, enc_only=True): if not enc_only: all_model = { 'encoder': self.Encoder.state_dict(), 'decoder': self.Decoder.state_dict(), 'generator': self.Generator.state_dict(), 'latent_discriminator': self.LatentDiscriminator.state_dict(), 'patch_discriminator': self.PatchDiscriminator.state_dict(), } else: all_model = { 'encoder': self.Encoder.state_dict(), 'decoder': self.Decoder.state_dict(), 'generator': self.Generator.state_dict(), } new_model_path = '{}-{}'.format(model_path, iteration) with open(new_model_path, 'wb') as f_out: torch.save(all_model, f_out) self.model_kept.append(new_model_path) if len(self.model_kept) >= self.max_keep: os.remove(self.model_kept[0]) self.model_kept.pop(0) def load_model(self, model_path, enc_only=True): print('load model from {}'.format(model_path)) with open(model_path, 'rb') as f_in: all_model = torch.load(f_in) self.Encoder.load_state_dict(all_model['encoder']) self.Decoder.load_state_dict(all_model['decoder']) #self.Genrator.load_state_dict(all_model['generator']) if not enc_only: self.LatentDiscriminator.load_state_dict( all_model['latent_discriminator']) self.PatchDiscriminator.load_state_dict( all_model['patch_discriminator']) def set_eval(self): self.Encoder.eval() self.Decoder.eval() self.Generator.eval() #self.LatentDiscriminator.eval() def test_step(self, x, c): self.set_eval() x = to_var(x).permute(0, 2, 1) enc = self.Encoder(x) x_tilde = self.Decoder(enc, c) return x_tilde.data.cpu().numpy() def permute_data(self, data): C = [to_var(c, requires_grad=False) for c in data[:2]] X = [to_var(x).permute(0, 2, 1) for x in data[2:]] return C, X def sample_c(self, size): c_sample = Variable(torch.multinomial(torch.ones(8), num_samples=size, replacement=True), requires_grad=False) c_sample = c_sample.cuda() if torch.cuda.is_available() else c_sample return c_sample def cal_acc(self, logits, y_true): _, ind = torch.max(logits, dim=1) acc = torch.sum( (ind == y_true).type(torch.FloatTensor)) / y_true.size(0) return acc def encode_step(self, *args): enc_list = [] for x in args: enc = self.Encoder(x) enc_list.append(enc) return tuple(enc_list) def decode_step(self, enc, c): x_tilde = self.Decoder(enc, c) return x_tilde def latent_discriminate_step(self, enc_i_t, enc_i_tk, enc_i_prime, enc_j, is_dis=True): same_pair = torch.cat([enc_i_t, enc_i_tk], dim=1) diff_pair = torch.cat([enc_i_prime, enc_j], dim=1) if is_dis: same_val = self.LatentDiscriminator(same_pair) diff_val = self.LatentDiscriminator(diff_pair) w_dis = torch.mean(same_val - diff_val) gp = calculate_gradients_penalty(self.LatentDiscriminator, same_pair, diff_pair) return w_dis, gp else: diff_val = self.LatentDiscriminator(diff_pair) loss_adv = -torch.mean(diff_val) return loss_adv def patch_discriminate_step(self, x, x_tilde, cal_gp=True): # w-distance D_real, real_logits = self.PatchDiscriminator(x, classify=True) D_fake, fake_logits = self.PatchDiscriminator(x_tilde, classify=True) w_dis = torch.mean(D_real - D_fake) if cal_gp: gp = calculate_gradients_penalty(self.PatchDiscriminator, x, x_tilde) return w_dis, real_logits, fake_logits, gp else: return w_dis, real_logits, fake_logits # backup #def classify(): # # aux clssify loss # criterion = nn.NLLLoss() # c_loss = criterion(real_logits, c) + criterion(fake_logits, c_sample) # real_acc = self.cal_acc(real_logits, c) # fake_acc = self.cal_acc(fake_logits, c_sample) def train(self, model_path, flag='train'): # load hyperparams hps = self.hps for iteration in range(hps.iters): # calculate current alpha if iteration + 1 < hps.lat_sched_iters and iteration >= hps.enc_pretrain_iters: current_alpha = hps.alpha_enc * ( iteration + 1 - hps.enc_pretrain_iters) / ( hps.lat_sched_iters - hps.enc_pretrain_iters) else: current_alpha = 0 if iteration >= hps.enc_pretrain_iters: n_latent_steps = hps.n_latent_steps \ if iteration > hps.enc_pretrain_iters else hps.dis_pretrain_iters for step in range(n_latent_steps): #===================== Train latent discriminator =====================# data = next(self.data_loader) (c_i, c_j), (x_i_t, x_i_tk, x_i_prime, x_j) = self.permute_data(data) # encode enc_i_t, enc_i_tk, enc_i_prime, enc_j = self.encode_step( x_i_t, x_i_tk, x_i_prime, x_j) # latent discriminate latent_w_dis, latent_gp = self.latent_discriminate_step( enc_i_t, enc_i_tk, enc_i_prime, enc_j) lat_loss = -hps.alpha_dis * latent_w_dis + hps.lambda_ * latent_gp reset_grad([self.LatentDiscriminator]) lat_loss.backward() grad_clip([self.LatentDiscriminator], self.hps.max_grad_norm) self.lat_opt.step() # print info info = { f'{flag}/D_latent_w_dis': latent_w_dis.data[0], f'{flag}/latent_gp': latent_gp.data[0], } slot_value = (step, iteration + 1, hps.iters) + \ tuple([value for value in info.values()]) log = 'lat_D-%d:[%06d/%06d], w_dis=%.3f, gp=%.2f' print(log % slot_value) for tag, value in info.items(): self.logger.scalar_summary(tag, value, iteration) # two stage training if iteration >= hps.patch_start_iter: for step in range(hps.n_patch_steps): #===================== Train patch discriminator =====================# data = next(self.data_loader) (c_i, _), (x_i_t, _, _, _) = self.permute_data(data) # encode enc_i_t, = self.encode_step(x_i_t) c_sample = self.sample_c(x_i_t.size(0)) x_tilde = self.decode_step(enc_i_t, c_i) # Aux classify loss patch_w_dis, real_logits, fake_logits, patch_gp = \ self.patch_discriminate_step(x_i_t, x_tilde, cal_gp=True) patch_loss = -hps.beta_dis * patch_w_dis + hps.lambda_ * patch_gp + hps.beta_clf * c_loss reset_grad([self.PatchDiscriminator]) patch_loss.backward() grad_clip([self.PatchDiscriminator], self.hps.max_grad_norm) self.patch_opt.step() # print info info = { f'{flag}/D_patch_w_dis': patch_w_dis.data[0], f'{flag}/patch_gp': patch_gp.data[0], f'{flag}/c_loss': c_loss.data[0], f'{flag}/real_acc': real_acc, f'{flag}/fake_acc': fake_acc, } slot_value = (step, iteration + 1, hps.iters) + \ tuple([value for value in info.values()]) log = 'patch_D-%d:[%06d/%06d], w_dis=%.3f, gp=%.2f, c_loss=%.3f, real_acc=%.2f, fake_acc=%.2f' print(log % slot_value) for tag, value in info.items(): self.logger.scalar_summary(tag, value, iteration) #===================== Train G =====================# data = next(self.data_loader) (c_i, c_j), (x_i_t, x_i_tk, x_i_prime, x_j) = self.permute_data(data) # encode enc_i_t, enc_i_tk, enc_i_prime, enc_j = self.encode_step( x_i_t, x_i_tk, x_i_prime, x_j) # decode x_tilde = self.decode_step(enc_i_t, c_i) loss_rec = torch.mean(torch.abs(x_tilde - x_i_t)) # latent discriminate loss_adv = self.latent_discriminate_step(enc_i_t, enc_i_tk, enc_i_prime, enc_j, is_dis=False) ae_loss = loss_rec + current_alpha * loss_adv reset_grad([self.Encoder, self.Decoder]) retain_graph = True if hps.n_patch_steps > 0 else False ae_loss.backward(retain_graph=retain_graph) grad_clip([self.Encoder, self.Decoder], self.hps.max_grad_norm) self.ae_opt.step() info = { f'{flag}/loss_rec': loss_rec.data[0], f'{flag}/loss_adv': loss_adv.data[0], f'{flag}/alpha': current_alpha, } slot_value = (iteration + 1, hps.iters) + tuple( [value for value in info.values()]) log = 'G:[%06d/%06d], loss_rec=%.2f, loss_adv=%.2f, alpha=%.2e' print(log % slot_value) for tag, value in info.items(): self.logger.scalar_summary(tag, value, iteration + 1) # patch discriminate if hps.n_patch_steps > 0 and iteration >= hps.patch_start_iter: c_sample = self.sample_c(x_i_t.size(0)) x_tilde = self.decode_step(enc_i_t, c_sample) patch_w_dis, real_logits, fake_logits = \ self.patch_discriminate_step(x_i_t, x_tilde, cal_gp=False) patch_loss = hps.beta_dec * patch_w_dis + hps.beta_clf * c_loss reset_grad([self.Decoder]) patch_loss.backward() grad_clip([self.Decoder], self.hps.max_grad_norm) self.decoder_opt.step() info = { f'{flag}/G_patch_w_dis': patch_w_dis.data[0], f'{flag}/c_loss': c_loss.data[0], f'{flag}/real_acc': real_acc, f'{flag}/fake_acc': fake_acc, } slot_value = (iteration + 1, hps.iters) + tuple( [value for value in info.values()]) log = 'G:[%06d/%06d]: patch_w_dis=%.2f, c_loss=%.2f, real_acc=%.2f, fake_acc=%.2f' print(log % slot_value) for tag, value in info.items(): self.logger.scalar_summary(tag, value, iteration + 1) if iteration % 1000 == 0 or iteration + 1 == hps.iters: self.save_model(model_path, iteration)
def train(config): train_data, word2index, tag2index, intent2index = preprocessing( config.file_path, config.max_length) if train_data == None: print("Please check your data or its path") return encoder = Encoder(len(word2index), config.embedding_size, config.hidden_size) decoder = Decoder(len(tag2index), len(intent2index), len(tag2index) // 3, config.hidden_size * 2) if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() #print("來到這裏了!1!") encoder.init_weights() decoder.init_weights() #print("來到這裏了!2!") loss_function_1 = nn.CrossEntropyLoss(ignore_index=0) loss_function_2 = nn.CrossEntropyLoss() enc_optim = optim.Adam(encoder.parameters(), lr=config.learning_rate) dec_optim = optim.Adam(decoder.parameters(), lr=config.learning_rate) #print("來到這裏了!3!") for step in range(config.step_size): losses = [] for i, batch in enumerate(getBatch(config.batch_size, train_data)): x, y_1, y_2 = zip(*batch) x = torch.cat(x) tag_target = torch.cat(y_1) intent_target = torch.cat(y_2) # print("來到這裏了!4!") x_mask = torch.cat([ Variable(torch.ByteTensor(tuple(map(lambda s: s == 0, t.data)))).cuda() if USE_CUDA else Variable( torch.ByteTensor(tuple(map(lambda s: s == 0, t.data)))) for t in x ]).view(config.batch_size, -1) y_1_mask = torch.cat([ Variable(torch.ByteTensor(tuple(map(lambda s: s == 0, t.data)))).cuda() if USE_CUDA else Variable( torch.ByteTensor(tuple(map(lambda s: s == 0, t.data)))) for t in tag_target ]).view(config.batch_size, -1) # print("來到這裏了!5!") encoder.zero_grad() decoder.zero_grad() # print("來到這裏了!6!") output, hidden_c = encoder(x, x_mask) # print("來到這裏了!7!") start_decode = Variable( torch.LongTensor([ [word2index['<SOS>']] * config.batch_size ])).cuda().transpose(1, 0) if USE_CUDA else Variable( torch.LongTensor([[word2index['<SOS>']] * config.batch_size])).transpose(1, 0) # print("來到這裏了!8!") tag_score, intent_score = decoder(start_decode, hidden_c, output, x_mask) #print("來到這裏了!9!") loss_1 = loss_function_1(tag_score, tag_target.view(-1)) # print("來到這裏了!10!") loss_2 = loss_function_2(intent_score, intent_target) #print("來到這裏了!11!") loss = loss_1 + loss_2 losses.append( loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy()) #print("來到這裏了!12!") loss.backward() # print("來到這裏了!13!") torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0) torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0) enc_optim.step() dec_optim.step() if i % 100 == 0: with open("result.txt", "a+") as f: #print("Step",step," epoch",i," : ",np.mean(losses)) print(f"Step是{step},epoch是{i} :均值为{np.mean(losses)}") f.write(f"Step是{step},epoch是{i} :均值为{np.mean(losses)}") f.write("\n") losses = [] if not os.path.exists(config.model_dir): os.makedirs(config.model_dir) #print("來到這裏了!5!") torch.save(decoder.state_dict(), os.path.join(config.model_dir, 'jointnlu-decoder.pkl')) torch.save(encoder.state_dict(), os.path.join(config.model_dir, 'jointnlu-encoder.pkl')) print("Train Complete!")
def main(args): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map with open(args.vocab_path, 'rb') as f: word_map = pickle.load(f) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() criterion = nn.CrossEntropyLoss() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) train_loader = get_loader(args.train_image_dir, args.caption_path, word_map, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) val_loader = get_loader(args.val_image_dir, args.caption_path, word_map, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) for epoch in range(start_epoch, epochs): if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def train_dynamics(env, args, writer=None): """ Trains the Dynamics module. Supervised. Arguments: env: the initialized environment (rllab/gym) args: input arguments writer: initialized summary writer for tensorboard """ args.action_space = env.action_space # Initialize models enc = Encoder(env.observation_space.shape[0], args.dim, use_conv=args.use_conv) dec = Decoder(env.observation_space.shape[0], args.dim, use_conv=args.use_conv) d_module = D_Module(env.action_space.shape[0], args.dim, args.discrete) if args.from_checkpoint is not None: results_dict = torch.load(args.from_checkpoint) enc.load_state_dict(results_dict['enc']) dec.load_state_dict(results_dict['dec']) d_module.load_state_dict(results_dict['d_module']) all_params = chain(enc.parameters(), dec.parameters(), d_module.parameters()) if args.transfer: for p in enc.parameters(): p.requires_grad = False for p in dec.parameters(): p.requires_grad = False all_params = d_module.parameters() optimizer = torch.optim.Adam(all_params, lr=args.lr, weight_decay=args.weight_decay) if args.gpu: enc = enc.cuda() dec = dec.cuda() d_module = d_module.cuda() # Initialize datasets val_loader = None train_dataset = DynamicsDataset(args.train_set, args.train_size, batch=args.train_batch, rollout=args.rollout) val_dataset = DynamicsDataset(args.test_set, 5000, batch=args.test_batch, rollout=args.rollout) val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) results_dict = { 'dec_losses': [], 'forward_losses': [], 'inverse_losses': [], 'total_losses': [], 'enc': None, 'dec': None, 'd_module': None, 'd_init': None, 'args': args } total_action_taken = 0 correct_predicted_a_hat = 0 # create the mask here for re-weighting dec_mask = None if args.dec_mask is not None: dec_mask = torch.ones(9) game_vocab = dict([ (b, a) for a, b in enumerate(sorted(env.game.all_possible_features())) ]) dec_mask[game_vocab['Agent']] = args.dec_mask dec_mask[game_vocab['Goal']] = args.dec_mask dec_mask = dec_mask.expand(args.batch_size, args.maze_length, args.maze_length, 9).contiguous().view(-1) dec_mask = Variable(dec_mask, requires_grad=False) if args.gpu: dec_mask = dec_mask.cuda() for epoch in range(1, args.num_epochs + 1): enc.train() dec.train() d_module.train() if args.framework == "mazebase": d_init.train() # for measuring the accuracy train_acc = 0 current_epoch_actions = 0 current_epoch_predicted_a_hat = 0 start = time.time() for i, (states, target_actions) in enumerate(train_loader): optimizer.zero_grad() if args.framework != "mazebase": forward_loss, inv_loss, dec_loss, recon_loss, model_loss, _, _ = forward_planning( i, states, target_actions, enc, dec, d_module, args) else: forward_loss, inv_loss, dec_loss, recon_loss, model_loss, current_epoch_predicted_a_hat, current_epoch_actions = multiple_forward( i, states, target_actions, enc, dec, d_module, args, d_init, dec_mask) loss = forward_loss + args.inv_loss_coef * inv_loss + \ args.dec_loss_coef * dec_loss if i % args.log_interval == 0: log( 'Epoch [{}/{}]\tIter [{}/{}]\t'.format( epoch, args.num_epochs, i+1, len( train_dataset)//args.batch_size) + \ 'Time: {:.2f}\t'.format(time.time() - start) + \ 'Decoder Loss: {:.2f}\t'.format(dec_loss.data[0]) + \ 'Forward Loss: {:.2f}\t'.format(forward_loss.data[0] ) + \ 'Inverse Loss: {:.2f}\t'.format(inv_loss.data[0]) + \ 'Loss: {:.2f}\t'.format(loss.data[0])) results_dict['dec_losses'].append(dec_loss.data[0]) results_dict['forward_losses'].append(forward_loss.data[0]) results_dict['inverse_losses'].append(inv_loss.data[0]) results_dict['total_losses'].append(loss.data[0]) # write the summaries here if writer: writer.add_scalar('dynamics/total_loss', loss.data[0], epoch) writer.add_scalar('dynamics/decoder', dec_loss.data[0], epoch) writer.add_scalar('dynamics/reconstruction_loss', recon_loss.data[0], epoch) writer.add_scalar('dynamics/next_state_prediction_loss', model_loss.data[0], epoch) writer.add_scalar('dynamics/inv_loss', inv_loss.data[0], epoch) writer.add_scalar('dynamics/forward_loss', forward_loss.data[0], epoch) writer.add_scalars( 'dynamics/all_losses', { "total_loss": loss.data[0], "reconstruction_loss": recon_loss.data[0], "next_state_prediction_loss": model_loss.data[0], "decoder_loss": dec_loss.data[0], "inv_loss": inv_loss.data[0], "forward_loss": forward_loss.data[0], }, epoch) loss.backward() correct_predicted_a_hat += current_epoch_predicted_a_hat total_action_taken += current_epoch_actions # does it not work at all without grad clipping ? torch.nn.utils.clip_grad_norm(all_params, args.max_grad_norm) optimizer.step() # maybe add the generated image to add the logs # writer.add_image() # Run validation if val_loader is not None: enc.eval() dec.eval() d_module.eval() forward_loss, inv_loss, dec_loss = 0, 0, 0 for i, (states, target_actions) in enumerate(val_loader): f_loss, i_loss, d_loss, _, _, _, _ = forward_planning( i, states, target_actions, enc, dec, d_module, args) forward_loss += f_loss inv_loss += i_loss dec_loss += d_loss loss = forward_loss + args.inv_loss_coef * inv_loss + \ args.dec_loss_coef * dec_loss if writer: writer.add_scalar('val/forward_loss', forward_loss.data[0] / i, epoch) writer.add_scalar('val/inverse_loss', inv_loss.data[0] / i, epoch) writer.add_scalar('val/decoder_loss', dec_loss.data[0] / i, epoch) log( '[Validation]\t' + \ 'Decoder Loss: {:.2f}\t'.format(dec_loss.data[0] / i) + \ 'Forward Loss: {:.2f}\t'.format(forward_loss.data[0] / i) + \ 'Inverse Loss: {:.2f}\t'.format(inv_loss.data[0] / i) + \ 'Loss: {:.2f}\t'.format(loss.data[0] / i)) if epoch % args.checkpoint == 0: results_dict['enc'] = enc.state_dict() results_dict['dec'] = dec.state_dict() results_dict['d_module'] = d_module.state_dict() if args.framework == "mazebase": results_dict['d_init'] = d_init.state_dict() torch.save( results_dict, os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch)) log('Saved model %s' % epoch) results_dict['enc'] = enc.state_dict() results_dict['dec'] = dec.state_dict() results_dict['d_module'] = d_module.state_dict() torch.save(results_dict, os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch)) print(os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch))
def train(): opt = parse_args() os.makedirs("images/%s" % (opt.dataset), exist_ok=True) os.makedirs("checkpoints/%s" % (opt.dataset), exist_ok=True) cuda = True if torch.cuda.is_available() else False FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor # get dataloader train_loader = commic2human_loader(opt, mode='train') test_loader = commic2human_loader(opt, mode='test') # Dimensionality input_shape = (opt.channels, opt.img_height, opt.img_width) shared_dim = opt.dim * (2**opt.n_downsample) # Initialize generator and discriminator shared_E = ResidualBlock(in_channels=shared_dim) E1 = Encoder(dim=opt.dim, n_downsample=opt.n_downsample, shared_block=shared_E) E2 = Encoder(dim=opt.dim, n_downsample=opt.n_downsample, shared_block=shared_E) shared_G = ResidualBlock(in_channels=shared_dim) G1 = Generator(dim=opt.dim, n_upsample=opt.n_upsample, shared_block=shared_G) G2 = Generator(dim=opt.dim, n_upsample=opt.n_upsample, shared_block=shared_G) D1 = Discriminator(input_shape) D2 = Discriminator(input_shape) # Initialize weights E1.apply(weights_init_normal) E2.apply(weights_init_normal) G1.apply(weights_init_normal) G2.apply(weights_init_normal) D1.apply(weights_init_normal) D2.apply(weights_init_normal) # Loss function adversarial_loss = torch.nn.MSELoss() pixel_loss = torch.nn.L1Loss() if cuda: E1 = E1.cuda() E2 = E2.cuda() G1 = G1.cuda() G2 = G2.cuda() D1 = D1.cuda() D2 = D2.cuda() adversarial_loss = adversarial_loss.cuda() pixel_loss = pixel_loss.cuda() # Optimizers optimizer_G = torch.optim.Adam(itertools.chain(E1.parameters(), E2.parameters(), G1.parameters(), G2.parameters()), lr=opt.lr, betas=(opt.b1, opt.b2)) optimizer_D1 = torch.optim.Adam(D1.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) optimizer_D2 = torch.optim.Adam(D2.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) # Learning rate update schedulers lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR( optimizer_G, lr_lambda=LambdaLR(opt.epochs, 0, opt.decay_epoch).step) lr_scheduler_D1 = torch.optim.lr_scheduler.LambdaLR( optimizer_D1, lr_lambda=LambdaLR(opt.epochs, 0, opt.decay_epoch).step) lr_scheduler_D2 = torch.optim.lr_scheduler.LambdaLR( optimizer_D2, lr_lambda=LambdaLR(opt.epochs, 0, opt.decay_epoch).step) prev_time = time.time() for epoch in range(opt.epochs): for i, (img_A, img_B) in enumerate(train_loader): # Model inputs X1 = Variable(img_A.type(FloatTensor)) X2 = Variable(img_B.type(FloatTensor)) # Adversarial ground truths valid = Variable(FloatTensor(img_A.shape[0], *D1.output_shape).fill_(1.0), requires_grad=False) fake = Variable(FloatTensor(img_A.shape[0], *D1.output_shape).fill_(0.0), requires_grad=False) # ----------------------------- # Train Encoders and Generators # ----------------------------- # Get shared latent representation mu1, Z1 = E1(X1) mu2, Z2 = E2(X2) # Reconstruct images recon_X1 = G1(Z1) recon_X2 = G2(Z2) # Translate images fake_X1 = G1(Z2) fake_X2 = G2(Z1) # Cycle translation mu1_, Z1_ = E1(fake_X1) mu2_, Z2_ = E2(fake_X2) cycle_X1 = G1(Z2_) cycle_X2 = G2(Z1_) # Losses for encoder and generator id_loss_1 = opt.lambda_id * pixel_loss(recon_X1, X1) id_loss_2 = opt.lambda_id * pixel_loss(recon_X2, X2) adv_loss_1 = opt.lambda_adv * adversarial_loss(D1(fake_X1), valid) adv_loss_2 = opt.lambda_adv * adversarial_loss(D2(fake_X2), valid) cyc_loss_1 = opt.lambda_cyc * pixel_loss(cycle_X1, X1) cyc_loss_2 = opt.lambda_cyc * pixel_loss(cycle_X2, X2) KL_loss_1 = opt.lambda_KL1 * compute_KL(mu1) KL_loss_2 = opt.lambda_KL1 * compute_KL(mu2) KL_loss_1_ = opt.lambda_KL2 * compute_KL(mu1_) KL_loss_2_ = opt.lambda_KL2 * compute_KL(mu2_) # total loss for encoder and generator G_loss = id_loss_1 + id_loss_2 \ + adv_loss_1 + adv_loss_2 \ + cyc_loss_1 + cyc_loss_2 + \ KL_loss_1 + KL_loss_2 + KL_loss_1_ + KL_loss_2_ G_loss.backward() optimizer_G.step() # ---------------------- # Train Discriminator 1 # ---------------------- optimizer_D1.zero_grad() D1_loss = adversarial_loss(D1(X1), valid) + adversarial_loss( D1(fake_X1.detach()), fake) D1_loss.backward() optimizer_D1.step() # ---------------------- # Train Discriminator 2 # ---------------------- optimizer_D2.zero_grad() D2_loss = adversarial_loss(D2(X2), valid) + adversarial_loss( D2(fake_X2.detach()), fake) D2_loss.backward() optimizer_D2.step() # ------------------ # Log Information # ------------------ batches_done = epoch * len(train_loader) + i batches_left = opt.epochs * len(train_loader) - batches_done time_left = datetime.timedelta(seconds=batches_left * (time.time() - prev_time)) prev_time = time.time() print( "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f] ETA: %s" % (epoch, opt.epochs, i, len(train_loader), (D1_loss + D2_loss).item(), G_loss.item(), time_left)) if batches_done % opt.sample_interval == 0: save_sample(opt.dataset, test_loader, batches_done, E1, E2, G1, G2, FloatTensor) if batches_done % opt.checkpoint_interval == 0: torch.save(E1.state_dict(), "checkpoints/%s/E1_%d.pth" % (opt.dataset, epoch)) torch.save(E2.state_dict(), "checkpoints/%s/E2_%d.pth" % (opt.dataset, epoch)) torch.save(G1.state_dict(), "checkpoints/%s/G1_%d.pth" % (opt.dataset, epoch)) torch.save(G2.state_dict(), "checkpoints/%s/G2_%d.pth" % (opt.dataset, epoch)) # Update learning rates lr_scheduler_G.step() lr_scheduler_D1.step() lr_scheduler_D2.step() torch.save(shared_E.state_dict(), "checkpoints/%s/shared_E_done.pth" % opt.dataset) torch.save(shared_G.state_dict(), "checkpoints/%s/shared_G_done.pth" % opt.dataset) torch.save(E1.state_dict(), "checkpoints/%s/E1_done.pth" % opt.dataset) torch.save(E2.state_dict(), "checkpoints/%s/E2_done.pth" % opt.dataset) torch.save(G1.state_dict(), "checkpoints/%s/G1_done.pth" % opt.dataset) torch.save(G2.state_dict(), "checkpoints/%s/G2_done.pth" % opt.dataset) print("Training Process has been Done!")
def main(): epoch = 1000 batch_size = 64 hidden_dim = 300 use_cuda = True encoder = Encoder(num_words, hidden_dim) if args.attn: attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, hidden_dim, num_words) else: decoder = DecoderRhyme(hidden_dim, num_words, num_target_lengths, num_rhymes) if args.train: weight = torch.ones(num_words) weight[word2idx_mapping[PAD_TOKEN]] = 0 if use_cuda: encoder = encoder.cuda() decoder = decoder.cuda() weight = weight.cuda() encoder_optimizer = Adam(encoder.parameters(), lr=0.001) decoder_optimizer = Adam(decoder.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss(weight=weight) np.random.seed(1124) order = np.arange(len(train_data)) best_loss = 1e10 best_epoch = 0 for e in range(epoch): #if e - best_epoch > 20: break np.random.shuffle(order) shuffled_train_data = train_data[order] shuffled_x_lengths = input_lengths[order] shuffled_y_lengths = target_lengths[order] shuffled_y_rhyme = target_rhymes[order] train_loss = 0 valid_loss = 0 for b in tqdm(range(int(len(order) // batch_size))): #print(b, '\r', end='') batch_x = torch.LongTensor( shuffled_train_data[b * batch_size:(b + 1) * batch_size][:, 0].tolist()).t() batch_y = torch.LongTensor( shuffled_train_data[b * batch_size:(b + 1) * batch_size][:, 1].tolist()).t() batch_x_lengths = shuffled_x_lengths[b * batch_size:(b + 1) * batch_size] batch_y_lengths = shuffled_y_lengths[b * batch_size:(b + 1) * batch_size] batch_y_rhyme = shuffled_y_rhyme[b * batch_size:(b + 1) * batch_size] if use_cuda: batch_x, batch_y = batch_x.cuda(), batch_y.cuda() train_loss += train(batch_x, batch_y, batch_y_lengths, max(batch_y_lengths), batch_y_rhyme, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, use_cuda, False) train_loss /= b ''' for b in range(len(valid_data) // batch_size): batch_x = torch.LongTensor(valid_data[b*batch_size: (b+1)*batch_size][:, 0].tolist()).t() batch_y = torch.LongTensor(valid_data[b*batch_size: (b+1)*batch_size][:, 1].tolist()).t() if use_cuda: batch_x, batch_y = batch_x.cuda(), batch_y.cuda() valid_loss += train(batch_x, batch_y, max_seqlen, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, use_cuda, True) valid_loss /= b ''' print( "epoch {}, train_loss {:.4f}, valid_loss {:.4f}, best_epoch {}, best_loss {:.4f}" .format(e, train_loss, valid_loss, best_epoch, best_loss)) ''' if valid_loss < best_loss: best_loss = valid_loss best_epoch = e torch.save(encoder.state_dict(), args.encoder_path + '.best') torch.save(decoder.state_dict(), args.decoder_path + '.best') ''' torch.save(encoder.state_dict(), args.encoder_path) torch.save(decoder.state_dict(), args.decoder_path) print(encoder) print(decoder) print("==============") else: encoder.load_state_dict(torch.load( args.encoder_path)) #, map_location=torch.device('cpu'))) decoder.load_state_dict(torch.load( args.decoder_path)) #, map_location=torch.device('cpu'))) print(encoder) print(decoder) predict(encoder, decoder)
def main(): checkpoint = torch.load(args.model_path) encoder = Encoder() generator = G() encoder.load_state_dict(checkpoint['encoder_state_dict']) generator.load_state_dict(checkpoint['generator_state_dict']) encoder.cuda() generator.cuda() FS = 16000 SPEAKERS = list() with open(args.speaker_list) as fp: SPEAKERS = [l.strip() for l in fp.readlines()] normalizer = Tanhize( xmax=np.fromfile('./etc/{}_xmax.npf'.format(args.corpus_name)), xmin=np.fromfile('./etc/{}_xmin.npf'.format(args.corpus_name)), ) total_sp_speaker = [] total_speaker = [] total_features = read_whole_features(args.file_pattern.format(args.src)) for features in total_features: x = normalizer.forward_process(features['sp']) x = nh_to_nchw(x) y_s = features['speaker'] #print('????',SPEAKERS.index(args.trg)) #y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,]) #y_t = y_t_id * torch.ones(shape=[x.shape[0],], dtype=torch.int64) #print(y_t) x = Variable(torch.FloatTensor(x).cuda(), requires_grad=False) y_t = torch.ones((x.shape[0])).view(-1, 1) * (SPEAKERS.index(args.trg)) z, _ = encoder(x) x_t, _ = generator(z, y_t) # NOTE: the API yields NHWC format x_t = torch.squeeze(x_t) #print('x_t.shape',x_t.shape) x_t = normalizer.backward_process(x_t) #print('backward_process.finish') x_s, _ = generator(z, y_s) x_s = torch.squeeze(x_s) x_s = normalizer.backward_process(x_s) f0_s = features['f0'] #print(f0_s.shape) f0_t = convert_f0(f0_s, args.src, args.trg) #output_dir = get_default_output(args.output_dir) output_dir = args.output_dir features['sp'] = x_t.cpu().data.numpy() features['f0'] = f0_t #print('=-=-=-=-=-=') y = pw2wav(features) oFilename = make_output_wav_name(output_dir, features['filename']) print(f'\r Processing {oFilename}', end=' ') if not os.path.exists(os.path.dirname(oFilename)): try: os.makedirs(os.path.dirname(oFilename)) except OSError as exc: # Guard against race condition print('error') pass sf.write(oFilename, y, FS) #print('2: ',features['sp'].shape) #print('3: ',features['f0'].shape) print('\n==finish==')
class Trainer: def __init__(self, driving, target, time_step, split, lr): self.dataset = DataSet(driving, target, time_step, split) f = open('dataset_obj.txt', 'wb') pickle.dump(self.dataset, f) f.close() print('save model finish!!!!!!!!!!!!!!!!!!') # f = open('dataset_obj.txt','rb') # self.dataset = pickle.load(f) # f.close() self.encoder = Encoder(input_size=self.dataset.get_num_features(), hidden_size=ENCODER_HIDDEN_SIZE, T=time_step) self.decoder = Decoder(encoder_hidden_size=ENCODER_HIDDEN_SIZE, decoder_hidden_size=DECODER_HIDDEN_SIZE, T=time_step) if torch.cuda.is_available(): # print('tocuda') self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() self.encoder_optim = optim.Adam(self.encoder.parameters(), lr) self.decoder_optim = optim.Adam(self.decoder.parameters(), lr) self.loss_func = nn.CrossEntropyLoss() self.train_size, self.validation_size, self.test_size = self.dataset.get_size( ) self.best_dev_acc = 0.0 def get_accuracy(self, truth, pred): assert len(truth) == len(pred) right = (truth == pred).sum() return right / len(truth) def train_minibatch(self, num_epochs, batch_size, interval): train_acc_list = [] dev_acc_list = [] train_loss_list = [] dev_loss_list = [] x_train, y_train, y_seq_train = self.dataset.get_train_set() # print(x_train.shape) for epoch in range(num_epochs): print('Start epoch {}'.format(epoch)) i = 0 loss_sum = 0 pred_res_total = [] while i < self.train_size: self.encoder_optim.zero_grad() self.decoder_optim.zero_grad() batch_end = i + batch_size if (batch_end >= self.train_size): batch_end = self.train_size var_x = self.to_variable(x_train[i:batch_end]) # var_y = self.to_variable(y_train[i: batch_end]) var_y = Variable( torch.from_numpy(y_train[i:batch_end]).long()).cuda() var_y_seq = self.to_variable(y_seq_train[i:batch_end]) if var_x.dim() == 2: var_x = var_x.unsqueeze(2) code = self.encoder(var_x) y_res = self.decoder(code, var_y_seq) loss = self.loss_func(y_res, var_y) if i == 0: print("y_res:", y_res) print("var_y:", var_y) loss.backward() self.encoder_optim.step() self.decoder_optim.step() loss_sum += loss.item() # update the i i = batch_end pred_y = y_res.data.cpu() # print('see what the pred and truth') # print('y_res:',y_res.shape,' : ',y_res) # print('var_y:',var_y.shape,' : ',var_y) pred_y = torch.max(F.softmax(pred_y, dim=1), 1)[1] # # print('pred_y:',pred_y) # print('var_y',var_y) pred_res_total.extend(pred_y) # if i%50 == 0: # print(' finish {0:.2f}/100'.format(i/self.train_size)) acc = self.get_accuracy(y_train, np.array(pred_res_total)) print( 'epoch [%d] finished, the average loss is %.2f, accuracy is %.1f' % (epoch, loss_sum, acc * 100)) dev_acc, dev_loss = self.test(batch_size) print('dev_acc is %.2f' % (dev_acc * 100)) train_acc_list.append(acc) dev_acc_list.append(dev_acc) train_loss_list.append(loss_sum) dev_loss_list.append(dev_loss) if dev_acc > self.best_dev_acc: torch.save( self.encoder.state_dict(), 'D:\Projects\\stock_predict\\models\\encoder_best.model') torch.save( self.decoder.state_dict(), 'D:\Projects\\stock_predict\\models\\decoder_best.model') self.best_dev_acc = dev_acc test_acc, test_loss = self.test(batch_size, True) print('test_accuracy: %.1f' % (test_acc * 100)) return train_acc_list, dev_acc_list, train_loss_list, dev_loss_list def test(self, batch_size, is_test=False): if not is_test: x, y, y_seq = self.dataset.get_validation_set() else: x, y, y_seq = self.dataset.get_test_set() i = 0 res = [] length = len(y) loss_sum = 0 while i < length: batch_end = i + batch_size if batch_end >= length: batch_end = length var_x = self.to_variable(x[i:batch_end]) var_y = Variable(torch.from_numpy(y[i:batch_end]).long()).cuda() # var_y = self.to_variable(y_test[i: batch_end]) var_y_seq = self.to_variable(y_seq[i:batch_end]) if var_x.dim() == 2: var_x = var_x.unsqueeze(2) # to encoder get encoder output code = self.encoder(var_x) # to decoder get classification y_res = self.decoder(code, var_y_seq) loss = self.loss_func(y_res, var_y) loss_sum += loss.item() pred_y = y_res.data.cpu() pred_y = torch.max(pred_y, 1)[1] res.extend(pred_y) i = batch_end res = np.array(res) return self.get_accuracy(y, res), loss_sum def load_model(self, encoder_path, decoder_path): self.encoder.load_state_dict( torch.load(encoder_path, map_location=lambda storage, loc: storage)) self.decoder.load_state_dict( torch.load(decoder_path, map_location=lambda storage, loc: storage)) def to_variable(self, x): if torch.cuda.is_available(): # print("var to cuda") return Variable(torch.from_numpy(x).float()).cuda() else: return Variable(torch.from_numpy(x).float()) def draw_plot(self, train_list, dev_list, acc=True): plt.plot(np.array(train_list)) plt.plot(np.array(dev_list)) if acc: plt.title('model acc') plt.ylabel('accuracy') else: plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'validation'], loc='upper left') plt.show()
]) with open("../data/vocab.pkl", 'rb') as f: vocab = pickle.load(f) dataloader = get_loader("../data/resized/", "../data/annotations/captions_train2014.json", vocab, trans, 128, shuffle=True) encoder = Encoder(256) decoder = Decoder(256, 512, len(vocab), 1) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list(encoder.parameters()) + list( encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=0.001) total_step = len(dataloader) for epoch in range(5): for i, (images, captions, lengths) in enumerate(dataloader): images = to_var(images, volatile=True) captions = to_var(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] decoder.zero_grad()
exit(0) else: fp_data = sys.argv[1] fp_ind = sys.argv[2] fp_ans = sys.argv[3] fp_model_fe = 'model6.fe.pt' state_dict = torch.load(fp_model_fe) model_enc = Encoder() model_enc_dict = model_enc.state_dict() model_enc_dict.update({k: v for k, v in state_dict.items() \ if k in model_enc_dict}) model_enc.load_state_dict(model_enc_dict) model_enc.cuda() test_loader = load_data(fp_data) features = predict(model_enc, test_loader) ind = (pd.read_csv(fp_ind, delimiter=',').values)[:, 1:] pred = [] for i in range(ind.shape[0]): if np.linalg.norm(features[ind[i][0]] - features[ind[i][1]]) > 10: pred.append(0) else: pred.append(1) df_pred = pd.DataFrame() df_pred['ID'] = np.arange(len(pred))
class sample: def __init__(self): parser = argparse.ArgumentParser(description='Image Captioning') parser.add_argument('--root', default='../../../cocodataset/', type=str) parser.add_argument( '--sample_image', default='../../../cocodataset/val2017/000000435205.jpg', type=str) parser.add_argument('--epochs', default=100, type=int) parser.add_argument('--lr', default=1e-4, type=float) parser.add_argument('--batch_size', default=128, help='') parser.add_argument('--num_workers', default=4, type=int) parser.add_argument('--embed_dim', default=256, type=int) parser.add_argument('--hidden_size', default=512, type=int) parser.add_argument('--num_layers', default=1, type=int) parser.add_argument('--encoder_path', default='./model/Encoder-100.ckpt', type=str) parser.add_argument('--decoder_path', default='./model/Decoder-100.ckpt', type=str) parser.add_argument('--vocab_path', default='./vocab/', type=str) self.args = parser.parse_args() self.transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )), transforms.Resize((224, 224)) ]) with open(self.args.vocab_path + 'vocab.pickle', 'rb') as f: data = pickle.load(f) self.vocab = data self.DataLoader = get_dataloader(root=self.args.root, transform=self.transform, shuffle=True, batch_size=self.args.batch_size, num_workers=self.args.num_workers, vocab=self.vocab) self.Encoder = Encoder(embed_dim=self.args.embed_dim) self.Decoder = Decoder(embed_dim=self.args.embed_dim, hidden_size=self.args.hidden_size, vocab_size=len(self.vocab), num_layers=self.args.num_layers) def load_image(self, image_path): image = Image.open(image_path).convert('RGB') image = self.transform(image).unsqueeze(0) return image def main(self): self.Encoder.load_state_dict(torch.load(self.args.encoder_path)) self.Decoder.load_state_dict(torch.load(self.args.decoder_path)) self.Encoder = self.Encoder.cuda().eval() self.Decoder = self.Decoder.cuda().eval() sample_image = self.load_image(self.args.sample_image).cuda() output = self.Encoder(sample_image) output = self.Decoder.sample(output)[0].cpu().numpy() sample_caption = [] for idx in output: word = self.vocab.idx2word[idx] sample_caption.append(word) if word == '<end>': break sentence = ' '.join(sample_caption) print(sentence)
# network ImplicitFun = ImplicitFun() Encoder = Encoder() InverseImplicitFun = InverseImplicitFun() if cate_name == 'helicopter': all_model = torch.load('../models/plane.pth') else: all_model = torch.load('../models/' + cate_name + '.pth') ImplicitFun.load_state_dict(all_model['ImplicitFun_state_dict']) Encoder.load_state_dict(all_model['Encoder_state_dict']) InverseImplicitFun.load_state_dict(all_model['InverseImplicitFun_state_dict']) print(InverseImplicitFun) # gpu or cpu ImplicitFun = ImplicitFun.cuda() Encoder = Encoder.cuda() InverseImplicitFun = InverseImplicitFun.cuda() # ----------------------------------------------------------------------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------------------------------------------------------------------- # if __name__ == '__main__': thres = np.arange(0,0.26,0.01) dis_list = np.array([]) for it, data in enumerate(train_loader): print("Paired sample: [%d/%d]"%(it, len(train_loader.dataset))) shape, land_a, land_b, name_a, name_b = data shape = Variable(shape.squeeze(0).cuda())
def train(config): data_loader = DataLoader(config.file_path, config.max_length, config.batch_size) train_data, word2index, tag2index, intent2index = data_loader.load_train() if train_data is None: print("Please check your data or its path") return encoder = Encoder(len(word2index), config.embedding_size, config.hidden_size) decoder = Decoder(len(tag2index), len(intent2index), config.hidden_size * 2) if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() encoder.init_weights() decoder.init_weights() loss_function_1 = nn.CrossEntropyLoss(ignore_index=0) loss_function_2 = nn.CrossEntropyLoss() enc_optim = optim.Adam(encoder.parameters(), lr=config.learning_rate) dec_optim = optim.Adam(decoder.parameters(), lr=config.learning_rate) for step in range(config.step_size): losses = [] for i, batch in enumerate(data_loader.get_batch(train_data)): x, embedding_x, y_1, y_2 = zip(*batch) x = torch.cat(x) embedding_x = torch.cat(embedding_x) tag_target = torch.cat(y_1) intent_target = torch.cat(y_2) x_mask = torch.cat([ Variable(torch.ByteTensor(tuple(map(lambda s: s == 0, t.data)))).cuda() if USE_CUDA else Variable( torch.ByteTensor(tuple(map(lambda s: s == 0, t.data)))) for t in x ]).view(len(batch), -1) encoder.zero_grad() decoder.zero_grad() output, hidden_c = encoder(x, embedding_x, x_mask) start_decode = Variable( torch.LongTensor( [[word2index['<SOS>']] * len(batch)])).cuda().transpose( 1, 0) if USE_CUDA else Variable( torch.LongTensor([[word2index['<SOS>']] * len(batch)])).transpose(1, 0) tag_score, intent_score = decoder(start_decode, hidden_c, output, x_mask) loss_1 = loss_function_1(tag_score, tag_target.view(-1)) loss_2 = loss_function_2(intent_score, intent_target) loss = loss_1 + loss_2 losses.append( loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy()) loss.backward() torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0) torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0) enc_optim.step() dec_optim.step() if i % 100 == 0: print("Step", step, " : ", np.mean(losses)) losses = [] if not os.path.exists(config.model_dir): os.makedirs(config.model_dir) torch.save(encoder, os.path.join(config.model_dir, 'jointnlu-encoder.pt')) torch.save(decoder, os.path.join(config.model_dir, 'jointnlu-decoder.pt')) print("Training Complete!")
def main(): parser = argparse.ArgumentParser( description='Estimate average error and std for each MNIST dataset') parser.add_argument('--model-name', type=str, required=True, help='filepath of model to use') parser.add_argument('--output-name', type=str, required=True, help='name of output files') parser.add_argument('--batch-size', type=int, default=200, metavar='N', help='batch-size for evaluation') args = parser.parse_args() #Load model path = '/home/ubuntu/Saved_Models/' filename = os.path.join(path, args.model_name, 'checkpoint.pt') use_cuda = torch.cuda.is_available() kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} device = torch.device("cuda" if use_cuda else "cpu") model = Encoder(device) model.load_state_dict(torch.load(filename)) model = model.cuda() data_root_file = '/home/ubuntu/mnist-interpretable-tranformations/data' data_loaders = { digit: DataLoader(MNISTDadataset(data_root_file, digit), batch_size=args.batch_size, shuffle=False, **kwargs) for digit in range(0, 10) } step = 5 #degrees step mean_error = pd.DataFrame() mean_abs_error = pd.DataFrame() error_std = pd.DataFrame() for digit, data_loader in data_loaders.items(): sys.stdout.write('Processing digit {} \n'.format(digit)) sys.stdout.flush() results = get_metrics(model, data_loader, device, step) mean_error[digit] = pd.Series(results[0]) mean_abs_error[digit] = pd.Series(results[1]) error_std[digit] = pd.Series(results[2]) mean_error.index = mean_error.index * step mean_abs_error.index = mean_abs_error.index * step error_std.index = error_std.index * step mean_error.to_csv(args.output_name + '_mean_error.csv') mean_abs_error.to_csv(args.output_name + '_mean_abs_error.csv') error_std.to_csv(args.output_name + '_error_std.csv') ##Plottin just absolute error with plt.style.context('ggplot'): mean_abs_error.plot(figsize=(9, 8)) plt.xlabel('Degrees') plt.ylabel('Average error in degrees') plt.legend(loc="upper left", bbox_to_anchor=[0, 1], ncol=2, shadow=True, title="Digits", fancybox=True) plt.tick_params(colors='gray', direction='out') plt.savefig(args.output_name + '_abs_mean_curves.png') plt.close() ##Plotting absoltue error and std with plt.style.context('ggplot'): fig = plt.figure(figsize=(9, 8)) ax = fig.add_subplot(111) x = mean_abs_error.index for digit in mean_abs_error.columns: mean = mean_abs_error[digit] std = error_std[digit] line, = ax.plot(x, mean) ax.fill_between(x, mean - std, mean + std, alpha=0.2, facecolor=line.get_color(), edgecolor=line.get_color()) ax.set_xlabel('Degrees') ax.set_ylabel('Average error in degrees') ax.legend(loc="upper left", bbox_to_anchor=[0, 1], ncol=2, shadow=True, title="Digits", fancybox=True) ax.tick_params(colors='gray', direction='out') fig.savefig(args.output_name + '_mean_&_std_curves.png') fig.clf()
def main(_): # Load the configuration file. with open(FLAGS.config, 'r') as f: config = yaml.load(f) # Load the vocabularies. src_vocab = Vocab.load(config['data']['src']['vocab']) tgt_vocab = Vocab.load(config['data']['tgt']['vocab']) # Load the training and dev datasets. test_data = ShakespeareDataset('test', config, src_vocab, tgt_vocab) # Restore the model. src_vocab_size = len(src_vocab) tgt_vocab_size = len(tgt_vocab) encoder = Encoder(src_vocab_size, config['model']['embedding_dim'], config['model']['bidirection'], config['model']['dropout'], config['model']['layer'], config['model']['mode']) decoder = Decoder(tgt_vocab_size, config['model']['embedding_dim'], config['model']['bidirection'], config['model']['dropout'], config['model']['layer'], config['model']['mode']) if torch.cuda.is_available(): encoder = encoder.cuda() decoder = decoder.cuda() ckpt_path = os.path.join(config['data']['ckpt'], config['experiment_name'], 'model.pt') if os.path.exists(ckpt_path): print('Loading checkpoint: %s' % ckpt_path) ckpt = torch.load(ckpt_path) encoder.load_state_dict(ckpt['encoder']) decoder.load_state_dict(ckpt['decoder']) else: print('Unable to find checkpoint. Terminating.') sys.exit(1) encoder.eval() decoder.eval() # Initialize translator. greedy_translator = GreedyTranslator(encoder, decoder, tgt_vocab) # Qualitative evaluation - print translations for first couple sentences in # test corpus. for i in range(10): src, tgt = test_data[i] translation = greedy_translator(src) src_sentence = [src_vocab.id2word(id) for id in src.data.cpu().numpy()] tgt_sentence = [tgt_vocab.id2word(id) for id in tgt.data.cpu().numpy()] translated_sentence = [tgt_vocab.id2word(id) for id in translation] print('---') print('Source: %s' % ' '.join(src_sentence)) print('Ground truth: %s' % ' '.join(tgt_sentence)) print('Model output: %s' % ' '.join(translated_sentence)) print('---') # Quantitative evaluation - compute corpus level BLEU scores. hypotheses = [] references = [] for src, tgt in test_data: translation = greedy_translator(src) tgt_sentence = [tgt_vocab.id2word(id) for id in tgt.data.cpu().numpy()] translated_sentence = [tgt_vocab.id2word(id) for id in translation] # Remove start and end of sentence tokens. tgt_sentence = tgt_sentence[1:-1] translated_sentence = translated_sentence[1:-1] hypotheses.append(tgt_sentence) references.append([translated_sentence]) print("Corpus BLEU score: %0.4f" % corpus_bleu(references, hypotheses))