def __init__(self, config, args): self.config = config for k, v in list(args.__dict__.items()): setattr(self.config, k, v) setattr(self.config, 'save_dir', '{}_log'.format(self.config.dataset)) disp_str = '' for attr in sorted(dir(self.config), key=lambda x: len(x)): if not attr.startswith('__'): disp_str += '{} : {}\n'.format(attr, getattr(self.config, attr)) sys.stdout.write(disp_str) sys.stdout.flush() self.labeled_loader, self.unlabeled_loader, self.unlabeled_loader2, self.dev_loader, self.special_set = data.get_cifar_loaders(config) self.dis = model.Discriminative(config).cuda() self.gen = model.Generator(image_size=config.image_size, noise_size=config.noise_size).cuda() self.enc = model.Encoder(config.image_size, noise_size=config.noise_size, output_params=True).cuda() self.dis_optimizer = optim.Adam(self.dis.parameters(), lr=config.dis_lr, betas=(0.5, 0.999)) self.gen_optimizer = optim.Adam(self.gen.parameters(), lr=config.gen_lr, betas=(0.0, 0.999)) self.enc_optimizer = optim.Adam(self.enc.parameters(), lr=config.enc_lr, betas=(0.0, 0.999)) self.d_criterion = nn.CrossEntropyLoss() if not os.path.exists(self.config.save_dir): os.makedirs(self.config.save_dir) log_path = os.path.join(self.config.save_dir, '{}.FM+VI.{}.txt'.format(self.config.dataset, self.config.suffix)) self.logger = open(log_path, 'wb') self.logger.write(disp_str) print(self.dis)
def get_model(train_method, config): logger.info('Building model --------------------------------------') if is_train and is_restore: if config.train_method == TrainMethod.RL_METHOD: model_filename = build_rl_init_model_filename() else: model_name = 'model-{}'.format(config.restore_epoch) model_filename = os.path.join(config.runner_path, model_name) logger.info('Parameter init from: %s' % model_filename) else: logger.info('Parameter init Randomly') embedding_table = model.get_embedding_table(config) encoder = model.Encoder(config=config, max_sentence_length=config.max_sentence_length, embedding_table=embedding_table) encoder.set_cell(name=config.cell_name, num_units=config.encoder_num_units) encoder.build() relation_decoder = model.RelationDecoder(encoder=encoder, config=config, is_train=is_train) position_decoder = model.PositionDecoder(encoder=encoder, config=config, is_train=is_train) decode_cell = [] for t in range(config.triple_number if config.decoder_method == const.DecoderMethod.MULTI_DECODER else 1): cell = model.set_rnn_cell(config.cell_name, config.decoder_num_units) decode_cell.append(cell) triple_decoder = model.TripleDecoder( decoder_output_max_length=config.decoder_output_max_length, encoder=encoder, relation_decoder=relation_decoder, position_decoder=position_decoder, decode_cell=decode_cell, config=config) triple_decoder.build(train_method=train_method, decoder_method=config.decoder_method, is_train=is_train) sess = tf.Session(config=tfconfig) # sess = tf_debug.LocalCLIDebugWrapperSession(sess) # sess.add_tensor_filter('has_inf_or_nan', tf_debug.has_inf_or_nan) saver = tf.train.Saver() if is_train and is_restore: saver.restore(sess, model_filename) else: sess.run(tf.global_variables_initializer()) logger.debug('print trainable variables') for v in tf.trainable_variables(): value = sess.run(v) logger.info(v.name) logger.debug('mean %.4f, max %.3f, min %.3f' % (np.mean(value), np.max(value), np.min(value))) return triple_decoder, sess
def encoder_test(): embedded_sentence = pre_model_test.embedder_test() embedded_sentence = Variable(torch.FloatTensor(embedded_sentence)) encoder = model.Encoder(100,50) _, h = encoder(embedded_sentence) print(h.size()) return h
def step(self, samples): # reverser self.reverser = model.Encoder(FLAGS.sample_num, FLAGS.dc_dim, FLAGS.z_dim) self.R1, R1_logits, R1_inter = self.reverser.inference(samples) return R1_logits
def __init__(self, config, args): self.config = config for k, v in args.__dict__.items(): setattr(self.config, k, v) setattr(self.config, 'save_dir', '{}_log'.format(self.config.dataset)) disp_str = '' for attr in sorted(dir(self.config), key=lambda x: len(x)): if not attr.startswith('__'): disp_str += '{} : {}\n'.format(attr, getattr(self.config, attr)) sys.stdout.write(disp_str) sys.stdout.flush() self.labeled_loader, self.unlabeled_loader, self.dev_loader, self.special_set = data.get_cifar_loaders( config) self.dis = model.Discriminative(config).cuda() self.ema_dis = model.Discriminative(config, ema=True).cuda() self.gen = model.Generator(image_size=config.image_size, noise_size=config.noise_size).cuda() self.enc = model.Encoder(config.image_size, noise_size=config.noise_size, output_params=True).cuda() # self.dis_optimizer = optim.Adam(self.dis.parameters(), lr=config.dis_lr, betas=(0.5, 0.999)) self.dis_optimizer = optim.SGD(self.dis.parameters(), lr=config.dis_lr, momentum=config.momentum, weight_decay=config.weight_decay, nesterov=config.nesterov) self.gen_optimizer = optim.Adam(self.gen.parameters(), lr=config.gen_lr, betas=(0.0, 0.999)) self.enc_optimizer = optim.Adam(self.enc.parameters(), lr=config.enc_lr, betas=(0.0, 0.999)) self.d_criterion = nn.CrossEntropyLoss() if config.consistency_type == 'mse': self.consistency_criterion = losses.softmax_mse_loss # nn.MSELoss() # (size_average=False) elif config.consistency_type == 'kl': self.consistency_criterion = losses.softmax_kl_loss # nn.KLDivLoss() # (size_average=False) else: pass self.consistency_weight = 0 if not os.path.exists(self.config.save_dir): os.makedirs(self.config.save_dir) if self.config.resume: pass log_path = os.path.join( self.config.save_dir, '{}.FM+VI.{}.txt'.format(self.config.dataset, self.config.suffix)) self.logger = open(log_path, 'wb') self.logger.write(disp_str) print self.dis
def main(): ### Create the torch datasets and get the size of the 'on-the-fly' created vocabulary and the length of the longest caption trainDataset = loadData.FlickrTrainDataset(images_folder, captions_folder, trans, 'TRAIN') valDataset = loadData.FlickrValDataset(images_folder, captions_folder, trans, 'VAL') voc_size = trainDataset.getVocabSize() max_capt = trainDataset.getMaxCaptionsLength() ### Create the models Encoder = model.Encoder() Decoder = model.Decoder(encoder_dim=2048, decoder_dim=512, attention_dim=256, vocab_size=voc_size) Embedding = model.Embedding(vocab_size=voc_size, embedding_dim=128) ### Set the optimizer for the decoder(the only component that is actually trained) and the device for the model tensors decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, Decoder.parameters()), lr=e - 3) Encoder.to(device) Decoder.to(device) Embedding.to(device) ### Create the data loaders for training and evaluation loader_train = DataLoader(trainDataset, 32, sampler=sampler.SubsetRandomSampler( range(30000))) val_loader = DataLoader(valDataset, 32, sampler=sampler.SubsetRandomSampler(range(30000))) best_bleu = 0 #The best blue score by now for i in range(epochs): ## One epoch's training train.train(data_loader=loader_train, encoder=Encoder, decoder=Decoder, embedding=Embedding, max_caption_length=max_capt, optim=decoder_optimizer) ## One epoch's validation new_bleu = train.validate(data_loader=val_loader, encoder=Encoder, decoder=Decoder, embedding=Embedding, max_capt) if new_bleu > best_bleu: best_bleu = new_bleu else: ## We had no improvement since last time,so se don't train more break ## Save the model for deploying torch.save(Encoder, 'Encoder') torch.save(Decoder, 'Decoder') torch.save(Embedding, 'Embedding')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, choices=['ffhq', 'cifar', 'mnist', 'mnist_fashion', 'emnist']) parser.add_argument('--image_size', type=int, choices=[32, 64, 128]) args = parser.parse_args() latent_dim = 32 image_size = args.image_size image_shape = [3, image_size, image_size] batch_size = 512 train_loader, _ = data_helper.get_data(args.dataset, batch_size, image_size) for each_distribution in ['standard_normal', 'uniform', 'gamma', 'beta', 'chi', 'laplace']: encoder = model.Encoder(latent_dim, image_shape).cuda() encoder.load_state_dict(torch.load(f'model/image_size_128_epoch_500_test_1/encoder_{args.dataset}_{args.image_size}_{each_distribution}')) z_array = None for each_batch in tqdm.tqdm(train_loader): each_batch = Variable(each_batch[0]).cuda() each_z_batch = encoder(each_batch) if z_array is None: z_array = each_z_batch.cpu().detach().numpy() else: z_array = np.concatenate((z_array, (each_z_batch.cpu().detach().numpy()))) print_percentage_of_data_that_is_difficult_to_generate(z_array, each_distribution)
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") data_device = torch.device("cpu") print("Device type: %s" % device.type) encoder = model.Encoder(depth) decoder = model.Decoder(depth) net = torch.nn.Sequential(encoder, decoder).to(device) optimizer = optim.Adadelta(net.parameters(), lr=0.01) dataset = load.WavDataSet("data/wav/", model.downsample_factor**depth, data_device) dataloader = torch.utils.data.dataloader.DataLoader(dataset, batch_size=batch_size, shuffle=True) print("Starting training") for e in range(epochs): net.train() loss_sum = 0 for batch_idx, (data_noise, data) in enumerate(dataloader): data = data.to(device) data_noise = data_noise.to(device) optimizer.zero_grad() output = net(data_noise) loss = F.mse_loss(output, data) loss.backward() optimizer.step() loss_sum += loss print("Epoch: %d\tLoss: %f" % (e, loss_sum)) if e % 50 == 0: torch.save(net.state_dict(), "checkpoint/model_%d.pth" % e)
def __init__(self, model_file1=model_path): # You should # 1. create the model object # 2. load your state_dict # 3. call cuda() # self.model = ... # self.models_detector = [] for i in range(6): models = {} encoder_path = os.path.join(model_file1[i], "encoder.pth") encoder_dict = torch.load(encoder_path, map_location=device) feed_height = encoder_dict["height"] feed_width = encoder_dict["width"] models["encoder"] = model.Encoder(feed_width, feed_height, False) filtered_dict_enc = { k: v for k, v in encoder_dict.items() if k in models["encoder"].state_dict() } models["encoder"].load_state_dict(filtered_dict_enc) decoder_path = os.path.join(model_file1[i], "decoder.pth") models["decoder"] = model.Decoder(models["encoder"].num_ch_enc) models["decoder"].load_state_dict( torch.load(decoder_path, map_location=device)) for key in models.keys(): models[key].to(device) models[key].eval() self.models_detector.append(models)
def load(name, i2w, w2i): ctx_enc = model.Encoder(vocab_size=len(i2w), emb_size=args.emb_size, hid_size=args.hid_size, embedding_weights=None) r_enc = model.Encoder(vocab_size=len(i2w), emb_size=args.emb_size, hid_size=args.hid_size, embedding_weights=None) d_enc = model.DualEncoder(context_encoder=ctx_enc, response_encoder=r_enc, w2i=w2i, i2w=i2w, args=args) d_enc = d_enc.to(device) d_enc.load(name) return d_enc
def get_model(embeddings,word2id,paths): encoder = model.Encoder(args, embeddings,config=config) encoder.build() if args.decode_method==0 or args.decode_method==1: decoder=model.softmaxOrCRFDecoder(encoder,args, tag2label, word2id, paths, config) else: print("Invalid argument! Please use valid arguments!") decoder.build() return decoder
def __init__(self, pidx: int, gidx: int, n_features: int, n_hiddens: int): self.n_features = n_features self.pidx = pidx self.gidx = gidx self.gpu = torch.device('cuda:{}'.format(gidx - 1)) self.encoder = model.Encoder(n_inputs=n_features, n_hiddens=n_hiddens).to(self.gpu) self.decoder = model.AttentionDecoder(n_hiddens=n_hiddens, n_features=n_features).to(self.gpu) self.model_fn = 'checkpoints/SWaT-P{}'.format(pidx) self.encoder_optimizer = optim.Adam(self.encoder.parameters(), amsgrad=True) self.decoder_optimizer = optim.Adam(self.decoder.parameters(), amsgrad=True) self.mse_loss = nn.MSELoss()
def main(): print('Generating emb...') checkpoint = torch.load( hp.eval_model, map_location=lambda storage, loc: storage.cuda(hp.gpu)) encoder = model.Encoder(hp.emb_size, hp.hidden_size, hp.dropout_rate) if hp.gpu >= 0: encoder.cuda() encoder.load_state_dict(checkpoint['encoder']) w2v = de_biassing_emb(encoder) eval_bias_analogy(w2v) print('Saving emb...') debias_emb_txt = 'src/debiased_{}/gender_debiased.txt'.format(sys.argv[1]) debias_emb_bin = 'src/debiased_{}/gender_debiased.bin'.format(sys.argv[1]) w2v.save_word2vec_format(debias_emb_bin, binary=True) w2v.save_word2vec_format(debias_emb_txt, binary=False)
def step(self, z): z_sum = tf.summary.histogram("z", z) # generater self.generator = model.Generator(FLAGS.batch_size, FLAGS.gc_dim) # self.G = self.generator.inference(z) # sampler using generator self.samples = self.generator.sampler(z, reuse=False, trainable=False) # reverser self.reverser = model.Encoder(FLAGS.batch_size, FLAGS.dc_dim, FLAGS.z_dim) self.R1, R1_logits, R1_inter = self.reverser.inference(self.samples) R_sum = tf.summary.histogram("R", self.R1) # return images, D1_logits, D2_logits, G_sum, z_sum, d1_sum, d2_sum # return D2_logits, G_sum, z_sum, d1_sum, d2_sum return R1_logits, R1_inter, R_sum, z_sum
def main(): # -------- hyper params -------------- file_path = "nlp_sample.txt" embedding_dim = 200 hidden_dim = 128 BATCH_NUM = 100 epoch = 10 # 損失関数 criterion = gluon.loss.SoftmaxCrossEntropyLoss() # optimize opt = "adam" save = True # ----- dataの用意 --------- input_date, output_date = utils.date_load(file_path) # inputとoutputの系列の長さを取得 # すべて長さが同じなので、0番目の要素でlenを取ってます # paddingする必要は、このデータに対してはない # input_len = len(input_date[0]) # 29 # output_len = len(output_date[0]) # 10 input_data, output_data, char2id, id2char = utils.create_corpus( input_date, output_date) vocab_size = len(char2id) # 7:3でtrainとtestに分ける train_x, test_x, train_y, test_y = train_test_split( input_data, output_data, train_size=0.7) train_x = np.array(train_x) train_y = np.array(train_y) train_data = mx.io.NDArrayIter(train_x, train_y, BATCH_NUM, shuffle=False) # -------- training --------------- encoder = model.Encoder(vocab_size, embedding_dim, hidden_dim) attn_decoder = model.AttentionDecoder( vocab_size, embedding_dim, hidden_dim, BATCH_NUM) encoder, attn_decoder = train(encoder, attn_decoder, train_data, epoch, criterion, opt=opt, save=save)
def train(max_epoch, batch_size=64): loss = [ 1, ] data = Data.Data() voc_size = data.get_voc_size() model = Model.Encoder(batch_size=batch_size, voc_size=voc_size, hidden_size=100, device=device, n_layers=1, dropout=0).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.03) epoch_count = 0 increase_count = 0 while True: train_loss = train_iter(data, model, criterion, optimizer, batch_size, voc_size) # sample loss.append( sample(dataset=data, model=model, batch_size=batch_size, criterion=criterion)) print('epoch :', epoch_count, "\t", loss[-1], "\t", train_loss) if epoch_count > 500: torch.save(model.state_dict(), "../ckpt/model" + str(epoch_count % 100)) # judge whether stop or not if loss[-2] < loss[-1]: increase_count += 1 if increase_count > 10 and loss[-1] < 0.2: break else: increase_count = 0 # increase epoch count epoch_count += 1 plt.plot(loss) plt.show()
def get_model(train_method, config): logger.info('Building model --------------------------------------') logger.info('Parameter init Randomly') embedding_table = model.get_embedding_table(config) encoder = model.Encoder(config=config, max_sentence_length=config.max_sentence_length, embedding_table=embedding_table) encoder.set_cell(name=config.cell_name, num_units=config.encoder_num_units) encoder.build() if config.decoder_method == DecoderMethod.ONE_DECODER: decoder = model.OneDecoder( decoder_output_max_length=config.decoder_output_max_length, embedding_table=embedding_table, encoder=encoder, config=config) elif config.decoder_method == DecoderMethod.MULTI_DECODER: decoder = model.MultiDecoder( decoder_output_max_length=config.decoder_output_max_length, embedding_table=embedding_table, encoder=encoder, config=config) else: logger.error('decoder_method is %s, which is illegal.' % config.decoder_method) exit() decoder.set_cell(name=config.cell_name, num_units=config.decoder_num_units) decoder.build(is_train=is_train) sess = tf.Session(config=tfconfig) sess.run(tf.global_variables_initializer()) logger.debug('print trainable variables') for v in tf.trainable_variables(): value = sess.run(v) logger.debug('Name %s:\tmean %s, max %s, min %s' % (v.name, np.mean(value), np.max(value), np.min(value))) return decoder, sess
def __init__(self, config, args): self.config = config for k, v in args.__dict__.items(): setattr(self.config, k, v) setattr(self.config, 'save_dir', '{}_log'.format(self.config.dataset)) disp_str = '' for attr in sorted(dir(self.config), key=lambda x: len(x)): if not attr.startswith('__'): disp_str += '{} : {}\n'.format(attr, getattr(self.config, attr)) sys.stdout.write(disp_str) sys.stdout.flush() self.labeled_loader, self.unlabeled_loader, self.unlabeled_loader2, self.dev_loader, self.special_set = data.get_cifar_loaders( config) self.dis = model.Discriminative(config).cuda() self.gen = model.Generator(image_size=config.image_size, noise_size=config.noise_size).cuda() self.enc = model.Encoder(config.image_size, noise_size=config.noise_size, output_params=True).cuda() # load model # ta self.load_network(self.dis, 'D', strict=False) self.load_network(self.gen, 'G', strict=False) self.load_network(self.enc, 'E', strict=False) if not os.path.exists(self.config.save_dir): os.makedirs(self.config.save_dir) log_path = os.path.join( self.config.save_dir, '{}.FM+VI.{}.txt'.format(self.config.dataset, self.config.suffix)) self.logger = open(log_path, 'wb') self.logger.write(disp_str) print self.dis
def load_models(checkpoint_name=None, encoded_image_size=None, word_embeddings_dim=None, attention_dim=None, decoder_hidden_size=None, vocab_size=None, device=None): ''' :param checkpoint_name: name of checkpoint file :param encoded_image_size: params to initialize model if there is no checkpoint name :param word_embeddings_dim: params to initialize model if there is no checkpoint name :param attention_dim: params to initialize model if there is no checkpoint name :param decoder_hidden_size: params to initialize model if there is no checkpoint name :param vocab_size: params to initialize model if there is no checkpoint name :param device: on this device to store model :return: start_epoch, end_epoch, loss_fn, enc, dec, optimizer_encoder, optimizer_decoder ''' loss_fn = nn.CrossEntropyLoss().to(device) end_epoch = 10_000 if checkpoint_name == None: start_epoch = 0 enc = model.Encoder(encoded_image_size=encoded_image_size).to(device) dec = model.Decoder(vocab_size=vocab_size, word_embeddings_dim=word_embeddings_dim, attention_dim=attention_dim, decoder_hidden_size=decoder_hidden_size, encoded_image_size=encoded_image_size).to(device) optimizer_decoder = torch.optim.Adam(enc.parameters(), lr=4e-4) optimizer_encoder = torch.optim.Adam(dec.parameters(), lr=1e-4) else: checkpoint = torch.load(checkpoint_name) start_epoch = checkpoint['epoch'] dec = checkpoint['decoder'].to(device) optimizer_decoder = checkpoint['decoder_optimizer'] enc = checkpoint['encoder'].to(device) optimizer_encoder = checkpoint['encoder_optimizer'] return start_epoch, end_epoch, loss_fn, enc, dec, optimizer_encoder, optimizer_decoder
def main(): device = torch.device("cpu") print("Device type: %s" % device.type) encoder = model.Encoder(depth) decoder = model.Decoder(depth) net = torch.nn.Sequential(encoder, decoder).to(device) net.load_state_dict(torch.load("checkpoint/model_3150.pth")) net.eval() dataset = load.WavDataSet("data/wav/", model.downsample_factor**depth, device) dataloader = torch.utils.data.dataloader.DataLoader(dataset, batch_size=batch_size, shuffle=True) for batch_idx, data in enumerate(dataloader): data = data.to(device) output = net(data) scipy.io.wavfile.write("out/%d.wav" % batch_idx, load.sample_rate, output.data.numpy()) print("Finished %d" % batch_idx)
def build_model(hp): encoder = model.Encoder(num_layers=hp["encoder_num_layers"], num_units=hp["encoder_num_units"], dropout=hp["encoder_dropout"], dropout_prob=hp["encoder_dropout_prob"], layer_norm=hp["encoder_layer_norm"], dtype=tf.float32) decoder = model.Decoder( attention_unit_num=hp["decoder_attention_unit_num"], vocab_size=hp["decoder_vocab_size"], gru_unit_num=hp["decoder_gru_unit_num"], fc_layer_num=hp["decoder_fc_layer_num"], fc_unit_num=hp["decoder_fc_unit_num"], attention_type=hp["decoder_attention_type"], gru_layer_norm=hp["decoder_gru_layer_norm"], gru_dropout=hp["decoder_gru_dropout"], gru_dropout_prob=hp["decoder_gru_dropout_prob"], fc_activation=hp["decoder_fc_activation"], dtype=tf.float32) return encoder, decoder
def load_model(model_name, sess, seq_length=None): model_dir = os.path.dirname(model_name + '.meta') model_config_file = './output/' + model_dir + "/params.yaml" import yaml with open(model_config_file, 'r') as stream: model_params = yaml.load(stream) model = model_params['model'] hidden_units = model_params['hidden_units'] num_layers = model_params['num_layers'] ### TODO: Get this information from a separate config prob_config = config.get_problem_config(rnn_model.PROBLEM_NAME) if seq_length is None: seq_length = prob_config['max_sequence_length'] observation_length = prob_config['input_length'] action_length = prob_config['output_length'] encoder = rnn_model.Encoder(action_length, observation_length) input_length = encoder.size_x() output_length = encoder.size_y() start = time.time() model = Seq2SeqModelExt(session=sess, hidden_units=hidden_units, model=model, num_layers=num_layers, seq_length=seq_length, input_length=input_length, output_length=output_length, batch_size=1, scope="model") end = time.time() model_create_time = end - start #model.load('vrep/version1/model.ckpt-967') model.load(model_name) start = time.time() model_load_time = start - end return model
def make_model(src_vocab, tgt_vocab, emb_size=256, hidden_size=512, num_layers=1, dropout=0.1): "Helper: Construct a model from hyperparameters." attention = model.BahdanauAttention(hidden_size) mdl = model.EncoderDecoder( model.Encoder(emb_size, hidden_size, num_layers=num_layers, dropout=dropout), model.Decoder(emb_size, hidden_size, attention, num_layers=num_layers, dropout=dropout), nn.Embedding(src_vocab, emb_size), nn.Embedding(tgt_vocab, emb_size), model.Generator(hidden_size, tgt_vocab)) return mdl.cuda() if USE_CUDA else mdl
dual_encoder = load(args.load_model, i2w, w2i) evaluate_specific(valid, dual_encoder) evaluate_specific(test, dual_encoder, suffix="test") else: # Load all the data train, valid, test = load_data(args.data_path) if args.data_size >= 0: train = train[:int(len(train) * args.data_size)] print("Number of training instances:", len(train)) print("Number of validation instances:", len(valid)) print("Number of test instances:", len(test)) i2w, w2i = build_vocab(train, path=args.data_path) context_encoder = model.Encoder(vocab_size=len(i2w), emb_size=args.emb_size, hid_size=args.hid_size, embedding_weights=emb_w) response_encoder = model.Encoder(vocab_size=len(i2w), emb_size=args.emb_size, hid_size=args.hid_size, embedding_weights=emb_w) dual_encoder = model.DualEncoder(context_encoder=context_encoder, response_encoder=response_encoder, w2i=w2i, i2w=i2w, args=args) dual_encoder = dual_encoder.to(device) best_valid = 0.0 best_epoch = 0 for epoch in range(start_epoch, args.num_epochs): # Train
from datetime import datetime import numpy as np import torch import torch.nn as nn import torch.optim as optim import preprocessing import model # Encoder, Decoderの初期化 encoder = model.Encoder(vocab_size, embedding_dim, hidden_dim, batch_size).to(device) decoder = model.Decoder(vocab_size, embedding_dim, hidden_dim, batch_size).to(device) criterion = nn.CrossEntropyLoss(ignore_index=word2id["<pad>"], size_average=False) # 最適化関数の定義 encoder_optimizer = optim.Adam(encoder.parameters(), lr=0.001) decoder_optimizer = optim.Adam(decoder.parameters(), lr=0.001) def get_current_time(): return datetime.now().strftime("%Y-%m-%d %H:%M:%S") print("学習開始") n_epoch = 60 sig_cnt = 0 all_losses = [] for epoch in range(1, n_epoch+1): title_batch = preprocessing.train2batch(title_train, batch_size) if epoch > 25: sig_cnt += 1
envs = MultiEnvironment(args.env, args.batch_size, args.fskip) action_size = envs.get_action_size() print('Building models...') torch.cuda.set_device(args.gpu) if not (os.path.isfile(args.agent_file) and os.path.isfile(args.agent_file) and os.path.isfile(args.agent_file)): print("need an agent file") exit() args.agent_file = args.env + ".model.80.tar" agent = model.Agent(action_size).cuda() agent.load_state_dict(torch.load(args.agent_file, map_location=map_loc)) Z_dim = args.latent encoder = model.Encoder(Z_dim).cuda() generator = model.Generator(Z_dim).cuda() encoder.train() generator.train() optim_gen = optim.Adam(generator.parameters(), lr=args.lr, betas=(0.0,0.9)) optim_enc = optim.Adam(filter(lambda p: p.requires_grad, encoder.parameters()), lr=args.lr, betas=(0.0,0.9)) print('finished building model') bs = args.batch_size TINY = 1e-9 def immsave(file, pixels, size=200): np_img = imresize(pixels,size, interp = 'nearest')
def main(): print('Loading word embedding') emb = KeyedVectors.load_word2vec_format(hp.word_embedding, binary=hp.emb_binary) print("Loading data") stereotype_words = {} gender_words = {} no_gender_words = make_no_gender_words(open(hp.no_gender_words), emb) stereotype_words['female'], stereotype_words['male'] = \ make_pair_words(hp.stereotype_words, emb) gender_words['female'], gender_words['male'] = \ make_pair_words(hp.gender_words, emb) all_words = no_gender_words \ + stereotype_words['female'] \ + stereotype_words['male'] \ + gender_words['female'] \ + gender_words['male'] train_words, dev_words = create_train_dev(gender_words, no_gender_words, stereotype_words) word2emb = {} for word in all_words: word2emb[word] = emb[word] if hp.pre_train_autoencoder: print('Pre-training autoencoder') encoder = model.Encoder(hp.emb_size, hp.hidden_size, hp.pta_dropout_rate) decoder = model.Decoder(hp.hidden_size, hp.emb_size, hp.pta_dropout_rate) if hp.gpu >= 0: encoder.cuda() decoder.cuda() encoder_optim = make_optim(encoder, hp.pta_optimizer, hp.pta_learning_rate, hp.pta_lr_decay, hp.pta_max_grad_norm) decoder_optim = make_optim(decoder, hp.pta_optimizer, hp.pta_learning_rate, hp.pta_lr_decay, hp.pta_max_grad_norm) if hp.pre_data == 'random': checkpoint = pre_train_autoencoder(hp, encoder, encoder_optim, decoder, decoder_optim, emb) elif hp.pre_data == 'common': checkpoint = pre_train_autoencoder(hp, encoder, encoder_optim, decoder, decoder_optim, emb, dev_words=dev_words) encoder = model.Encoder(hp.emb_size, hp.hidden_size, hp.dropout_rate) decoder = model.Decoder(hp.hidden_size, hp.emb_size, hp.dropout_rate) if hp.gpu >= 0: encoder.cuda() decoder.cuda() if hp.pre_train_autoencoder: encoder.load_state_dict(checkpoint['encoder']) decoder.load_state_dict(checkpoint['decoder']) if hp.pre_train_classifier: female_classifier = model.Classifier(hp.hidden_size) male_classifier = model.Classifier(hp.hidden_size) if hp.gpu >= 0: female_classifier.cuda() male_classifier.cuda() female_classifier_optim = make_optim(female_classifier, hp.cls_optimizer, hp.cls_learning_rate, hp.cls_lr_decay, hp.cls_max_grad_norm) male_classifier_optim = make_optim(male_classifier, hp.cls_optimizer, hp.cls_learning_rate, hp.cls_lr_decay, hp.cls_max_grad_norm) encoder.eval() encoder.zero_grad() train_females = [] train_males = [] dev_females = [] dev_males = [] train_female_embs = [ encoder(torch.FloatTensor(emb[word[0]]).cuda()).data if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word[0]])).data for word in train_words['female & male'] ] encoder.zero_grad() train_male_embs = [ encoder(torch.FloatTensor(emb[word[1]]).cuda()).data if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word[1]])).data for word in train_words['female & male'] ] encoder.zero_grad() train_stereotype_embs = [ encoder(torch.FloatTensor(emb[word]).cuda()).data if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word])).data for word in train_words['no gender'] ] encoder.zero_grad() dev_female_embs = [ encoder(torch.FloatTensor(emb[word[0]]).cuda()).data if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word[0]])).data for word in dev_words['female & male'] ] encoder.zero_grad() dev_male_embs = [ encoder(torch.FloatTensor(emb[word[1]]).cuda()).data if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word[1]])).data for word in dev_words['female & male'] ] encoder.zero_grad() dev_stereotype_embs = [ encoder(torch.FloatTensor(emb[word]).cuda()).data if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word])).data for word in dev_words['no gender'] ] encoder.zero_grad() print('Pre-training classifier') female_checkpoint, male_checkpoint = pre_train_classifier( hp, female_classifier, male_classifier, female_classifier_optim, male_classifier_optim, train_female_embs, train_male_embs, train_stereotype_embs, dev_female_embs, dev_male_embs, dev_stereotype_embs) print('Building female & male classifiers') female_classifier = model.Classifier(hp.hidden_size) male_classifier = model.Classifier(hp.hidden_size) if hp.gpu >= 0: female_classifier.cuda() male_classifier.cuda() if hp.pre_train_classifier: female_classifier.load_state_dict(female_checkpoint['female']) male_classifier.load_state_dict(male_checkpoint['male']) print('Setting optimizer') encoder_optim = make_optim(encoder, hp.optimizer, hp.learning_rate, hp.lr_decay, hp.max_grad_norm) female_classifier_optim = make_optim(female_classifier, hp.optimizer, hp.learning_rate, hp.lr_decay, hp.max_grad_norm) male_classifier_optim = make_optim(male_classifier, hp.optimizer, hp.learning_rate, hp.lr_decay, hp.max_grad_norm) decoder_optim = make_optim(decoder, hp.optimizer, hp.learning_rate, hp.lr_decay, hp.max_grad_norm) trainModel(encoder, encoder_optim, female_classifier, female_classifier_optim, male_classifier, male_classifier_optim, decoder, decoder_optim, train_words, dev_words, word2emb)
torch.cuda.manual_seed_all(opt.manualSeed) torch.set_default_tensor_type('torch.FloatTensor') cudnn.benchmark = True # For speed i.e, cudnn autotuner ######################################################## if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) #calling the dataloader data = util.DATA_LOADER(opt) print("training samples: ", data.ntrain) ############## MODEL INITIALIZATION ############# netE = model.Encoder(opt) netG = model.HYBRID_FUSION_ATTENTION(opt) netD = model.Discriminator(opt) print(netE) print(netG) print(netD) ################################################ #init tensors input_res = torch.FloatTensor(opt.batch_size, opt.resSize) input_test_labels = torch.LongTensor(opt.fake_batch_size, opt.nclass_all) input_labels = torch.LongTensor(opt.batch_size, opt.nseen_class) input_train_early_fusion_att = torch.FloatTensor(opt.batch_size, opt.attSize) input_test_early_fusion_att = torch.FloatTensor(opt.fake_batch_size, opt.attSize)
def main(config): # prepare data tokenizer = transformers.tokenization_bert.BertTokenizer.from_pretrained( config.bert_model) contexts, _, valid_qa = util.load_data(config, tokenizer) context_text = [context["clean_context"] for context in contexts] q_tokenized = [' '.join(qa["tokenized"]) for qa in valid_qa] q_wordpiece = [qa["wordpiece"] for qa in valid_qa] q_answer = [qa["answer"] for qa in valid_qa] tfidf = TfidfVectorizer(analyzer=str.split, encoding="utf-8", stop_words="english", ngram_range=(1, config.ngram)) # define TF-IDF print("TF-IDF Retrieval") tfidf_context = tfidf.fit_transform( [' '.join(context["tokenized"]) for context in contexts]) tfidf_question = tfidf.transform(q_tokenized) tfidf_sim = util.get_sim(tfidf_question, tfidf_context) check_answer(tfidf_sim, context_text, q_answer) del tfidf_context del tfidf_question gc.collect() # define ICT model config.devices = [int(device) for device in config.devices.split('_')] if config.use_cuda: config.device = config.devices[0] else: config.device = "cpu" vocab = dict() for k, v in tokenizer.vocab.items(): vocab[k] = v start_token = vocab["[CLS]"] model = Model.Encoder(config) if config.use_cuda: model.cuda() model = nn.DataParallel(model, device_ids=config.devices) optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) loss = nn.CrossEntropyLoss() # make data loader def get_loader(data, batch_size): data = TensorDataset(torch.from_numpy(data)) return DataLoader(data, batch_size=batch_size, shuffle=True, sampler=None, drop_last=True) loader = get_loader(np.array([i for i in range(len(contexts))]), config.batch_size) def get_batch(index, contexts, start_token): "make ICT batch data" sentence = [contexts[i]["sentence"] for i in index] # get sentences of paragraphs target_sentence = [ random.randint(0, len(sen) - 1) for sen in sentence ] # set target sentence for ICT training remove_target = [ random.random() < (1 - config.remove_percent) for _ in range(len(target_sentence)) ] # determine removal of original sentence as mention in paper target_context = [ sen[:i] + sen[i + remove:] for i, sen, remove in zip(target_sentence, sentence, remove_target) ] # set sentences of target context target_context = [[y for x in context for y in x] for context in target_context ] # concat sentences of context target_context = [[start_token] + context for context in target_context] target_sentence = [sen[i] for i, sen in zip(target_sentence, sentence)] target_sentence = [[start_token] + sen for sen in target_sentence] s, s_mask = util.pad_sequence(target_sentence, max_seq=config.max_seq, device=config.device) # pad sequence c, c_mask = util.pad_sequence(target_context, max_seq=config.max_seq, device=config.device) return s, s_mask, c, c_mask def save(model, epoch, accuracy): "save model weight" model_to_save = model.module if hasattr(model, 'module') else model save_dict = { 'epoch': epoch, 'accuracy': accuracy, 'model': model_to_save.state_dict() } torch.save(save_dict, config.model_weight) def load(model, device): "load model weight" model_to_load = model.module if hasattr(model, 'module') else model load_dict = torch.load( config.model_weight, map_location=lambda storage, loc: storage.cuda(device)) model_to_load.load_state_dict(load_dict['model']) return model_to_load def get_semantic_sim(model): "make semantic embedding of context, question. and get similarity" context_embedding = [] question_embedding = [] model.eval() with torch.no_grad(): for i in tqdm(range(0, len(contexts), config.test_batch_size)): c = [[y for x in context["sentence"] for y in x] for context in contexts[i:i + config.test_batch_size]] c, c_mask = util.pad_sequence(c, max_seq=config.max_seq, device=config.device) c_encode = model(x=c, x_mask=c_mask) context_embedding.append(c_encode.detach().cpu().numpy()) for i in tqdm(range(0, len(q_wordpiece), config.test_batch_size)): q = [ tokens for tokens in q_wordpiece[i:i + config.test_batch_size] ] q, q_mask = util.pad_sequence(q, max_seq=config.max_seq, device=config.device) q_encode = model(x=q, x_mask=q_mask) question_embedding.append(q_encode.detach().cpu().numpy()) context_embedding = np.concatenate(context_embedding, axis=0) question_embedding = np.concatenate(question_embedding, axis=0) return util.get_sim(question_embedding, context_embedding) # train ICT model max_accuracy = -math.inf print("ICT model Retrieval.") for e in range(config.epoch): model.train() avg_loss = .0 batch_num = len(loader) for batch in tqdm(loader, total=batch_num): batch = batch[0] s, s_mask, c, c_mask = get_batch(batch, contexts, start_token) s_encode = model(x=s, x_mask=s_mask) c_encode = model(x=c, x_mask=c_mask) logit = torch.matmul(s_encode, c_encode.transpose(-2, -1)) target = torch.from_numpy( np.array([i for i in range(batch.size(0)) ])).long().to(config.device) loss_val = loss(logit, target).mean() avg_loss += loss_val.item() loss_val.backward() optimizer.step() optimizer.zero_grad() print("{} epoch, train loss : {}".format( e + 1, round(avg_loss / batch_num, 2))) semantic_sim = get_semantic_sim(model) accuracy = check_answer(semantic_sim, context_text, q_answer) if accuracy > max_accuracy: max_accuracy = accuracy save(model, e + 1, accuracy) # evaluate model with best performance weight model = load(model, config.device) semantic_sim = get_semantic_sim(model) check_answer(semantic_sim, context_text, q_answer) # evalute ensemble check_answer( semantic_sim * (1 - config.sim_ratio) + tfidf_sim * config.sim_ratio, context_text, q_answer)
import numpy as np INPUT_DIM = len(data.SRC.vocab) OUTPUT_DIM = len(data.TRG.vocab) TRG_PAD_IDX = data.TRG.vocab.stoi[data.TRG.pad_token] SRC_PAD_IDX = data.SRC.vocab.stoi[data.SRC.pad_token] train_iterator = data.get_train_iterator() test_iterator = data.get_test_iterator() val_iterator = data.get_valid_iterator() encoder = model.Encoder(vocab_size=INPUT_DIM, d_model=config.HID_DIM, ff_dim=config.PF_DIM, n_heads=config.N_HEADS, max_len=config.MAX_LEN, dropout=config.DROPOUT, n_layers=config.N_LAYERS, n_experts=config.N_EXP, capacity_factor=config.CAPACITY_FACTOR, device=config.DEVICE).to(config.DEVICE) decoder = model.Decoder(output_dim=OUTPUT_DIM, d_model=config.HID_DIM, ff_dim=config.PF_DIM, n_heads=config.N_HEADS, max_len=config.MAX_LEN, dropout=config.DROPOUT, n_layers=config.N_LAYERS, n_experts=config.N_EXP, capacity_factor=config.CAPACITY_FACTOR, device=config.DEVICE).to(config.DEVICE)