def load_model(file, cuda): with open(file) as f: config = json.load(f) model_type = config['model_type'] if model_type == 'wgan-gp': model = build_standard_wgan_gp(config, cuda) elif model_type == 'ae': model = build_standard_ae(config, cuda) elif model_type == 'vae': model = build_standard_vae(config, cuda) elif model_type == 'labeled_gan': model = build_labeled_gan(config, cuda) elif model_type == 'encoder': model = Encoder(config) elif model_type == 'kl-encoder': model = KLEncoder(config) elif model_type == 'decoder': model = Decoder(config) elif model_type == 'dual-encoder': model = DualEncoder(config) elif model_type == 'img2img-decoder': model = ImgToImgDecoder(config) elif model_type == 'img2img-gan': model = build_img2img_gan(config, cuda) else: assert False, f"Unknown model type '{model_type}'!" if cuda: model.cuda() return model
numeric_attr.max() - numeric_attr.min()) ## merge cat and num atts # merge categorical and numeric subsets ori_subset_transformed = pd.concat( [ori_dataset_categ_transformed, ori_dataset_numeric_attr], axis=1) ## ADVERSARIAL NEURAL NETWORK IMPLEMENTATION # Encoder/Generator network instantiation # init training network classes / architectures encoder_train = Encoder(input_size=ori_subset_transformed.shape[1], hidden_size=[256, 64, 16, 4, 2]) # push to cuda if cudnn is available if (torch.backends.cudnn.version() != None and USE_CUDA == True): encoder_train = encoder_train.cuda() # print the initialized architectures now = datetime.utcnow().strftime("%Y%m%d-%H:%M:%S") print('[LOG {}] encoder-generator architecture:\n\n{}\n'.format( now, encoder_train)) # Decoder network instantiation # init training network classes / architectures decoder_train = Decoder(output_size=ori_subset_transformed.shape[1], hidden_size=[2, 4, 16, 64, 256]) # push to cuda if cudnn is available if (torch.backends.cudnn.version() != None) and (USE_CUDA == True): decoder_train = decoder_train.cuda() # print the initialized architectures now = datetime.utcnow().strftime("%Y%m%d-%H:%M:%S") print('[LOG {}] decoder architecture:\n\n{}\n'.format(now, decoder_train))
data = dataloader.gen_data() vocab_size = vocab.index hidden_dim = 512 learning_rate = 1e-3 embedding_dim = 512 # Initializing Encoder and Decoder Network passing appropriate arguments encoder = Encoder() decoder = DecoderRNN(embedding_dim=embedding_dim, hidden_dim=hidden_dim, vocab_size=vocab_size) # Converting tensors into cuda based tensors if available if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Concatenating the parameters of Encoder and Decoder Network into one params = list(encoder.linear.parameters()) + list(decoder.parameters()) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(params=params, lr=learning_rate) num_epoch = 1000 save_every = 10 print('-' * 100) print('Starting training network') print('-' * 100)
encoder_bytes = urllib.request.urlopen(encoder_model_name) decoder_bytes = urllib.request.urlopen(decoder_model_name) # Load tensor from io.BytesIO object encoder_buffer = io.BytesIO(encoder_bytes.read()) decoder_buffer = io.BytesIO(decoder_bytes.read()) # init training network classes / architectures encoder_eval = Encoder(input_size=ori_subset_transformed.shape[1], hidden_size=[256, 64, 16, 4, 2]) decoder_eval = Decoder(output_size=ori_subset_transformed.shape[1], hidden_size=[2, 4, 16, 64, 256]) # push to cuda if cudnn is available if (torch.backends.cudnn.version() != None) and (USE_CUDA == True): encoder_eval = encoder_eval.cuda() decoder_eval = decoder_eval.cuda() # load trained models # since the model was trained on a gpu and will be restored in a cpu we need to provide: map_location = 'cpu' encoder_eval.load_state_dict(torch.load(encoder_buffer, map_location='cpu')) decoder_eval.load_state_dict(torch.load(decoder_buffer, map_location='cpu')) ## specify a dataloader that provides the ability to evaluate the journal entrie in an "unshuffled" batch-wise manner: # convert pre-processed data to pytorch tensor torch_dataset = torch.from_numpy(ori_subset_transformed.values).float() # convert to pytorch tensor - none cuda enabled dataloader_eval = DataLoader(torch_dataset, batch_size=mini_batch_size, shuffle=False,
class Mem2SeqRunner(ExperimentRunnerBase): def __init__(self, args): super(Mem2SeqRunner, self).__init__(args) # Model parameters self.gru_size = 128 self.emb_size = 128 #TODO: Try hops 4 with task 3 self.hops = 3 self.dropout = 0.2 self.encoder = Encoder(self.hops, self.nwords, self.gru_size) self.decoder = Decoder(self.emb_size, self.hops, self.gru_size, self.nwords) self.optim_enc = torch.optim.Adam(self.encoder.parameters(), lr=0.001) self.optim_dec = torch.optim.Adam(self.decoder.parameters(), lr=0.001) if self.loss_weighting: self.optim_loss_weights = torch.optim.Adam([self.loss_weights], lr=0.0001) self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optim_dec, mode='max', factor=0.5, patience=1, min_lr=0.0001, verbose=True) if self.use_cuda: self.cross_entropy = self.cross_entropy.cuda() self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() if self.loss_weighting: self.loss_weights = self.loss_weights.cuda() def train_batch_wrapper(self, batch, new_epoch, clip_grads): context = batch[0].transpose(0, 1) responses = batch[1].transpose(0, 1) index = batch[2].transpose(0, 1) sentinel = batch[3].transpose(0, 1) context_lengths = batch[4] target_lengths = batch[5] return self.train_batch(context, responses, index, sentinel, new_epoch, context_lengths, target_lengths, clip_grads) def train_batch(self, context, responses, index, sentinel, new_epoch, context_lengths, target_lengths, clip_grads): # (TODO): remove transpose if new_epoch: # (TODO): Change this part self.loss = 0 self.ploss = 0 self.vloss = 0 self.n = 1 context = context.type(self.TYPE) responses = responses.type(self.TYPE) index = index.type(self.TYPE) sentinel = sentinel.type(self.TYPE) self.optim_enc.zero_grad() self.optim_dec.zero_grad() if self.loss_weighting: self.optim_loss_weights.zero_grad() h = self.encoder(context.transpose(0, 1)) self.decoder.load_memory(context.transpose(0, 1)) y = torch.from_numpy(np.array([2] * context.size(1), dtype=int)).type(self.TYPE) y_len = 0 h = h.unsqueeze(0) output_vocab = torch.zeros(max(target_lengths), context.size(1), self.nwords) output_ptr = torch.zeros(max(target_lengths), context.size(1), context.size(0)) if self.use_cuda: output_vocab = output_vocab.cuda() output_ptr = output_ptr.cuda() while y_len < responses.size(0): # TODO: Add EOS condition p_ptr, p_vocab, h = self.decoder(context, y, h) output_vocab[y_len] = p_vocab output_ptr[y_len] = p_ptr #TODO: Add teqacher forcing ratio y = responses[y_len].type(self.TYPE) y_len += 1 # print(loss) mask_v = torch.ones(output_vocab.size()) mask_p = torch.ones(output_ptr.size()) if self.use_cuda: mask_p = mask_p.cuda() mask_v = mask_v.cuda() for i in range(responses.size(1)): mask_v[target_lengths[i]:, i, :] = 0 mask_p[target_lengths[i]:, i, :] = 0 loss_v = self.cross_entropy( output_vocab.contiguous().view(-1, self.nwords), responses.contiguous().view(-1)) loss_ptr = self.cross_entropy( output_ptr.contiguous().view(-1, context.size(0)), index.contiguous().view(-1)) if self.loss_weighting: loss = loss_ptr/(2*self.loss_weights[0]*self.loss_weights[0]) + loss_v/(2*self.loss_weights[1]*self.loss_weights[1]) + \ torch.log(self.loss_weights[0] * self.loss_weights[1]) loss_ptr = loss_ptr / (2 * self.loss_weights[0] * self.loss_weights[0]) loss_v = loss_v / (2 * self.loss_weights[1] * self.loss_weights[1]) else: loss = loss_ptr + loss_v loss.backward() ec = torch.nn.utils.clip_grad_norm_(self.encoder.parameters(), 10.0) dc = torch.nn.utils.clip_grad_norm_(self.decoder.parameters(), 10.0) self.optim_enc.step() self.optim_dec.step() if self.loss_weighting: self.optim_loss_weights.step() self.loss += loss.item() self.vloss += loss_v.item() self.ploss += loss_ptr.item() return loss.item(), loss_v.item(), loss_ptr.item() def evaluate_batch(self, batch_size, input_batches, input_lengths, target_batches, target_lengths, target_index, target_gate, src_plain, profile_memory=None): # Set to not-training mode to disable dropout self.encoder.train(False) self.decoder.train(False) # Run words through encoder decoder_hidden = self.encoder(input_batches.transpose(0, 1)).unsqueeze(0) self.decoder.load_memory(input_batches.transpose(0, 1)) # Prepare input and output variables decoder_input = Variable(torch.LongTensor([2] * batch_size)) decoded_words = [] all_decoder_outputs_vocab = Variable( torch.zeros(max(target_lengths), batch_size, self.nwords)) all_decoder_outputs_ptr = Variable( torch.zeros(max(target_lengths), batch_size, input_batches.size(0))) # all_decoder_outputs_gate = Variable(torch.zeros(self.max_r, batch_size)) # Move new Variables to CUDA if self.use_cuda: all_decoder_outputs_vocab = all_decoder_outputs_vocab.cuda() all_decoder_outputs_ptr = all_decoder_outputs_ptr.cuda() # all_decoder_outputs_gate = all_decoder_outputs_gate.cuda() decoder_input = decoder_input.cuda() p = [] for elm in src_plain: elm_temp = [word_triple[0] for word_triple in elm] p.append(elm_temp) self.from_whichs = [] acc_gate, acc_ptr, acc_vac = 0.0, 0.0, 0.0 # Run through decoder one time step at a time for t in range(max(target_lengths)): decoder_ptr, decoder_vacab, decoder_hidden = self.decoder( input_batches, decoder_input, decoder_hidden) all_decoder_outputs_vocab[t] = decoder_vacab topv, topvi = decoder_vacab.data.topk(1) all_decoder_outputs_ptr[t] = decoder_ptr topp, toppi = decoder_ptr.data.topk(1) top_ptr_i = torch.gather(input_batches[:, :, 0], 0, Variable(toppi.view(1, -1))).transpose(0, 1) next_in = [ top_ptr_i[i].item() if (toppi[i].item() < input_lengths[i] - 1) else topvi[i].item() for i in range(batch_size) ] # if next_in in self.kb_entry.keys(): # ptr_distr.append([next_in, decoder_vacab.data]) decoder_input = Variable( torch.LongTensor(next_in)) # Chosen word is next input if self.use_cuda: decoder_input = decoder_input.cuda() temp = [] from_which = [] for i in range(batch_size): if (toppi[i].item() < len(p[i]) - 1): temp.append(p[i][toppi[i].item()]) from_which.append('p') else: if target_index[t][i] != toppi[i].item(): self.incorrect_sentinel += 1 ind = topvi[i].item() if ind == 3: temp.append('<eos>') else: temp.append(self.i2w[ind]) from_which.append('v') decoded_words.append(temp) self.from_whichs.append(from_which) self.from_whichs = np.array(self.from_whichs) loss_v = self.cross_entropy( all_decoder_outputs_vocab.contiguous().view(-1, self.nwords), target_batches.contiguous().view(-1)) loss_ptr = self.cross_entropy( all_decoder_outputs_ptr.contiguous().view(-1, input_batches.size(0)), target_index.contiguous().view(-1)) if self.loss_weighting: loss = loss_ptr/(2*self.loss_weights[0]*self.loss_weights[0]) + loss_v/(2*self.loss_weights[1]*self.loss_weights[1]) + \ torch.log(self.loss_weights[0] * self.loss_weights[1]) else: loss = loss_ptr + loss_v self.loss += loss.item() self.vloss += loss_v.item() self.ploss += loss_ptr.item() self.n += 1 # Set back to training mode self.encoder.train(True) self.decoder.train(True) return decoded_words, self.from_whichs # , acc_ptr, acc_vac def save_models(self, path): torch.save(self.encoder.state_dict(), os.path.join(path, 'encoder.pth')) torch.save(self.decoder.state_dict(), os.path.join(path, 'decoder.pth')) def load_models(self, path: str = '.'): self.encoder.load_state_dict( torch.load(os.path.join(path, 'encoder.pth'))) self.decoder.load_state_dict( torch.load(os.path.join(path, 'decoder.pth')))
''' seqs to id ''' #train text_id_list = seq2id(text_alpha, text_sent_list) label_id_list = seq2id(label_alpha, label_sent_list) #test # text_test_id_list = seq2id(text_alpha, text_sent_list) # label_test_id_list = seq2id(label_alpha, label_sent_list) encoder = Encoder(text_alpha.m_size, config) decoder = AttnDecoderRNN(label_alpha.m_size, config) if config.use_cuda: encoder = encoder.cuda() decoder = decoder.cuda() # print(encoder) # print(decoder) lr = config.lr encoder_optimizer = optim.Adam(encoder.parameters(), lr=lr) decoder_optimizer = optim.Adam(decoder.parameters(), lr=lr) criterion = nn.NLLLoss() n_epochs = config.Steps plot_every = 200 print_every = 1 start = time.time() plot_losses = []