def make_images( dev_dl: tf.data.Dataset, model: nn.Module, args: argparse.Namespace, ) -> nn.Module: device = model_utils.get_device() print(' Running forward inference...') torch.set_grad_enabled(False) with tqdm(total=args.batch_size * len(dev_dl)) as progress_bar: for i, (x_batch_orig, y_batch) in enumerate(dev_dl.as_numpy_iterator()): x_batch, y_batch = model_utils.preprocess_test_example( x_batch_orig, y_batch) y_batch = y_batch.to(device) x_batch = x_batch.to(device) # Forward pass on model y_pred = model(x_batch).detach() model_utils.make_3_col_diagram( x_batch.cpu().numpy(), y_batch.cpu().numpy(), y_pred.cpu().numpy(), f'{args.save_dir}/{args.name}/{args.name}_{i}.png') progress_bar.update(len(x_batch)) del x_batch del y_pred return model
def __init__(self, config, model_dir, device=None): self.config = config self.model_dir = model_dir self.log_file = os.path.join(model_dir, 'log.csv') self.device = get_device(device) self.slu_cls = getattr(modules, config['model']['name']) self.slu = self.slu_cls(config['model']) self.use_elmo = config.get("use_elmo", False) if self.use_elmo: option_file = config["elmo"]["option_file"] weight_file = config["elmo"]["weight_file"] self.elmo = Elmo(option_file, weight_file, 1, dropout=0) self.slu.elmo_scalar_mixes = nn.ModuleList(self.elmo._scalar_mixes) if len(config["elmo"].get("checkpoint", "")) > 0: self.elmo._elmo_lstm = torch.load( config["elmo"]["checkpoint"]).elmo for param in self.elmo._elmo_lstm.parameters(): param.requires_grad_(False) self.elmo.to(self.device) self.slu.to(self.device)
def main(): parser = argparse.ArgumentParser() add_test_args(parser) add_common_args(parser) args = parser.parse_args() device = model_utils.get_device() # Load dataset from disk x_dev, y_dev, ground_truths, container = model_utils.load_test_data( args.dataset_dir, dev_frac=args.dev_frac, max_entries=args.dataset_cap) dev_dl = data.DataLoader( data.TensorDataset(x_dev, y_dev, ground_truths), batch_size=args.batch_size, shuffle=False, ) # Initialize a model model = models.get_model(args.model)() # load from checkpoint if path specified assert args.load_path is not None model = model_utils.load_model(model, args.load_path) model.eval() # Move model to GPU if necessary model.to(device) # test! test_model( dev_dl, model, args, container, )
def __init__(self, config, model_dir, device=None): self.config = config self.model_dir = model_dir self.log_file = os.path.join(model_dir, 'log.csv') self.lm_scale = config.get("lm_scale", 1.0) self.ca_scale = config.get("ca_scale", 0.0) self.n_negative_sample = config.get("n_negative_sample", 0) self.device = get_device(device) self.vocab_size = config["vocab_size"] option_file = config["elmo"]["option_file"] weight_file = config["elmo"]["weight_file"] random_init = config["elmo"].get("random_init", False) if config["elmo"].get("lattice", False): combine_method = config["elmo"].get("combine_method", "weighted-sum") self.lm = LatticeELMoLM(option_file, weight_file, self.vocab_size, combine_method=combine_method, random_init=random_init) else: self.lm = ELMoLM(option_file, weight_file, self.vocab_size, random_init=random_init) self.lm.to(self.device)
def main(): parser = argparse.ArgumentParser() add_test_args(parser) add_common_args(parser) args = parser.parse_args() device = model_utils.get_device() assert args.name is not None os.makedirs(f'{args.save_dir}/{args.name}') # Load dataset from disk dev_dl = model_utils.load_test_data(args) dev_dl = dev_dl.take(args.num_images) # Initialize a model model = models.get_model(args.model)(args.size) # load from checkpoint if path specified assert args.load_path is not None model = model_utils.load_model(model, args.load_path) model.eval() # Move model to GPU if necessary model.to(device) # test! make_images( dev_dl, model, args, )
def batch_to_torch(batch): new_batch = [] for b in batch: if isinstance(b, dict): new_element = dict() for key in b.keys(): new_element[key] = torch.LongTensor(b[key]).to( get_device()) if b[key] is not None else None elif isinstance(b, list): new_element = [ torch.LongTensor(b_e).to(get_device()) for b_e in b ] elif isinstance(b, tuple): new_element = tuple( [torch.LongTensor(b_e).to(get_device()) for b_e in b]) elif b is None: new_element = None else: new_element = torch.LongTensor(b).to(get_device()) new_batch.append(new_element) return tuple(new_batch)
def __init__(self, config, model_dir, device=None): self.config = config self.model_dir = model_dir self.log_file = os.path.join(model_dir, 'log.csv') self.lm_scale = config.get("lm_scale", 1.0) self.ca_scale = config.get("ca_scale", 0.0) self.n_negative_sample = config.get("n_negative_sample", 0) self.device = get_device(device) self.vocab_size = config["vocab_size"] option_file = config["elmo"]["option_file"] weight_file = config["elmo"]["weight_file"] self.lm = ELMoLM(option_file, weight_file, self.vocab_size) self.lm.to(self.device)
def main(): parser = argparse.ArgumentParser() add_train_args(parser) add_common_args(parser) args = parser.parse_args() add_experiment(args) device = model_utils.get_device() # Load dataset from disk train_ds, dev_ds = model_utils.load_training_data(args) # Initialize a model model = models.get_model(args.model)(size=args.size) # load from checkpoint if path specified if args.load_path is not None: model = model_utils.load_model(model, args.load_path) # Move model to GPU if necessary model.to(device) # Initialize optimizer optimizer = optim.Adam( model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay, ) # Scheduler scheduler = optim.lr_scheduler.StepLR(optimizer, 5, 0.1, verbose=True) os.makedirs(f'{args.save_path}/{args.experiment}') print(f'Created new experiment: {args.experiment}') save_arguments(args, f'{args.save_path}/{args.experiment}/args.txt') # Train! trained_model = train_model( train_ds, dev_ds, model, optimizer, scheduler, args, ) # Save trained model filename = f'{args.save_path}/{args.experiment}/{model.__class__.__name__}_trained.checkpoint' model_utils.save_model(trained_model, filename)
def __init__(self, model_params, optimizer_params, batch_size, checkpoint_path, debug=False): ## Load vocabulary _, self.word2id, wordvec_tensor = load_word2vec_from_file() self.batch_size = batch_size ## Load model self.model = self._create_model(model_params, wordvec_tensor).to(get_device()) ## Load task self.task = self._create_task(model_params, debug=debug) ## Load optimizer and checkpoints self._create_optimizer(optimizer_params) self._prepare_checkpoint(checkpoint_path)
def __init__(self, dim_input, hidden_list, resample_every=20, num_masks=1, model_dir="./model", is_load=True, device=None, dir_name='test'): # Initialize attributes model_dir = os.path.join(model_dir, dir_name) if not os.path.isdir(model_dir): os.makedirs(model_dir) self.model_dir = model_dir self.dim_input = dim_input self.hidden_list = hidden_list self.resample_every = resample_every self.num_masks = num_masks self.dir_name = dir_name self.device = get_device(device) self.made = MADENet(dim_input, hidden_list, dim_input, num_masks, natural_ordering=False) self.made.to(self.device) self.optimizer = torch.optim.Adam(self.made.parameters(), 3e-4, weight_decay=1e-4) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.1) if is_load: self.load_model(self.model_dir)
def load_our_model(checkpoint_path): global OUR_MODEL if OUR_MODEL is None: args = load_args(checkpoint_path) print("-> Loading model...") model_params, _ = unsupervised_args_to_params(args) _, _, wordvec_tensor = load_word2vec_from_file() model = ModelUnsupervisedContextParaphrasingTemplate( model_params, wordvec_tensor) print(checkpoint_path) _ = load_model(checkpoint_path, model=model, load_best_model=True) model = model.to(get_device()) model.eval() OUR_MODEL = model return OUR_MODEL
def main(): parser = argparse.ArgumentParser() add_test_args(parser) add_common_args(parser) args = parser.parse_args() device = model_utils.get_device() assert args.load_path is not None or args.load_dir is not None if args.load_dir is not None: prev_args: Dict = load_arguments(f'{args.load_dir}/args.txt') args.model = prev_args['model'] args.size = prev_args['size'] args.name = prev_args['experiment'] assert args.name is not None os.makedirs(f'{args.save_dir}/{args.name}') # Load dataset from disk dev_dl = model_utils.load_test_data(args) # Initialize a model model = models.get_model(args.model)(size=args.size) # load from checkpoint if path specified if args.load_path is not None: model = model_utils.load_model(model, args.load_path) else: model = model_utils.load_model( model, f'{args.load_dir}/{args.model}_best_val.checkpoint') model.eval() # Move model to GPU if necessary model.to(device) # test! test_model( dev_dl, model, args, )
def train_model( train_ds: tf.data.Dataset, dev_ds: tf.data.Dataset, model: nn.Module, optimizer: optim.Optimizer, lr_scheduler: optim.lr_scheduler._LRScheduler, args: argparse.Namespace, ) -> nn.Module: device = model_utils.get_device() loss_fn = model_utils.depth_proportional_loss val_loss_fn = model_utils.l1_norm_loss best_val_loss = torch.tensor(float('inf')) saved_checkpoints = [] writer = SummaryWriter(log_dir=f'{args.log_dir}/{args.experiment}') cos = nn.CosineSimilarity(dim=1, eps=0) get_gradient: nn.Module = sobel.Sobel().to(device) for e in range(1, args.train_epochs + 1): print(f'Training epoch {e}...') if args.use_scheduler: lr_scheduler.step() # Training portion torch.cuda.empty_cache() torch.set_grad_enabled(True) with tqdm(total=args.train_batch_size * len(train_ds)) as progress_bar: model.train() for i, (x_batch_orig, y_batch) in enumerate(train_ds.as_numpy_iterator()): x_batch, y_batch = model_utils.preprocess_training_example( x_batch_orig, y_batch) y_blurred = model_utils.blur_depth_map(y_batch) ones = torch.ones(y_batch.shape, dtype=torch.float32, device=device) # Forward pass on model optimizer.zero_grad() y_pred = model(x_batch) depth_grad = get_gradient(y_blurred) output_grad = get_gradient(y_pred) depth_grad_dx = depth_grad[:, 0, :, :].contiguous().view_as( y_blurred) depth_grad_dy = depth_grad[:, 1, :, :].contiguous().view_as( y_batch) output_grad_dx = output_grad[:, 0, :, :].contiguous().view_as( y_blurred) output_grad_dy = output_grad[:, 1, :, :].contiguous().view_as( y_batch) depth_normal = torch.cat( (-depth_grad_dx, -depth_grad_dy, ones), 1) output_normal = torch.cat( (-output_grad_dx, -output_grad_dy, ones), 1) loss_depth = torch.log(torch.abs(y_pred - y_batch) + 0.5).mean() loss_dx = torch.log( torch.abs(output_grad_dx - depth_grad_dx) + 0.5).mean() loss_dy = torch.log( torch.abs(output_grad_dy - depth_grad_dy) + 0.5).mean() loss_normal = torch.abs( 1 - cos(output_normal, depth_normal)).mean() loss = loss_depth + loss_normal + (loss_dx + loss_dy) # Backward pass and optimization loss.backward() optimizer.step() progress_bar.update(len(x_batch)) progress_bar.set_postfix(loss=loss.item()) writer.add_scalar("train/Loss", loss, ((e - 1) * len(train_ds) + i) * args.train_batch_size) # Periodically save a diagram if (i + 1) % args.picture_frequency == 0: model_utils.make_diagram( np.transpose(x_batch_orig, (0, 3, 1, 2)), x_batch.cpu().numpy(), y_batch.cpu().numpy(), y_pred.cpu().detach().numpy(), f'{args.save_path}/{args.experiment}/diagram_{e}_{i+1}.png', ) del x_batch del y_batch del y_blurred del y_pred del loss # Validation portion torch.cuda.empty_cache() torch.set_grad_enabled(False) with tqdm(total=args.dev_batch_size * len(dev_ds)) as progress_bar: model.eval() val_loss = 0.0 num_batches_processed = 0 total_pixels = 0 total_examples = 0 squared_error = 0 rel_error = 0 log_error = 0 threshold1 = 0 # 1.25 threshold2 = 0 # 1.25^2 threshold3 = 0 # corresponds to 1.25^3 for i, (x_batch, y_batch) in enumerate(dev_ds.as_numpy_iterator()): x_batch, y_batch = model_utils.preprocess_test_example( x_batch, y_batch) # Forward pass on model in validation environment y_pred = model(x_batch) # TODO: Process y_pred in whatever way inference requires. loss = val_loss_fn(y_pred, y_batch) val_loss += loss.item() num_batches_processed += 1 nanmask = getNanMask(y_batch) total_pixels = torch.sum(~nanmask) total_examples += x_batch.shape[0] # RMS, REL, LOG10, threshold calculation squared_error += ( torch.sum(torch.pow(y_pred - y_batch, 2)).item() / total_pixels)**0.5 rel_error += torch.sum( removeNans(torch.abs(y_pred - y_batch) / y_batch)).item() / total_pixels log_error += torch.sum( torch.abs( removeNans(torch.log10(y_pred)) - removeNans( torch.log10(y_batch)))).item() / total_pixels threshold1 += torch.sum( torch.max(y_pred / y_batch, y_batch / y_pred) < 1.25).item() / total_pixels threshold2 += torch.sum( torch.max(y_pred / y_batch, y_batch / y_pred) < 1.25**2).item() / total_pixels threshold3 += torch.sum( torch.max(y_pred / y_batch, y_batch / y_pred) < 1.25**3).item() / total_pixels progress_bar.update(len(x_batch)) progress_bar.set_postfix(val_loss=val_loss / num_batches_processed) writer.add_scalar("Val/Loss", loss, ((e - 1) * len(dev_ds) + i) * args.dev_batch_size) del x_batch del y_batch del y_pred del loss writer.add_scalar("Val/RMS", squared_error / total_examples, e) writer.add_scalar("Val/REL", rel_error / total_examples, e) writer.add_scalar("Val/LOG10", log_error / total_examples, e) writer.add_scalar("Val/delta1", threshold1 / total_examples, e) writer.add_scalar("Val/delta2", threshold2 / total_examples, e) writer.add_scalar("Val/delta3", threshold3 / total_examples, e) # Save model if it's the best one yet. if val_loss / num_batches_processed < best_val_loss: best_val_loss = val_loss / num_batches_processed filename = f'{args.save_path}/{args.experiment}/{model.__class__.__name__}_best_val.checkpoint' model_utils.save_model(model, filename) print(f'Model saved!') print(f'Best validation loss yet: {best_val_loss}') # Save model on checkpoints. if e % args.checkpoint_freq == 0: filename = f'{args.save_path}/{args.experiment}/{model.__class__.__name__}_epoch_{e}.checkpoint' model_utils.save_model(model, filename) print(f'Model checkpoint reached!') saved_checkpoints.append(filename) # Delete checkpoints if there are too many while len(saved_checkpoints) > args.num_checkpoints: os.remove(saved_checkpoints.pop(0)) return model
def __init__( self, batch_size, optimizer, learning_rate, train_data_engine, test_data_engine, dim_hidden, dim_embedding, vocab_size=None, n_layers=1, model_dir="./model", log_dir="./log", is_load=True, replace_model=True, device=None, dir_name='test' ): # Initialize attributes self.data_engine = train_data_engine self.n_layers = n_layers self.log_dir = log_dir self.model_dir = model_dir self.dim_hidden = dim_hidden self.dim_embedding = dim_embedding self.vocab_size = vocab_size self.dir_name = dir_name self.device = get_device(device) self.lm = LMRNN( dim_embedding=dim_embedding, dim_hidden=dim_hidden, attr_vocab_size=None, vocab_size=vocab_size, n_layers=n_layers, bidirectional=False ) self.lm.to(self.device) self.parameters = filter( lambda p: p.requires_grad, self.lm.parameters()) self.optimizer = build_optimizer( optimizer, self.parameters, learning_rate) self.model_dir, self.log_dir = handle_model_dirs( model_dir, log_dir, dir_name, replace_model, is_load ) if is_load: self.load_model(self.model_dir) self.train_data_engine = train_data_engine self.test_data_engine = test_data_engine self.train_data_loader = DataLoader( train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=collate_fn_nl, pin_memory=True) self.test_data_loader = DataLoader( test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True, collate_fn=collate_fn_nl, pin_memory=True)
seq_len = MAX_NUM_OF_WORDS train_sentences = pad_input(train_sentences, seq_len) test_sentences = pad_input(test_sentences, seq_len) val_sentences= pad_input(val_sentences, seq_len) assert train_sentences.shape[1] == MAX_NUM_OF_WORDS assert test_sentences.shape[1] == MAX_NUM_OF_WORDS assert val_sentences.shape[1] == MAX_NUM_OF_WORDS train_loader = to_data_loader(train_sentences, list(train_data.intervened), BATCH_SIZE) test_loader = to_data_loader(test_sentences, list(test_data.intervened), BATCH_SIZE) val_loader = to_data_loader(val_sentences, list(val_data.intervened), BATCH_SIZE) device = get_device() # save the weights_matrix so can be loaded for testing purpose if not os.path.exists('baseline_weights_matrix.txt'): np.savetxt('baseline_weights_matrix.txt', weights_matrix, fmt='%d') model = BaselineModel(torch.from_numpy(weights_matrix).type('torch.FloatTensor')) model.to(device) ############### learning_rate = 0.005 criterion = WeightedBCELoss(zero_weight=intervened_ratio, one_weight=1-intervened_ratio) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) clip = 5 ###############
def generate_responses(style_vecs, input_templates, checkpoint_path): if len(style_vecs.shape) == 3: style_vecs = style_vecs[:, 0, :] model = load_our_model(checkpoint_path) print("-> Loading dataset...") dataset = create_dataset(input_templates) # Prepare metrics batch_size = 64 number_batches = int(math.ceil(len(dataset.data_list) * 1.0 / batch_size)) hypotheses, references = None, None # Evaluation loop for batch_ind in range(number_batches): # print("Evaluation process: %4.2f%% (batch %i of %i)" % (100.0 * batch_ind / number_batches, batch_ind+1, number_batches), end="\r") batch = dataset._data_to_batch([ d.get_view(0) for d in dataset.data_list[batch_ind * batch_size:min(len(dataset.data_list), (batch_ind + 1) * batch_size)] ], toTorch=True) par_1_words, par_1_lengths, par_2_words, _, par_1_slots, par_1_slot_lengths, _, _, _, _, _, _ = batch batch_style_vecs = torch.from_numpy( style_vecs[batch_ind * batch_size:min(len(dataset.data_list), (batch_ind + 1) * batch_size), :]).to(get_device()) # Evaluate single batch with torch.no_grad(): # TODO: # 3) Run model on batch resp_results = model.generate_new_style( (par_1_words, par_1_lengths, par_1_slots, par_1_slot_lengths), style_vecs=batch_style_vecs) _, _, generated_words, generated_lengths = resp_results batch_labels = par_2_words if (batch_labels[:, 0] == get_SOS_index()).byte().all(): batch_labels = batch_labels[:, 1:] unknown_label = (batch_labels == get_UNK_index()).long() batch_labels = batch_labels * (1 - unknown_label) + (-1) * unknown_label batch_hyp, batch_ref = TaskTemplate._preds_to_sents( batch_labels, generated_words, generated_lengths) hypotheses, references = add_if_not_none((batch_hyp, batch_ref), (hypotheses, references)) # BLEU_score, _ = get_BLEU_score(hypotheses, references) # print("BLEU at batch %i: %4.2f%%" % (batch_ind, BLEU_score*100.0)) BLEU_score, prec_per_ngram = get_BLEU_score(hypotheses, references) print("=" * 50) print("Achieved BLEU score of: %4.2f%%" % (BLEU_score * 100.0)) print(prec_per_ngram) print("=" * 50) return hypotheses, references, BLEU_score
def __init__(self, batch_size, optimizer, learning_rate, train_data_engine, test_data_engine, dim_hidden, dim_embedding, vocab_size=None, attr_vocab_size=None, n_layers=1, bidirectional=False, model_dir="./model", log_dir="./log", is_load=True, replace_model=True, device=None, dir_name='test', with_intent=True): # Initialize attributes self.data_engine = train_data_engine self.n_layers = n_layers self.log_dir = log_dir self.model_dir = model_dir self.dim_hidden = dim_hidden self.dim_embedding = dim_embedding self.vocab_size = vocab_size self.attr_vocab_size = attr_vocab_size self.dir_name = dir_name self.with_intent = with_intent self.device = get_device(device) self.nlu = NLURNN( dim_embedding=dim_embedding, dim_hidden=dim_hidden, vocab_size=train_data_engine.tokenizer.get_vocab_size(), slot_vocab_size=len(train_data_engine.nlu_slot_vocab), intent_vocab_size=len(train_data_engine.intent_vocab), n_layers=n_layers, bidirectional=bidirectional) self.nlg = NLGRNN( dim_embedding=dim_embedding, dim_hidden=dim_hidden, vocab_size=train_data_engine.tokenizer.get_vocab_size(), n_slot_key=len(train_data_engine.nlg_slot_vocab), n_intent=len(train_data_engine.intent_vocab), n_layers=n_layers, bidirectional=False, batch_size=batch_size) self.nlu.to(self.device) self.nlg.to(self.device) # Initialize data loaders and optimizers self.train_data_engine = train_data_engine self.test_data_engine = test_data_engine self.test_result_path = os.path.join(self.log_dir, "test_result.txt") self.nlu_output_file = None """ self.train_data_loader = DataLoader( train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=collate_fn_nlg, pin_memory=True) self.test_data_loader = DataLoader( test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True, collate_fn=collate_fn_nlg, pin_memory=True) """ self.train_nlu_data_loader = DataLoader( train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=train_data_engine.collate_fn_nlu, pin_memory=True) self.train_nlg_data_loader = DataLoader( train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=train_data_engine.collate_fn_nlg, pin_memory=True) self.test_nlu_data_loader = DataLoader( test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=False, collate_fn=test_data_engine.collate_fn_nlu, pin_memory=True) self.test_nlg_data_loader = DataLoader( test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=False, collate_fn=test_data_engine.collate_fn_nlg, pin_memory=True) # nlu parameters optimization self.nlu_parameters = filter(lambda p: p.requires_grad, self.nlu.parameters()) self.nlu_optimizer = build_optimizer(optimizer, self.nlu_parameters, learning_rate) # nlg parameters optimization self.nlg_parameters = filter(lambda p: p.requires_grad, self.nlg.parameters()) self.nlg_optimizer = build_optimizer(optimizer, self.nlg_parameters, learning_rate) print_time_info("Model create complete") self.model_dir, self.log_dir = handle_model_dirs( model_dir, log_dir, dir_name, replace_model, is_load) if is_load: print_time_info("Loading model from directory %s" % self.model_dir) self.load_model(self.model_dir) print_time_info("Model create completed.") self.train_log_path = os.path.join(self.log_dir, "train_log.csv") self.valid_log_path = os.path.join(self.log_dir, "valid_log.csv") with open(self.train_log_path, 'w') as file: file.write( "epoch,nlu_loss,nlg_loss,intent_acc,slot_f1,bleu,rouge(1,2,L)\n" ) with open(self.valid_log_path, 'w') as file: file.write( "epoch,nlu_loss,nlg_loss,intent_acc,slot_f1,bleu,rouge(1,2,L)\n" ) # Initialize batch count self.batches = 0
def main(): parser = argparse.ArgumentParser() add_train_args(parser) add_common_args(parser) args = parser.parse_args() add_experiment(args) device = model_utils.get_device() # Load dataset from disk print('Loading train data...') train_graph, valid_graph, train_edges, eval_edges, valid_edges = model_utils.load_training_data() if args.train_partial_graph: train_edges['edge'] = eval_edges['edge'] train_dl = data.DataLoader( data.TensorDataset(train_edges['edge']), batch_size=args.train_batch_size, shuffle=True, ) dev_dl = data.DataLoader( data.TensorDataset( torch.cat([valid_edges['edge'], valid_edges['edge_neg']], dim=0), torch.cat([torch.ones(valid_edges['edge'].shape[0]), torch.zeros(valid_edges['edge_neg'].shape[0])], dim=0), ), batch_size=args.val_batch_size, shuffle=True, ) # Initialize node embeddings print('Computing initial embeddings') train_graph = model_utils.initialize_embeddings( train_graph, 'train_embeddings.pt', args.refresh_embeddings) valid_graph = model_utils.initialize_embeddings( valid_graph, 'valid_embeddings.pt', args.refresh_embeddings) if not args.train_partial_graph: train_graph = valid_graph # Stats evaluator evaluator = Evaluator(name='ogbl-ddi') # Initialize a model model = models.get_model(args.model)( # train_graph.x.shape, train_graph.adj_t.to(device) num_nodes=train_graph.num_nodes, adj_t=train_graph.adj_t.to(device) ) # load from checkpoint if path specified if args.load_path is not None: model = model_utils.load_model(model, args.load_path) print(f"Parameters: {model_utils.count_parameters(model)}") # Move model to GPU if necessary model.to(device) # Initialize optimizer optimizer = optim.Adam( model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay, ) # Scheduler scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.5, patience=30, verbose=True, ) os.makedirs(f'{args.save_path}/{args.experiment}') print(f'Created new experiment: {args.experiment}') save_arguments(args, f'{args.save_path}/{args.experiment}/args.txt') # Train! trained_model = train_model( train_graph, valid_graph, train_dl, dev_dl, evaluator, model, optimizer, scheduler, args, ) # Save trained model filename = f'{args.save_path}/{args.experiment}/{model.__class__.__name__}_trained.checkpoint' model_utils.save_model(trained_model, filename)
def train_model( train_graph: pyg.torch_geometric.data.Data, valid_graph: pyg.torch_geometric.data.Data, train_dl: data.DataLoader, dev_dl: data.DataLoader, evaluator: Evaluator, model: nn.Module, optimizer: optim.Optimizer, lr_scheduler: optim.lr_scheduler._LRScheduler, args: argparse.Namespace, ) -> nn.Module: device = model_utils.get_device() loss_fn = nn.functional.binary_cross_entropy val_loss_fn = nn.functional.binary_cross_entropy best_val_loss = torch.tensor(float('inf')) best_val_hits = torch.tensor(0.0) saved_checkpoints = [] writer = SummaryWriter(log_dir=f'{args.log_dir}/{args.experiment}') for e in range(1, args.train_epochs + 1): print(f'Training epoch {e}...') # Training portion torch.cuda.empty_cache() torch.set_grad_enabled(True) with tqdm(total=args.train_batch_size * len(train_dl)) as progress_bar: model.train() # Load graph into GPU adj_t = train_graph.adj_t.to(device) edge_index = train_graph.edge_index.to(device) x = train_graph.x.to(device) pos_pred = [] neg_pred = [] for i, (y_pos_edges,) in enumerate(train_dl): y_pos_edges = y_pos_edges.to(device).T y_neg_edges = negative_sampling( edge_index, num_nodes=train_graph.num_nodes, num_neg_samples=y_pos_edges.shape[1] ).to(device) y_batch = torch.cat([torch.ones(y_pos_edges.shape[1]), torch.zeros( y_neg_edges.shape[1])], dim=0).to(device) # Ground truth edge labels (1 or 0) # Forward pass on model optimizer.zero_grad() y_pred = model(adj_t, torch.cat( [y_pos_edges, y_neg_edges], dim=1)) loss = loss_fn(y_pred, y_batch) # Backward pass and optimization loss.backward() optimizer.step() if args.use_scheduler: lr_scheduler.step(loss) batch_acc = torch.mean( 1 - torch.abs(y_batch.detach() - torch.round(y_pred.detach()))).item() pos_pred += [y_pred[y_batch == 1].detach()] neg_pred += [y_pred[y_batch == 0].detach()] progress_bar.update(y_pos_edges.shape[1]) progress_bar.set_postfix(loss=loss.item(), acc=batch_acc) writer.add_scalar( "train/Loss", loss, ((e - 1) * len(train_dl) + i) * args.train_batch_size) writer.add_scalar("train/Accuracy", batch_acc, ((e - 1) * len(train_dl) + i) * args.train_batch_size) del y_pos_edges del y_neg_edges del y_pred del loss del adj_t del edge_index del x # Training set evaluation Hits@K Metrics pos_pred = torch.cat(pos_pred, dim=0) neg_pred = torch.cat(neg_pred, dim=0) results = {} for K in [10, 20, 30]: evaluator.K = K hits = evaluator.eval({ 'y_pred_pos': pos_pred, 'y_pred_neg': neg_pred, })[f'hits@{K}'] results[f'Hits@{K}'] = hits print() print(f'Train Statistics') print('*' * 30) for k, v in results.items(): print(f'{k}: {v}') writer.add_scalar( f"train/{k}", v, (pos_pred.shape[0] + neg_pred.shape[0]) * e) print('*' * 30) del pos_pred del neg_pred # Validation portion torch.cuda.empty_cache() torch.set_grad_enabled(False) with tqdm(total=args.val_batch_size * len(dev_dl)) as progress_bar: model.eval() adj_t = valid_graph.adj_t.to(device) edge_index = valid_graph.edge_index.to(device) x = valid_graph.x.to(device) val_loss = 0.0 accuracy = 0 num_samples_processed = 0 pos_pred = [] neg_pred = [] for i, (edges_batch, y_batch) in enumerate(dev_dl): edges_batch = edges_batch.T.to(device) y_batch = y_batch.to(device) # Forward pass on model in validation environment y_pred = model(adj_t, edges_batch) loss = val_loss_fn(y_pred, y_batch) num_samples_processed += edges_batch.shape[1] batch_acc = torch.mean( 1 - torch.abs(y_batch - torch.round(y_pred))).item() accuracy += batch_acc * edges_batch.shape[1] val_loss += loss.item() * edges_batch.shape[1] pos_pred += [y_pred[y_batch == 1].detach()] neg_pred += [y_pred[y_batch == 0].detach()] progress_bar.update(edges_batch.shape[1]) progress_bar.set_postfix( val_loss=val_loss / num_samples_processed, acc=accuracy/num_samples_processed) writer.add_scalar( "Val/Loss", loss, ((e - 1) * len(dev_dl) + i) * args.val_batch_size) writer.add_scalar( "Val/Accuracy", batch_acc, ((e - 1) * len(dev_dl) + i) * args.val_batch_size) del edges_batch del y_batch del y_pred del loss del adj_t del edge_index del x # Validation evaluation Hits@K Metrics pos_pred = torch.cat(pos_pred, dim=0) neg_pred = torch.cat(neg_pred, dim=0) results = {} for K in [10, 20, 30]: evaluator.K = K hits = evaluator.eval({ 'y_pred_pos': pos_pred, 'y_pred_neg': neg_pred, })[f'hits@{K}'] results[f'Hits@{K}'] = hits print() print(f'Validation Statistics') print('*' * 30) for k, v in results.items(): print(f'{k}: {v}') writer.add_scalar( f"Val/{k}", v, (pos_pred.shape[0] + neg_pred.shape[0]) * e) print('*' * 30) del pos_pred del neg_pred # Save model if it's the best one yet. if results['Hits@20'] > best_val_hits: best_val_hits = results['Hits@20'] filename = f'{args.save_path}/{args.experiment}/{model.__class__.__name__}_best_val.checkpoint' model_utils.save_model(model, filename) print(f'Model saved!') print(f'Best validation Hits@20 yet: {best_val_hits}') # Save model on checkpoints. if e % args.checkpoint_freq == 0: filename = f'{args.save_path}/{args.experiment}/{model.__class__.__name__}_epoch_{e}.checkpoint' model_utils.save_model(model, filename) print(f'Model checkpoint reached!') saved_checkpoints.append(filename) # Delete checkpoints if there are too many while len(saved_checkpoints) > args.num_checkpoints: os.remove(saved_checkpoints.pop(0)) return model
def encode_by_transformer(input_sentences, transformer_model=TRANSFORMER_MODELS[0], export_checkpoint=None, postfix="", overwrite=False): model_class, tokenizer_class, pretrained_weights = transformer_model if export_checkpoint is not None: export_file = os.path.join( export_checkpoint, "transformer_vecs_" + pretrained_weights + postfix + ".npz") if os.path.isfile(export_file) and not overwrite: print("-> Found stored exports at %s, trying to load them..." % (export_file)) return np.load(export_file) # Load pretrained model/tokenizer tokenizer = tokenizer_class.from_pretrained(pretrained_weights) model = model_class.from_pretrained(pretrained_weights).to(get_device()) batch_size = 64 number_batches = int(math.ceil(len(input_sentences) * 1.0 / batch_size)) output_vecs = {"max": list(), "min": list(), "avg": list(), "orig": list()} for batch_index in range(number_batches): # print("Transforming done by %4.2f%%" % (100.0 * batch_index / number_batches), end="\r") batch = input_sentences[batch_index * batch_size:min( (batch_index + 1) * batch_size, len(input_sentences))] batch = [ "[CLS] " + s.replace(" unk ", " [UNK] ") + " [SEP]" for s in batch ] input_ids = [tokenizer.encode(s) for s in batch] max_input_len = max([len(ids) for ids in input_ids]) input_ids = [ ids + [0] * (max_input_len - len(ids)) for ids in input_ids ] attention_mask = [[float(x > 0) for x in ids] for ids in input_ids] input_ids = torch.tensor(input_ids).to(get_device()) attention_mask = torch.tensor(attention_mask).to(get_device()) # Encode text # print(batch) # input_ids = torch.tensor(tokenizer.encode(batch)) with torch.no_grad(): last_hidden_states, pooler_output = model( input_ids, attention_mask=attention_mask) # Models outputs are now tuples output_vecs["orig"].append(pooler_output.cpu().numpy()) output_vecs["max"].append( last_hidden_states.max(dim=1)[0].cpu().numpy()) output_vecs["min"].append( last_hidden_states.min(dim=1)[0].cpu().numpy()) output_vecs["avg"].append( last_hidden_states.mean(dim=1).cpu().numpy()) output_vecs = { key: np.concatenate(val, axis=0) for key, val in output_vecs.items() } if export_checkpoint is not None: print("-> Exporting results to %s..." % export_file) np.savez(export_file, **output_vecs) return output_vecs
def main(): parser = argparse.ArgumentParser() add_train_args(parser) add_common_args(parser) args = parser.parse_args() add_experiment(args) device = model_utils.get_device() # Load dataset from disk x_train, y_train_biden, y_train_trump, mask_train, x_dev, y_dev_biden, y_dev_trump, mask_dev, container = model_utils.load_data( args.dataset_dir, dev_frac=args.dev_frac, max_entries=args.dataset_cap) train_dl = data.DataLoader( data.TensorDataset(x_train, y_train_biden, y_train_trump, mask_train), batch_size=args.train_batch_size, shuffle=True, ) dev_dl = data.DataLoader( data.TensorDataset(x_dev, y_dev_biden, y_dev_trump, mask_dev), batch_size=args.val_batch_size, shuffle=False, ) # Initialize a model model = models.get_model(args.model)() # load from checkpoint if path specified if args.load_path is not None: model = model_utils.load_model(model, args.load_path) # Move model to GPU if necessary model.to(device) # Initialize optimizer optimizer = optim.Adam( model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay, ) # Scheduler scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.5, patience=30, verbose=True, ) os.makedirs(f'{args.save_path}/{args.experiment}') print(f'Created new experiment: {args.experiment}') save_arguments(args, f'{args.save_path}/{args.experiment}/args.txt') # Train! trained_model = train_model( train_dl, dev_dl, model, optimizer, scheduler, args, ) # Save trained model filename = f'{args.save_path}/{args.experiment}/{model.__class__.__name__}_trained.checkpoint' model_utils.save_model(trained_model, filename)
def test_model( dev_dl: data.DataLoader, model: nn.Module, args: argparse.Namespace, stft_container: StftData, ) -> nn.Module: device = model_utils.get_device() loss_fn = model_utils.l1_norm_loss print('\nRunning test metrics...') # Validation portion # Forward inference on model predicted_masks = [] print(' Running forward inference...') with tqdm(total=args.batch_size * len(dev_dl)) as progress_bar: for i, (x_batch, _, _) in enumerate(dev_dl): x_batch = x_batch.abs().to(device) # Forward pass on model # y_pred = model(torch.clamp_min(torch.log(x_batch), 0)) y_pred_b, y_pred_t = model(x_batch) if args.nonboolean_mask: y_biden_mask = torch.clamp(y_pred_b.detach() / x_batch, 0, 1) y_trump_mask = torch.clamp(y_pred_t.detach() / x_batch, 0, 1) else: y_biden_mask = torch.ones_like(y_pred_b) * (torch.clamp( y_pred_b / x_batch, 0, 1) > args.alpha) y_trump_mask = torch.ones_like(y_pred_t) * ( 1 - torch.clamp(y_pred_t / x_batch, 0, 1) > args.alpha) predicted_masks.append((y_biden_mask.cpu(), y_trump_mask.cpu())) progress_bar.update(len(x_batch)) del x_batch del y_biden_mask del y_trump_mask del y_pred_b del y_pred_t print('\n Processing results...') SDR, ISR, SIR, SAR = [], [], [], [] with tqdm(total=args.batch_size * len(dev_dl)) as progress_bar: for i, ((x_batch, _, ground_truth), (y_biden_mask, y_trump_mask)) in enumerate(zip(dev_dl, predicted_masks)): stft_biden_audio = y_biden_mask * x_batch stft_trump_audio = y_trump_mask * x_batch stft_audio = torch.stack([stft_biden_audio, stft_trump_audio], dim=1) # Calculate other stats model_stft = stft_audio.cpu().numpy() stft_container.data = stft_audio.numpy() model_audio = model_utils.invert_batch_like( model_stft, stft_container) m, nsrc, timesamples, chan = ground_truth.shape gt = torch.reshape(ground_truth, (m * nsrc, timesamples, 1)) if args.biden_only_sdr: batch_sdr, batch_isr, batch_sir, batch_sar = bsseval.evaluate( gt[:1, :, :], model_audio[:1, :, :], win=stft_container.fs, hop=stft_container.fs, ) else: batch_sdr, batch_isr, batch_sir, batch_sar = bsseval.evaluate( gt, model_audio, win=stft_container.fs, hop=stft_container.fs, ) SDR = np.concatenate([SDR, np.mean(batch_sdr, axis=1)], axis=0) ISR = np.concatenate([ISR, np.mean(batch_isr, axis=1)], axis=0) SIR = np.concatenate([SIR, np.mean(batch_sir, axis=1)], axis=0) SAR = np.concatenate([SAR, np.mean(batch_sar, axis=1)], axis=0) progress_bar.update(len(x_batch)) print(f'\n Calculating overall metrics...') print() print('*' * 30) print(f'SDR: {np.mean(SDR)}') print(f'ISR: {np.mean(ISR)}') print(f'SIR: {np.mean(SIR)}') print(f'SAR: {np.mean(SAR)}') print('*' * 30) # for i in range(ground_truths.shape[0]): # audio = AudioData(manual_init=[stft_container.fs, ground_truths[i, :, 0]]) # audio2 = AudioData(manual_init=[stft_container.fs, model_outputs[i, :, 0]]) # play(audio) # play(audio2) return model
def train_model( train_dl: data.DataLoader, dev_dl: data.DataLoader, model: nn.Module, optimizer: optim.Optimizer, lr_scheduler: optim.lr_scheduler._LRScheduler, args: argparse.Namespace, ) -> nn.Module: device = model_utils.get_device() # loss_fn = nn.functional.binary_cross_entropy loss_fn = model_utils.l1_norm_loss val_loss_fn = model_utils.l1_norm_loss best_val_loss = torch.tensor(float('inf')) saved_checkpoints = [] writer = SummaryWriter(log_dir=f'{args.log_dir}/{args.experiment}') scalar_rand = torch.distributions.uniform.Uniform(0.5, 1.5) for e in range(1, args.train_epochs + 1): print(f'Training epoch {e}...') # Training portion torch.cuda.empty_cache() with tqdm(total=args.train_batch_size * len(train_dl)) as progress_bar: model.train() for i, (x_batch, y_batch_biden, y_batch_trump, _) in enumerate(train_dl): # trump_scale = scalar_rand.sample() # biden_scale = scalar_rand.sample() # y_batch_biden = y_batch_biden * biden_scale # y_batch_trump = y_batch_trump * trump_scale # x_batch = (y_batch_trump + y_batch_biden).abs().to(device) x_batch = x_batch.abs().to(device) y_batch_biden = y_batch_biden.abs().to(device) y_batch_trump = y_batch_trump.abs().to(device) # Forward pass on model optimizer.zero_grad() y_pred_b, y_pred_t = model(x_batch) if args.train_trump: # loss = loss_fn(y_pred_t * x_batch, y_batch_trump) loss = loss_fn(y_pred_t, y_batch_trump) else: # loss = loss_fn(y_pred_b * x_batch, y_batch_biden) loss = loss_fn(y_pred_b, y_batch_biden) # Backward pass and optimization loss.backward() optimizer.step() if args.use_scheduler: lr_scheduler.step(loss) progress_bar.update(len(x_batch)) progress_bar.set_postfix(loss=loss.item()) writer.add_scalar("train/Loss", loss, ((e - 1) * len(train_dl) + i) * args.train_batch_size) del x_batch del y_batch_biden del y_batch_trump del y_pred_b del y_pred_t del loss # Validation portion torch.cuda.empty_cache() with tqdm(total=args.val_batch_size * len(dev_dl)) as progress_bar: model.eval() val_loss = 0.0 num_batches_processed = 0 for i, (x_batch, y_batch_biden, y_batch_trump, _) in enumerate(dev_dl): x_batch = x_batch.abs().to(device) y_batch_biden = y_batch_biden.abs().to(device) y_batch_trump = y_batch_trump.abs().to(device) # Forward pass on model y_pred_b, y_pred_t = model(x_batch) # y_pred_b_mask = torch.ones_like(y_pred_b) * (y_pred_b > args.alpha) # y_pred_t_mask = torch.ones_like(y_pred_t) * (y_pred_t > args.alpha) y_pred_b_mask = torch.clamp(y_pred_b / x_batch, 0, 1) y_pred_t_mask = torch.clamp(y_pred_t / x_batch, 0, 1) loss_trump = val_loss_fn(y_pred_t_mask * x_batch, y_batch_trump) loss_biden = val_loss_fn(y_pred_b_mask * x_batch, y_batch_biden) if args.train_trump: val_loss += loss_trump.item() else: val_loss += loss_biden.item() num_batches_processed += 1 progress_bar.update(len(x_batch)) progress_bar.set_postfix(val_loss=val_loss / num_batches_processed) writer.add_scalar("Val/Biden Loss", loss_biden, ((e - 1) * len(dev_dl) + i) * args.val_batch_size) writer.add_scalar("Val/Trump Loss", loss_trump, ((e - 1) * len(dev_dl) + i) * args.val_batch_size) del x_batch del y_batch_biden del y_batch_trump del y_pred_b del y_pred_t del loss_trump del loss_biden # Save model if it's the best one yet. if val_loss / num_batches_processed < best_val_loss: best_val_loss = val_loss / num_batches_processed filename = f'{args.save_path}/{args.experiment}/{model.__class__.__name__}_best_val.checkpoint' model_utils.save_model(model, filename) print(f'Model saved!') print(f'Best validation loss yet: {best_val_loss}') # Save model on checkpoints. if e % args.checkpoint_freq == 0: filename = f'{args.save_path}/{args.experiment}/{model.__class__.__name__}_epoch_{e}.checkpoint' model_utils.save_model(model, filename) print(f'Model checkpoint reached!') saved_checkpoints.append(filename) # Delete checkpoints if there are too many while len(saved_checkpoints) > args.num_checkpoints: os.remove(saved_checkpoints.pop(0)) return model
def test_model( dev_dl: tf.data.Dataset, model: nn.Module, args: argparse.Namespace, ) -> nn.Module: device = model_utils.get_device() print('\Computing evaluation metrics...') total_pixels = 0 total_examples = 0 squared_error = 0 rel_error = 0 log_error = 0 threshold1 = 0 # 1.25 threshold2 = 0 # 1.25^2 threshold3 = 0 # corresponds to 1.25^3 eps = 0.5 print(' Running forward inference...') torch.set_grad_enabled(False) with tqdm(total=args.batch_size * len(dev_dl)) as progress_bar: for i, (x_batch_orig, y_batch) in enumerate(dev_dl.as_numpy_iterator()): x_batch, y_batch = model_utils.preprocess_test_example( x_batch_orig, y_batch) # Forward pass on model y_pred = model(x_batch) # TODO: Process y_pred in the optimal way (round it off, etc) # Maybe clamp from 0 to infty or something nanmask = getNanMask(y_batch) total_pixels = torch.sum(~nanmask) total_examples += x_batch.shape[0] # RMS, REL, LOG10, threshold calculation squared_error += ( torch.sum(torch.pow(y_pred - y_batch, 2)).item() / total_pixels)**0.5 rel_error += torch.sum( torch.abs(y_pred - y_batch) / y_batch).item() / total_pixels log_error += torch.sum( torch.abs( removeNans(torch.log10(y_pred)) - removeNans(torch.log10(y_batch)))).item() / total_pixels threshold1 += torch.sum( torch.max(y_pred / y_batch, y_batch / y_pred) < 1.25).item() / total_pixels threshold2 += torch.sum( torch.max(y_pred / y_batch, y_batch / y_pred) < 1.25**2).item() / total_pixels threshold3 += torch.sum( torch.max(y_pred / y_batch, y_batch / y_pred) < 1.25**3).item() / total_pixels # total_pixels += np.prod(y_batch.shape) if i < args.num_images: model_utils.make_3_col_diagram( x_batch.cpu().numpy(), y_batch.cpu().numpy(), y_pred.cpu().numpy(), f'{args.save_dir}/{args.name}/{args.name}_{i}.png', ) progress_bar.update(len(x_batch)) del x_batch del y_pred del y_batch print('\n Calculating overall metrics...') print() output_str = '' output_str += '*' * 30 + '\n' output_str += f'RMS: {squared_error / total_examples}\n' output_str += f'REL: {rel_error / total_examples}\n' output_str += f'LOG10: {log_error / total_examples}\n' output_str += f'delta1:{threshold1 / total_examples}\n' output_str += f'delta2:{threshold2 / total_examples}\n' output_str += f'delta3:{threshold3 / total_examples}\n' output_str += '*' * 30 print(output_str) if args.load_dir is not None: with open(f'{args.load_dir}/test_results.txt', 'w') as f: f.write(output_str) return model
def __init__(self, batch_size, optimizer, learning_rate, train_data_engine, test_data_engine, dim_hidden, dim_embedding, vocab_size=None, attr_vocab_size=None, n_layers=1, bidirectional=False, model_dir="./model", log_dir="./log", is_load=True, replace_model=True, model='nlu-nlg', schedule='iterative', device=None, dir_name='test', f1_per_sample=False, dim_loss=False, with_intent=True, nlg_path=None): # Initialize attributes # model_dir = os.path.join(model_dir, dir_name) if not os.path.isdir(model_dir): os.makedirs(model_dir) self.model_dir = model_dir self.log_dir = log_dir self.dir_name = dir_name self.device = get_device(device) self.maskpredict = MaskPredict( dim_embedding=dim_embedding, dim_hidden=dim_hidden, # attr_vocab_size=attr_vocab_size, vocab_size=train_data_engine.tokenizer.get_vocab_size(), # n_slot_key=len(train_data_engine.slot_vocab), n_slot_key=len(train_data_engine.nlg_slot_vocab), n_intent=len(train_data_engine.intent_vocab), n_layers=n_layers, bidirectional=False, batch_size=batch_size) self.optimizer = torch.optim.Adam(self.maskpredict.parameters(), 3e-4, weight_decay=1e-4) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.1) if is_load: print_time_info("Loading marginal model from %s" % self.model_dir) self.load_model(self.model_dir) else: pass # self.nlg = NLGRNN( # dim_embedding=dim_embedding, # dim_hidden=dim_hidden, # # attr_vocab_size=attr_vocab_size, # vocab_size=train_data_engine.tokenizer.get_vocab_size(), # # n_slot_key=len(train_data_engine.slot_vocab), # n_slot_key=len(train_data_engine.nlg_slot_vocab), # n_intent=len(train_data_engine.intent_vocab), # n_layers=n_layers, # bidirectional=False, # batch_size=batch_size) # pretrained_nlg = torch.load(nlg_path) # self.maskpredict.load_encoder(pretrained_nlg) self.train_data_engine = train_data_engine self.test_data_engine = test_data_engine self.train_nlg_data_loader = DataLoader( train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=train_data_engine.collate_fn_nlg, pin_memory=True) self.test_nlg_data_loader = DataLoader( test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True, collate_fn=test_data_engine.collate_fn_nlg, pin_memory=True) self.maskpredict_parameters = filter(lambda p: p.requires_grad, self.maskpredict.parameters()) self.maskpredict_optimizer = build_optimizer( optimizer, self.maskpredict_parameters, learning_rate) self.train_log_path = os.path.join(self.log_dir, "train_log.csv") self.valid_log_path = os.path.join(self.log_dir, "valid_log.csv") self.test_result_path = os.path.join(self.log_dir, "test_result.txt") with open(self.train_log_path, 'w') as file: file.write("epoch,loss\n") with open(self.valid_log_path, 'w') as file: file.write("epoch,loss\n")
import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import math, copy, time from torch.autograd import Variable import os, re, sys from model_utils import get_device global device device = get_device(device=None) def clones(module, N): "Produce N identical layers." return nn.ModuleList([copy.deepcopy(module) for _ in range(N)]) class Encoder(nn.Module): "Core encoder is a stack of N layers" def __init__(self, layer, N): super(Encoder, self).__init__() self.layers = clones(layer, N) # layer = EncoderLayer() self.norm = LayerNorm(layer.size) def forward(self, x, mask): "Pass the input (and mask) through each layer in turn." for layer in self.layers: x = layer(x, mask) return self.norm(x)