def get_data_loaders(self): data_dir = self.args.data_dir self.train_dataset = PickleDataset( os.path.join(data_dir, f'{self.args.train_set}.pkl'), os.path.join(data_dir, self.args.train_index_file), segment_size=self.config.segment_size) self.val_dataset = PickleDataset( os.path.join(data_dir, f'{self.args.val_set}.pkl'), os.path.join(data_dir, self.args.val_index_file), segment_size=self.config.segment_size) self.train_loader = get_data_loader(self.train_dataset, batch_size=self.config.batch_size, shuffle=self.config.shuffle, num_workers=4, drop_last=False) self.val_loader = get_data_loader(self.val_dataset, batch_size=self.config.batch_size, shuffle=self.config.shuffle, num_workers=4, drop_last=False) self.train_iter = infinite_iter(self.train_loader) return
def get_data_loaders(self): data_dir = self.args.data_dir self.gpu_num = torch.cuda.device_count() if torch.cuda.is_available( ) else 1 self.train_dataset = PickleDataset( os.path.join(data_dir, f'{self.args.train_set}.pkl'), os.path.join(data_dir, self.args.train_index_file), segment_size=self.config['data_loader']['segment_size']) self.train_loader = get_data_loader( self.train_dataset, frame_size=self.config['data_loader']['frame_size'], batch_size=self.config['data_loader']['batch_size'] * self.gpu_num, num_workers=0, shuffle=self.config['data_loader']['shuffle'], drop_last=False) self.train_iter = infinite_iter(self.train_loader) if self.args.use_eval_set: self.eval_dataset = PickleDataset( os.path.join(data_dir, f'{self.args.eval_set}.pkl'), os.path.join(data_dir, self.args.eval_index_file), segment_size=self.config['data_loader']['segment_size']) self.eval_loader = get_data_loader( self.eval_dataset, frame_size=self.config['data_loader']['frame_size'], batch_size=self.config['data_loader']['batch_size'] * self.gpu_num, shuffle=self.config['data_loader']['shuffle'], num_workers=0, drop_last=False) self.eval_iter = infinite_iter(self.eval_loader) if self.args.use_test_set: self.test_dataset = PickleDataset( os.path.join(data_dir, f'{self.args.test_set}.pkl'), os.path.join(data_dir, self.args.test_index_file), segment_size=self.config['data_loader']['segment_size']) self.test_loader = get_data_loader( self.test_dataset, frame_size=self.config['data_loader']['frame_size'], batch_size=self.config['data_loader']['batch_size'], shuffle=False, num_workers=0, drop_last=False) self.test_iter = infinite_iter(self.test_loader) return
def _get_loader(network_loc, molecule_loc, exclude_ids_loc, split_by, batch_size, batch_size_test, num_iterations, num_workers, full, training_only, k, p, ms: MoleculeSpec): """Helper function for getting data loaders Args: network_loc (str): Location of the bipartite network molecule_loc (str): Location of molecule SMILES strings exclude_ids_loc (str): The location storing the ids to be excluded from the training set split_by (str): Whether to split by scaffold or molecule batch_size (int): The batch size for training batch_size_test (int): The batch size for testing num_iterations (int): The number of total iterations for model training num_workers (int): The number of workers for loading dataset full (bool): Whether to use the full dataset for training training_only (bool): Only record training loss k (int): The number of importance samples p (float): The degree of stochasticity of importance sampling 0.0 for fully stochastic decoding, 1.0 for fully deterministic decoding ms (MoleculeSpec) Returns: t.Tuple[t.Iterable, t.Iterable]: """ if full: loader_train = get_data_loader_full(scaffold_network_loc=network_loc, molecule_smiles_loc=molecule_loc, batch_size=batch_size, num_iterations=num_iterations, num_workers=num_workers, k=k, p=p, ms=ms) loader_test = None else: loader_train, loader_test = get_data_loader( scaffold_network_loc=network_loc, molecule_smiles_loc=molecule_loc, exclude_ids_loc=exclude_ids_loc, split_type=split_by, batch_size=batch_size, batch_size_test=batch_size_test, num_iterations=num_iterations, num_workers=num_workers, k=k, p=p, ms=ms) if training_only: loader_test = None return loader_train, loader_test
def get_data_loaders(self): data_dir = self.args.data_dir self.test_dataset = PickleDataset( os.path.join(data_dir, f'{self.args.test_set}.pkl'), os.path.join(data_dir, self.args.test_index_file), segment_size=self.config['data_loader']['segment_size']) self.test_loader = get_data_loader( self.test_dataset, frame_size=self.config['data_loader']['frame_size'], batch_size=self.config['data_loader']['batch_size'], shuffle=False, drop_last=False)
def get_data_loaders(self): data_dir = self.args.data_dir self.train_dataset = PickleDataset( os.path.join(data_dir, f'{self.args.train_set}.pkl'), os.path.join(data_dir, self.args.train_index_file), segment_size=self.config['data_loader']['segment_size']) self.train_loader = get_data_loader( self.train_dataset, frame_size=self.config['data_loader']['frame_size'], batch_size=self.config['data_loader']['batch_size'], shuffle=self.config['data_loader']['shuffle'], num_workers=4, drop_last=False) self.train_iter = infinite_iter(self.train_loader) return
def get_data_loaders(self): data_dir = self.args.data_dir self.train_dataset = PickleDataset( os.path.join(data_dir, f"{self.args.train_set}.pkl"), os.path.join(data_dir, self.args.train_index_file), segment_size=self.config["data_loader"]["segment_size"], ) self.train_loader = get_data_loader( self.train_dataset, frame_size=self.config["data_loader"]["frame_size"], batch_size=self.config["data_loader"]["batch_size"], shuffle=self.config["data_loader"]["shuffle"], num_workers=0, drop_last=False, ) self.train_iter = infinite_iter(self.train_loader) return
def train(args): # Define Tokenizer tokenizer_module = getattr(import_module("transformers"), f"{args.model_name}Tokenizer") tokenizer = tokenizer_module.from_pretrained(args.pretrained_name_or_path) slot_meta, train_examples, dev_examples, dev_labels = train_data_loading( args, isUserFirst=False, isDialogueLevel=False) # Define Preprocessor processor = TRADEPreprocessor(slot_meta, tokenizer, max_seq_length=args.max_seq_length, use_n_gate=args.use_n_gate) train_features = processor.convert_examples_to_features(train_examples) dev_features = processor.convert_examples_to_features(dev_examples) train_loader = get_data_loader(processor, train_features, args.train_batch_size) dev_loader = get_data_loader(processor, dev_features, args.eval_batch_size) args.vocab_size = len(tokenizer) args.n_gate = len( processor.gating2id ) # gating 갯수 : (none, dontcare, ptr) or (none, yes, no, dontcare, ptr) # Slot Meta tokenizing for the decoder initial inputs tokenized_slot_meta = [] for slot in slot_meta: tokenized_slot_meta.append( tokenizer.encode(slot.replace("-", " "), add_special_tokens=False)) # Model 선언 model = TRADE(args, tokenized_slot_meta) # model.set_subword_embedding(args) # Subword Embedding 초기화 print(f"Subword Embeddings is loaded from {args.pretrained_name_or_path}") model.to(device) print("Model is initialized") # Optimizer 및 Scheduler 선언 n_epochs = args.epochs t_total = len(train_loader) * n_epochs # get_optimizer 부분에서 자동으로 warmup_steps를 계산할 수 있도록 바꿨음 (아래가 원래의 code) # warmup_steps = int(t_total * args.warmup_ratio) optimizer = get_optimizer(model, args) # get optimizer (Adam, sgd, AdamP, ..) scheduler = get_scheduler( optimizer, t_total, args) # get scheduler (custom, linear, cosine, ..) loss_fnc_1 = masked_cross_entropy_for_value # generation - # classes: vocab_size loss_fnc_2 = nn.CrossEntropyLoss() # loss_fnc_2 = LabelSmoothingLoss(classes=model.decoder.n_gate,smoothing=args.smoothing_factor) json.dump( vars(args), open(f"{args.model_dir}/{args.model_fold}/exp_config.json", "w"), indent=2, ensure_ascii=False, ) json.dump( slot_meta, open(f"{args.model_dir}/{args.model_fold}/slot_meta.json", "w"), indent=2, ensure_ascii=False, ) best_score, best_checkpoint = 0, 0 for epoch in range(n_epochs): model.train() for step, batch in enumerate(train_loader): input_ids, segment_ids, input_masks, gating_ids, target_ids, guids = [ b.to(device) if not isinstance(b, list) else b for b in batch ] # teacher forcing if (args.teacher_forcing_ratio > 0.0 and random.random() < args.teacher_forcing_ratio): tf = target_ids else: tf = None all_point_outputs, all_gate_outputs = model( input_ids, segment_ids, input_masks, target_ids.size(-1), tf) # generation loss loss_1 = loss_fnc_1( all_point_outputs.contiguous(), target_ids.contiguous().view(-1), tokenizer.pad_token_id, ) # gating loss loss_2 = loss_fnc_2( all_gate_outputs.contiguous().view(-1, args.n_gate), gating_ids.contiguous().view(-1), ) loss = loss_1 + loss_2 loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step() for learning_rate in scheduler.get_lr(): wandb.log({"learning_rate": learning_rate}) optimizer.zero_grad() if step % 100 == 0: print( f"[{epoch}/{n_epochs}] [{step}/{len(train_loader)}] loss: {loss.item()} gen: {loss_1.item()} gate: {loss_2.item()}" ) wandb.log({ "epoch": epoch, "Train epoch loss": loss.item(), "Train epoch gen loss": loss_1.item(), "Train epoch gate loss": loss_2.item(), }) predictions = inference_TRADE(model, dev_loader, processor, device) eval_result = _evaluation(predictions, dev_labels, slot_meta) for k, v in eval_result.items(): if k in ("joint_goal_accuracy", 'turn_slot_accuracy', 'turn_slot_f1'): print(f"{k}: {v}") if best_score < eval_result["joint_goal_accuracy"]: print("Update Best checkpoint!") best_score = eval_result["joint_goal_accuracy"] best_checkpoint = epoch wandb.log({ "epoch": epoch, "Best joint goal accuracy": best_score, "Best turn slot accuracy": eval_result['turn_slot_accuracy'], "Best turn slot f1": eval_result['turn_slot_f1'] }) if args.logging_accuracy_per_domain_slot: wandb.log({ k: v for k, v in eval_result.items() if k not in ("joint_goal_accuracy", 'turn_slot_accuracy', 'turn_slot_f1') }) torch.save(model.state_dict(), f"{args.model_dir}/{args.model_fold}/model-{epoch}.bin") print(f"Best checkpoint: {args.model_dir}/model-{best_checkpoint}.bin") wandb.log( {"Best checkpoint": f"{args.model_dir}/model-{best_checkpoint}.bin"})
def main_inference(args, config): slot_meta = json.load( open(f"{args.model_dir}/{args.model_fold}/slot_meta.json", "r")) ontology = json.load(open(f"{CFG.TrainOntology}", "r")) if config.replace_word_data: slot_meta = [meta.replace('택시', '버스') for meta in slot_meta] ontology = { domain_slot_key.replace('택시', '버스'): domain_slot_value for domain_slot_key, domain_slot_value in ontology.items() } # Define Tokenizer tokenizer_module = getattr(import_module("transformers"), f"{config.model_name}Tokenizer") tokenizer = tokenizer_module.from_pretrained( config.pretrained_name_or_path) # Extracting Featrues if config.dst == 'TRADE': eval_examples = test_data_loading(args, isUserFirst=False, isDialogueLevel=False) processor = TRADEPreprocessor(slot_meta, tokenizer) tokenized_slot_meta = [] for slot in slot_meta: tokenized_slot_meta.append( tokenizer.encode(slot.replace("-", " "), add_special_tokens=False)) # Model 선언 model = TRADE(config, tokenized_slot_meta) model.set_subword_embedding(config) # Subword Embedding 초기화 elif config.dst == 'SUMBT': eval_examples = test_data_loading(args, isUserFirst=True, isDialogueLevel=True) max_turn = max([len(e) * 2 for e in eval_examples]) processor = SUMBTPreprocessor( slot_meta, tokenizer, ontology=ontology, # predefined ontology max_seq_length=config.max_seq_length, # 각 turn마다 최대 길이 max_turn_length=max_turn) # 각 dialogue의 최대 turn 길이 slot_type_ids, slot_values_ids = tokenize_ontology( ontology, tokenizer, config.max_label_length) # Model 선언 num_labels = [len(s) for s in slot_values_ids] # 각 Slot 별 후보 Values의 갯수 model = SUMBT(config, num_labels, device) model.initialize_slot_value_lookup( slot_values_ids, slot_type_ids) # Tokenized Ontology의 Pre-encoding using BERT_SV eval_features = processor.convert_examples_to_features(eval_examples) eval_loader = get_data_loader(processor, eval_features, config.eval_batch_size) print("# eval:", len(eval_loader)) ckpt = torch.load( f'{args.model_dir}/{args.model_fold}/model-{args.chkpt_idx}.bin', map_location="cpu") model.load_state_dict(ckpt) model.to(device) print("Model is loaded") inference_module = getattr(import_module("inference"), f"inference_{config.dst}") predictions = inference_module(model, eval_loader, processor, device) os.makedirs(args.output_dir, exist_ok=True) json.dump( predictions, open(f"{args.output_dir}/{args.model_fold}-predictions.csv", "w"), indent=2, ensure_ascii=False, )
def train(args): # Define Tokenizer tokenizer_module = getattr(import_module("transformers"), f"{args.model_name}Tokenizer") tokenizer = tokenizer_module.from_pretrained(args.pretrained_name_or_path) slot_meta, train_examples, dev_examples, dev_labels = train_data_loading( args, isUserFirst=True, isDialogueLevel=True) ontology = json.load(open("../input/data/train_dataset/ontology.json")) # Define Preprocessor max_turn = max([len(e) * 2 for e in train_examples]) processor = SUMBTPreprocessor( slot_meta, tokenizer, ontology=ontology, # predefined ontology max_seq_length=args.max_seq_length, # 각 turn마다 최대 길이 max_turn_length=max_turn) # 각 dialogue의 최대 turn 길이 train_features = processor.convert_examples_to_features(train_examples) dev_features = processor.convert_examples_to_features(dev_examples) train_loader = get_data_loader(processor, train_features, args.train_batch_size) dev_loader = get_data_loader(processor, dev_features, args.eval_batch_size) if args.replace_word_data: ontology = { domain_slot_key.replace('택시', '버스'): domain_slot_value for domain_slot_key, domain_slot_value in ontology.items() } slot_type_ids, slot_values_ids = tokenize_ontology(ontology, tokenizer, args.max_label_length) # Model 선언 num_labels = [len(s) for s in slot_values_ids] # 각 Slot 별 후보 Values의 갯수 n_gpu = 1 if torch.cuda.device_count() < 2 else torch.cuda.device_count() model = SUMBT(args, num_labels, device) model.initialize_slot_value_lookup( slot_values_ids, slot_type_ids) # Tokenized Ontology의 Pre-encoding using BERT_SV model.to(device) print("Model is initialized") """## Optimizer & Scheduler 선언 """ n_epochs = args.epochs t_total = len(train_loader) * n_epochs no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] optimizer = get_optimizer(optimizer_grouped_parameters, args) # get optimizer (Adam, sgd, AdamP, ..) scheduler = get_scheduler( optimizer, t_total, args) # get scheduler (custom, linear, cosine, ..) json.dump( vars(args), open(f"{args.model_dir}/{args.model_fold}/exp_config.json", "w"), indent=2, ensure_ascii=False, ) json.dump( slot_meta, open(f"{args.model_dir}/{args.model_fold}/slot_meta.json", "w"), indent=2, ensure_ascii=False, ) best_score, best_checkpoint = 0, 0 for epoch in range(n_epochs): batch_loss = [] model.train() for step, batch in enumerate(train_loader): input_ids, segment_ids, input_masks, target_ids, num_turns, guids = \ [b.to(device) if not isinstance(b, list) else b for b in batch] # Forward if n_gpu == 1: loss, loss_slot, acc, acc_slot, _ = model( input_ids, segment_ids, input_masks, target_ids, n_gpu) else: loss, _, acc, acc_slot, _ = model(input_ids, segment_ids, input_masks, target_ids, n_gpu) batch_loss.append(loss.item()) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() scheduler.step() for learning_rate in scheduler.get_lr(): wandb.log({"learning_rate": learning_rate}) optimizer.zero_grad() if step % 100 == 0: print('[%d/%d] [%d/%d] %f' % (epoch, n_epochs, step, len(train_loader), loss.item())) wandb.log({"epoch": epoch, "Train epoch loss": loss.item()}) predictions = inference_SUMBT(model, dev_loader, processor, device) eval_result = _evaluation(predictions, dev_labels, slot_meta) for k, v in eval_result.items(): if k in ("joint_goal_accuracy", 'turn_slot_accuracy', 'turn_slot_f1'): print(f"{k}: {v}") if best_score < eval_result["joint_goal_accuracy"]: print("Update Best checkpoint!") best_score = eval_result["joint_goal_accuracy"] best_checkpoint = epoch wandb.log({ "epoch": epoch, "Best joint goal accuracy": best_score, "Best turn slot accuracy": eval_result['turn_slot_accuracy'], "Best turn slot f1": eval_result['turn_slot_f1'] }) if args.logging_accuracy_per_domain_slot: wandb.log({ k: v for k, v in eval_result.items() if k not in ("joint_goal_accuracy", 'turn_slot_accuracy', 'turn_slot_f1') }) torch.save(model.state_dict(), f"{args.model_dir}/{args.model_fold}/model-{epoch}.bin") print(f"Best checkpoint: {args.model_dir}/model-{best_checkpoint}.bin") wandb.log( {"Best checkpoint": f"{args.model_dir}/model-{best_checkpoint}.bin"})
def _get_loader( network_loc: str, molecule_loc: str, exclude_ids_loc: str, split_by: str, batch_size: int, batch_size_test: int, num_iterations: int, num_workers: int, full: bool, training_only: bool, k: int, p: float, ms: MoleculeSpec) -> t.Tuple[t.Iterable, t.Optional[t.Iterable]]: """Helper function for getting data loaders Args: network_loc (str): Location of the bipartite network molecule_loc (str): Location of molecule SMILES strings exclude_ids_loc (str): The location storing the ids to be excluded from the training set split_by (str): Whether to split by scaffold or molecule batch_size (int): The batch size for training batch_size_test (int): The batch size for testing num_iterations (int): The number of total iterations for model training num_workers (int): The number of workers for loading dataset full (bool): Whether to use the full dataset for training training_only (bool): Only record training loss k (int): The number of importance samples p (float): The degree of stochasticity of importance sampling 0.0 for fully stochastic decoding, 1.0 for fully deterministic decoding ms (MoleculeSpec) Returns: t.Tuple[t.Iterable, t.Iterable]: DataLoaders for training and test data """ if full: training_only = True loader_train = \ get_data_loader_full(network_loc, molecule_loc, batch_size, num_iterations, num_workers, k, p, ms) loader_test = None else: loader_train, loader_test = \ get_data_loader(network_loc, molecule_loc, exclude_ids_loc, split_by, batch_size, batch_size_test, num_iterations, num_workers, k, p, ms) if training_only: loader_test = None return loader_train, loader_test