def train(self, train_data, valid_data, test_data, configs, save_model=True, path='saved_models/default', verbose=False ): self.controller.train() optimizer = optim.Adam(self.controller.parameters(), lr=configs.agent.learning_rate) for epoch in range(configs.agent.num_epochs): actions, log_probs = self.controller.sample() layer_IDs = actions[0::2] share = actions[1::2] layers = [self.search_space[s][l] for l, s in zip(layer_IDs, share)] if layers in self.sampled_architecture: idx = self.sampled_architecture.index(layers) accuracy = self.architecture_acc_val[idx] test_acc = self.architecture_acc_test[idx] else: model = MultiTaskModel(layers, self.architecture, self.task_info) accuracy = model.train(train_data=train_data, valid_data=valid_data, num_epochs=configs.model.num_epochs, learning_rate=configs.model.learning_rate, save_history=False, verbose=False ) test_acc = model.eval(test_data) self.sampled_architecture.append(layers) self.architecture_acc_val.append(accuracy) self.architecture_acc_test.append(test_acc) self.history.append(test_acc) if self.baseline is None: self.baseline = accuracy else: self.baseline = configs.agent.baseline_decay * self.baseline + (1 - configs.agent.baseline_decay) * accuracy advantage = accuracy - self.baseline loss = -log_probs * advantage loss = loss.sum() optimizer.zero_grad() loss.backward() optimizer.step() if verbose: print('[Epoch {}] Accuracy: {}'.format(epoch + 1, self.history[-1])) if save_model: self.save(path)
def eval(self, train_data, test_data, configs): with torch.no_grad(): self.controller.eval() actions, _ = self.controller.sample(sample_best=True) layer_IDs = actions[0::2] share = actions[1::2] layers = [self.search_space[s][l] for l, s in zip(layer_IDs, share)] model = MultiTaskModel(layers, self.architecture, self.task_info) accuracy = model.train(train_data=train_data, valid_data=test_data, num_epochs=configs.model.num_epochs, learning_rate=configs.model.learning_rate, save_history=False, verbose=False ) return accuracy, layers
def eval(self, train_data, test_data, configs): with torch.no_grad(): self.controller.eval() layer_IDs, shares, _ = self.controller.sample(sample_best=True) layers = [search_space[i] for i in layer_IDs] shares = [[s.item() for s in share] for share in shares] layers = [ShareLayer(layer=layer, share=share) for layer, share in zip(layers, shares)] model = MultiTaskModel(layers, self.architecture, self.task_info) accuracy = model.train(train_data=train_data, valid_data=test_data, num_epochs=configs.model.num_epochs, learning_rate=configs.model.learning_rate, save_history=False, verbose=False ) return accuracy, layers
def __init__(self,model_file,identifier): self.config = None self.model = None self.sentence_length = None self.name_to_name_to_indices = None self.logger = logging.getLogger('root.Model-{}'.format(identifier)) load_path = Path(model_file) if (not load_path.exists()) or (not load_path.is_dir()): self.logger.error("model directory {} doesn't exist".format(model_file)) config_filename = load_path.joinpath("model_config.json") with config_filename.open('r',encoding='utf8') as fp: self.config = json.load(fp) index_filename = load_path.joinpath("name_to_index.json") with index_filename.open('r',encoding='utf8') as fp: self.name_to_name_to_indices = json.load(fp) self.sentence_length = self.config['sentence_length'] self.model = MultiTaskModel(self.config,self.sentence_length,{},{}) self.model.load_model(load_path.joinpath("nn")) self.input_names = [] self.target_name_to_def = {} self.input_name_to_def = {} self.name_to_index_to_name = {} for i in self.config['inputs']: input_name = i['name'] self.input_names.append(input_name) self.input_name_to_def[input_name] = i for t in self.config['tasks']: target_name = t['target'] self.target_name_to_def[target_name] = t index_to_name = {} for x,y in self.name_to_name_to_indices[target_name].items(): index_to_name[y] = x self.name_to_index_to_name[target_name] = index_to_name
def main(args): config = None model = None sentence_length = None num_examples = None sentences = None inputs = None targets = None input_names = None target_names = None name_to_name_to_indices = None #load a saved model if args.load_model: load_path = Path(args.config_or_model) if (not load_path.exists()) or (not load_path.is_dir()): print("Error: directory doesn't exist") config_filename = load_path.joinpath("model_config.json") with config_filename.open('r', encoding='utf8') as fp: config = json.load(fp) index_filename = load_path.joinpath("name_to_index.json") with index_filename.open('r', encoding='utf8') as fp: name_to_name_to_indices = json.load(fp) sentence_length = config['sentence_length'] model = MultiTaskModel(config, config['sentence_length'], {}, {}) model.load_model(load_path.joinpath("nn")) input_names = [] target_names = [] input_name_to_def = {} for i in config['inputs']: input_names.append(i['name']) input_name_to_def[i['name']] = i for t in config['tasks']: target_names.append(t['target']) num_examples, sentences, inputs, targets = parse.parse_json_file_with_index( args.data_file, name_to_name_to_indices, input_names, target_names, sentence_length) for input_name in input_names: if not input_name in inputs: print( "problem: model input \"{}\" not found in dataset file, feeding zero values" .format(input_name)) input_def = input_name_to_def[input_name] input_type = input_def['type'] array_shape = [] if input_type == "vector_sequence": array_shape = [ num_examples, sentence_length, input_def['vector_length'] ] elif input_type == "class_sequence": array_shape = [num_examples, sentence_length] elif input_type == "graph_structure": array_shape = [ num_examples, sentence_length, sentence_length ] inputs[input_name] = (input_type, np.zeros(array_shape)) for target_name in target_names: if not target_name in targets: print("problem: model target \"{}\" not found in dataset file". format(target_name)) #a saved model should not be retrained without necessary target data #because if zero values are fed to the model instead of the real target values #the weights of the model will change to produce zero target value #this will destroy the previous training progress for this target print("Shutting down") return print("Model loaded from: {}".format(args.config_or_model)) #create a new model else: with open(args.config_or_model, 'r', encoding='utf8') as fp: config = json.load(fp) input_names = [] target_names = [] for i in config['inputs']: input_names.append(i['name']) for t in config['tasks']: target_names.append(t['target']) sentence_length, num_examples, sentences, inputs, targets, name_to_name_to_indices = parse.parse_json_file( args.data_file, input_names, target_names) model = MultiTaskModel(config, sentence_length, inputs, targets) tmp_bs = args.batch_size if args.batch_size <= 0 or args.batch_size > num_examples: print( "Error: batch size negative or greater than number of examples in dataset" ) tmp_bs = 32 #split dataset into training and testing part if not args.cross_validate and (args.test_data_file is None): #split dataset into train and test parts permutation = np.random.permutation(num_examples) split_index = int(num_examples * (1.0 - args.test_data_fraction)) train_data = {} test_data = {} for x, y in inputs.items(): if args.train_all_examples: train_data[x] = y[1] else: train_data[x] = y[1][permutation[0:split_index]] test_data[x] = y[1][permutation[split_index:]] for x, y in targets.items(): if args.train_all_examples: train_data[x] = y[1] else: train_data[x] = y[1][permutation[0:split_index]] test_data[x] = y[1][permutation[split_index:]] test_sentences = [] for i in range(split_index, num_examples): test_sentences.append(sentences[permutation[i]]) model.train(train_data, args.steps, tmp_bs, quiet=args.quiet, print_frequency=args.print_frequency) test_data_length = test_data[list(test_data.keys())[0]].shape[0] results = model.test_in_batches(test_data, tmp_bs, quiet=args.quiet) results = combine_accuracy_results(results, target_names, tmp_bs, test_data_length) model.print_test_status(results) #use separate dataset file for testing elif not args.test_data_file is None: test_num_examples, test_sentences, test_inputs, test_targets = parse.parse_json_file_with_index( args.test_data_file, name_to_name_to_indices, input_names, target_names, sentence_length) train_data = {} test_data = {} for x, y in inputs.items(): train_data[x] = y[1] for x, y in targets.items(): train_data[x] = y[1] for x, y in test_inputs.items(): test_data[x] = y[1] for x, y in test_targets.items(): test_data[x] = y[1] model.train(train_data, args.steps, tmp_bs, quiet=args.quiet, print_frequency=args.print_frequency) test_data_length = test_data[list(test_data.keys())[0]].shape[0] results = model.test_in_batches(test_data, tmp_bs, quiet=args.quiet) results = combine_accuracy_results(results, target_names, tmp_bs, test_data_length) model.print_test_status(results) #run cross validation else: data = {} for x, y in inputs.items(): data[x] = y[1] for x, y in targets.items(): data[x] = y[1] accuracies = cross_validate(model, args.num_repetitions, args.num_parts, num_examples, data, args.steps, tmp_bs, target_names, quiet=args.quiet) for target_name in target_names: print_cv_summary(target_name, accuracies[target_name]) #save model if not args.save_model is None: path = save_model(args.save_model, model, name_to_name_to_indices) if path is None: print("Error: model could not be saved") else: print("Model saved in directory {}".format(path))
def train(args): output_dir = os.path.join(args.output_dir, args.save_model_name) if not os.path.exists(output_dir): os.makedirs(output_dir) logfilename = time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime()) + " " + args.save_model_name + ".log.txt" fh = logging.FileHandler(os.path.join(output_dir, logfilename), mode='a', encoding='utf-8') fh.setLevel(logging.INFO) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.INFO) logger.addHandler(fh) logger.addHandler(ch) # load data logger.info("****** Loading Datasets ******") tokenizer = select_tokenizer(args.model) dataset_names = args.datasets.split(" ") with open(args.datasets_config) as f: datasets_config = json.load(f) train_datasets = [] dev_datasets = [] task_list = [] for task_id, dataset in enumerate(dataset_names): # 按照顺序分配task_id config = datasets_config[dataset] if args.do_preprocess: single_train_dataset = SingleTaskDataset( tokenizer=tokenizer, path=config["train_dir"], is_training=True, task_id=config['task_id'], is_pair=config["is_pair"], task_type=TaskType[config["task_type"]], batch_size=config["train_batch_size"], max_seq_length=config["max_seq_length"]) single_dev_dataset = SingleTaskDataset( tokenizer=tokenizer, path=config["dev_dir"], is_training=True, task_id=config['task_id'], is_pair=config["is_pair"], task_type=TaskType[config["task_type"]], batch_size=config["dev_batch_size"], max_seq_length=config["max_seq_length"]) # single_train_dataset.load_data() # single_dev_dataset.load_data torch.save( single_train_dataset, os.path.join(args.dataset_features_dir, dataset + ".train")) torch.save( single_dev_dataset, os.path.join(args.dataset_features_dir, dataset + ".dev")) else: single_train_dataset = torch.load( os.path.join(args.dataset_features_dir, dataset + ".train")) single_dev_dataset = torch.load( os.path.join(args.dataset_features_dir, dataset + ".dev")) # task_list.append(TaskType[config["task_type"]]) task_list.append((config["task_id"], TaskType[config["task_type"]])) train_datasets.append(single_train_dataset) dev_datasets.append(single_dev_dataset) train_collater = Collater(dropout_w=args.collater_dropout) multi_task_datasets = MultiTaskDataset(train_datasets) multi_task_batch_sampler = MultiTaskBatchSampler(train_datasets, mix_opt=0, extra_task_ratio=0) train_dataloader = DataLoader(multi_task_datasets, batch_sampler=multi_task_batch_sampler, collate_fn=train_collater.collate_fn, pin_memory=torch.cuda.is_available()) dev_dataloaders = [] for dataset in dev_datasets: all_input_ids = torch.tensor( [item['sample'].select_field("input_ids") for item in dataset], dtype=torch.long) all_token_type_ids = torch.ByteTensor( [item['sample'].select_field("segment_ids") for item in dataset]) all_attention_mask = torch.ByteTensor( [item['sample'].select_field("input_mask") for item in dataset]) all_labels = torch.tensor([item['sample'].label for item in dataset], dtype=torch.long) dev_dataset = TensorDataset(all_input_ids, all_token_type_ids, all_attention_mask, all_labels) sampler = SequentialSampler(dev_dataset) dev_dataloader = DataLoader(dev_dataset, sampler=sampler, batch_size=dataset.get_batch_size()) dev_dataloaders.append( (dataset.get_task_id(), dataset.get_task_type(), dev_dataloader)) # prepare model status = {} tasklist = list(set(task_list)) if args.do_finetune: model_dir = os.path.join(args.output_dir, args.save_model_name) status = json.load(open(os.path.join(model_dir, 'status.json'))) epoch = status["current_epoch"] current_model = os.path.join(model_dir, 'checkpoint-{}.model'.format(epoch)) # model = XLNetForMultipleChoice.from_pretrained(current_model) model = MultiTaskModel(args.model, cache_dir=args.cache_dir, task_list=task_list) model.load_state_dict(torch.load(current_model)) model.cuda() # dev_result,dev_accuracy= evaluate(model,dev_dataloaders) # for res in dev_result: # logger.info("%s dev loss: %s, dev acc: %s",TaskName[res[0]],str(res[1]),str(res[2])) # logger.info("total dev acc: %s",str(dev_accuracy)) # elif args.do_finetune_best: # return else: # first time model = MultiTaskModel(args.model, cache_dir=args.cache_dir, task_list=task_list) status["best_epoch"] = 0 status["current_epoch"] = 0 status['best_dev_accuracy'] = 0 model.cuda() num_train_optimization_steps = len( train_dataloader) * args.num_train_epochs optimizer = AdamW(model.parameters(), eps=args.eps, lr=args.learning_rate, correct_bias=False) if args.do_finetune: scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_optimization_steps) else: scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.num_warmup_steps, num_training_steps=num_train_optimization_steps) # scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=2, num_training_steps=num_train_optimization_steps) # train logger.info("***** Running training *****") best_dev_accuracy = 0 best_dev_epoch = 0 no_up = 0 global_step = 0 model.cuda() epoch_tqdm = trange(status["current_epoch"], int(args.num_train_epochs), desc="Epoch") for epoch in epoch_tqdm: model.train() train_loss = 0.0 avg_loss = 0.0 for step, (batch_meta, batch_data) in enumerate( tqdm(train_dataloader, desc="Iteration")): # batch_meta, batch_data = Collater.patch_data( args.cuda, batch_meta, batch_data) # pdb.set_trace() batch_data[1] = batch_data[1].byte() batch_data[2] = batch_data[2].byte() batch_data = [item.cuda() for item in batch_data] # print([item.shape for item in batch_data]) loss, logits = model(batch_meta, batch_data) # pdb.set_trace() loss.backward() train_loss += loss.item() global_step += 1 if step % 500 == 0 or step == len( train_dataloader ) - 1: # print step average loss every 500 step avg_loss = train_loss / (step + 1) logger.info( "\t average_step_loss=%s @ step = %s on epoch = %s", str(avg_loss), str(global_step), str(epoch + 1)) optimizer.step() scheduler.step() # Update scheduler model.zero_grad() # evalute dev_result, dev_accuracy = evaluate(model, dev_dataloaders) for res in dev_result: logger.info("%s dev loss: %s, dev acc: %s", TaskName[res[0]], str(res[1]), str(res[2])) logger.info("total dev acc: %s", str(dev_accuracy)) if dev_accuracy > best_dev_accuracy: # New best model. status['best_dev_accuracy'] = dev_accuracy best_dev_accuracy = dev_accuracy best_dev_epoch = epoch + 1 status["best_epoch"] = epoch + 1 no_up = 0 else: no_up += 1 torch.save( model.state_dict(), os.path.join(output_dir, 'checkpoint-{}.model'.format(epoch + 1))) logger.info("\t epoch %s saved to %s", str(epoch + 1), output_dir) status["current_epoch"] = epoch + 1 with open(os.path.join(output_dir, 'status.json'), "w") as fs: json.dump(status, fs) if no_up >= args.patience: epoch_tqdm.close() break
class QueryModel: def __init__(self,model_file,identifier): self.config = None self.model = None self.sentence_length = None self.name_to_name_to_indices = None self.logger = logging.getLogger('root.Model-{}'.format(identifier)) load_path = Path(model_file) if (not load_path.exists()) or (not load_path.is_dir()): self.logger.error("model directory {} doesn't exist".format(model_file)) config_filename = load_path.joinpath("model_config.json") with config_filename.open('r',encoding='utf8') as fp: self.config = json.load(fp) index_filename = load_path.joinpath("name_to_index.json") with index_filename.open('r',encoding='utf8') as fp: self.name_to_name_to_indices = json.load(fp) self.sentence_length = self.config['sentence_length'] self.model = MultiTaskModel(self.config,self.sentence_length,{},{}) self.model.load_model(load_path.joinpath("nn")) self.input_names = [] self.target_name_to_def = {} self.input_name_to_def = {} self.name_to_index_to_name = {} for i in self.config['inputs']: input_name = i['name'] self.input_names.append(input_name) self.input_name_to_def[input_name] = i for t in self.config['tasks']: target_name = t['target'] self.target_name_to_def[target_name] = t index_to_name = {} for x,y in self.name_to_name_to_indices[target_name].items(): index_to_name[y] = x self.name_to_index_to_name[target_name] = index_to_name def query(self,query_input): num_examples, sentences, inputs, targets = parse.parse_json_file_with_index(query_input,self.name_to_name_to_indices,self.input_names,[],self.sentence_length) for input_name in self.input_names: if not input_name in inputs: self.logger.warning("problem: model input \"{}\" not found in dataset file, feeding zero values".format(input_name)) input_def = self.input_name_to_def[input_name] input_type = input_def['type'] array_shape = [] if input_type == "vector_sequence": array_shape = [num_examples,self.sentence_length,input_def['vector_length']] elif input_type == "class_sequence": array_shape = [num_examples,self.sentence_length] elif input_type == "graph_structure": array_shape = [num_examples,self.sentence_length,self.sentence_length] inputs[input_name] = (input_type,np.zeros(array_shape)) data = {} for x,y in inputs.items(): data[x] = y[1] results = self.model.query(data) return results
args = parser.parse_args() # if(len(sys.argv)==2): # trainableVariablesFile = sys.argv[1] # trainableVariables = pickle.load(open(trainableVariablesFile,'rb')) # model = MultiTaskModel(num_inputs=num_inputs,image_shape=image_shape,num_labels=num_labels,trainableVariables=trainableVariables) # else: # model = MultiTaskModel(num_inputs=num_inputs,image_shape=image_shape,num_labels=num_labels) if args.small: train_dir = 'data_small/' label_im_dir = 'label_small/images' label_dir = 'label_small/labels' model = MultiTaskModel(num_inputs=num_inputs, image_shape=image_shape, num_labels=num_labels, attention=args.attention, pix2pix=args.pix2pix, two_stage=args.twostage) d1 = np.load("{}/0v.npy".format(train_dir), allow_pickle=True) print(d1[:, :1].shape) print("Building Model...") model.build(d1[:, :1]) # fig, ax = plt.subplots(3, 3) # # print(res[0][0]) # # try: # ax[0,0].imshow(res[0][0]) # ax[1,0].imshow(res[1][0]) # ax[2,0].imshow(res[2][0]) # ax[0,1].imshow(res[3][0])