Esempio n. 1
0
    def train(self,
              train_data,
              valid_data,
              test_data,
              configs,
              save_model=True,
              path='saved_models/default',
              verbose=False
              ):

        self.controller.train()

        optimizer = optim.Adam(self.controller.parameters(), lr=configs.agent.learning_rate)

        for epoch in range(configs.agent.num_epochs):
            actions, log_probs = self.controller.sample()
            layer_IDs = actions[0::2]
            share = actions[1::2]
            layers = [self.search_space[s][l] for l, s in zip(layer_IDs, share)]

            if layers in self.sampled_architecture:
                idx = self.sampled_architecture.index(layers)
                accuracy = self.architecture_acc_val[idx]
                test_acc = self.architecture_acc_test[idx]

            else:
                model = MultiTaskModel(layers, self.architecture, self.task_info)
                accuracy = model.train(train_data=train_data,
                                       valid_data=valid_data,
                                       num_epochs=configs.model.num_epochs,
                                       learning_rate=configs.model.learning_rate,
                                       save_history=False,
                                       verbose=False
                                       )
                test_acc = model.eval(test_data)

                self.sampled_architecture.append(layers)
                self.architecture_acc_val.append(accuracy)
                self.architecture_acc_test.append(test_acc)

            self.history.append(test_acc)

            if self.baseline is None:
                self.baseline = accuracy
            else:
                self.baseline = configs.agent.baseline_decay * self.baseline + (1 - configs.agent.baseline_decay) * accuracy

            advantage = accuracy - self.baseline
            loss = -log_probs * advantage
            loss = loss.sum()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if verbose:
                print('[Epoch {}] Accuracy: {}'.format(epoch + 1, self.history[-1]))

            if save_model:
                self.save(path)
Esempio n. 2
0
    def eval(self, train_data, test_data, configs):
        with torch.no_grad():
            self.controller.eval()

            actions, _ = self.controller.sample(sample_best=True)
            layer_IDs = actions[0::2]
            share = actions[1::2]
            layers = [self.search_space[s][l] for l, s in zip(layer_IDs, share)]

        model = MultiTaskModel(layers, self.architecture, self.task_info)
        accuracy = model.train(train_data=train_data,
                               valid_data=test_data,
                               num_epochs=configs.model.num_epochs,
                               learning_rate=configs.model.learning_rate,
                               save_history=False,
                               verbose=False
                               )

        return accuracy, layers
Esempio n. 3
0
    def eval(self, train_data, test_data, configs):
        with torch.no_grad():
            self.controller.eval()
            layer_IDs, shares, _ = self.controller.sample(sample_best=True)
            layers = [search_space[i] for i in layer_IDs]
            shares = [[s.item() for s in share] for share in shares]

            layers = [ShareLayer(layer=layer, share=share) for layer, share in zip(layers, shares)]

        model = MultiTaskModel(layers, self.architecture, self.task_info)
        accuracy = model.train(train_data=train_data,
                               valid_data=test_data,
                               num_epochs=configs.model.num_epochs,
                               learning_rate=configs.model.learning_rate,
                               save_history=False,
                               verbose=False
                               )

        return accuracy, layers
Esempio n. 4
0
    def __init__(self,model_file,identifier):
        self.config = None
        self.model = None
        self.sentence_length = None
        self.name_to_name_to_indices = None
        self.logger = logging.getLogger('root.Model-{}'.format(identifier))
    
        load_path = Path(model_file)
        if (not load_path.exists()) or (not load_path.is_dir()):
            self.logger.error("model directory {} doesn't exist".format(model_file))

        config_filename = load_path.joinpath("model_config.json")
        with config_filename.open('r',encoding='utf8') as fp:
            self.config = json.load(fp)

        index_filename = load_path.joinpath("name_to_index.json")
        with index_filename.open('r',encoding='utf8') as fp:
            self.name_to_name_to_indices = json.load(fp)

        self.sentence_length = self.config['sentence_length']

        self.model = MultiTaskModel(self.config,self.sentence_length,{},{})
        self.model.load_model(load_path.joinpath("nn"))

        self.input_names = []
        self.target_name_to_def = {}
        self.input_name_to_def = {}
        self.name_to_index_to_name = {}
        for i in self.config['inputs']:
            input_name = i['name']
            self.input_names.append(input_name)
            self.input_name_to_def[input_name] = i
        for t in self.config['tasks']:
            target_name = t['target']
            self.target_name_to_def[target_name] = t
            index_to_name = {}
            for x,y in self.name_to_name_to_indices[target_name].items():
                index_to_name[y] = x
            self.name_to_index_to_name[target_name] = index_to_name
Esempio n. 5
0
def main(args):
    config = None
    model = None
    sentence_length = None
    num_examples = None
    sentences = None
    inputs = None
    targets = None
    input_names = None
    target_names = None
    name_to_name_to_indices = None

    #load a saved model
    if args.load_model:
        load_path = Path(args.config_or_model)
        if (not load_path.exists()) or (not load_path.is_dir()):
            print("Error: directory doesn't exist")

        config_filename = load_path.joinpath("model_config.json")
        with config_filename.open('r', encoding='utf8') as fp:
            config = json.load(fp)

        index_filename = load_path.joinpath("name_to_index.json")
        with index_filename.open('r', encoding='utf8') as fp:
            name_to_name_to_indices = json.load(fp)

        sentence_length = config['sentence_length']
        model = MultiTaskModel(config, config['sentence_length'], {}, {})
        model.load_model(load_path.joinpath("nn"))
        input_names = []
        target_names = []
        input_name_to_def = {}
        for i in config['inputs']:
            input_names.append(i['name'])
            input_name_to_def[i['name']] = i
        for t in config['tasks']:
            target_names.append(t['target'])

        num_examples, sentences, inputs, targets = parse.parse_json_file_with_index(
            args.data_file, name_to_name_to_indices, input_names, target_names,
            sentence_length)

        for input_name in input_names:
            if not input_name in inputs:
                print(
                    "problem: model input \"{}\" not found in dataset file, feeding zero values"
                    .format(input_name))
                input_def = input_name_to_def[input_name]
                input_type = input_def['type']
                array_shape = []
                if input_type == "vector_sequence":
                    array_shape = [
                        num_examples, sentence_length,
                        input_def['vector_length']
                    ]
                elif input_type == "class_sequence":
                    array_shape = [num_examples, sentence_length]
                elif input_type == "graph_structure":
                    array_shape = [
                        num_examples, sentence_length, sentence_length
                    ]

                inputs[input_name] = (input_type, np.zeros(array_shape))

        for target_name in target_names:
            if not target_name in targets:
                print("problem: model target \"{}\" not found in dataset file".
                      format(target_name))
                #a saved model should not be retrained without necessary target data
                #because if zero values are fed to the model instead of the real target values
                #the weights of the model will change to produce zero target value
                #this will destroy the previous training progress for this target
                print("Shutting down")
                return

        print("Model loaded from: {}".format(args.config_or_model))
    #create a new model
    else:
        with open(args.config_or_model, 'r', encoding='utf8') as fp:
            config = json.load(fp)

        input_names = []
        target_names = []
        for i in config['inputs']:
            input_names.append(i['name'])
        for t in config['tasks']:
            target_names.append(t['target'])

        sentence_length, num_examples, sentences, inputs, targets, name_to_name_to_indices = parse.parse_json_file(
            args.data_file, input_names, target_names)
        model = MultiTaskModel(config, sentence_length, inputs, targets)

    tmp_bs = args.batch_size
    if args.batch_size <= 0 or args.batch_size > num_examples:
        print(
            "Error: batch size negative or greater than number of examples in dataset"
        )
        tmp_bs = 32

    #split dataset into training and testing part
    if not args.cross_validate and (args.test_data_file is None):
        #split dataset into train and test parts
        permutation = np.random.permutation(num_examples)
        split_index = int(num_examples * (1.0 - args.test_data_fraction))

        train_data = {}
        test_data = {}
        for x, y in inputs.items():
            if args.train_all_examples:
                train_data[x] = y[1]
            else:
                train_data[x] = y[1][permutation[0:split_index]]

            test_data[x] = y[1][permutation[split_index:]]

        for x, y in targets.items():
            if args.train_all_examples:
                train_data[x] = y[1]
            else:
                train_data[x] = y[1][permutation[0:split_index]]

            test_data[x] = y[1][permutation[split_index:]]

        test_sentences = []
        for i in range(split_index, num_examples):
            test_sentences.append(sentences[permutation[i]])

        model.train(train_data,
                    args.steps,
                    tmp_bs,
                    quiet=args.quiet,
                    print_frequency=args.print_frequency)
        test_data_length = test_data[list(test_data.keys())[0]].shape[0]
        results = model.test_in_batches(test_data, tmp_bs, quiet=args.quiet)
        results = combine_accuracy_results(results, target_names, tmp_bs,
                                           test_data_length)
        model.print_test_status(results)
    #use separate dataset file for testing
    elif not args.test_data_file is None:
        test_num_examples, test_sentences, test_inputs, test_targets = parse.parse_json_file_with_index(
            args.test_data_file, name_to_name_to_indices, input_names,
            target_names, sentence_length)

        train_data = {}
        test_data = {}
        for x, y in inputs.items():
            train_data[x] = y[1]

        for x, y in targets.items():
            train_data[x] = y[1]

        for x, y in test_inputs.items():
            test_data[x] = y[1]

        for x, y in test_targets.items():
            test_data[x] = y[1]

        model.train(train_data,
                    args.steps,
                    tmp_bs,
                    quiet=args.quiet,
                    print_frequency=args.print_frequency)
        test_data_length = test_data[list(test_data.keys())[0]].shape[0]
        results = model.test_in_batches(test_data, tmp_bs, quiet=args.quiet)
        results = combine_accuracy_results(results, target_names, tmp_bs,
                                           test_data_length)
        model.print_test_status(results)
    #run cross validation
    else:
        data = {}
        for x, y in inputs.items():
            data[x] = y[1]

        for x, y in targets.items():
            data[x] = y[1]

        accuracies = cross_validate(model,
                                    args.num_repetitions,
                                    args.num_parts,
                                    num_examples,
                                    data,
                                    args.steps,
                                    tmp_bs,
                                    target_names,
                                    quiet=args.quiet)

        for target_name in target_names:
            print_cv_summary(target_name, accuracies[target_name])

    #save model
    if not args.save_model is None:
        path = save_model(args.save_model, model, name_to_name_to_indices)
        if path is None:
            print("Error: model could not be saved")
        else:
            print("Model saved in directory {}".format(path))
Esempio n. 6
0
def train(args):
    output_dir = os.path.join(args.output_dir, args.save_model_name)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    logfilename = time.strftime(
        "%Y-%m-%d %H:%M:%S",
        time.localtime()) + " " + args.save_model_name + ".log.txt"
    fh = logging.FileHandler(os.path.join(output_dir, logfilename),
                             mode='a',
                             encoding='utf-8')
    fh.setLevel(logging.INFO)
    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(logging.INFO)
    logger.addHandler(fh)
    logger.addHandler(ch)

    # load data
    logger.info("****** Loading Datasets ******")
    tokenizer = select_tokenizer(args.model)
    dataset_names = args.datasets.split(" ")
    with open(args.datasets_config) as f:
        datasets_config = json.load(f)
    train_datasets = []
    dev_datasets = []
    task_list = []
    for task_id, dataset in enumerate(dataset_names):  # 按照顺序分配task_id
        config = datasets_config[dataset]
        if args.do_preprocess:
            single_train_dataset = SingleTaskDataset(
                tokenizer=tokenizer,
                path=config["train_dir"],
                is_training=True,
                task_id=config['task_id'],
                is_pair=config["is_pair"],
                task_type=TaskType[config["task_type"]],
                batch_size=config["train_batch_size"],
                max_seq_length=config["max_seq_length"])

            single_dev_dataset = SingleTaskDataset(
                tokenizer=tokenizer,
                path=config["dev_dir"],
                is_training=True,
                task_id=config['task_id'],
                is_pair=config["is_pair"],
                task_type=TaskType[config["task_type"]],
                batch_size=config["dev_batch_size"],
                max_seq_length=config["max_seq_length"])
            # single_train_dataset.load_data()
            # single_dev_dataset.load_data
            torch.save(
                single_train_dataset,
                os.path.join(args.dataset_features_dir, dataset + ".train"))
            torch.save(
                single_dev_dataset,
                os.path.join(args.dataset_features_dir, dataset + ".dev"))
        else:
            single_train_dataset = torch.load(
                os.path.join(args.dataset_features_dir, dataset + ".train"))
            single_dev_dataset = torch.load(
                os.path.join(args.dataset_features_dir, dataset + ".dev"))
    # task_list.append(TaskType[config["task_type"]])
        task_list.append((config["task_id"], TaskType[config["task_type"]]))
        train_datasets.append(single_train_dataset)
        dev_datasets.append(single_dev_dataset)
    train_collater = Collater(dropout_w=args.collater_dropout)
    multi_task_datasets = MultiTaskDataset(train_datasets)
    multi_task_batch_sampler = MultiTaskBatchSampler(train_datasets,
                                                     mix_opt=0,
                                                     extra_task_ratio=0)
    train_dataloader = DataLoader(multi_task_datasets,
                                  batch_sampler=multi_task_batch_sampler,
                                  collate_fn=train_collater.collate_fn,
                                  pin_memory=torch.cuda.is_available())

    dev_dataloaders = []
    for dataset in dev_datasets:
        all_input_ids = torch.tensor(
            [item['sample'].select_field("input_ids") for item in dataset],
            dtype=torch.long)
        all_token_type_ids = torch.ByteTensor(
            [item['sample'].select_field("segment_ids") for item in dataset])
        all_attention_mask = torch.ByteTensor(
            [item['sample'].select_field("input_mask") for item in dataset])
        all_labels = torch.tensor([item['sample'].label for item in dataset],
                                  dtype=torch.long)
        dev_dataset = TensorDataset(all_input_ids, all_token_type_ids,
                                    all_attention_mask, all_labels)
        sampler = SequentialSampler(dev_dataset)
        dev_dataloader = DataLoader(dev_dataset,
                                    sampler=sampler,
                                    batch_size=dataset.get_batch_size())
        dev_dataloaders.append(
            (dataset.get_task_id(), dataset.get_task_type(), dev_dataloader))

    # prepare model
    status = {}
    tasklist = list(set(task_list))
    if args.do_finetune:
        model_dir = os.path.join(args.output_dir, args.save_model_name)
        status = json.load(open(os.path.join(model_dir, 'status.json')))
        epoch = status["current_epoch"]
        current_model = os.path.join(model_dir,
                                     'checkpoint-{}.model'.format(epoch))
        # model = XLNetForMultipleChoice.from_pretrained(current_model)
        model = MultiTaskModel(args.model,
                               cache_dir=args.cache_dir,
                               task_list=task_list)
        model.load_state_dict(torch.load(current_model))
        model.cuda()
        # dev_result,dev_accuracy= evaluate(model,dev_dataloaders)
        # for res in dev_result:
        #     logger.info("%s dev loss: %s, dev acc: %s",TaskName[res[0]],str(res[1]),str(res[2]))
        # logger.info("total dev acc: %s",str(dev_accuracy))
    # elif args.do_finetune_best:
    #     return
    else:
        # first time
        model = MultiTaskModel(args.model,
                               cache_dir=args.cache_dir,
                               task_list=task_list)
        status["best_epoch"] = 0
        status["current_epoch"] = 0
        status['best_dev_accuracy'] = 0
        model.cuda()

    num_train_optimization_steps = len(
        train_dataloader) * args.num_train_epochs
    optimizer = AdamW(model.parameters(),
                      eps=args.eps,
                      lr=args.learning_rate,
                      correct_bias=False)
    if args.do_finetune:
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=0,
            num_training_steps=num_train_optimization_steps)
    else:
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=args.num_warmup_steps,
            num_training_steps=num_train_optimization_steps)
    # scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=2, num_training_steps=num_train_optimization_steps)

    # train
    logger.info("***** Running training *****")

    best_dev_accuracy = 0
    best_dev_epoch = 0
    no_up = 0
    global_step = 0

    model.cuda()
    epoch_tqdm = trange(status["current_epoch"],
                        int(args.num_train_epochs),
                        desc="Epoch")
    for epoch in epoch_tqdm:
        model.train()
        train_loss = 0.0
        avg_loss = 0.0

        for step, (batch_meta, batch_data) in enumerate(
                tqdm(train_dataloader, desc="Iteration")):

            #
            batch_meta, batch_data = Collater.patch_data(
                args.cuda, batch_meta, batch_data)
            # pdb.set_trace()
            batch_data[1] = batch_data[1].byte()
            batch_data[2] = batch_data[2].byte()

            batch_data = [item.cuda() for item in batch_data]
            # print([item.shape for item in batch_data])
            loss, logits = model(batch_meta, batch_data)
            # pdb.set_trace()
            loss.backward()

            train_loss += loss.item()

            global_step += 1
            if step % 500 == 0 or step == len(
                    train_dataloader
            ) - 1:  # print step average loss every 500 step
                avg_loss = train_loss / (step + 1)
                logger.info(
                    "\t average_step_loss=%s @ step = %s on epoch = %s",
                    str(avg_loss), str(global_step), str(epoch + 1))

            optimizer.step()
            scheduler.step()  # Update scheduler
            model.zero_grad()

        # evalute
        dev_result, dev_accuracy = evaluate(model, dev_dataloaders)
        for res in dev_result:
            logger.info("%s dev loss: %s, dev acc: %s", TaskName[res[0]],
                        str(res[1]), str(res[2]))
        logger.info("total dev acc: %s", str(dev_accuracy))

        if dev_accuracy > best_dev_accuracy:
            # New best model.
            status['best_dev_accuracy'] = dev_accuracy
            best_dev_accuracy = dev_accuracy
            best_dev_epoch = epoch + 1
            status["best_epoch"] = epoch + 1
            no_up = 0
        else:
            no_up += 1

        torch.save(
            model.state_dict(),
            os.path.join(output_dir, 'checkpoint-{}.model'.format(epoch + 1)))
        logger.info("\t epoch %s saved to %s", str(epoch + 1), output_dir)

        status["current_epoch"] = epoch + 1
        with open(os.path.join(output_dir, 'status.json'), "w") as fs:
            json.dump(status, fs)
        if no_up >= args.patience:
            epoch_tqdm.close()
            break
Esempio n. 7
0
class QueryModel:

    def __init__(self,model_file,identifier):
        self.config = None
        self.model = None
        self.sentence_length = None
        self.name_to_name_to_indices = None
        self.logger = logging.getLogger('root.Model-{}'.format(identifier))
    
        load_path = Path(model_file)
        if (not load_path.exists()) or (not load_path.is_dir()):
            self.logger.error("model directory {} doesn't exist".format(model_file))

        config_filename = load_path.joinpath("model_config.json")
        with config_filename.open('r',encoding='utf8') as fp:
            self.config = json.load(fp)

        index_filename = load_path.joinpath("name_to_index.json")
        with index_filename.open('r',encoding='utf8') as fp:
            self.name_to_name_to_indices = json.load(fp)

        self.sentence_length = self.config['sentence_length']

        self.model = MultiTaskModel(self.config,self.sentence_length,{},{})
        self.model.load_model(load_path.joinpath("nn"))

        self.input_names = []
        self.target_name_to_def = {}
        self.input_name_to_def = {}
        self.name_to_index_to_name = {}
        for i in self.config['inputs']:
            input_name = i['name']
            self.input_names.append(input_name)
            self.input_name_to_def[input_name] = i
        for t in self.config['tasks']:
            target_name = t['target']
            self.target_name_to_def[target_name] = t
            index_to_name = {}
            for x,y in self.name_to_name_to_indices[target_name].items():
                index_to_name[y] = x
            self.name_to_index_to_name[target_name] = index_to_name

    def query(self,query_input):
        num_examples, sentences, inputs, targets = parse.parse_json_file_with_index(query_input,self.name_to_name_to_indices,self.input_names,[],self.sentence_length)

        for input_name in self.input_names:
            if not input_name in inputs:
                self.logger.warning("problem: model input \"{}\" not found in dataset file, feeding zero values".format(input_name))
                input_def = self.input_name_to_def[input_name]
                input_type = input_def['type']
                array_shape = [] 
                if input_type == "vector_sequence":
                    array_shape = [num_examples,self.sentence_length,input_def['vector_length']]
                elif input_type == "class_sequence":
                    array_shape = [num_examples,self.sentence_length]
                elif input_type == "graph_structure":
                    array_shape = [num_examples,self.sentence_length,self.sentence_length]
                
                inputs[input_name] = (input_type,np.zeros(array_shape))                 

        data = {}
        for x,y in inputs.items():
            data[x] = y[1]

        results = self.model.query(data)
        return results
Esempio n. 8
0
    args = parser.parse_args()

    # if(len(sys.argv)==2):
    #   trainableVariablesFile = sys.argv[1]
    #   trainableVariables = pickle.load(open(trainableVariablesFile,'rb'))
    #   model = MultiTaskModel(num_inputs=num_inputs,image_shape=image_shape,num_labels=num_labels,trainableVariables=trainableVariables)
    # else:
    #   model = MultiTaskModel(num_inputs=num_inputs,image_shape=image_shape,num_labels=num_labels)

    if args.small:
        train_dir = 'data_small/'
        label_im_dir = 'label_small/images'
        label_dir = 'label_small/labels'
    model = MultiTaskModel(num_inputs=num_inputs,
                           image_shape=image_shape,
                           num_labels=num_labels,
                           attention=args.attention,
                           pix2pix=args.pix2pix,
                           two_stage=args.twostage)

    d1 = np.load("{}/0v.npy".format(train_dir), allow_pickle=True)
    print(d1[:, :1].shape)
    print("Building Model...")
    model.build(d1[:, :1])

    # fig, ax = plt.subplots(3, 3)
    # # print(res[0][0])
    # # try:
    # ax[0,0].imshow(res[0][0])
    # ax[1,0].imshow(res[1][0])
    # ax[2,0].imshow(res[2][0])
    # ax[0,1].imshow(res[3][0])