Example #1
0
def train(parameters):
    # 模型保存的目录 eg: './results/docred-dev/docred_basebert_full/'
    model_folder = setup_log(parameters, parameters['save_pred'] + '_train')
    set_seed(parameters['seed'])

    ###################################
    # Data Loading
    ###################################
    # if parameters['re_train']:
    #     print('\nLoading mappings ...')
    #     train_loader = load_mappings(parameters['remodelfile'])
    # else:
    print('加载训练数据 ...')
    train_loader = DataLoader(parameters['train_data'], parameters)
    train_loader(embeds=parameters['embeds'], parameters=parameters)
    train_data, _ = DocRelationDataset(train_loader, 'train', parameters,
                                       train_loader).__call__()

    print('\nLoading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters, train_loader)
    test_loader(parameters=parameters)
    test_data, prune_recall = DocRelationDataset(test_loader, 'test',
                                                 parameters,
                                                 train_loader).__call__()

    # print("prune_recall-->", str(prune_recall))
    ###################################
    # Training
    ###################################
    trainer = Trainer(train_loader, parameters, {
        'train': train_data,
        'test': test_data
    }, model_folder, prune_recall)

    trainer.run()
Example #2
0
def train(parameters):
    model_folder = setup_log(parameters, 'train')

    set_seed(parameters['seed'])

    ###################################
    # Data Loading
    ###################################
    print('Loading training data ...')
    train_loader = DataLoader(parameters['train_data'], parameters)
    train_loader(embeds=parameters['embeds'])
    train_data = DocRelationDataset(train_loader, 'train', parameters, train_loader).__call__()

    print('\nLoading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters)
    test_loader()
    test_data = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__()

    ###################################
    # Training
    ###################################
    trainer = Trainer(train_loader, parameters, {'train': train_data, 'test': test_data}, model_folder)
    trainer.run()

    if parameters['plot']:
        plot_learning_curve(trainer, model_folder)

    if parameters['save_model']:
        save_model(model_folder, trainer, train_loader)
def train(parameters):
    model_folder = setup_log(parameters, parameters['save_pred'] + '_train')
    set_seed(parameters['seed'])

    ###################################
    # Data Loading
    ###################################
    # if parameters['re_train']:
    #     print('\nLoading mappings ...')
    #     train_loader = load_mappings(parameters['remodelfile'])
    # else:
    flag=False
    processed_dataset=parameters['remodelfile']
    if flag and os.path.exists(os.path.join(processed_dataset, 'train_loader.pkl')):
        with open(os.path.join(processed_dataset, 'train_loader.pkl'), 'rb') as f:
            train_loader = pkl.load(f)
        with open(os.path.join(processed_dataset, 'train_data.pkl'), 'rb') as f:
            train_data = pkl.load(f)
        with open(os.path.join(processed_dataset, 'test_data.pkl'), 'rb') as f:
            test_data = pkl.load(f)
        with open(os.path.join(processed_dataset, 'prune_recall.pkl'), 'rb') as f:
            prune_recall = pkl.load(f)
    # print('Loading training data ...')
    else:
        train_loader = DataLoader(parameters['train_data'], parameters)
        train_loader(embeds=parameters['embeds'], parameters=parameters)
        train_data, _ = DocRelationDataset(train_loader, 'train', parameters, train_loader).__call__()
        # operate_data(train_data, "train_data.json")
        print('\nLoading testing data ...')
        test_loader = DataLoader(parameters['test_data'], parameters, train_loader)
        test_loader(parameters=parameters)
        test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__()
        with open(os.path.join(processed_dataset, 'train_loader.pkl'), 'wb') as f:
            pkl.dump(train_loader, f, pkl.HIGHEST_PROTOCOL)
        with open(os.path.join(processed_dataset, 'train_data.pkl'), 'wb') as f:
            pkl.dump(train_data, f, pkl.HIGHEST_PROTOCOL)
        with open(os.path.join(processed_dataset, 'test_data.pkl'), 'wb') as f:
            pkl.dump(test_data, f, pkl.HIGHEST_PROTOCOL)
        with open(os.path.join(processed_dataset, 'prune_recall.pkl'), 'wb') as f:
            pkl.dump(prune_recall, f, pkl.HIGHEST_PROTOCOL)

    #

    ###################################
    # Training
    ###################################
    trainer = Trainer(train_loader, parameters, {'train': train_data, 'test': test_data}, model_folder, prune_recall)

    trainer.run()
    write_metrics(trainer,model_folder)

    if parameters['plot']:
        plot_learning_curve(trainer, model_folder)
        plot_P_R(trainer, model_folder)
def _test(parameters):
    model_folder = setup_log(parameters, parameters['save_pred'] + '_test')

    print('\nLoading mappings ...')
    train_loader = load_mappings(parameters['remodelfile'])
    flag=True
    print('\nLoading testing data ...')
    processed_dataset=parameters['remodelfile']
    if flag and os.path.exists(os.path.join(processed_dataset, 'test_test_data.pkl')):
        with open(os.path.join(processed_dataset, 'test_test_data.pkl'), 'rb') as f:
            test_data = pkl.load(f)
        with open(os.path.join(processed_dataset, 'test_prune_recall.pkl'), 'rb') as f:
            prune_recall = pkl.load(f)
    else:
        test_loader = DataLoader(parameters['test_data'], parameters, train_loader)
        test_loader(parameters=parameters)
        test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__()
        with open(os.path.join(processed_dataset, 'test_test_data.pkl'), 'wb') as f:
            pkl.dump(test_data, f, pkl.HIGHEST_PROTOCOL)
        with open(os.path.join(processed_dataset, 'test_prune_recall.pkl'), 'wb') as f:
            pkl.dump(prune_recall, f, pkl.HIGHEST_PROTOCOL)
    m = Trainer(train_loader, parameters, {'train': [], 'test': test_data}, model_folder, prune_recall)
    trainer = load_model(parameters['remodelfile'], m)
    _, _,_,p,r=trainer.eval_epoch(final=True, save_predictions=True)
    print('Saving test metrics ... ', end="")
    np.savetxt(parameters['remodelfile']+"/p.txt", p)
    np.savetxt(parameters['remodelfile']+"/r.txt", r)

        # b = numpy.loadtxt("filename.txt", delimiter=',')
    print('DONE')
Example #5
0
def train(parameters):
    model_folder = setup_log(parameters, 'train')

    set_seed(0)

    ###################################
    # Data Loading
    ###################################
    print('\nLoading training data ...')
    train_loader = DataLoader(parameters['train_data'], parameters)
    train_loader(embeds=parameters['embeds'])
    train_data = RelationDataset(train_loader, 'train',
                                 parameters['unk_w_prob'],
                                 train_loader).__call__()

    print('\nLoading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters)
    test_loader()
    test_data = RelationDataset(test_loader, 'test', parameters['unk_w_prob'],
                                train_loader).__call__()

    ###################################
    # TRAINING
    ###################################
    trainer = Trainer({
        'train': train_data,
        'test': test_data
    }, parameters, train_loader, model_folder)
    trainer.run()

    trainer.eval_epoch(final=True, save_predictions=True)
    if parameters['plot']:
        plot_learning_curve(trainer, model_folder)
Example #6
0
def test(parameters):
    model_folder = setup_log(parameters, 'test')

    print('\nLoading mappings ...')
    train_loader = load_mappings(model_folder)
    
    print('\nLoading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters)
    test_loader()    
    test_data = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() 

    m = Trainer(train_loader, parameters, {'train': [], 'test': test_data}, model_folder)
    trainer = load_model(model_folder, m)
    trainer.eval_epoch(final=True, save_predictions=True)
Example #7
0
def _test(parameters):
    model_folder = setup_log(parameters, parameters['save_pred'] + '_test')

    print('\nLoading mappings ...')
    train_loader = load_mappings(parameters['remodelfile'])

    print('\nLoading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters, train_loader)
    test_loader(parameters=parameters)
    test_data, prune_recall = DocRelationDataset(test_loader, 'test',
                                                 parameters,
                                                 train_loader).__call__()

    m = Trainer(train_loader, parameters, {
        'train': [],
        'test': test_data
    }, model_folder, prune_recall)
    trainer = load_model(parameters['remodelfile'], m)
    trainer.eval_epoch(final=True, save_predictions=True)
Example #8
0
    def train(self):
        ###################################
        # TRAINING
        ###################################
        trainer = Trainer({'train': self.train_data, 'test': self.test_data},
                          self.parameters, self.train_loader, self.model_folder)
        trainer.run()

        trainer.eval_epoch(final=True, save_predictions=True)
        save_model(model_folder, trainer.model, train_loader)
        if parameters['plot']:
            plot_learning_curve(trainer, model_folder)

        return float(trainer.best_score)
Example #9
0
def test(parameters):
    print('*** Testing Model ***')
    model_folder = setup_log(parameters, 'test')

    print('Loading mappings ...')
    with open(os.path.join(model_folder, 'mappings.pkl'), 'rb') as f:
        loader = pkl.load(f)

    print('Loading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters)
    test_loader.__call__()
    test_data = RelationDataset(test_loader, 'test', parameters['unk_w_prob'],
                                loader).__call__()

    m = Trainer({
        'train': [],
        'test': test_data
    }, parameters, loader, model_folder)
    trainer = load_model(model_folder, m)
    trainer.eval_epoch(final=True, save_predictions=True)