Ejemplo n.º 1
0
 # Use brown clusters
 with gzip.open("features/bc3200.pickle.gz") as fin:
     print('Load Brown clusters for creating features ...')
     brown_clusters = pickle.load(fin)
 # print(brown_clusters)
 data_helper = DataHelper(max_action_feat_num=330000,
                          max_relation_feat_num=300000,
                          min_action_feat_occur=1,
                          min_relation_feat_occur=1,
                          brown_clusters=brown_clusters)
 if args.preprocess:
     preprocess.preprocess_data(args)
 if args.prepare:
     # Create training data
     data_helper.create_data_helper(data_dir=args.data_dir,
                                    output_dir=args.output_dir,
                                    parse_type=args.parse_type,
                                    isFlat=args.isFlat)
     data_helper.save_data_helper(
         os.path.join(args.output_dir, args.parse_type, "TRAINING",
                      "data_helper.bin"))
 if args.train:
     data_helper.load_data_helper(
         os.path.join(args.output_dir, args.parse_type, "TRAINING",
                      "data_helper.bin"))
     data_helper.load_train_data(data_dir=args.data_dir,
                                 output_dir=args.output_dir,
                                 parse_type=args.parse_type,
                                 isFlat=args.isFlat)
     train_model(data_helper)
 if args.eval:
     # Evaluate models on the RST-DT test set
Ejemplo n.º 2
0
    parser.add_argument('--eval_dir', help='eval data directory')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    # Use brown clusters
    with gzip.open("../data/resources/bc3200.pickle.gz") as fin:
        print('Load Brown clusters for creating features ...')
        brown_clusters = pickle.load(fin)
    data_helper = DataHelper(max_action_feat_num=330000,
                             max_relation_feat_num=300000,
                             min_action_feat_occur=1,
                             min_relation_feat_occur=1,
                             brown_clusters=brown_clusters)
    if args.prepare:
        # Create training data
        data_helper.create_data_helper(data_dir=args.train_dir)
        data_helper.save_data_helper('../data/data_helper.bin')
    if args.train:
        data_helper.load_data_helper('../data/data_helper.bin')
        data_helper.load_train_data(data_dir=args.train_dir)
        train_model(data_helper)
    if args.eval:
        # Evaluate models on the RST-DT test set
        evaluator = Evaluator(model_dir='../data/model')
        evaluator.eval_parser(path=args.eval_dir,
                              report=True,
                              bcvocab=brown_clusters,
                              draw=False)
Ejemplo n.º 3
0
    print("Config:", config)

    data_helper = DataHelper()
    train_dirname = (args.train_dir[:-1] if args.train_dir[-1] == os.sep else
                     args.train_dir).split(os.sep)[-1]
    HELPER_PATH = f"..{os.sep}data{os.sep}{train_dirname}_data_helper_rst.bin"
    print("Helper path:", HELPER_PATH)

    if args.prepare:
        # Create training data
        #coref_model = CorefScore(higher_order=True).to(config[DEVICE])
        coref_model = CorefScore().to(config[DEVICE])

        coref_trainer = Trainer(coref_model, [], [], [], debug=False)

        data_helper.create_data_helper(args.train_dir, config, coref_trainer)
        data_helper.save_data_helper(HELPER_PATH)

    if args.train:
        train_model_coref(data_helper, config)

    if args.eval:
        # Evaluate models on the RST-DT test set
        data_helper.load_data_helper(HELPER_PATH)

        parser = get_discourse_parser(data_helper, config)
        parser.load('../data/model/' + config[MODEL_NAME])
        print("Evaluating")
        with torch.no_grad():
            evaluator = Evaluator(parser, data_helper, config)
            evaluator.eval_parser(None,
Ejemplo n.º 4
0
        FINETUNE_MEGADT: args.finetune_megadt,
        UNTRAINED_ROBERTA: args.untrained_roberta,
        NUM_EPOCHS: 20
    }

    data_helper = DataHelper()

    config[DATASET_TYPE] = args.dataset_type
    assert config[DATASET_TYPE] in [RST, INSTR, MEGA_DT]
    assert not (config[ROBERTA_ONLY] and config[ORG_FEATS_ONLY])
    helper_path = os.path.join('../data/',
                               "data_helper_" + config[DATASET_TYPE] + ".bin")
    model_name = args.model_name

    if args.prepare:
        data_helper.create_data_helper(args.train_dir, config)
        data_helper.save_data_helper(helper_path)
    if args.train:
        if not os.path.isdir('../data/model'):
            os.mkdir('../data/model')
        if args.model_name == None:
            raise Exception("Please provide model name")
        train_model(data_helper, helper_path, config)
    if args.eval:
        data_helper.load_data_helper(helper_path)
        clf = NeuralClassifier(data_helper, config)
        parser = NeuralRstParser(clf, config, save_dir=None)
        parser.load(
            os.path.join('../data/model/',
                         args.model_name + "_" + str(config[EPOCH_START])))
        clf.eval()