# Use brown clusters with gzip.open("features/bc3200.pickle.gz") as fin: print('Load Brown clusters for creating features ...') brown_clusters = pickle.load(fin) # print(brown_clusters) data_helper = DataHelper(max_action_feat_num=330000, max_relation_feat_num=300000, min_action_feat_occur=1, min_relation_feat_occur=1, brown_clusters=brown_clusters) if args.preprocess: preprocess.preprocess_data(args) if args.prepare: # Create training data data_helper.create_data_helper(data_dir=args.data_dir, output_dir=args.output_dir, parse_type=args.parse_type, isFlat=args.isFlat) data_helper.save_data_helper( os.path.join(args.output_dir, args.parse_type, "TRAINING", "data_helper.bin")) if args.train: data_helper.load_data_helper( os.path.join(args.output_dir, args.parse_type, "TRAINING", "data_helper.bin")) data_helper.load_train_data(data_dir=args.data_dir, output_dir=args.output_dir, parse_type=args.parse_type, isFlat=args.isFlat) train_model(data_helper) if args.eval: # Evaluate models on the RST-DT test set
parser.add_argument('--eval_dir', help='eval data directory') return parser.parse_args() if __name__ == '__main__': args = parse_args() # Use brown clusters with gzip.open("../data/resources/bc3200.pickle.gz") as fin: print('Load Brown clusters for creating features ...') brown_clusters = pickle.load(fin) data_helper = DataHelper(max_action_feat_num=330000, max_relation_feat_num=300000, min_action_feat_occur=1, min_relation_feat_occur=1, brown_clusters=brown_clusters) if args.prepare: # Create training data data_helper.create_data_helper(data_dir=args.train_dir) data_helper.save_data_helper('../data/data_helper.bin') if args.train: data_helper.load_data_helper('../data/data_helper.bin') data_helper.load_train_data(data_dir=args.train_dir) train_model(data_helper) if args.eval: # Evaluate models on the RST-DT test set evaluator = Evaluator(model_dir='../data/model') evaluator.eval_parser(path=args.eval_dir, report=True, bcvocab=brown_clusters, draw=False)
print("Config:", config) data_helper = DataHelper() train_dirname = (args.train_dir[:-1] if args.train_dir[-1] == os.sep else args.train_dir).split(os.sep)[-1] HELPER_PATH = f"..{os.sep}data{os.sep}{train_dirname}_data_helper_rst.bin" print("Helper path:", HELPER_PATH) if args.prepare: # Create training data #coref_model = CorefScore(higher_order=True).to(config[DEVICE]) coref_model = CorefScore().to(config[DEVICE]) coref_trainer = Trainer(coref_model, [], [], [], debug=False) data_helper.create_data_helper(args.train_dir, config, coref_trainer) data_helper.save_data_helper(HELPER_PATH) if args.train: train_model_coref(data_helper, config) if args.eval: # Evaluate models on the RST-DT test set data_helper.load_data_helper(HELPER_PATH) parser = get_discourse_parser(data_helper, config) parser.load('../data/model/' + config[MODEL_NAME]) print("Evaluating") with torch.no_grad(): evaluator = Evaluator(parser, data_helper, config) evaluator.eval_parser(None,
FINETUNE_MEGADT: args.finetune_megadt, UNTRAINED_ROBERTA: args.untrained_roberta, NUM_EPOCHS: 20 } data_helper = DataHelper() config[DATASET_TYPE] = args.dataset_type assert config[DATASET_TYPE] in [RST, INSTR, MEGA_DT] assert not (config[ROBERTA_ONLY] and config[ORG_FEATS_ONLY]) helper_path = os.path.join('../data/', "data_helper_" + config[DATASET_TYPE] + ".bin") model_name = args.model_name if args.prepare: data_helper.create_data_helper(args.train_dir, config) data_helper.save_data_helper(helper_path) if args.train: if not os.path.isdir('../data/model'): os.mkdir('../data/model') if args.model_name == None: raise Exception("Please provide model name") train_model(data_helper, helper_path, config) if args.eval: data_helper.load_data_helper(helper_path) clf = NeuralClassifier(data_helper, config) parser = NeuralRstParser(clf, config, save_dir=None) parser.load( os.path.join('../data/model/', args.model_name + "_" + str(config[EPOCH_START]))) clf.eval()