def train_and_eval_model(working_path, model_path, eval_data, C):
    parameter_settings = {'C': C}
    logging.info('Training model with C = {}'.format(C))
    model_path = '{}.C{}'.format(model_path, C)

    logging.info('Evaluating model with C = {}'.format(C))
    train_rst_parsing_model(working_path, model_path, parameter_settings)
    rst_parser = Parser(1, 1, 1)
    rst_parser.load_model(model_path)
    results = predict_and_evaluate_rst_trees(None, None,
                                             rst_parser, eval_data,
                                             use_gold_syntax=True)
    return results
def train_and_eval_model(working_path, model_path, eval_data, C):
    parameter_settings = {'C': C}
    logging.info('Training model with C = {}'.format(C))
    model_path = '{}.C{}'.format(model_path, C)

    logging.info('Evaluating model with C = {}'.format(C))
    train_rst_parsing_model(working_path, model_path, parameter_settings)
    rst_parser = Parser(1, 1, 1)
    rst_parser.load_model(model_path)
    results = predict_and_evaluate_rst_trees(None,
                                             None,
                                             rst_parser,
                                             eval_data,
                                             use_gold_syntax=True)
    return results
def batch_process(docs, output_path, zpar_model_directory,
                  segmentation_model, parsing_model):
    '''
    docs is a list or tuple of (doc_id, text) tuples.
    '''
    syntax_parser = SyntaxParserWrapper(zpar_model_directory)
    segmenter = Segmenter(segmentation_model)

    parser = Parser(max_acts=1, max_states=1, n_best=1)
    parser.load_model(parsing_model)

    with open(output_path, 'w') as outfile:
        for doc_id, text in docs:
            logging.info('doc_id: {}'.format(doc_id))
            doc_dict = {"doc_id": doc_id, "raw_text": text}
            edu_tokens, complete_trees = \
                segment_and_parse(doc_dict, syntax_parser, segmenter, parser)
            print(json.dumps({"doc_id": doc_id, "edu_tokens": edu_tokens, \
                "scored_rst_trees": \
                [{"score": tree["score"],
                  "tree": tree["tree"].pprint(margin=TREE_PRINT_MARGIN)}
                 for tree in complete_trees]}), file=outfile)
Example #4
0
def batch_process(docs, output_path, zpar_model_directory,
                  segmentation_model, parsing_model):
    '''
    docs is a list or tuple of (doc_id, text) tuples.
    '''
    syntax_parser = SyntaxParserWrapper(zpar_model_directory)
    segmenter = Segmenter(segmentation_model)

    parser = Parser(max_acts=1, max_states=1, n_best=1)
    parser.load_model(parsing_model)

    with open(output_path, 'w') as outfile:
        for doc_id, text in docs:
            logging.info('doc_id: {}'.format(doc_id))
            doc_dict = {"doc_id": doc_id, "raw_text": text}
            edu_tokens, complete_trees = \
                segment_and_parse(doc_dict, syntax_parser, segmenter, parser)
            print(json.dumps({"doc_id": doc_id, "edu_tokens": edu_tokens, \
                "scored_rst_trees": \
                [{"score": tree["score"],
                  "tree": tree["tree"].pformat(margin=TREE_PRINT_MARGIN)}
                 for tree in complete_trees]}), file=outfile)
def test_reconstruct_training_examples():
    '''
    This code goes through the training data and makes sure
    that the actions extracted from the trees can be used to
    reconstruct those trees from a list of EDUs.
    '''

    train_path = 'rst_discourse_tb_edus_TRAINING_TRAIN.json'
    with open(train_path) as f:
        data = json.load(f)

    rst_parser = Parser(max_acts=1, max_states=1, n_best=1)
    for doc_dict in data:
        tree_orig = ParentedTree.fromstring(doc_dict['rst_tree'])
        actions = extract_parse_actions(tree_orig)

        tree2 = next(rst_parser.parse(doc_dict,
                                      gold_actions=actions,
                                      make_features=False))['tree']

        logging.info('test_reconstruct_training_examples verified tree for {}'.format(doc_dict['path_basename']))
        assert tree2 == tree_orig
def main():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('input_paths',
                        nargs='+',
                        help='A document to segment and parse.' +
                        ' Paragraphs should be separated by two or more' +
                        ' newline characters.')
    parser.add_argument('-g', '--segmentation_model',
                        help='Path to segmentation model.',
                        required=True)
    parser.add_argument('-p', '--parsing_model',
                        help='Path to RST parsing model.',
                        required=True)
    parser.add_argument('-a', '--max_acts',
                        help='Maximum number of actions for...?',
                        type=int, default=1)
    parser.add_argument('-n', '--n_best',
                        help='Number of parses to return', type=int, default=1)
    parser.add_argument('-s', '--max_states',
                        help='Maximum number of states to retain for \
                              best-first search',
                        type=int, default=1)
    parser.add_argument('-zp', '--zpar_port', type=int)
    parser.add_argument('-zh', '--zpar_hostname', default=None)
    parser.add_argument('-zm', '--zpar_model_directory', default=None)
    parser.add_argument('-v', '--verbose',
                        help='Print more status information. For every ' +
                        'additional time this flag is specified, ' +
                        'output gets more verbose.',
                        default=0, action='count')
    args = parser.parse_args()

    # Convert verbose flag to actually logging level.
    log_levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    log_level = log_levels[min(args.verbose, 2)]
    # Make warnings from built-in warnings module get formatted more nicely.
    logging.captureWarnings(True)
    logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' +
                                '%(message)s'), level=log_level)

    # Read the models.
    logging.info('Loading models')
    syntax_parser = \
        SyntaxParserWrapper(port=args.zpar_port, hostname=args.zpar_hostname,
                            zpar_model_directory=args.zpar_model_directory)
    segmenter = Segmenter(args.segmentation_model)

    parser = Parser(max_acts=args.max_acts,
                    max_states=args.max_states,
                    n_best=args.n_best)
    parser.load_model(args.parsing_model)

    for input_path in args.input_paths:
        logging.info('rst_parse input file: {}'.format(input_path))
        doc = read_text_file(input_path)

        logging.debug('rst_parse input. doc_id = {}, text = {}'
                      .format(input_path, doc))
        doc_dict = {"raw_text": doc, "doc_id": input_path}

        edu_tokens, complete_trees = segment_and_parse(doc_dict, syntax_parser,
                                                       segmenter, parser)

        print(json.dumps({"edu_tokens": edu_tokens, \
            "scored_rst_trees": [{"score": tree["score"],
                                  "tree": tree["tree"]
                                          .pprint(margin=TREE_PRINT_MARGIN)}
                                 for tree in complete_trees]}))
def main():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('input_paths',
                        nargs='+',
                        help='A document to segment and parse.' +
                        ' Paragraphs should be separated by two or more' +
                        ' newline characters.')
    parser.add_argument('-g',
                        '--segmentation_model',
                        help='Path to segmentation model.',
                        required=True)
    parser.add_argument('-p',
                        '--parsing_model',
                        help='Path to RST parsing model.',
                        required=True)
    parser.add_argument('-a',
                        '--max_acts',
                        help='Maximum number of actions for...?',
                        type=int,
                        default=1)
    parser.add_argument('-n',
                        '--n_best',
                        help='Number of parses to return',
                        type=int,
                        default=1)
    parser.add_argument('-s',
                        '--max_states',
                        help='Maximum number of states to retain for \
                              best-first search',
                        type=int,
                        default=1)
    parser.add_argument('-zp', '--zpar_port', type=int)
    parser.add_argument('-zh', '--zpar_hostname', default=None)
    parser.add_argument('-zm', '--zpar_model_directory', default=None)
    parser.add_argument('-v',
                        '--verbose',
                        help='Print more status information. For every ' +
                        'additional time this flag is specified, ' +
                        'output gets more verbose.',
                        default=0,
                        action='count')
    args = parser.parse_args()

    # Convert verbose flag to actually logging level.
    log_levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    log_level = log_levels[min(args.verbose, 2)]
    # Make warnings from built-in warnings module get formatted more nicely.
    logging.captureWarnings(True)
    logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' +
                                '%(message)s'),
                        level=log_level)

    # Read the models.
    logging.info('Loading models')
    syntax_parser = \
        SyntaxParserWrapper(port=args.zpar_port, hostname=args.zpar_hostname,
                            zpar_model_directory=args.zpar_model_directory)
    segmenter = Segmenter(args.segmentation_model)

    parser = Parser(max_acts=args.max_acts,
                    max_states=args.max_states,
                    n_best=args.n_best)
    parser.load_model(args.parsing_model)

    for input_path in args.input_paths:
        logging.info('rst_parse input file: {}'.format(input_path))
        doc = read_text_file(input_path)

        logging.debug('rst_parse input. doc_id = {}, text = {}'.format(
            input_path, doc))
        doc_dict = {"raw_text": doc, "doc_id": input_path}

        edu_tokens, complete_trees = segment_and_parse(doc_dict, syntax_parser,
                                                       segmenter, parser)

        complete_trees = [tree for tree in complete_trees
                          ]  # can't use a generator twice

        print(json.dumps({"edu_tokens": edu_tokens, \
            "scored_rst_trees": [{"score": tree["score"],
                                  "tree": tree["tree"]
                                          .pformat(margin=TREE_PRINT_MARGIN)}
                                 for tree in complete_trees]}))

        for i, tree in enumerate(complete_trees, 1):
            ptree_str = tree['tree'].__repr__() + '\n'
            with codecs.open(input_path + '_{}.parentedtree'.format(str(i)),
                             'w', 'utf-8') as ptree_file:
                ptree_file.write(ptree_str)
def main():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('train_file',
                        help='Path to JSON training file.',
                        type=argparse.FileType('r'))
    parser.add_argument('eval_file',
                        help='Path to JSON dev or test file for ' +
                        'tuning/evaluation.',
                        type=argparse.FileType('r'))
    parser.add_argument('model_path',
                        help='Prefix for the path to where the model should be'
                        ' stored.  A suffix with the C value will be added.')
    parser.add_argument('-w',
                        '--working_path',
                        help='Path to where intermediate files should be ' +
                        'stored',
                        default='working')
    parser.add_argument('-C',
                        '--C_values',
                        help='comma-separated list of model complexity ' +
                        'parameter settings to evaluate.',
                        default=','.join([str(2.0**x) for x in range(-4, 5)]))
    parser.add_argument('-v',
                        '--verbose',
                        help='Print more status information. For every ' +
                        'additional time this flag is specified, ' +
                        'output gets more verbose.',
                        default=0,
                        action='count')
    parser.add_argument('-s',
                        '--single_process',
                        action='store_true',
                        help='Run in a single process for all hyperparameter' +
                        ' grid points, to simplify debugging.')
    args = parser.parse_args()

    if os.path.exists(args.working_path):
        raise IOError("{} already exists.  Stopping here to avoid the "
                      "possibility of overwriting files that are currently "
                      "being used.".format(args.working_path))
    os.makedirs(args.working_path)

    parser = Parser(1, 1, 1)

    # Convert verbose flag to actually logging level
    log_levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    log_level = log_levels[min(args.verbose, 2)]
    # Make warnings from built-in warnings module get formatted more nicely
    logging.captureWarnings(True)
    logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' +
                                '%(message)s'),
                        level=log_level)
    logger = logging.getLogger(__name__)

    logger.info('Extracting examples')
    train_data = json.load(args.train_file)
    eval_data = json.load(args.eval_file)

    train_examples = []

    for doc_dict in train_data:
        path_basename = doc_dict['path_basename']
        logging.info('Extracting examples for {}'.format(path_basename))
        tree = ParentedTree.fromstring(doc_dict['rst_tree'])
        collapse_rst_labels(tree)
        actions = extract_parse_actions(tree)

        for i, (action_str, feats) in \
                enumerate(parser.parse(doc_dict, gold_actions=actions)):
            example_id = "{}_{}".format(path_basename, i)
            example = {"x": Counter(feats), "y": action_str, "id": example_id}
            train_examples.append(example)
            # print("{} {}".format(action_str, " ".join(feats)))

    # train and evaluate a model for each value of C
    best_labeled_f1 = -1.0
    best_C = None

    # train and evaluate models with different C values in parallel
    C_values = [float(x) for x in args.C_values.split(',')]
    partial_train_and_eval_model = partial(train_and_eval_model,
                                           args.working_path, args.model_path,
                                           eval_data)

    # Make the SKLL jsonlines feature file
    train_path = os.path.join(args.working_path, 'rst_parsing.jsonlines')
    with open(train_path, 'w') as train_file:
        for example in train_examples:
            train_file.write('{}\n'.format(json.dumps(example)))

    if args.single_process:
        all_results = [
            partial_train_and_eval_model(C_value) for C_value in C_values
        ]
    else:
        n_workers = len(C_values)
        with ProcessPoolExecutor(max_workers=n_workers) as executor:
            all_results = executor.map(partial_train_and_eval_model, C_values)

    for C_value, results in zip(C_values, all_results):
        results["C"] = C_value
        print(json.dumps(sorted(results.items())))
        if results["labeled_f1"] > best_labeled_f1:
            best_labeled_f1 = results["labeled_f1"]
            best_C = C_value

    print("best labeled F1 = {}, with C = {}".format(best_labeled_f1, best_C))
Example #9
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('evaluation_set',
                        help='The dev or test set JSON file',
                        type=argparse.FileType('r'))
    parser.add_argument('-g', '--segmentation_model',
                        help='Path to segmentation model.  If not specified,' +
                        'then gold EDUs will be used.',
                        default=None)
    parser.add_argument('-p', '--parsing_model',
                        help='Path to RST parsing model.',
                        required=True)
    parser.add_argument('-z', '--zpar_directory', default='zpar')
    parser.add_argument('-t', '--use_gold_syntax',
                        help='If specified, then gold PTB syntax trees will' +
                        'be used.', action='store_true')
    parser.add_argument('-a', '--max_acts',
                        help='Maximum number of actions to perform on each ' +
                        'state', type=int, default=1)
    parser.add_argument('-s', '--max_states',
                        help='Maximum number of states to retain for ' +
                        'best-first search', type=int, default=1)
    parser.add_argument('-v', '--verbose',
                        help='Print more status information. For every ' +
                        'additional time this flag is specified, ' +
                        'output gets more verbose.',
                        default=0, action='count')
    args = parser.parse_args()
    assert args.use_gold_syntax or args.segmentation_model

    # Convert verbose flag to actually logging level
    log_levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    log_level = log_levels[min(args.verbose, 2)]
    # Make warnings from built-in warnings module get formatted more nicely
    logging.captureWarnings(True)
    logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' +
                                '%(message)s'), level=log_level)
    logger = logging.getLogger(__name__)

    # read the models
    logger.info('Loading models')

    # TODO add port, host, model args
    syntax_parser = SyntaxParserWrapper() if not args.use_gold_syntax else None
    segmenter = Segmenter(args.segmentation_model) \
        if args.segmentation_model else None

    rst_parser = Parser(max_acts=args.max_acts,
                        max_states=args.max_states,
                        n_best=1)
    rst_parser.load_model(args.parsing_model)

    eval_data = json.load(args.evaluation_set)

    results = \
        predict_and_evaluate_rst_trees(syntax_parser, segmenter, rst_parser,
                                       eval_data,
                                       use_gold_syntax=args.use_gold_syntax)
    print(json.dumps(sorted(results.items())))
Example #10
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('evaluation_set',
                        help='The dev or test set JSON file',
                        type=argparse.FileType('r'))
    parser.add_argument('-g',
                        '--segmentation_model',
                        help='Path to segmentation model.  If not specified,' +
                        'then gold EDUs will be used.',
                        default=None)
    parser.add_argument('-p',
                        '--parsing_model',
                        help='Path to RST parsing model.',
                        required=True)
    parser.add_argument('-z', '--zpar_directory', default='zpar')
    parser.add_argument('-t',
                        '--use_gold_syntax',
                        help='If specified, then gold PTB syntax trees will' +
                        'be used.',
                        action='store_true')
    parser.add_argument('-a',
                        '--max_acts',
                        help='Maximum number of actions to perform on each ' +
                        'state',
                        type=int,
                        default=1)
    parser.add_argument('-s',
                        '--max_states',
                        help='Maximum number of states to retain for ' +
                        'best-first search',
                        type=int,
                        default=1)
    parser.add_argument('-v',
                        '--verbose',
                        help='Print more status information. For every ' +
                        'additional time this flag is specified, ' +
                        'output gets more verbose.',
                        default=0,
                        action='count')
    args = parser.parse_args()
    assert args.use_gold_syntax or args.segmentation_model

    # Convert verbose flag to actually logging level
    log_levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    log_level = log_levels[min(args.verbose, 2)]
    # Make warnings from built-in warnings module get formatted more nicely
    logging.captureWarnings(True)
    logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' +
                                '%(message)s'),
                        level=log_level)
    logger = logging.getLogger(__name__)

    # read the models
    logger.info('Loading models')

    # TODO add port, host, model args
    syntax_parser = SyntaxParserWrapper() if not args.use_gold_syntax else None
    segmenter = Segmenter(args.segmentation_model) \
        if args.segmentation_model else None

    rst_parser = Parser(max_acts=args.max_acts,
                        max_states=args.max_states,
                        n_best=1)
    rst_parser.load_model(args.parsing_model)

    eval_data = json.load(args.evaluation_set)

    results = \
        predict_and_evaluate_rst_trees(syntax_parser, segmenter, rst_parser,
                                       eval_data,
                                       use_gold_syntax=args.use_gold_syntax)
    print(json.dumps(sorted(results.items())))
def main():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('train_file',
                        help='Path to JSON training file.',
                        type=argparse.FileType('r'))
    parser.add_argument('eval_file',
                        help='Path to JSON dev or test file for ' +
                        'tuning/evaluation.',
                        type=argparse.FileType('r'))
    parser.add_argument('model_path',
                        help='Prefix for the path to where the model should be'
                        ' stored.  A suffix with the C value will be added.')
    parser.add_argument('-w', '--working_path',
                        help='Path to where intermediate files should be ' +
                        'stored', default='working')
    parser.add_argument('-C', '--C_values',
                        help='comma-separated list of model complexity ' +
                        'parameter settings to evaluate.',
                        default=','.join([str(2.0 ** x)
                                          for x in range(-4, 5)]))
    parser.add_argument('-v', '--verbose',
                        help='Print more status information. For every ' +
                        'additional time this flag is specified, ' +
                        'output gets more verbose.',
                        default=0, action='count')
    parser.add_argument('-s', '--single_process', action='store_true',
                        help='Run in a single process for all hyperparameter' +
                        ' grid points, to simplify debugging.')
    args = parser.parse_args()

    if os.path.exists(args.working_path):
        raise IOError("{} already exists.  Stopping here to avoid the "
                      "possibility of overwriting files that are currently "
                      "being used.".format(args.working_path))
    os.makedirs(args.working_path)

    parser = Parser(1, 1, 1)

    # Convert verbose flag to actually logging level
    log_levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    log_level = log_levels[min(args.verbose, 2)]
    # Make warnings from built-in warnings module get formatted more nicely
    logging.captureWarnings(True)
    logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' +
                                '%(message)s'), level=log_level)
    logger = logging.getLogger(__name__)

    logger.info('Extracting examples')
    train_data = json.load(args.train_file)
    eval_data = json.load(args.eval_file)

    train_examples = []

    for doc_dict in train_data:
        path_basename = doc_dict['path_basename']
        logging.info('Extracting examples for {}'.format(path_basename))
        tree = ParentedTree.fromstring(doc_dict['rst_tree'])
        collapse_rst_labels(tree)
        actions = extract_parse_actions(tree)

        for i, (action_str, feats) in \
                enumerate(parser.parse(doc_dict, gold_actions=actions)):
            example_id = "{}_{}".format(path_basename, i)
            example = {"x": Counter(feats), "y": action_str, "id": example_id}
            train_examples.append(example)
            # print("{} {}".format(action_str, " ".join(feats)))

    # train and evaluate a model for each value of C
    best_labeled_f1 = -1.0
    best_C = None

    # train and evaluate models with different C values in parallel
    C_values = [float(x) for x in args.C_values.split(',')]
    partial_train_and_eval_model = partial(train_and_eval_model,
                                           args.working_path, args.model_path,
                                           eval_data)

    # Make the SKLL jsonlines feature file
    train_path = os.path.join(args.working_path, 'rst_parsing.jsonlines')
    with open(train_path, 'w') as train_file:
        for example in train_examples:
            train_file.write('{}\n'.format(json.dumps(example)))

    if args.single_process:
        all_results = [partial_train_and_eval_model(C_value)
                       for C_value in C_values]
    else:
        n_workers = len(C_values)
        with ProcessPoolExecutor(max_workers=n_workers) as executor:
            all_results = executor.map(partial_train_and_eval_model, C_values)

    for C_value, results in zip(C_values, all_results):
        results["C"] = C_value
        print(json.dumps(sorted(results.items())))
        if results["labeled_f1"] > best_labeled_f1:
            best_labeled_f1 = results["labeled_f1"]
            best_C = C_value

    print("best labeled F1 = {}, with C = {}".format(best_labeled_f1, best_C))
def main():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('evaluation_set',
                        help='The dev or test set JSON file',
                        type=argparse.FileType('r'))
    parser.add_argument('-p', '--parsing_model',
                        help='Path to RST parsing model.',
                        required=True)
    parser.add_argument('-v', '--verbose',
                        help='Print more status information. For every ' +
                        'additional time this flag is specified, ' +
                        'output gets more verbose.',
                        default=0, action='count')
    parser.add_argument('--metric_name', help='name of metric to use',
                        choices=["labeled_precision",
                                 "labeled_recall",
                                 "labeled_f1",
                                 "nuc_precision",
                                 "nuc_recall",
                                 "nuc_f1",
                                 "span_precision",
                                 "span_recall",
                                 "span_f1"],
                        required=True)
    parser.add_argument('--n_samples', type=int, default=10000)
    parser.add_argument('--alpha', type=float, default=0.05)
    args = parser.parse_args()

    # Convert verbose flag to actually logging level
    log_levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    log_level = log_levels[min(args.verbose, 2)]
    # Make warnings from built-in warnings module get formatted more nicely
    logging.captureWarnings(True)
    logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' +
                                '%(message)s'), level=log_level)
    logger = logging.getLogger(__name__)

    # read the models
    logger.info('Loading models')

    rst_parser = Parser(max_acts=1, max_states=1, n_best=1)
    rst_parser.load_model(args.parsing_model)

    eval_data = json.load(args.evaluation_set)

    pred_edu_tokens_lists, pred_trees, gold_edu_tokens_lists, gold_trees = \
        predict_rst_trees_for_eval(None, None, rst_parser, eval_data)

    data = np.array(list(zip(pred_edu_tokens_lists, pred_trees,
                             gold_edu_tokens_lists, gold_trees)))

    # score without bootstrapping
    orig_score = compute_rst_eval_results(pred_edu_tokens_lists,
                                          pred_trees,
                                          gold_edu_tokens_lists,
                                          gold_trees)[args.metric_name]
    tmp_score = make_score_func(args.metric_name)(data)
    assert tmp_score == orig_score

    boot_ci_lower, boot_ci_upper = \
        boot.ci(data, make_score_func(args.metric_name),
                n_samples=args.n_samples, method='bca', alpha=args.alpha)

    print("evaluation_set: {}".format(args.evaluation_set))
    print("alpha: {}".format(args.alpha))
    print("n_samples: {}".format(args.n_samples))
    print("metric: {}".format(args.metric_name))
    print("original score: {}".format(orig_score))
    print("CI: ({}, {})".format(boot_ci_lower, boot_ci_upper))