コード例 #1
0
def objective_function_parsing(feature_weights):
    """
    The objective function to optimize for dependency parsing.
    :param feature_weights: a numpy array; these are the weights of the features
                            that we want to learn
    :return: the error that should be minimized
    """
    train_subset, train_labels_subset = task_utils.get_data_subsets(
        feature_values, feature_weights, X_train, y_train, PARSING,
        TASK2TRAIN_EXAMPLES[PARSING])
    val_accuracy, _ = task_utils.train_and_evaluate_parsing(
        train_subset, train_labels_subset, X_val, y_val,
        parser_output_path=parser_output_path,
        perl_script_path=perl_script_path)
    error = 100 - float(val_accuracy)
    return error
コード例 #2
0
def objective_function_pos(feature_weights):
    """
    The objective function to optimize for POS tagging.
    :param feature_weights: a numpy array; these are the weights of the features
                            that we want to learn
    :return: the error that should be minimized
    """
    train_subset, train_labels_subset = task_utils.get_data_subsets(
        feature_values, feature_weights, X_train, y_train, POS,
        TASK2TRAIN_EXAMPLES[POS])

    # train and evaluate the tagger; we input the test documents here but only
    # minimize the validation error
    val_accuracy, _ = task_utils.train_and_evaluate_pos(
        train_subset, train_labels_subset, X_val, y_val)

    # we minimize the error; the lower the better
    error = 1 - float(val_accuracy)
    return error
コード例 #3
0
def objective_function_slot_filling(feature_weights):

    train_subset, train_labels_subset = task_utils.get_data_subsets(
        feature_values, feature_weights, X_train, y_train, SLOT_FILLING,
        TASK2TRAIN_EXAMPLES[SLOT_FILLING])

    print("Running evaluation for slot filling")
    # train and evaluate the tagger; we input the test documents here but only
    # minimize the validation error
    #print("Training samples {} {}".format(X_train[:10], y_train[:10]))

    #data_utils.dump_to_conll(train_subset, train_labels_subset, "test_dump.txt")

    dev_f1_score, test_f1_score = task_utils.train_and_evaluate_slot_filling_MTL(
        train_subset, train_labels_subset, X_val, y_val, args=args)

    # we minimize the error; the lower the better
    error = 1 - dev_f1_score

    return error
コード例 #4
0
                    most_similar_domain = similarity.get_most_similar_domain(
                        trg_domain, domain2term_dist)
                    train_subset, labels_subset, _ = domain2train_data[
                        most_similar_domain]
                    train_subset, _, labels_subset, _ = train_test_split(
                        train_subset,
                        labels_subset,
                        train_size=num_train_examples,
                        stratify=labels_subset
                        if args.task == SENTIMENT else None)
                elif baseline == MOST_SIMILAR_EXAMPLES:
                    print('Selecting the most similar examples...')
                    one_all_weights = np.ones(len(feature_names))
                    one_all_weights[1:] = 0
                    train_subset, labels_subset = task_utils.get_data_subsets(
                        feature_values, one_all_weights, X_train, y_train,
                        args.task, num_train_examples)
                else:
                    raise ValueError('%s is not a baseline.' % baseline)

                # train the baseline
                val_accuracy, test_accuracy = train_and_evaluate(
                    train_subset,
                    labels_subset,
                    X_val,
                    y_val,
                    X_test,
                    y_test,
                    parser_output_path=parser_output_path,
                    perl_script_path=perl_script_path)
                run_dict[baseline].append((val_accuracy, test_accuracy))