Example #1
0
        FN += 1
    elif (pl - {'O'}) == (gl - {'O'}) and not has_label_errors(pt, gt):
        cls = 'BOUNDARY_ERROR'
        FP += num_pred_ents
        FN += num_gold_ents
    else:
        # This can be broken down into multiple FP and FN
        cls = 'LABEL+BOUNDARY_ERROR'
        FP += num_pred_ents
        FN += num_gold_ents
    return cls, TP, TN, FP, FN


if __name__ == "__main__":
    parser = ArgumentParser(
        description='Visualise label entropy and print out '
        'K most similar sentences to those in the conll '
        'file using KNN with faiss index.')
    parser.add_argument('--K',
                        default=10,
                        type=str,
                        help='The number of nearest neighbours to use')
    parser.add_argument('--load_index',
                        required=True,
                        type=str,
                        help='The folder to load the FAISS index from')
    parser.add_argument('--conll_file',
                        type=str,
                        required=True,
                        help='Path to conll file')
    parser.add_argument('--load_blueprint', action=YAMLLoaderAction)
Example #2
0
        if checkpoint_callback is not None:
            checkpoint_callback(
                e,
                checkpoint_stats,
                improved=(patience == conf.checkpoint.patience))

        if patience == 0:
            break
    pbar.close()
    return model


if __name__ == "__main__":

    parser = ArgumentParser(description='Dependency parser trainer')

    if not EXP_ENV_VAR in os.environ:
        parser.add_argument('-o',
                            '--outfolder',
                            required=True,
                            type=str,
                            help='path to where to save the models.')
    parser.add_argument('-i',
                        '--datafolder',
                        required=False,
                        type=str,
                        help='path to CONLL folder containing languages. '
                        'If not set script will check env variables.')
    parser.add_argument('--name',
                        type=str,
Example #3
0
import numpy as np
import torch
import tqdm

from collections import namedtuple

from mlconf import YAMLLoaderAction, ArgumentParser
from edien.components import BertSentenceEncoder
from edien.preprocess import PaddedVariabliser
from edien.train_utils import only_pseudorandomness_please
from edien.vocab import BertCoder
from utils import FaissIndex, k_nearest_interpolation, entropy, argmax

if __name__ == "__main__":

    parser = ArgumentParser(description='Use KNN with faiss index to obtain '
                            'interpolated probabilities for labels.')
    parser.add_argument('--K',
                        default=10,
                        type=str,
                        help='The number of nearest neighbours to use')
    parser.add_argument('--load_index',
                        required=True,
                        type=str,
                        help='The folder to load the FAISS index from')
    parser.add_argument('--load_blueprint', action=YAMLLoaderAction)

    conf = parser.parse_args()
    only_pseudorandomness_please(conf.seed)

    # Make sure we aren't trying to create vocab on test
    conf.data.vocab_encoder.load_if_exists = True
Example #4
0
    stats = {
        'test_mean_loss': mean_loss.score,
        'test_uas': u_scorer.score,
        'test_las': l_scorer.score
    }

    # TODO: save these
    bp.test_results = stats
    for key, val in stats.items():
        print('%s: %s' % (key, val))


if __name__ == "__main__":
    # needed to import train to visualise_train
    parser = ArgumentParser(description='Dependency parser evaluator')
    parser.add_argument(
        '--blueprint',
        required=True,
        type=str,
        help='Path to .bp blueprint file produces by training.')
    parser.add_argument('--test_file',
                        required=True,
                        type=str,
                        help='Conll file to use for testing')
    parser.add_argument('--conll_out',
                        action='store_true',
                        help='If specified writes conll output')
    parser.add_argument(
        '--treeify',
        type=str,
Example #5
0
    stats = {
        'test_mean_loss': mean_loss.score,
        'test_uas': u_scorer.score,
        'test_las': l_scorer.score,
        'aux_acc': tag_acc
    }

    # TODO: save these
    bp.test_results = stats
    for key, val in sorted(stats.items()):
        print('%s: %s' % (key, val))


if __name__ == "__main__":
    # needed to import train to visualise_train
    parser = ArgumentParser(description='Dependency parser trainer')
    parser.add_argument(
        '--blueprint',
        required=True,
        type=str,
        help='Path to .bp blueprint file produces by training.')
    parser.add_argument('--test_file',
                        required=True,
                        type=str,
                        help='Conll file to use for testing')
    parser.add_argument('--conll_out',
                        type=str,
                        default='out.conllu',
                        help='If specified writes conll output to this file')
    parser.add_argument('--unlabelled',
                        action='store_true',
Example #6
0
import torch
import tqdm

from collections import namedtuple

from mlconf import YAMLLoaderAction, ArgumentParser
from edien import EdIENPath
from edien.components import BertSentenceEncoder
from edien.preprocess import PaddedVariabliser
from edien.vocab import BertCoder
from edien.train_utils import only_pseudorandomness_please
from utils import FaissIndex

if __name__ == "__main__":

    parser = ArgumentParser(description='Fit faiss index on training set')
    parser.add_argument('--save_index',
                        required=True,
                        type=str,
                        help='The name to give to the FAISS index folder')
    parser.add_argument('--load_blueprint', action=YAMLLoaderAction)
    conf = parser.parse_args()
    only_pseudorandomness_please(conf.seed)

    bp = conf.build()

    train = bp.data.train_vars(bp.paths)
    train_sents = bp.data.train_sents

    # Get BERT embedding dimension
    dim = bp.model.model.tasks['ner_tags'].in_dim