Example #1
0
from nerds.models import NERModel
from nerds.utils import get_logger, write_param_file

import os
import joblib
import sklearn_crfsuite
import spacy

log = get_logger()


class CrfNER(NERModel):
    def __init__(self, max_iter=100, c1=0.1, c2=0.1, featurizer=None):
        """ Construct a Conditional Random Fields (CRF) based NER. Implementation
            of CRF NER is provided by sklearn.crfsuite.CRF.

            Parameters
            ----------
            max_iter : int, optional, default 100
                maximum number of iterations to run CRF training
            c1 : float, optional, default 0.1
                L1 regularization coefficient.
            c2 : float, optional, default 0.1
                L2 regularization coefficient.
            featurizer : function, default None
                if None, the default featurizer _sent2features() is used to convert 
                list of tokens for each sentence to a list of features, where each 
                feature is a dictionary of name-value pairs. For custom features, a 
                featurizer function must be provided that takes in a list of tokens 
                (sentence) and returns a list of features.
Example #2
0
parser = argparse.ArgumentParser(
    description="Script to convert BRAT annotations to IOB (NERDS) format.")
parser.add_argument("-i",
                    "--input_dir",
                    help="Directory to store BRAT .txt and .ann files.")
parser.add_argument("-o",
                    "--output_file",
                    help="Output file to write IOB output to.")
parser.add_argument("-t",
                    "--test",
                    help="Runs self test.",
                    action="store_true")
args = parser.parse_args()

logger = get_logger()

input_dir = args.input_dir
output_file = args.output_file
self_test = args.test

nlp = spacy.load("en")

if self_test:
    logger.info("Executing self test...")
    do_self_test(nlp)
else:
    logger.info(
        "Reading BRAT .txt and .ann files from: {:s}".format(input_dir))
    logger.info("Writing IOB tokens/tags to file: {:s}".format(output_file))
    convert_brat_to_iob(input_dir, output_file, nlp)