from nerds.models import NERModel from nerds.utils import get_logger, write_param_file import os import joblib import sklearn_crfsuite import spacy log = get_logger() class CrfNER(NERModel): def __init__(self, max_iter=100, c1=0.1, c2=0.1, featurizer=None): """ Construct a Conditional Random Fields (CRF) based NER. Implementation of CRF NER is provided by sklearn.crfsuite.CRF. Parameters ---------- max_iter : int, optional, default 100 maximum number of iterations to run CRF training c1 : float, optional, default 0.1 L1 regularization coefficient. c2 : float, optional, default 0.1 L2 regularization coefficient. featurizer : function, default None if None, the default featurizer _sent2features() is used to convert list of tokens for each sentence to a list of features, where each feature is a dictionary of name-value pairs. For custom features, a featurizer function must be provided that takes in a list of tokens (sentence) and returns a list of features.
parser = argparse.ArgumentParser( description="Script to convert BRAT annotations to IOB (NERDS) format.") parser.add_argument("-i", "--input_dir", help="Directory to store BRAT .txt and .ann files.") parser.add_argument("-o", "--output_file", help="Output file to write IOB output to.") parser.add_argument("-t", "--test", help="Runs self test.", action="store_true") args = parser.parse_args() logger = get_logger() input_dir = args.input_dir output_file = args.output_file self_test = args.test nlp = spacy.load("en") if self_test: logger.info("Executing self test...") do_self_test(nlp) else: logger.info( "Reading BRAT .txt and .ann files from: {:s}".format(input_dir)) logger.info("Writing IOB tokens/tags to file: {:s}".format(output_file)) convert_brat_to_iob(input_dir, output_file, nlp)