foldername = get_foldername('sq_pp_ner') # Flags verbose_on = True # Verbose comments verbose2_on = False # Detailed verbose comments - show results of NLP testing_mode = False skip_save = False # Set up AllenNLP allenNERmodel = os.path.join(os.getenv("HOME"), 'src', 'allennlp', 'ner-model-2018.12.18.tar.gz') if not testing_mode: predictor = Predictor.from_path(allenNERmodel) # # # # # # # # # # # # # # # # # # # # Process training data # # # # # # # # # # # # # # # # # # Load the training data arts = load_SQuAD_train() art = arts # art = arts[105:107] # A few short articles run_predictor(art, predictor, foldername, 'train', testing_mode=False, skip_save=False) # # # # # # # # # # # # # # # # # # # # Process DEV data # # # # # # # # # # # # # # # # # # Load the training data arts = load_SQuAD_dev() art = arts run_predictor(art, predictor,
# Set up and load data # Includes import sys import os # Setup paths containing utility curr_folder = os.getcwd() sys.path.insert(0, os.path.join(curr_folder,'../app')) # Import utils from utils_EDA import p_list_qas from utils import load_SQuAD_train from utils import load_SQuAD_dev # Load the training data arts_train = load_SQuAD_train() # Load the testing data arts_dev = load_SQuAD_dev() # All articles Ntrain = len(arts_train) Ndev = len(arts_dev) print ("Narticles in train = " + str(len(arts_train))) print ("Narticles in dev = " + str(len(arts_dev))) # # TRAINING DATASET # # # # Pick out a subset of articles art = arts_train[:] # art = arts_train[14:15]