in the meantime, we do preprocess like capitalize the first character of a sentence or normalize digits ''' import os from collections import Counter from nltk.parse import CoreNLPDependencyParser import numpy as np import argparse from tqdm import tqdm from io_utils import read_yaml, save_pickle, read_pickle from str_utils import normalize_tok from vocab import Vocab from sklearn.model_selection import train_test_split config = read_yaml('config.yaml') parser = argparse.ArgumentParser(description='this is a description') parser.add_argument('--seed', '-s', required=False, type=int, default=config['random_seed']) args = parser.parse_args() config['random_seed'] = args.seed print('seed:', config['random_seed']) print('the current file path:', os.getcwd()) np.random.seed(config['random_seed']) data_dir = config['data_dir']
from io_utils import read_yaml joint_config = read_yaml('joint_config.yaml') data_config = read_yaml('data_config.yaml') import numpy as np import random import dynet_config print('seed:', joint_config['random_seed']) random.seed(joint_config['random_seed']) np.random.seed(joint_config['random_seed']) dynet_config.set(autobatch=1, mem='4096', random_seed=joint_config['random_seed']) import dynet as dy import nn import ops from dy_utils import ParamManager as pm from dy_utils import AdamTrainer from event_eval import EventEval from io_utils import to_set, get_logger from shift_reduce import ShiftReduce logger = get_logger('transition', log_dir='log', log_name='trains.log') sent_vec_dim = 0 if joint_config['use_sentence_vec']: train_sent_file = data_config['train_sent_file'] test_sent_file = data_config['test_sent_file']
from flair.data import Sentence from flair.models import SequenceTagger from flair.embeddings import CharLMEmbeddings, StackedEmbeddings, BertEmbeddings import os import pickle import numpy as np from io_utils import read_yaml, read_lines, read_json_lines data_config = read_yaml('data_config.yaml') data_dir = data_config['data_dir'] ace05_event_dir = data_config['ace05_event_dir'] train_list = read_json_lines( os.path.join(ace05_event_dir, 'train_nlp_ner.json')) dev_list = read_json_lines(os.path.join(ace05_event_dir, 'dev_nlp_ner.json')) test_list = read_json_lines(os.path.join(ace05_event_dir, 'test_nlp_ner.json')) train_sent_file = data_config['train_sent_file'] bert = BertEmbeddings(layers='-1', bert_model_or_path='bert-base-uncased').to('cuda:0') def save_bert(inst_list, filter_tri=True, name='train'): sents = [] sent_lens = [] for inst in inst_list: words, trigger_list, ent_list, arg_list = inst['nlp_words'], inst[