def __init__(self, trn=False, tst=False, input_data=False, def_data=False, def_y=False, classes_per_it=60, num_support=5, iterations=100, target_frames=False): with open( dir_path + '/../koreanframenet/resource/info/fn1.7_frame2idx.json', 'r') as f: self.frame2idx = json.load(f) with open( dir_path + '/../koreanframenet/resource/info/fn1.7_frame_definitions.json', 'r') as f: self.frame2definition = json.load(f) if target_frames: self.target_frames = target_frames else: with open(dir_path + '/../data/target_frames.json', 'r') as f: self.target_frames = json.load(f) self.idx2frame = dict( zip(self.frame2idx.values(), self.frame2idx.keys())) # self.trn = trn # self.tst = tst # self.trn_y = self.get_y(self.trn) # self.tst_y = self.get_y(self.tst) # self.trn_data = trn_data # self.tst_data = tst_data self.bert_io = utils.for_BERT(mode='train', language='multi') if def_data: self.def_data = def_data self.def_y = def_y else: self.def_data, self.def_y = self.bert_io.convert_to_bert_input_label_definition( self.frame2definition, self.frame2idx) self.classes_per_it = classes_per_it self.num_support = num_support self.iterations = iterations
(t_hour, t_min) = divmod(t_min, 60) result = '{}hour:{}min:{}sec'.format(t_hour, t_min, t_sec) return result # In[3]: try: dir_path = os.path.dirname(os.path.abspath(__file__)) except: dir_path = '.' # In[4]: bert_io = utils.for_BERT(mode='train', language='multi') # In[5]: frameBERT_dir = '/disk/data/models/frameBERT/frameBERT_en' frameBERT = FrameBERT.from_pretrained(frameBERT_dir, num_senses=len(bert_io.sense2idx), num_args=len(bert_io.bio_arg2idx), lufrmap=bert_io.lufrmap, frargmap=bert_io.bio_frargmap) frameBERT.to(device) frameBERT.eval() # In[6]:
epochs = 50 print('\nFineTuning Multilingual') print('### TRAINING') print('MODEL:', srl) print('LANGUAGE:', language) print('PRETRAINED BERT:', PRETRAINED_MODEL) print('training data:') print('\t(ko):', len(trn)) print('BATCH_SIZE:', batch_size) print('MAX_LEN:', MAX_LEN) print('') bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL) train( retrain=True, pretrained_dir='/disk/data/models/dict_framenet/enModel-with-exemplar/9/') # In[ ]: # by 25% (4460) # model_dir = '/disk/data/models/framenet/mulModel-25/' # epochs = 50 # trn, dev, tst = dataio.load_data(srl=srl, language='ko')
def __init__(self, fnversion=1.1, language='ko', masking=True, srl='framenet', model_path=False, gold_pred=False, viterbi=False, tgt=True, pretrained='bert-base-multilingual-cased'): self.fnversion = fnversion self.language = language self.masking = masking self.srl = srl self.gold_pred = gold_pred self.viterbi = viterbi self.pretrained = pretrained self.tgt = tgt #using <tgt> and </tgt> as a special token if self.masking == True: self.targetid = target_identifier.targetIdentifier() else: self.targetid = target_identifier.targetIdentifier(only_lu=False) if self.srl == 'propbank-dp': self.viterbi = False self.masking = False print('srl model:', self.srl) print('language:', self.language) print('version:', self.fnversion) print('using viterbi:', self.viterbi) print('using masking:', self.masking) print('pretrained BERT:', self.pretrained) print('using TGT special token:', self.tgt) self.bert_io = utils.for_BERT(mode='predict', srl=self.srl, language=self.language, masking=self.masking, fnversion=self.fnversion, pretrained=self.pretrained) #load model if model_path: self.model_path = model_path else: print('model_path={your_model_dir}') # self.model = torch.load(model_path, map_location=device) self.model = BertForJointShallowSemanticParsing.from_pretrained( self.model_path, num_senses=len(self.bert_io.sense2idx), num_args=len(self.bert_io.bio_arg2idx), lufrmap=self.bert_io.lufrmap, masking=self.masking, frargmap=self.bert_io.bio_frargmap) self.model.to(device) print('...loaded model path:', self.model_path) # self.model = BertForJointShallowSemanticParsing self.model.eval() print(self.model_path) print('...model is loaded') # trainsition parameter for vitervi decoding if self.srl != 'propbank-dp': self.transition_param = inference.get_transition_params( self.bert_io.idx2bio_arg.values())