Esempio n. 1
0
    def __init__(self,
                 trn=False,
                 tst=False,
                 input_data=False,
                 def_data=False,
                 def_y=False,
                 classes_per_it=60,
                 num_support=5,
                 iterations=100,
                 target_frames=False):
        with open(
                dir_path +
                '/../koreanframenet/resource/info/fn1.7_frame2idx.json',
                'r') as f:
            self.frame2idx = json.load(f)
        with open(
                dir_path +
                '/../koreanframenet/resource/info/fn1.7_frame_definitions.json',
                'r') as f:
            self.frame2definition = json.load(f)

        if target_frames:
            self.target_frames = target_frames
        else:
            with open(dir_path + '/../data/target_frames.json', 'r') as f:
                self.target_frames = json.load(f)

        self.idx2frame = dict(
            zip(self.frame2idx.values(), self.frame2idx.keys()))

        #         self.trn = trn
        #         self.tst = tst

        #         self.trn_y = self.get_y(self.trn)
        #         self.tst_y = self.get_y(self.tst)

        #         self.trn_data = trn_data
        #         self.tst_data = tst_data

        self.bert_io = utils.for_BERT(mode='train', language='multi')

        if def_data:
            self.def_data = def_data
            self.def_y = def_y
        else:
            self.def_data, self.def_y = self.bert_io.convert_to_bert_input_label_definition(
                self.frame2definition, self.frame2idx)

        self.classes_per_it = classes_per_it
        self.num_support = num_support
        self.iterations = iterations
Esempio n. 2
0
    (t_hour, t_min) = divmod(t_min, 60)

    result = '{}hour:{}min:{}sec'.format(t_hour, t_min, t_sec)
    return result


# In[3]:

try:
    dir_path = os.path.dirname(os.path.abspath(__file__))
except:
    dir_path = '.'

# In[4]:

bert_io = utils.for_BERT(mode='train', language='multi')

# In[5]:

frameBERT_dir = '/disk/data/models/frameBERT/frameBERT_en'

frameBERT = FrameBERT.from_pretrained(frameBERT_dir,
                                      num_senses=len(bert_io.sense2idx),
                                      num_args=len(bert_io.bio_arg2idx),
                                      lufrmap=bert_io.lufrmap,
                                      frargmap=bert_io.bio_frargmap)
frameBERT.to(device)
frameBERT.eval()

# In[6]:
Esempio n. 3
0
epochs = 50

print('\nFineTuning Multilingual')
print('### TRAINING')
print('MODEL:', srl)
print('LANGUAGE:', language)
print('PRETRAINED BERT:', PRETRAINED_MODEL)
print('training data:')
print('\t(ko):', len(trn))
print('BATCH_SIZE:', batch_size)
print('MAX_LEN:', MAX_LEN)
print('')

bert_io = utils.for_BERT(mode='train',
                         srl=srl,
                         language=language,
                         masking=masking,
                         fnversion=fnversion,
                         pretrained=PRETRAINED_MODEL)
train(
    retrain=True,
    pretrained_dir='/disk/data/models/dict_framenet/enModel-with-exemplar/9/')

# In[ ]:

# by 25% (4460)

# model_dir = '/disk/data/models/framenet/mulModel-25/'
# epochs = 50

# trn, dev, tst = dataio.load_data(srl=srl, language='ko')
Esempio n. 4
0
    def __init__(self,
                 fnversion=1.1,
                 language='ko',
                 masking=True,
                 srl='framenet',
                 model_path=False,
                 gold_pred=False,
                 viterbi=False,
                 tgt=True,
                 pretrained='bert-base-multilingual-cased'):
        self.fnversion = fnversion
        self.language = language
        self.masking = masking
        self.srl = srl
        self.gold_pred = gold_pred
        self.viterbi = viterbi
        self.pretrained = pretrained
        self.tgt = tgt  #using <tgt> and </tgt> as a special token

        if self.masking == True:
            self.targetid = target_identifier.targetIdentifier()
        else:
            self.targetid = target_identifier.targetIdentifier(only_lu=False)

        if self.srl == 'propbank-dp':
            self.viterbi = False
            self.masking = False

        print('srl model:', self.srl)
        print('language:', self.language)
        print('version:', self.fnversion)
        print('using viterbi:', self.viterbi)
        print('using masking:', self.masking)
        print('pretrained BERT:', self.pretrained)
        print('using TGT special token:', self.tgt)

        self.bert_io = utils.for_BERT(mode='predict',
                                      srl=self.srl,
                                      language=self.language,
                                      masking=self.masking,
                                      fnversion=self.fnversion,
                                      pretrained=self.pretrained)

        #load model
        if model_path:
            self.model_path = model_path
        else:
            print('model_path={your_model_dir}')
#         self.model = torch.load(model_path, map_location=device)

        self.model = BertForJointShallowSemanticParsing.from_pretrained(
            self.model_path,
            num_senses=len(self.bert_io.sense2idx),
            num_args=len(self.bert_io.bio_arg2idx),
            lufrmap=self.bert_io.lufrmap,
            masking=self.masking,
            frargmap=self.bert_io.bio_frargmap)
        self.model.to(device)
        print('...loaded model path:', self.model_path)
        #         self.model = BertForJointShallowSemanticParsing
        self.model.eval()
        print(self.model_path)
        print('...model is loaded')

        # trainsition parameter for vitervi decoding
        if self.srl != 'propbank-dp':
            self.transition_param = inference.get_transition_params(
                self.bert_io.idx2bio_arg.values())