def __init__(self, info_dict, trained_model_path=None):
     self.info_dict = info_dict
     self.bertinizer = SentenceBERTinizer()
     self.model = BERTGraphRel(
         num_ne=info_dict["entity_vsize"],
         num_rel=info_dict["rel_vsize"],
         embedding_size=self.bertinizer.embedding_size)
     if trained_model_path is not None:
         self.load_trained_model(trained_model_path)
Esempio n. 2
0
    os.makedirs(dumb_dataset_dir)

train_dumb_dataset_path = dumb_dataset_dir + "dumb_train.json"
test_dumb_dataset_path = dumb_dataset_dir + "dumb_test.json"

if __name__ == "__main__":

    ddsc = DumbDataSetConstructor(config_path=dumb_dataset_config_path)

    print("+ Preparing and saving train dataset.")
    ddsc.generate_dataset(n=6000)
    data, ne_list, rel_list = ddsc.get_dataset()
    ddsc.write_json_dataset(train_dumb_dataset_path)

    print("+ Preparing and saving descriptive json.")
    sentbertnizer = SentenceBERTinizer()
    er_aligner = tgtEntRelConstructor(tokenizer=sentbertnizer,
                                      ne_tags=ne_list,
                                      rel_tags=rel_list)

    num_ne = er_aligner.NE_vsize
    num_rel = er_aligner.REL_vsize

    info_collector = InfoCollector()
    info_collector.remember_info(
        entities=ne_list,
        relations=rel_list,
        entity_vsize=num_ne,
        rel_vsize=num_rel,
        mod_entities=er_aligner.NE_biotags,
        mod_relations=er_aligner.REL_mod_tags,