예제 #1
0
def create_ntee_from_compoents(dir_path):
    word_dict_path = dir_path + '/dict.word'
    word_embs_path = dir_path + '/word_embeddings.npy'
    entity_dict_path = dir_path + '/dict.entity'
    entity_embs_path = dir_path + '/entity_embeddings.npy'
    W_path = dir_path + '/W.npy'
    b_path = dir_path + '/b.npy'

    print('load voca and embeedings')
    word_voca, word_embs = utils.load_voca_embs(word_dict_path, word_embs_path)
    entity_voca, entity_embs = utils.load_voca_embs(entity_dict_path,
                                                    entity_embs_path)
    config = {
        'word_embeddings': word_embs,
        'entity_embeddings': entity_embs,
        'word_voca': word_voca,
        'entity_voca': entity_voca,
        'emb_dims': word_embs.shape[1]
    }
    print("word_embs.shape:", word_embs.shape, "entity_embs.shape:",
          entity_embs.shape)

    # create model
    print('create model')
    model = NTEE(config)

    W = np.load(W_path)
    b = np.load(b_path)
    model.linear.weight = nn.parameter(torch.FloatTensor(W).t())
    model.linear.bias = nn.parameter(torch.FloatTensor(b))

    return model
예제 #2
0
                    type=int,
                    help="number of LBP loops",
                    default=10)

# args for debugging
parser.add_argument("--print_rel", action='store_true')
parser.add_argument("--print_incorrect", action='store_true')

args = parser.parse_args()

if __name__ == "__main__":
    print('load conll at', datadir)
    conll = D.CoNLLDataset(datadir, person_path, conll_path)

    print('create model')
    word_voca, word_embeddings = utils.load_voca_embs(
        voca_emb_dir + 'dict.word', voca_emb_dir + 'word_embeddings.npy')
    print('word voca size', word_voca.size())
    snd_word_voca, snd_word_embeddings = utils.load_voca_embs(
        voca_emb_dir + '/glove/dict.word',
        voca_emb_dir + '/glove/word_embeddings.npy')
    print('snd word voca size', snd_word_voca.size())

    entity_voca, entity_embeddings = utils.load_voca_embs(
        voca_emb_dir + 'dict.entity',
        voca_emb_dir + 'entity_embeddings_0.2_11max.npy')
    _, aet_entity_embeddings = utils.load_voca_embs(
        voca_emb_dir + 'dict.entity',
        voca_emb_dir + 'aligned_atee_entity_vec_100d.npy')
    aet_word_voca, aet_word_embeddings = utils.load_voca_embs(
        voca_emb_dir + 'atee_type_dict_100d.txt',
        voca_emb_dir + 'atee_type_vec_100d.npy')
예제 #3
0
                    type=int,
                    help="number of LBP loops",
                    default=10)

# args for debugging
parser.add_argument("--print_rel", action='store_true')
parser.add_argument("--print_incorrect", action='store_true')

args = parser.parse_args()

if __name__ == "__main__":
    print('load conll at', datadir)
    conll = D.CoNLLDataset(datadir, person_path, conll_path)

    print('create model')
    word_voca, word_embeddings = utils.load_voca_embs(
        voca_emb_dir + 'dict.word', voca_emb_dir + 'word_embeddings.npy')
    print('word voca size', word_voca.size())
    snd_word_voca, snd_word_embeddings = utils.load_voca_embs(
        voca_emb_dir + '/glove/dict.word',
        voca_emb_dir + '/glove/word_embeddings.npy')
    print('snd word voca size', snd_word_voca.size())
    dhl_voca_emb_dir = '/home/hldai/data/el/AIDA/deeped/'

    # entity_voca, entity_embeddings = utils.load_voca_embs(voca_emb_dir + 'dict.entity',
    #                                                       voca_emb_dir + 'entity_embeddings.npy')
    # entity_voca, entity_embeddings = utils.load_voca_embs(voca_emb_dir + 'entity-vocab-aida.txt',
    #                                                       voca_emb_dir + 'entity-vecs-aida.npy')
    #    entity_voca, entity_embeddings = utils.load_voca_embs(dhl_voca_emb_dir + 'mrel-dhl-entity-vocab.txt',
    #                                                          dhl_voca_emb_dir + 'entity-vecs-dhl.npy')
    entity_voca, entity_embeddings = utils.load_voca_embs(
        dhl_voca_emb_dir + 'mrel-dhl-entity-vocab.txt',
예제 #4
0
                    default=1000000)
parser.add_argument("--dev_enr", type=str,
                    help="dev net path",
                    default=None)

# args for debugging
parser.add_argument("--print_rel", action='store_true')
parser.add_argument("--print_incorrect", action='store_true')

args = parser.parse_args()
if (args.semisup or args.multi_instance) and args.n_negs < 1:
    raise Exception("multi instance requires at least 1 negative sample")

if __name__ == "__main__":
    print('create model')
    word_voca, word_embeddings = utils.load_voca_embs(voca_emb_dir + 'dict.word',
                                                      voca_emb_dir + 'word_embeddings.npy')
    print('word voca size', word_voca.size())
    snd_word_voca, snd_word_embeddings = utils.load_voca_embs(voca_emb_dir + '/glove/dict.word',
                                                              voca_emb_dir + '/glove/word_embeddings.npy')
    print('snd word voca size', snd_word_voca.size())

    entity_voca, entity_embeddings = utils.load_voca_embs(voca_emb_dir + 'dict.entity',
                                                          voca_emb_dir + 'entity_embeddings.npy')
    print('entity voca size', entity_voca.size())

    config = {'hid_dims': args.hid_dims,
              'emb_dims': entity_embeddings.shape[1],
              'freeze_embs': True,
              'tok_top_n': args.tok_top_n,
              'margin': args.margin,
              'word_voca': word_voca,
예제 #5
0
import sys
from nel.vocabulary import Vocabulary
import nel.utils as utils
import numpy as np

if __name__ == "__main__":
    core_voca_path = sys.argv[1]
    word_embs_dir = sys.argv[2]

    print('load core voca from', core_voca_path)
    core_voca = Vocabulary.load(core_voca_path)

    print('load full voca and embs')
    full_voca, full_embs = utils.load_voca_embs(
        word_embs_dir + '/all_dict.word',
        word_embs_dir + '/all_word_embeddings.npy')

    print('select word ids')
    selected = []
    for word in core_voca.id2word:
        word_id = full_voca.word2id.get(word, -1)
        if word_id >= 0:
            selected.append(word_id)

    print('save...')
    selected_embs = full_embs[selected, :]
    np.save(word_embs_dir + '/word_embeddings', selected_embs)

    with open(word_embs_dir + '/dict.word', 'w', encoding='utf8') as f:
        for i in selected:
            f.write(full_voca.id2word[i] + '\t1000\n')
예제 #6
0
파일: main.py 프로젝트: hldai/mulrel-nel
                    type=int,
                    help="number of LBP loops",
                    default=10)

# args for debugging
parser.add_argument("--print_rel", action='store_true')
parser.add_argument("--print_incorrect", action='store_true')

args = parser.parse_args()

if __name__ == "__main__":
    print('load conll at', datadir)
    conll = D.CoNLLDataset(datadir, person_path, conll_path)

    print('create model')
    word_voca, word_embeddings = utils.load_voca_embs(
        voca_emb_dir + 'dict.word', voca_emb_dir + 'word_embeddings.npy')
    print('word voca size', word_voca.size())
    snd_word_voca, snd_word_embeddings = utils.load_voca_embs(
        voca_emb_dir + '/glove/dict.word',
        voca_emb_dir + '/glove/word_embeddings.npy')
    print('snd word voca size', snd_word_voca.size())
    dhl_voca_emb_dir = '/home/data/hldai/el/AIDA/deeped/'
    deeped_emb_dir = '/home/data/hldai/el/deepedemb/'

    # entity_voca, entity_embeddings = utils.load_voca_embs(voca_emb_dir + 'dict.entity',
    #                                                       voca_emb_dir + 'entity_embeddings.npy')
    # entity_voca, entity_embeddings = utils.load_voca_embs(voca_emb_dir + 'entity-vocab-aida.txt',
    #                                                       voca_emb_dir + 'entity-vecs-aida.npy')
    entity_voca, entity_embeddings = utils.load_voca_embs(
        deeped_emb_dir + 'mrel-aidatac-entity-vocab.txt',
        deeped_emb_dir + 'entity-vecs-aidatac.npy')