Ejemplo n.º 1
0
def main():
    _, word_vecs = data_load.load_word_vectors('/media/dhl/Data/el/word2vec/wiki_vectors.jbin')
    word_vec_len = len(word_vecs[0])

    wid_idx_dict, entity_vecs = data_load.load_entities(
        '/media/dhl/Data/el/vec_rep/wid_entity_rep_wiki50_cat.bin',
        False)
    # wid_idx_dict, entity_vecs = data_load.load_entities('/media/dhl/Data/el/vec_rep/wid_entity_rep_wiki50.bin',
    #                                                     True)

    # all_word_vecs =
    num_val_candidates = 30
    num_test_candidates = 30
    skipwidth_loading = 0
    img_h = sentence_len + 2 * sentence_pad_len
    train_cnn_for_el('/media/dhl/Data/el/vec_rep/wiki_train_word_vec_indices_wiki50.td',
                     '/media/dhl/Data/el/vec_rep/tac_2014_training.bin',
                     # '/media/dhl/Data/el/vec_rep/wiki_val_word_vec_indices_wiki50.td',
                     num_val_candidates,
                     '/media/dhl/Data/el/vec_rep/wiki_test_word_vec_indices_wiki50.td',
                     num_test_candidates,
                     img_h, word_vec_len,
                     word_vecs,
                     wid_idx_dict,
                     entity_vecs,
                     skip_width_loading=skipwidth_loading,
                     n_epochs=1)
def main():
    if len(sys.argv) < 2:
        print 'need params file'

    params = load_params(sys.argv[1])

    entity_side_cnn = params['entity_side_cnn'] == '1'
    word_vec_file_name = params['word_vec_file']

    if entity_side_cnn:
        entity_rep_file_name = params['entity_rep_indices_file']
    else:
        entity_rep_file_name = params['entity_rep_vec_file']

    train_data_file_name = params['train_data_file']
    val_data_file_name = params['val_data_file']
    test_data_file_name = params['test_data_file']

    training_part_size = int(params['training_part_size'])
    sentence_len = int(params['context_sentence_len'])

    _, word_vecs = data_load.load_word_vectors(word_vec_file_name)
    word_vec_len = len(word_vecs[0])

    if entity_side_cnn:
        print 'entity use cnn'
        global entity_rep_len
        wid_idx_dict, entity_vecs, entity_rep_len = data_load.load_index_vec_of_entities_fixed_len(
            entity_rep_file_name)
    else:
        wid_idx_dict, entity_vecs = data_load.load_entities(
            entity_rep_file_name,
            False)

    # wid_idx_dict, entity_vecs = data_load.load_entities_indices(
    #     entity_rep_file_name, max_num_entity_words, entity_pad_len)

    num_val_candidates = 30
    num_test_candidates = 30
    skipwidth_loading = 0
    train_cnn_for_el(train_data_file_name,
                     val_data_file_name,
                     num_val_candidates,
                     test_data_file_name,
                     num_test_candidates,
                     sentence_len, word_vec_len,
                     word_vecs,
                     wid_idx_dict,
                     entity_vecs,
                     entity_side_cnn=entity_side_cnn,
                     gold_as_first_candidate=False,
                     skip_width_loading=skipwidth_loading,
                     n_epochs=1,
                     training_part_size=training_part_size)
Ejemplo n.º 3
0
def main():
    if len(sys.argv) < 2:
        print 'need params file'

    params = load_params(sys.argv[1])

    entity_side_cnn = params['entity_side_cnn'] == '1'
    word_vec_file_name = params['word_vec_file']

    if entity_side_cnn:
        entity_rep_file_name = params['entity_rep_indices_file']
    else:
        entity_rep_file_name = params['entity_rep_vec_file']

    train_data_file_name = params['train_data_file']
    val_data_file_name = params['val_data_file']
    test_data_file_name = params['test_data_file']

    training_part_size = int(params['training_part_size'])
    sentence_len = int(params['context_sentence_len'])

    _, word_vecs = data_load.load_word_vectors(word_vec_file_name)
    word_vec_len = len(word_vecs[0])

    if entity_side_cnn:
        print 'entity use cnn'
        global entity_rep_len
        wid_idx_dict, entity_vecs, entity_rep_len = data_load.load_index_vec_of_entities_fixed_len(
            entity_rep_file_name)
    else:
        wid_idx_dict, entity_vecs = data_load.load_entities(
            entity_rep_file_name, False)

    # wid_idx_dict, entity_vecs = data_load.load_entities_indices(
    #     entity_rep_file_name, max_num_entity_words, entity_pad_len)

    num_val_candidates = 30
    num_test_candidates = 30
    skipwidth_loading = 0
    train_cnn_for_el(train_data_file_name,
                     val_data_file_name,
                     num_val_candidates,
                     test_data_file_name,
                     num_test_candidates,
                     sentence_len,
                     word_vec_len,
                     word_vecs,
                     wid_idx_dict,
                     entity_vecs,
                     entity_side_cnn=entity_side_cnn,
                     gold_as_first_candidate=False,
                     skip_width_loading=skipwidth_loading,
                     n_epochs=1,
                     training_part_size=training_part_size)
def main():
    local_flg = True
    if len(sys.argv) > 1:
        if sys.argv[1] == '0':
            local_flg = False

    if local_flg:
        word_vec_file_name = '/media/dhl/Data/el/word2vec/wiki_vectors.jbin'
        entity_rep_file_name = '/media/dhl/Data/el/vec_rep/' + \
                               'wid_entity_rep_wiki50_indices_with_keywords_fixed_len.bin'
        # entity_rep_file_name = '/media/dhl/Data/el/vec_rep/' + \
        #                        'wid_entity_rep_wiki50_indices.bin'
        train_data_file_name = '/media/dhl/Data/el/vec_rep/wiki_train_word_vec_indices_wiki50.td'
        val_data_file_name = '/media/dhl/Data/el/vec_rep/tac_2014_training.bin'
        test_data_file_name = '/media/dhl/Data/el/vec_rep/tac_2014_eval.bin'
    else:
        word_vec_file_name = '/home/dhl/data/word_vec/wiki_vectors.jbin'
        entity_rep_file_name = '/home/dhl/data/vec_rep/wid_entity_rep_wiki50_indices_with_keywords_fixed_len_0kw.bin'
        train_data_file_name = '/home/dhl/data/vec_rep/wiki_train_word_vec_indices_wiki50.td'
        val_data_file_name = '/home/dhl/data/vec_rep/tac_2014_training.bin'
        test_data_file_name = '/home/dhl/data/vec_rep/tac_2014_eval.bin'

    _, word_vecs = data_load.load_word_vectors(word_vec_file_name)
    word_vec_len = len(word_vecs[0])

    # wid_idx_dict, entity_vecs = data_load.load_entities(
    #     '/media/dhl/Data/el/vec_rep/wid_entity_rep_wiki50_cat.bin',
    #     False)

    # wid_idx_dict, entity_vecs = data_load.load_entities_indices(
    #     entity_rep_file_name, max_num_entity_words, entity_pad_len)

    global entity_rep_len
    wid_idx_dict, entity_vecs, entity_rep_len = data_load.load_index_vec_of_entities_fixed_len(
        entity_rep_file_name)

    num_val_candidates = 30
    num_test_candidates = 30
    skipwidth_loading = 0
    img_h = sentence_len + 2 * sentence_pad_len
    train_cnn_for_el(train_data_file_name,
                     val_data_file_name,
                     num_val_candidates,
                     test_data_file_name,
                     num_test_candidates,
                     img_h,
                     word_vec_len,
                     word_vecs,
                     wid_idx_dict,
                     entity_vecs,
                     gold_as_first_candidate=False,
                     skip_width_loading=skipwidth_loading,
                     n_epochs=1)
Ejemplo n.º 5
0
def main():
    local_flg = True
    if len(sys.argv) > 1:
        if sys.argv[1] == '0':
            local_flg = False

    if local_flg:
        word_vec_file_name = '/media/dhl/Data/el/word2vec/wiki_vectors.jbin'
        entity_rep_file_name = '/media/dhl/Data/el/vec_rep/' + \
                               'wid_entity_rep_wiki50_indices_with_keywords_fixed_len.bin'
        # entity_rep_file_name = '/media/dhl/Data/el/vec_rep/' + \
        #                        'wid_entity_rep_wiki50_indices.bin'
        train_data_file_name = '/media/dhl/Data/el/vec_rep/wiki_train_word_vec_indices_wiki50.td'
        val_data_file_name = '/media/dhl/Data/el/vec_rep/tac_2014_training.bin'
        test_data_file_name = '/media/dhl/Data/el/vec_rep/tac_2014_eval.bin'
    else:
        word_vec_file_name = '/home/dhl/data/word_vec/wiki_vectors.jbin'
        entity_rep_file_name = '/home/dhl/data/vec_rep/wid_entity_rep_wiki50_indices_with_keywords_fixed_len_0kw.bin'
        train_data_file_name = '/home/dhl/data/vec_rep/wiki_train_word_vec_indices_wiki50.td'
        val_data_file_name = '/home/dhl/data/vec_rep/tac_2014_training.bin'
        test_data_file_name = '/home/dhl/data/vec_rep/tac_2014_eval.bin'

    _, word_vecs = data_load.load_word_vectors(word_vec_file_name)
    word_vec_len = len(word_vecs[0])

    # wid_idx_dict, entity_vecs = data_load.load_entities(
    #     '/media/dhl/Data/el/vec_rep/wid_entity_rep_wiki50_cat.bin',
    #     False)

    # wid_idx_dict, entity_vecs = data_load.load_entities_indices(
    #     entity_rep_file_name, max_num_entity_words, entity_pad_len)

    global entity_rep_len
    wid_idx_dict, entity_vecs, entity_rep_len = data_load.load_index_vec_of_entities_fixed_len(
        entity_rep_file_name)

    num_val_candidates = 30
    num_test_candidates = 30
    skipwidth_loading = 0
    img_h = sentence_len + 2 * sentence_pad_len
    train_cnn_for_el(train_data_file_name,
                     val_data_file_name,
                     num_val_candidates,
                     test_data_file_name,
                     num_test_candidates,
                     img_h, word_vec_len,
                     word_vecs,
                     wid_idx_dict,
                     entity_vecs,
                     gold_as_first_candidate=False,
                     skip_width_loading=skipwidth_loading,
                     n_epochs=1)