예제 #1
0
def load_data(args):
    train_data_e2t, validation_data_e2t, test_data_e2t, \
    train_data_ere, validation_data_ere, test_data_ere, \
    train_data_trt, validation_data_trt, test_data_trt, \
    entity2id, relation2id, type2id, headTailSelector, unique_entities_train_e2t, unique_types_train_e2t,\
    unique_entities_train_ere, unique_types_train_trt= build_data(
            args.data, is_unweigted=False, directed=True)

    if args.pretrained_emb:
        entity_embeddings, relation_embeddings = init_embeddings(
            os.path.join(args.data, 'entity2vec.txt'),
            os.path.join(args.data, 'relation2vec.txt'))
        print("Initialised relations and entities from TransE")

    else:
        entity_embeddings = np.random.randn(len(entity2id),
                                            args.entity_embedding_size)
        relation_embeddings = np.random.randn(len(relation2id),
                                              args.entity_embedding_size)
        rdf_relation_embeddings = np.random.randn(1, args.type_embedding_size)
        type_embeddings = np.random.randn(len(type2id),
                                          args.type_embedding_size)
        #        entity2typeMat = np.random.randn(
        #            args.type_embedding_size, args.entity_embedding_size)
        print("Initialised relations and entities randomly")

    corpus_e2t = Corpus_e2t(args, train_data_e2t, validation_data_e2t,
                            test_data_e2t, entity2id, type2id,
                            args.batch_size_gat, args.valid_invalid_ratio_gat,
                            unique_entities_train_e2t, unique_types_train_e2t,
                            args.get_2hop)

    corpus_ere = Corpus(args, train_data_ere, validation_data_ere,
                        test_data_ere, entity2id, relation2id,
                        headTailSelector, args.batch_size_gat,
                        args.valid_invalid_ratio_gat,
                        unique_entities_train_ere, args.get_2hop)

    corpus_trt = Corpus(args, train_data_trt, validation_data_trt,
                        test_data_trt, type2id, relation2id, headTailSelector,
                        args.batch_size_gat, args.valid_invalid_ratio_gat,
                        unique_types_train_trt, args.get_2hop)

    return corpus_e2t, corpus_ere, corpus_trt, torch.FloatTensor(
        entity_embeddings), torch.FloatTensor(
            relation_embeddings), torch.FloatTensor(
                rdf_relation_embeddings), torch.FloatTensor(type_embeddings)
예제 #2
0
def load_data(args):
    train_data, validation_data, test_data, entity2id, relation2id, headTailSelector, unique_entities_train = build_data(
        args.data, is_unweigted=False, directed=True)

    if args.pretrained_emb:
        entity_embeddings, relation_embeddings = init_embeddings(
            os.path.join(args.data, 'entity2vec.txt'),
            os.path.join(args.data, 'relation2vec.txt'))
        print("Initialised relations and entities from TransE")

    else:
        entity_embeddings = np.random.randn(len(entity2id),
                                            args.embedding_size)
        relation_embeddings = np.random.randn(len(relation2id),
                                              args.embedding_size)
        print("Initialised relations and entities randomly")

    corpus = Corpus(args, train_data, validation_data, test_data, entity2id,
                    relation2id, headTailSelector, args.batch_size_gat,
                    args.valid_invalid_ratio_gat, unique_entities_train,
                    args.get_2hop)

    return corpus, torch.FloatTensor(entity_embeddings), torch.FloatTensor(
        relation_embeddings)
예제 #3
0
    def __init__(self, args):

        self.args = args
        self.directory = args.data

        # avoid temptation to refactor below, it is not worth it ;(
        train_data, validation_data, test_data, entity2id, relation2id, headTailSelector, unique_entities_train = build_data(args.data, is_unweigted=False, directed=True)

        self.corpus = Corpus(args, train_data, validation_data, test_data,
                             entity2id, relation2id, headTailSelector,
                             args.batch_size, args.valid_invalid_ratio,
                             unique_entities_train, args.get_2hop)
예제 #4
0
파일: main.py 프로젝트: gecongcong/TransGAT
def load_data(args):
    train_data, validation_data, test_data, entity2id, relation2id, headTailSelector, unique_entities_train = build_data(
        args.data, is_unweigted=False, directed=True)
    if args.pretrained_emb:
        entity_embeddings, relation_embeddings = init_embeddings(os.path.join(args.data, 'entity2vec.txt'),
                                                                 os.path.join(args.data, 'relation2vec.txt'))
        print("Initialised relations and entities from TransE")

    else:
        entity_embeddings = np.random.randn(
            len(entity2id), args.embedding_size)
        relation_embeddings = np.random.randn(
            len(relation2id), args.embedding_size)
        print("Initialised relations and entities randomly")

    '''
    entity_embeddings = np.random.randn(
        len(entity2id), args.embedding_size)
    relation_embeddings = np.random.randn(
        len(relation2id), args.embedding_size)
    print("Initialised relations and entities randomly")
    '''

    corpus = Corpus(args, train_data, validation_data, test_data, entity2id, relation2id, headTailSelector,
                    args.batch_size_gat, args.valid_invalid_ratio_gat, unique_entities_train, args.get_2hop)
    if (args.get_2hop):
        file = args.data + "/2hop.pickle"
        with open(file, 'wb') as handle:
            pickle.dump(corpus.node_neighbors_2hop, handle,
                        protocol=pickle.HIGHEST_PROTOCOL)

    if (args.use_2hop):
        print("Opening node_neighbors pickle object")
        file = args.data + "/2hop.pickle"
        with open(file, 'rb') as handle:
            node_neighbors_2hop = pickle.load(handle)
    # return corpus, torch.FloatTensor(entity_embeddings), torch.FloatTensor(relation_embeddings)
    return corpus, torch.cuda.FloatTensor(entity_embeddings), torch.cuda.FloatTensor(
        relation_embeddings), node_neighbors_2hop
예제 #5
0
def load_data(args):
    train_data, validation_data, test_data, entity2id, relation2id, args.id2entity, args.id2relation, headTailSelector, unique_entities_train, unique_relations_train = build_data(
        args.data, is_unweigted=False, directed=False)
    print('Training size', len(train_data), 'Val size', len(validation_data), 'Test size', len(test_data))
    if args.pretrained_emb:
        # no relation embedding for us now
        entity_embeddings, relation_embeddings = init_embeddings(os.path.join(args.data, 'entity2vec.txt'),
                                                                 None)
        if entity_embeddings.shape[0] == 0:
            entity_embeddings = np.random.randn(
                len(entity2id), args.embedding_size)
        if relation_embeddings.shape[0] == 0:
            relation_embeddings = np.random.randn(
                len(relation2id), args.embedding_size)
        print("Initialised relations and entities from SSP")

    else:
        entity_embeddings = np.random.randn(
            len(entity2id), args.embedding_size)
        relation_embeddings = np.random.randn(
            len(relation2id), args.embedding_size)
        print("Initialised relations and entities randomly")

    corpus = Corpus(args, train_data, validation_data, test_data, entity2id, relation2id, headTailSelector,
                    args.batch_size_gat, args.valid_invalid_ratio_gat, unique_entities_train, unique_relations_train, args.get_2hop)

    return corpus, torch.FloatTensor(entity_embeddings), torch.FloatTensor(relation_embeddings)