Beispiel #1
0
    def load_edge_data(self,
                       load_geo_query=False,
                       test_query_keep_graph=False):
        '''
        just load 1-d query for train/val/test
            train_queries:     train_queries[query_type][formula] = list of query
            val_queries:       val_queries[one_neg/full_neg][query_type][formula] = list of query
            test_queries:      test_queries[one_neg/full_neg][query_type][formula] = list of query
        '''
        if load_geo_query:
            file_postfix = "-geo"
        else:
            file_postfix = ""

        print("Loading edge data..")

        print("Loading training edge data..")
        train_queries = load_queries_by_formula(
            self.args.data_dir + "/train_edges{:s}.pkl".format(file_postfix))
        print("Loading validation edge  data..")
        val_queries = load_test_queries_by_formula(
            self.args.data_dir + "/val_edges{:s}.pkl".format(file_postfix),
            keep_graph=test_query_keep_graph)
        print("Loading testing edge data..")
        test_queries = load_test_queries_by_formula(
            self.args.data_dir + "/test_edges{:s}.pkl".format(file_postfix),
            keep_graph=test_query_keep_graph)

        if load_geo_query:
            self.train_queries_geo = train_queries
            self.val_queries_geo = val_queries
            self.test_queries_geo = test_queries
        else:
            self.train_queries = train_queries
            self.val_queries = val_queries
            self.test_queries = test_queries
Beispiel #2
0
parser.add_argument("--log_dir", type=str, default="./log")
parser.add_argument("--model_dir", type=str, default="./model")
parser.add_argument("--decoder", type=str, default="bilinear")
parser.add_argument("--inter_decoder", type=str, default="mean")
parser.add_argument("--opt", type=str, default="adam")
parser.add_argument("--pretrain", type=bool, default=False)
args = parser.parse_args()

print("Loading graph data..")
graph, feature_modules, node_maps = load_graph(args.data_dir, args.embed_dim)
if args.cuda:
    graph.features = cudify(feature_modules, node_maps)
out_dims = {mode:args.embed_dim for mode in graph.relations}

print("Loading edge data..")
train_queries = load_queries_by_formula(args.data_dir + "/train_edges.pkl")
val_queries = load_test_queries_by_formula(args.data_dir + "/val_edges.pkl")
test_queries = load_test_queries_by_formula(args.data_dir + "/test_edges.pkl")

print("Loading query data..")
for i in range(2,4):
    train_queries.update(load_queries_by_formula(args.data_dir + "/train_queries_{:d}.pkl".format(i)))
    i_val_queries = load_test_queries_by_formula(args.data_dir + "/val_queries_{:d}.pkl".format(i))
    val_queries["one_neg"].update(i_val_queries["one_neg"])
    val_queries["full_neg"].update(i_val_queries["full_neg"])
    i_test_queries = load_test_queries_by_formula(args.data_dir + "/test_queries_{:d}.pkl".format(i))
    test_queries["one_neg"].update(i_test_queries["one_neg"])
    test_queries["full_neg"].update(i_test_queries["full_neg"])


enc = get_encoder(args.depth, graph, out_dims, feature_modules, args.cuda, args.beta)
Beispiel #3
0
    def load_multi_edge_query_data(self,
                                   load_geo_query=False,
                                   test_query_keep_graph=False):
        '''
        Load multi edge query for train/val/test
        '''
        if load_geo_query:
            file_postfix = "-geo"
            train_queries = self.train_queries
            val_queries = self.val_queries_geo
            test_queries = self.test_queries_geo
        else:
            file_postfix = ""
            train_queries = self.train_queries
            val_queries = self.val_queries
            test_queries = self.test_queries

        print("Loading {:s} query data..".format(file_postfix))
        for i in range(2, 4):
            # if not args.kg_train:
            print("Loading training {:s} {:d} triple data..".format(
                file_postfix, i))
            train_queries_file = self.args.data_dir + "/train_queries_{:d}{:s}.pkl".format(
                i, file_postfix)
            if path.exists(train_queries_file):
                train_queries.update(
                    load_queries_by_formula(train_queries_file))
            else:
                print("{} no exist!".format(train_queries_file))

            print("Loading validate {:s} {:d} triple data..".format(
                file_postfix, i))
            val_queries_file = self.args.data_dir + "/val_queries_{:d}{:s}.pkl".format(
                i, file_postfix)
            if path.exists(val_queries_file):
                i_val_queries = load_test_queries_by_formula(
                    val_queries_file, keep_graph=test_query_keep_graph)
                val_queries["one_neg"].update(i_val_queries["one_neg"])
                val_queries["full_neg"].update(i_val_queries["full_neg"])
            else:
                print("{} no exist!".format(val_queries_file))

            print("Loading testing {:s} {:d} triple data..".format(
                file_postfix, i))
            test_queries_file = self.args.data_dir + "/test_queries_{:d}{:s}.pkl".format(
                i, file_postfix)
            if path.exists(test_queries_file):
                i_test_queries = load_test_queries_by_formula(
                    test_queries_file, keep_graph=test_query_keep_graph)
                test_queries["one_neg"].update(i_test_queries["one_neg"])
                test_queries["full_neg"].update(i_test_queries["full_neg"])
            else:
                print("{} no exist!".format(test_queries_file))

        if self.args.kg_train:
            print(
                "Loading x-inter train {:s} query data..".format(file_postfix))
            if self.args.max_arity < 4:
                raise Exception("for full KG train, arity should be >= 4")
            for arity in range(4, self.args.max_arity + 1):
                print("Loading training {:s} {:d}-inter query data..".format(
                    file_postfix, arity))
                train_queries.update(
                    load_queries_by_formula(self.args.data_dir +
                                            "/train_inter_queries_{:d}{:s}.pkl"
                                            .format(arity, file_postfix)))