Exemple #1
0
def make_train_test_edge_data(data_dir):
    print("Loading graph...")
    graph, _, _ = load_graph(data_dir, 10)
    print("Getting all edges...")
    edges = graph.get_all_edges()
    split_point = int(0.1 * len(edges))
    val_test_edges = edges[:split_point]
    print("Getting negative samples...")
    val_test_edge_negsamples = [
        graph.get_negative_edge_samples(e, 100) for e in val_test_edges
    ]
    print("Making and storing test queries.")
    val_test_edge_queries = [
        Query(("1-chain", val_test_edges[i]), val_test_edge_negsamples[i],
              None, 100) for i in range(split_point)
    ]
    val_split_point = int(0.1 * len(val_test_edge_queries))
    val_queries = val_test_edge_queries[:val_split_point]
    test_queries = val_test_edge_queries[val_split_point:]
    pickle.dump([q.serialize() for q in val_queries],
                open(data_dir + "/val_edges.pkl", "w"),
                protocol=pickle.HIGHEST_PROTOCOL)
    pickle.dump([q.serialize() for q in test_queries],
                open(data_dir + "/test_edges.pkl", "w"),
                protocol=pickle.HIGHEST_PROTOCOL)

    print("Removing test edges...")
    graph.remove_edges(val_test_edges)
    print("Making and storing train queries.")
    train_edges = graph.get_all_edges()
    train_queries = [Query(("1-chain", e), None, None) for e in train_edges]
    pickle.dump([q.serialize() for q in train_queries],
                open(data_dir + "/train_edges.pkl", "w"),
                protocol=pickle.HIGHEST_PROTOCOL)
Exemple #2
0
def load_queries_by_type(data_file, keep_graph=True):
    raw_info = pickle.load(open(data_file, "rb"))
    queries = defaultdict(list)
    for raw_query in raw_info:
        query = Query.deserialize(raw_query, keep_graph=keep_graph)
        queries[query.formula.query_type].append(query)
    return queries
Exemple #3
0
def load_queries_by_formula(data_file):
    raw_info = pickle.load(open(data_file, "rb"))
    queries = defaultdict(lambda: defaultdict(list))
    for raw_query in raw_info:
        query = Query.deserialize(raw_query)
        queries[query.formula.query_type][query.formula].append(query)
    return queries
Exemple #4
0
def load_test_queries_by_formula(data_file, keep_graph=False):
    '''
    4. read query method
    Read query file as a dict
    key: "full_neg" (full negative sample) or "one_neg" (only one negative sample)
    value: a dict()
        key: query type
        value: a dict()
            key: formula template
            value: the query object
    '''
    if path.exists(data_file):
        raw_info = pickle.load(open(data_file, "rb"))
        queries = {
            "full_neg": defaultdict(lambda: defaultdict(list)),
            "one_neg": defaultdict(lambda: defaultdict(list))
        }
        for raw_query in raw_info:
            neg_type = "full_neg" if len(raw_query[1]) > 1 else "one_neg"
            query = Query.deserialize(raw_query, keep_graph=keep_graph)
            queries[neg_type][query.formula.query_type][query.formula].append(
                query)
        return queries
    else:
        return None
Exemple #5
0
def make_train_test_edge_data(data_dir, neg_sample_size):
    '''
    1. Load graph-data.pkl for the same format
    2. Load training/valid/testing triples, a list of edge (head id, (domain type, predicate, range type), tail id)
    '''
    print("Loading graph...")
    graph, _, _ = load_graph(data_dir, 10)
    
    print("Load training/valid/testing triples...")
    train_triples = pickle_load(data_dir + "/train_triples.pkl")
    valid_triples = pickle_load(data_dir + "/valid_triples.pkl")
    test_triples = pickle_load(data_dir + "/test_triples.pkl")

    print("Getting full negative samples (for APR evaluation) and make queries...")
    valid_queries = [make_valid_test_edge_from_triple(graph, edge, neg_sample_size) for edge in valid_triples]
    test_queries = [make_valid_test_edge_from_triple(graph, edge, neg_sample_size) for edge in test_triples]

    print("Getting one negative samples (for AUC evaluation) and make queries...")
    valid_queries += [make_valid_test_edge_from_triple(graph, edge, 1) for edge in valid_triples]
    test_queries += [make_valid_test_edge_from_triple(graph, edge, 1) for edge in test_triples]

    print("Dumping valid/test 1-chain queries")
    pickle.dump([q.serialize() for q in valid_queries], open(data_dir+"/val_edges.pkl", "w"), protocol=pickle.HIGHEST_PROTOCOL)
    pickle.dump([q.serialize() for q in test_queries], open(data_dir+"/test_edges.pkl", "w"), protocol=pickle.HIGHEST_PROTOCOL)

    print("Dumping train 1-chain queries")
    train_queries = [Query(("1-chain", e), None, None, keep_graph=True) for e in train_triples]
    pickle.dump([q.serialize() for q in train_queries], open(data_dir+"/train_edges.pkl", "w"), protocol=pickle.HIGHEST_PROTOCOL)

    print("Finish making training/valid/testing 1-chain queries")
Exemple #6
0
def load_queries(data_file, keep_graph=False):
    '''
    1. read query method
    Read query file as a list of Query object
    '''
    raw_info = pickle.load(open(data_file, "rb"))
    return [
        Query.deserialize(info, keep_graph=keep_graph) for info in raw_info
    ]
Exemple #7
0
def load_test_queries_by_formula(data_file):
    raw_info = pickle.load(open(data_file, "rb"))
    queries = {"full_neg" : defaultdict(lambda : defaultdict(list)), 
            "one_neg" : defaultdict(lambda : defaultdict(list))}
    for raw_query in raw_info:
        neg_type = "full_neg" if len(raw_query[1]) > 1 else "one_neg"
        query = Query.deserialize(raw_query)
        queries[neg_type][query.formula.query_type][query.formula].append(query)
    return queries
Exemple #8
0
def load_queries_by_type(data_file, keep_graph=True):
    '''
    3. read query method
    Read query file as a dict
    key: query type
    value: a list of Query object
    '''
    raw_info = pickle.load(open(data_file, "rb"))
    queries = defaultdict(list)
    for raw_query in raw_info:
        query = Query.deserialize(raw_query, keep_graph=keep_graph)
        queries[query.formula.query_type].append(query)
    return queries
Exemple #9
0
def load_test_queries_by_formula(data_file):
    raw_info = pickle.load(open(data_file, "rb"))
    queries = {
        "full_neg": defaultdict(lambda: defaultdict(list)),
        "one_neg": defaultdict(lambda: defaultdict(list))
    }
    for raw_query in raw_info:
        neg_type = "full_neg" if len(raw_query[1]) > 1 else "one_neg"
        #TODO Gad: I have changed keep_graph to True to keeep the nodes
        print raw_query
        query = Query.deserialize(raw_query, keep_graph=True)
        queries[neg_type][query.formula.query_type][query.formula].append(
            query)
    return queries
Exemple #10
0
def load_queries_by_formula(data_file, keep_graph=True):
    '''
    2. read query method
    Read query file as a dict
    key: query type
    value: a dict()
        key: formula template
        value: the query object
    '''
    if path.exists(data_file):
        raw_info = pickle.load(open(data_file, "rb"))
        queries = defaultdict(lambda: defaultdict(list))
        for raw_query in raw_info:
            query = Query.deserialize(raw_query, keep_graph=keep_graph)
            queries[query.formula.query_type][query.formula].append(query)
        return queries
    else:
        return None
Exemple #11
0
def load_queries(data_file, keep_graph=False):
    raw_info = pickle.load(open(data_file, "rb"))
    return [
        Query.deserialize(info, keep_graph=keep_graph) for info in raw_info
    ]
Exemple #12
0
def make_valid_test_edge_from_triple(graph, edge, neg_sample_size):
    neg_samples = graph.get_negative_edge_samples(edge, neg_sample_size)
    return Query(("1-chain", edge), neg_samples, None, neg_sample_size, keep_graph=True)