def extract_nodes(dataset_id):
    dataset = helpers.datasets[dataset_id]
    
    start = time.time()
    rdf_graph = rdf.read_graph(dataset["files"])
    end = time.time()
    print ("Reading graph took: {0} s.").format(end - start)
    
    rdf.extract_subjects_by_types(rdf_graph, "../output/stats/{0}/".format(dataset_id))
Example #2
0
def extract_nodes(dataset_id):
    dataset = helpers.datasets[dataset_id]

    start = time.time()
    rdf_graph = rdf.read_graph(dataset["files"])
    end = time.time()
    print("Reading graph took: {0} s.").format(end - start)

    rdf.extract_subjects_by_types(rdf_graph,
                                  "../output/stats/{0}/".format(dataset_id))
Example #3
0
    in_files_dir = "../output/stats/{0}/".format(dataset_id)
    stats = {}

    for i in files_range:
        i_str = str(i)
        stats[i_str] = {}
        stats[i_str]["class"], nodes = read_class_and_nodes_from_file(
            in_files_dir + str(i_str))
        stats[i_str]["len"] = len(nodes)

    total = sum(map(lambda x: stats[x]["len"], stats))

    stats["all"] = {}
    stats["all"]["len"] = total

    return stats


if __name__ == '__main__':
    #     print compute_stats("jamendo", range(10))

    dataset_id = "opencyc"

    dataset = helpers.datasets[dataset_id]

    start = time.time()
    rdf_graph = rdf.read_graph(dataset["files"])
    end = time.time()
    print("Reading graph took: {0} s.").format(end - start)

    print "Blank nodes:", len(set(rdf.get_blank_nodes(rdf_graph)))
    
    in_files_dir = "../output/stats/{0}/".format(dataset_id)
    stats = {}
    
    for i in files_range:
        i_str = str(i)
        stats[i_str] = {}
        stats[i_str]["class"], nodes = read_class_and_nodes_from_file(in_files_dir + str(i_str))
        stats[i_str]["len"] = len(nodes)
    
    total = sum(map(lambda x: stats[x]["len"], stats))
    
    stats["all"] = {}
    stats["all"]["len"] = total
    
    return stats
        

if __name__ == '__main__':
#     print compute_stats("jamendo", range(10))
    
    dataset_id = "opencyc"
    
    dataset = helpers.datasets[dataset_id]
    
    start = time.time()
    rdf_graph = rdf.read_graph(dataset["files"])
    end = time.time()
    print ("Reading graph took: {0} s.").format(end - start)
    
    print "Blank nodes:", len(set(rdf.get_blank_nodes(rdf_graph)))