Example #1
0
def gen_cycle_graph(destination_folder: Path, vertices_number: int) -> Path:
    """
    Generates one cycle graph with specified number of vertices

    :param destination_folder: directory to save the graph
    :type destination_folder: Path
    :param vertices_number: number of vertices in the graph
    :type vertices_number: int
    :return: path to generated graph
    :rtype: Path
    """

    output_graph = rdflib.Graph()

    edges = list()

    for i in range(0, vertices_number - 1):
        edges.append((i, 'A', i + 1))

    edges.append((vertices_number - 1, 'A', 0))

    for subj, pred, obj in tqdm(
            edges, desc=f'fullgraph_{vertices_number} generation'):
        add_rdf_edge(subj, pred, obj, output_graph)

    target = destination_folder / f'fullgraph_{vertices_number}.xml'
    write_to_rdf(target, output_graph)

    return target
Example #2
0
    def save_to_rdf(self, destination: Path) -> Path:
        """
        Saves RDF graph to destination rdf file

        :param destination: path to save the graph
        :type destination: Path
        :return: path to saved graph
        :rtype: Path
        """

        write_to_rdf(destination, self.store)
        return destination
Example #3
0
    def load_from_txt(cls,
                      source: Path = None,
                      config: Optional[Dict[str, str]] = None) -> RDF:
        """
        Loads RDF graph from specified source with txt format

        :param source: graph source
        :type source: Path
        :param config: edge configuration
        :type config: Optional[Dict[str, str]]
        :return: loaded graph
        :rtype: RDF
        """

        tmp_graph = rdflib.Graph()

        if config is None:
            config = dict()

            with open(source, 'r') as input_file:
                for edge in input_file:
                    s, p, o = edge.strip('\n').split(' ')
                    p_text = p
                    if not p.startswith('http'):
                        p_text = f'http://yacc/rdf-schema#{p_text}'
                    config[p] = p_text

        with open(source, 'r') as input_file:
            for edge in input_file:
                s, p, o = edge.strip('\n').split(' ')
                add_rdf_edge(s, config[p], o, tmp_graph)

        write_to_rdf(Path('tmp.xml'), tmp_graph)

        graph = cls.load_from_rdf(Path('tmp.xml'))

        # os.remove('tmp.xml')

        return graph
Example #4
0
def gen_worst_case_graph(destination_folder: Path,
                         vertices_number: int) -> Path:
    """
    Generates graphs with two cycles by number of vertices in the graph

    :param destination_folder: directory to save the graph
    :type destination_folder: Path
    :param vertices_number: number of vertices in the graph
    :type vertices_number: int
    :return: path to generated graph
    :rtype: Path
    """

    output_graph = rdflib.Graph()

    first_cycle = int(vertices_number / 2) + 1

    edges = list()

    for i in range(0, first_cycle - 1):
        edges.append((i, 'A', i + 1))

    edges.append((first_cycle - 1, 'A', 0))
    edges.append((first_cycle - 1, 'B', first_cycle))

    for i in range(first_cycle, vertices_number - 1):
        edges.append((i, 'B', i + 1))

    edges.append((vertices_number - 1, 'B', first_cycle - 1))

    for subj, pred, obj in tqdm(
            edges, desc=f'worstcase_{vertices_number} generation'):
        add_rdf_edge(subj, pred, obj, output_graph)

    target = destination_folder / f'worstcase_{vertices_number}.xml'

    write_to_rdf(target, output_graph)

    return target
Example #5
0
def gen_sparse_graph(destination_folder: Path,
                     vertices_number: int,
                     edge_probability: float) -> Path:
    """
    Generates sparse graph

    :param destination_folder: directory to save the graph
    :type destination_folder: Path
    :param vertices_number: number of vertices in the graph
    :type vertices_number: int
    :param edge_probability: probability of edge existence in the graph
    :type edge_probability: float
    :return: path to generated graph
    :rtype: Path
    """

    tmp_graph = nx.generators.fast_gnp_random_graph(vertices_number, edge_probability)

    output_graph = rdflib.Graph()

    edges = list()

    for v, to in tmp_graph.edges():
        edges.append((v, 'A', to))
        edges.append((v, 'AR', to))

    for subj, pred, obj in tqdm(
            edges,
            desc=f'G{vertices_number}-{edge_probability} generation'
    ):
        add_rdf_edge(subj, pred, obj, output_graph)

    target = destination_folder / f'G{vertices_number}-{edge_probability}.xml'

    write_to_rdf(target, output_graph)

    return target
Example #6
0
def gen_lubm_graph(destination_folder: Path, count: int) -> Path:
    """
    Generates LUBM graph by specified number of generated graphs to create one LUBM graph

    :param destination_folder: directory to save the graph
    :type destination_folder: Path
    :param count: number of generated graphs
    :type count: int
    :return: path to generated graph
    :rtype: Path
    """

    arch = MAIN_FOLDER / 'data' / 'LUBM' / 'uba.zip'
    univ_dir = MAIN_FOLDER / 'data' / 'LUBM' / 'univ'
    if not os.path.exists(univ_dir):
        wget.download(url=LUBM_URL, out=str(arch))
        shutil.unpack_archive(filename=str(arch), extract_dir=str(univ_dir))
        os.remove(arch)
    subprocess.run('java ' + '-cp ' + 'classes ' +
                   'edu.lehigh.swat.bench.uba.Generator ' + '-univ ' +
                   f'{count} ' + '-onto ' +
                   'http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl',
                   cwd=str(univ_dir),
                   stdout=subprocess.PIPE,
                   stderr=subprocess.PIPE,
                   check=True,
                   shell=True)

    output_graph = rdflib.Graph()

    triples = list()

    vertices = dict()
    next_id = 0

    generated_graphs = \
        os.listdir(MAIN_FOLDER / 'data' / 'LUBM') + \
        os.listdir(univ_dir)  # for Windows

    for tmp_graph_path in generated_graphs:
        if 'University' not in tmp_graph_path:
            continue

        tmp_graph_path = f"{MAIN_FOLDER / 'data' / 'LUBM'}/{tmp_graph_path}"
        tmp_graph = rdflib.Graph()
        tmp_graph.parse(tmp_graph_path)

        for subj, pred, obj in tmp_graph:
            for tmp in [subj, obj]:
                if tmp not in vertices:
                    vertices[tmp] = next_id
                    next_id += 1
            triples.append((vertices[subj], pred, vertices[obj]))

        os.remove(tmp_graph_path)

    for subj, pred, obj in tqdm(triples, desc=f'lubm_{count} generation'):
        add_rdf_edge(subj, pred, obj, output_graph, reverse=True)

    target = destination_folder / f'lubm_{count}.xml'

    write_to_rdf(target, output_graph)

    return target
Example #7
0
def gen_scale_free_graph(
    destination_folder: Path,
    vertices_number: int,
    vertices_degree: int,
    labels: Tuple[str, ...] = ('A', 'B', 'C', 'D')) -> Path:
    """
    Generates scale free graph

    :param destination_folder: directory to save the graph
    :type destination_folder: Path
    :param vertices_number: number of vertices in the graph
    :type vertices_number: int
    :param vertices_degree: degree of a vertex in the graph
    :type vertices_degree: int
    :param labels: edge labels in the graph
    :type labels: Tuple[str, ...]
    :return: path to generated graph
    :rtype: Path
    """

    g = {
        i: [(j, np.random.choice(labels)) for j in range(vertices_degree)]
        for i in range(vertices_degree)
    }

    degree = [3] * vertices_degree

    for i in range(vertices_degree, vertices_number):
        to_vertices = np.random.choice(range(i),
                                       size=vertices_degree,
                                       replace=False,
                                       p=np.array(degree) / sum(degree))

        g[i] = []
        degree.append(0)
        for to in to_vertices:
            label = np.random.choice(labels)
            g[i].append((to, label))
            degree[to] += 1
            degree[i] += 1

    output_graph = rdflib.Graph()

    edges = list()

    for v in g:
        for to in g[v]:
            edges.append((v, to[1], to[0]))

    for subj, pred, obj in tqdm(
            edges,
            desc=
            f'scale_free_graph_{vertices_number}_{vertices_degree} generation'
    ):
        add_rdf_edge(subj, pred, obj, output_graph)

    target = destination_folder / f'scale_free_graph_{vertices_number}_{vertices_degree}.xml'

    write_to_rdf(target, output_graph)

    return target