def gen_cycle_graph(destination_folder: Path, vertices_number: int) -> Path: """ Generates one cycle graph with specified number of vertices :param destination_folder: directory to save the graph :type destination_folder: Path :param vertices_number: number of vertices in the graph :type vertices_number: int :return: path to generated graph :rtype: Path """ output_graph = rdflib.Graph() edges = list() for i in range(0, vertices_number - 1): edges.append((i, 'A', i + 1)) edges.append((vertices_number - 1, 'A', 0)) for subj, pred, obj in tqdm( edges, desc=f'fullgraph_{vertices_number} generation'): add_rdf_edge(subj, pred, obj, output_graph) target = destination_folder / f'fullgraph_{vertices_number}.xml' write_to_rdf(target, output_graph) return target
def load_from_txt(cls, source: Path = None, config: Optional[Dict[str, str]] = None) -> RDF: """ Loads RDF graph from specified source with txt format :param source: graph source :type source: Path :param config: edge configuration :type config: Optional[Dict[str, str]] :return: loaded graph :rtype: RDF """ tmp_graph = rdflib.Graph() if config is None: config = dict() with open(source, 'r') as input_file: for edge in input_file: s, p, o = edge.strip('\n').split(' ') p_text = p if not p.startswith('http'): p_text = f'http://yacc/rdf-schema#{p_text}' config[p] = p_text with open(source, 'r') as input_file: for edge in input_file: s, p, o = edge.strip('\n').split(' ') add_rdf_edge(s, config[p], o, tmp_graph) write_to_rdf(Path('tmp.xml'), tmp_graph) graph = cls.load_from_rdf(Path('tmp.xml')) # os.remove('tmp.xml') return graph
def gen_worst_case_graph(destination_folder: Path, vertices_number: int) -> Path: """ Generates graphs with two cycles by number of vertices in the graph :param destination_folder: directory to save the graph :type destination_folder: Path :param vertices_number: number of vertices in the graph :type vertices_number: int :return: path to generated graph :rtype: Path """ output_graph = rdflib.Graph() first_cycle = int(vertices_number / 2) + 1 edges = list() for i in range(0, first_cycle - 1): edges.append((i, 'A', i + 1)) edges.append((first_cycle - 1, 'A', 0)) edges.append((first_cycle - 1, 'B', first_cycle)) for i in range(first_cycle, vertices_number - 1): edges.append((i, 'B', i + 1)) edges.append((vertices_number - 1, 'B', first_cycle - 1)) for subj, pred, obj in tqdm( edges, desc=f'worstcase_{vertices_number} generation'): add_rdf_edge(subj, pred, obj, output_graph) target = destination_folder / f'worstcase_{vertices_number}.xml' write_to_rdf(target, output_graph) return target
def gen_sparse_graph(destination_folder: Path, vertices_number: int, edge_probability: float) -> Path: """ Generates sparse graph :param destination_folder: directory to save the graph :type destination_folder: Path :param vertices_number: number of vertices in the graph :type vertices_number: int :param edge_probability: probability of edge existence in the graph :type edge_probability: float :return: path to generated graph :rtype: Path """ tmp_graph = nx.generators.fast_gnp_random_graph(vertices_number, edge_probability) output_graph = rdflib.Graph() edges = list() for v, to in tmp_graph.edges(): edges.append((v, 'A', to)) edges.append((v, 'AR', to)) for subj, pred, obj in tqdm( edges, desc=f'G{vertices_number}-{edge_probability} generation' ): add_rdf_edge(subj, pred, obj, output_graph) target = destination_folder / f'G{vertices_number}-{edge_probability}.xml' write_to_rdf(target, output_graph) return target
def gen_lubm_graph(destination_folder: Path, count: int) -> Path: """ Generates LUBM graph by specified number of generated graphs to create one LUBM graph :param destination_folder: directory to save the graph :type destination_folder: Path :param count: number of generated graphs :type count: int :return: path to generated graph :rtype: Path """ arch = MAIN_FOLDER / 'data' / 'LUBM' / 'uba.zip' univ_dir = MAIN_FOLDER / 'data' / 'LUBM' / 'univ' if not os.path.exists(univ_dir): wget.download(url=LUBM_URL, out=str(arch)) shutil.unpack_archive(filename=str(arch), extract_dir=str(univ_dir)) os.remove(arch) subprocess.run('java ' + '-cp ' + 'classes ' + 'edu.lehigh.swat.bench.uba.Generator ' + '-univ ' + f'{count} ' + '-onto ' + 'http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl', cwd=str(univ_dir), stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, shell=True) output_graph = rdflib.Graph() triples = list() vertices = dict() next_id = 0 generated_graphs = \ os.listdir(MAIN_FOLDER / 'data' / 'LUBM') + \ os.listdir(univ_dir) # for Windows for tmp_graph_path in generated_graphs: if 'University' not in tmp_graph_path: continue tmp_graph_path = f"{MAIN_FOLDER / 'data' / 'LUBM'}/{tmp_graph_path}" tmp_graph = rdflib.Graph() tmp_graph.parse(tmp_graph_path) for subj, pred, obj in tmp_graph: for tmp in [subj, obj]: if tmp not in vertices: vertices[tmp] = next_id next_id += 1 triples.append((vertices[subj], pred, vertices[obj])) os.remove(tmp_graph_path) for subj, pred, obj in tqdm(triples, desc=f'lubm_{count} generation'): add_rdf_edge(subj, pred, obj, output_graph, reverse=True) target = destination_folder / f'lubm_{count}.xml' write_to_rdf(target, output_graph) return target
def gen_scale_free_graph( destination_folder: Path, vertices_number: int, vertices_degree: int, labels: Tuple[str, ...] = ('A', 'B', 'C', 'D')) -> Path: """ Generates scale free graph :param destination_folder: directory to save the graph :type destination_folder: Path :param vertices_number: number of vertices in the graph :type vertices_number: int :param vertices_degree: degree of a vertex in the graph :type vertices_degree: int :param labels: edge labels in the graph :type labels: Tuple[str, ...] :return: path to generated graph :rtype: Path """ g = { i: [(j, np.random.choice(labels)) for j in range(vertices_degree)] for i in range(vertices_degree) } degree = [3] * vertices_degree for i in range(vertices_degree, vertices_number): to_vertices = np.random.choice(range(i), size=vertices_degree, replace=False, p=np.array(degree) / sum(degree)) g[i] = [] degree.append(0) for to in to_vertices: label = np.random.choice(labels) g[i].append((to, label)) degree[to] += 1 degree[i] += 1 output_graph = rdflib.Graph() edges = list() for v in g: for to in g[v]: edges.append((v, to[1], to[0])) for subj, pred, obj in tqdm( edges, desc= f'scale_free_graph_{vertices_number}_{vertices_degree} generation' ): add_rdf_edge(subj, pred, obj, output_graph) target = destination_folder / f'scale_free_graph_{vertices_number}_{vertices_degree}.xml' write_to_rdf(target, output_graph) return target