def graph_plan(plan, fountain): plan_graph = ConjunctiveGraph() plan_graph.bind('agora', AGORA) prefixes = plan.get('prefixes') ef_plan = plan.get('plan') tree_lengths = {} s_trees = set([]) patterns = {} for (prefix, u) in prefixes.items(): plan_graph.bind(prefix, u) def __get_pattern_node(p): if p not in patterns: patterns[p] = BNode('tp_{}'.format(len(patterns))) return patterns[p] def __inc_tree_length(tree, l): if tree not in tree_lengths: tree_lengths[tree] = 0 tree_lengths[tree] += l def __add_variable(p_node, vid, subject=True): sub_node = BNode(str(vid).replace('?', 'var_')) if subject: plan_graph.add((p_node, AGORA.subject, sub_node)) else: plan_graph.add((p_node, AGORA.object, sub_node)) plan_graph.set((sub_node, RDF.type, AGORA.Variable)) plan_graph.set((sub_node, RDFS.label, Literal(str(vid), datatype=XSD.string))) def include_path(elm, p_seeds, p_steps): elm_uri = __extend_uri(prefixes, elm) path_g = plan_graph.get_context(elm_uri) b_tree = BNode(elm_uri) s_trees.add(b_tree) path_g.set((b_tree, RDF.type, AGORA.SearchTree)) path_g.set((b_tree, AGORA.fromType, elm_uri)) for seed in p_seeds: path_g.add((b_tree, AGORA.hasSeed, URIRef(seed))) previous_node = b_tree __inc_tree_length(b_tree, len(p_steps)) for j, step in enumerate(p_steps): prop = step.get('property') b_node = BNode(previous_node.n3() + prop) if j < len(p_steps) - 1 or pattern[1] == RDF.type: path_g.add((b_node, AGORA.onProperty, __extend_uri(prefixes, prop))) path_g.add((b_node, AGORA.expectedType, __extend_uri(prefixes, step.get('type')))) path_g.add((previous_node, AGORA.next, b_node)) previous_node = b_node p_node = __get_pattern_node(pattern) path_g.add((previous_node, AGORA.byPattern, p_node)) for i, tp_plan in enumerate(ef_plan): paths = tp_plan.get('paths') pattern = tp_plan.get('pattern') hints = tp_plan.get('hints') context = BNode('space_{}'.format(tp_plan.get('context'))) for path in paths: steps = path.get('steps') seeds = path.get('seeds') if not len(steps) and len(seeds): include_path(pattern[2], seeds, steps) elif len(steps): ty = steps[0].get('type') include_path(ty, seeds, steps) for t in s_trees: plan_graph.set((t, AGORA.length, Literal(tree_lengths.get(t, 0), datatype=XSD.integer))) pattern_node = __get_pattern_node(pattern) plan_graph.add((context, AGORA.definedBy, pattern_node)) plan_graph.set((context, RDF.type, AGORA.SearchSpace)) plan_graph.add((pattern_node, RDF.type, AGORA.TriplePattern)) (sub, pred, obj) = pattern if isinstance(sub, BNode): __add_variable(pattern_node, str(sub)) elif isinstance(sub, URIRef): plan_graph.add((pattern_node, AGORA.subject, sub)) if isinstance(obj, BNode): __add_variable(pattern_node, str(obj), subject=False) elif isinstance(obj, Literal): node = BNode(str(obj).replace(' ', '')) plan_graph.add((pattern_node, AGORA.object, node)) plan_graph.set((node, RDF.type, AGORA.Literal)) plan_graph.set((node, AGORA.value, Literal(str(obj), datatype=XSD.string))) else: plan_graph.add((pattern_node, AGORA.object, obj)) plan_graph.add((pattern_node, AGORA.predicate, pred)) if pred == RDF.type: if 'check' in hints: plan_graph.add((pattern_node, AGORA.checkType, Literal(hints['check'], datatype=XSD.boolean))) sub_expected = plan_graph.subjects(predicate=AGORA.expectedType) for s in sub_expected: expected_types = list(plan_graph.objects(s, AGORA.expectedType)) for et in expected_types: plan_graph.remove((s, AGORA.expectedType, et)) q_expected_types = [plan_graph.qname(t) for t in expected_types] expected_types = [d for d in expected_types if not set.intersection(set(fountain.get_type(plan_graph.qname(d)).get('super')), set(q_expected_types))] for et in expected_types: plan_graph.add((s, AGORA.expectedType, et)) return plan_graph
def graph_plan(plan, fountain, agp): def extract_cycle_roots(): c_roots = {} for c_id, c_node in described_cycles.items(): c_root_types = set({}) for crt in plan_graph.objects(c_node, AGORA.expectedType): crt_qname = plan_graph.qname(crt) c_root_types.update(_type_subtree(fountain, crt_qname)) c_roots[c_id] = c_root_types return c_roots def inc_tree_length(tree, l): if tree not in tree_lengths: tree_lengths[tree] = 0 tree_lengths[tree] += l def add_variable(p_node, vid, subject=True): sub_node = BNode(str(vid).replace('?', 'var_')) if subject: plan_graph.add((p_node, AGORA.subject, sub_node)) else: plan_graph.add((p_node, AGORA.object, sub_node)) plan_graph.set((sub_node, RDF.type, AGORA.Variable)) plan_graph.set((sub_node, RDFS.label, Literal(str(vid), datatype=XSD.string))) def describe_cycle(cycle_id, cg): c_node = BNode('cycle{}'.format(cycle_id)) cg = cg.get_context(c_node) cg.add((c_node, RDF.type, AGORA.Cycle)) previous_node = c_node c_steps = cycles[cycle_id] cycle_type = c_steps[0].get('type') for et in _type_subtree(fountain, cycle_type): cg.add((c_node, AGORA.expectedType, __extend_uri(prefixes, et))) for j, step in enumerate(c_steps): prop = step.get('property') b_node = BNode(previous_node.n3() + '/' + prop) cg.add((b_node, AGORA.onProperty, __extend_uri(prefixes, prop))) c_expected_type = step.get('type') cg.add((b_node, AGORA.expectedType, __extend_uri(prefixes, c_expected_type))) cg.add((previous_node, AGORA.next, b_node)) previous_node = b_node return c_node def is_extensible(node, node_patterns): extensible = True near_patterns = node_patterns.copy() for prev in tree_graph.subjects(AGORA.next, node): for sib_node in tree_graph.objects(prev, AGORA.next): if sib_node != res.n: near_patterns.update(set(tree_graph.objects(sib_node, AGORA.byPattern))) subjects = set() for p_node in near_patterns: p_subject = list(plan_graph.objects(p_node, AGORA.subject)).pop() if not isinstance(p_subject, URIRef): subject_str = list(plan_graph.objects(p_subject, RDFS.label)).pop().toPython() else: subject_str = str(p_subject) subjects.add(subject_str) if subjects and set.difference(subjects, roots): extensible = False return extensible def enrich_type_patterns(node_patterns): for p_node in node_patterns: p_pred = list(plan_graph.objects(p_node, AGORA.predicate)).pop() if p_pred == RDF.type: p_type = list(plan_graph.objects(p_node, AGORA.object)).pop() if isinstance(p_type, URIRef): for et in [et for et in expected_types if et == p_type]: q_expected_types = _type_subtree(fountain, tree_graph.qname(et)) for et_q in q_expected_types: tree_graph.add((res.n, AGORA.expectedType, __extend_uri(prefixes, et_q))) else: for et in expected_types: q_expected_types = _type_subtree(fountain, tree_graph.qname(et)) for et_q in q_expected_types: tree_graph.add((res.n, AGORA.expectedType, __extend_uri(prefixes, et_q))) def apply_cycle_extensions(c_roots, node_types): for c_id, root_types in c_roots.items(): found_extension = False for n, expected in node_types.items(): if set.intersection(set(root_types), set(expected)): tree_graph.add((n, AGORA.isCycleStartOf, described_cycles[c_id])) found_extension = True if not found_extension: plan_graph.remove_context(plan_graph.get_context(described_cycles[c_id])) def include_path(elm, p_seeds, p_steps, cycles, check): m = hashlib.md5() for s in p_seeds: m.update(s) elm_uri = __extend_uri(prefixes, elm) b_tree = BNode(m.digest().encode('base64').strip()) s_trees.add(b_tree) tree_graph.set((b_tree, RDF.type, AGORA.SearchTree)) tree_graph.add((b_tree, AGORA.fromType, elm_uri)) for seed in p_seeds: tree_graph.add((b_tree, AGORA.hasSeed, URIRef(seed))) for cycle_id in filter(lambda x: x not in described_cycles.keys(), cycles): c_node = describe_cycle(cycle_id, plan_graph) described_cycles[cycle_id] = c_node plan_graph.get_context(c_node).add((b_tree, AGORA.goesThroughCycle, c_node)) previous_node = b_tree inc_tree_length(b_tree, len(p_steps)) root_index = -1 pp = [] for j, step in enumerate(p_steps): prop = step.get('property') pp.append(prop) path_root = step.get('root', None) if path_root and root_index < 0: root_index = j base_id = path_root or b_tree base_id += '/' if j < len(p_steps) - 1 or (pattern[1] == RDF.type and isinstance(pattern[2], URIRef)): b_node = BNode(base_id + '/'.join(pp)) tree_graph.add((b_node, AGORA.onProperty, __extend_uri(prefixes, prop))) else: b_node = BNode(base_id + '/'.join(pp)) tree_graph.add((b_node, AGORA.expectedType, __extend_uri(prefixes, step.get('type')))) tree_graph.add((previous_node, AGORA.next, b_node)) previous_node = b_node p_node = _get_pattern_node(pattern, patterns) if pattern[1] == RDF.type and isinstance(pattern[2], URIRef): b_id = '{}_{}_{}'.format(pattern[0].n3(plan_graph.namespace_manager), pattern[1].n3(plan_graph.namespace_manager), pattern[2].n3(plan_graph.namespace_manager)) b_node = BNode(b_id) tree_graph.add((b_node, AGORA.expectedType, pattern[2])) tree_graph.add((previous_node, AGORA.next, b_node)) tree_graph.add((b_node, AGORA.byPattern, p_node)) if check: tree_graph.add((b_node, AGORA.checkType, Literal(check))) else: tree_graph.add((previous_node, AGORA.byPattern, p_node)) plan_graph = ConjunctiveGraph() plan_graph.bind('agora', AGORA) prefixes = plan.get('prefixes') ef_plan = plan.get('plan') tree_lengths = {} s_trees = set([]) patterns = {} described_cycles = {} for (prefix, u) in prefixes.items(): plan_graph.bind(prefix, u) tree_graph = plan_graph.get_context('trees') for i, tp_plan in enumerate(ef_plan): paths = tp_plan.get('paths') pattern = tp_plan.get('pattern') hints = tp_plan.get('hints') cycles = {} for c in tp_plan.get('cycles'): cid = str(c['cycle']) c_steps = c['steps'] cycles[cid] = c_steps if len(c_steps) > 1: cycles[cid + 'r'] = list(reversed(c_steps)) context = BNode('space_{}'.format(tp_plan.get('context'))) for path in paths: steps = path.get('steps') seeds = path.get('seeds') check = path.get('check', None) ty = None if not len(steps) and len(seeds): ty = pattern[2] elif len(steps): ty = steps[0].get('type') if ty: include_path(ty, seeds, steps, cycles, check) for t in s_trees: tree_graph.set((t, AGORA.length, Literal(tree_lengths.get(t, 0), datatype=XSD.integer))) pattern_node = _get_pattern_node(pattern, patterns) plan_graph.add((context, AGORA.definedBy, pattern_node)) plan_graph.set((context, RDF.type, AGORA.SearchSpace)) plan_graph.add((pattern_node, RDF.type, AGORA.TriplePattern)) plan_graph.add((pattern_node, RDFS.label, Literal(pattern_node.toPython()))) (sub, pred, obj) = pattern if isinstance(sub, BNode): add_variable(pattern_node, str(sub)) elif isinstance(sub, URIRef): plan_graph.add((pattern_node, AGORA.subject, sub)) if isinstance(obj, BNode): add_variable(pattern_node, str(obj), subject=False) elif isinstance(obj, Literal): node = BNode(str(obj).replace(' ', '').replace(':', '')) plan_graph.add((pattern_node, AGORA.object, node)) plan_graph.set((node, RDF.type, AGORA.Literal)) plan_graph.set((node, AGORA.value, obj)) else: plan_graph.add((pattern_node, AGORA.object, obj)) plan_graph.add((pattern_node, AGORA.predicate, pred)) if pred == RDF.type: if 'check' in hints: plan_graph.add((pattern_node, AGORA.checkType, Literal(hints['check'], datatype=XSD.boolean))) expected_res = tree_graph.query("""SELECT DISTINCT ?n WHERE { ?n agora:expectedType ?type }""") node_types = {} roots = set(_extract_roots(agp)) for res in expected_res: expected_types = list(tree_graph.objects(res.n, AGORA.expectedType)) q_expected_types = set(map(lambda x: tree_graph.qname(x), expected_types)) q_expected_types = filter( lambda x: not set.intersection(set(fountain.get_type(x)['super']), q_expected_types), q_expected_types) type_hierarchy = len(q_expected_types) == 1 tree_graph.add((res.n, AGORA.typeHierarchy, Literal(type_hierarchy))) direct_patterns = set(tree_graph.objects(res.n, AGORA.byPattern)) enrich_type_patterns(direct_patterns) if is_extensible(res.n, direct_patterns): node_types[res.n] = q_expected_types c_roots = extract_cycle_roots() apply_cycle_extensions(c_roots, node_types) for t in s_trees: tree_graph.set((t, AGORA.length, Literal(tree_lengths.get(t, 0), datatype=XSD.integer))) from_types = set([plan_graph.qname(x) for x in plan_graph.objects(t, AGORA.fromType)]) def_from_types = filter(lambda x: not set.intersection(set(fountain.get_type(x)['sub']), from_types), from_types) for dft in def_from_types: tree_graph.set((t, AGORA.fromType, __extend_uri(prefixes, dft))) for res in plan_graph.query("""SELECT ?tree ?sub ?nxt WHERE { ?tree a agora:SearchTree ; agora:next ?nxt . ?nxt agora:byPattern [ agora:subject ?sub ] }"""): if isinstance(res.sub, URIRef): plan_graph.set((res.tree, AGORA.hasSeed, res.sub)) plan_graph.remove((res.nxt, AGORA.isCycleStartOf, None)) _inform_on_inverses(plan_graph, fountain, prefixes) return plan_graph
def get_value(node: URIRef, graph: ConjunctiveGraph) -> str: try: node = graph.label(node).value if graph.label(node) else graph.qname(node) except ValueError: node = node.title().lower() return str(node)