Beispiel #1
0
    def construct_query(self, description: Description2, min_coverage,
                        per_pattern_limit):
        """
        Construct a binding query for the variable predicates or arguments in the descriptipn rule

        :param description:
        :param min_coverage:
        :param per_pattern_limit:
        :return:
        """

        # query_predicates = description.preds
        # query_predicates = description.get_predicates()
        logger.debug("get_pattern_bindings: %r" % description)
        target_var = description.get_target_var(
        )  # can be called anchor var or counting vars
        # predict_directions = description.get_predicates_directions()

        # Check if it should bind predicates or arguments
        bind_vars = description.get_var_predicates(
        )  # list(filter(is_var, query_predicates))
        if not bind_vars:
            bind_vars = [description.get_dangling_arg()]
        # bind_vars = description.get_bind_vars()

        # select part
        select_part = 'select distinct ' + ' '.join(
            bind_vars) + ' (count(distinct ' + target_var + ') as ?c)'

        from_part = self.construct_from_part()
        # where part head + body

        filter_part = self.create_filter_part(description)

        # where_part = 'WHERE {' + ' '.join( map(tuple_sparql_repr, query_conditions)) + ' '.join(filter_part) + '} '
        query_conditions = [description.head] + description.body
        where_part = self._create_where_str(query_conditions, filter_part)
        # group by
        group_by = self._create_groupby_str(bind_vars)

        having = '' if min_coverage <= 0 else ' HAVING (count(distinct' + target_var + ') >' + str(
            min_coverage) + ')'

        limit = '' if per_pattern_limit <= 0 else 'LIMIT ' + str(
            per_pattern_limit)

        suffix = ' ORDER BY desc(?c) ' + limit
        query = select_part + from_part + where_part + group_by + having + suffix
        logger.debug(query)
        return query
Beispiel #2
0
 def create_predicates_filter(self, description: Description2):
     head = description.head
     filter_part = [
         'FILTER(' + p + '!=' + _sparql_repr(head.predicate) + ').'
         for p in description.get_var_predicates()
     ]
     return filter_part
Beispiel #3
0
 def get_predicates(self, description: Description2):
     var_predicates = description.get_var_predicates()
     query_conditions = description.body
     query = 'SELECT DISTINCT %s FROM <%s> Where {%s}' % (
         ' '.join(var_predicates), self.identifiers[0], ' '.join(
             map(lambda a: a.tuple_sparql_repr(), query_conditions)))
     logger.debug('Query: %s' % query)
     predicates = list(chain.from_iterable(self.execute(query)))
     logger.debug('Predicates: %i' % len(predicates))
     return predicates
Beispiel #4
0
 def get_entities(self, description: Description2, limit=100):
     # var_args = description.get_var_args()
     # TODO verify!
     var_args = description.get_uniq_var_args()
     query_conditions = description.body
     query = 'SELECT DISTINCT %s FROM <%s> Where {%s} limit %i' % (
         ' '.join(var_args), self.identifiers[0], ' '.join(
             map(lambda a: a.tuple_sparql_repr(), query_conditions)), limit)
     logger.debug('Query: %s' % query)
     entities = list(chain.from_iterable(self.execute(query)))
     logger.debug('Entities: %i' % len(entities))
     return entities
Beispiel #5
0
    def get_triples(self, entity, out_edge=True, limit_per_relation=100):
        link = Atom(entity, '?p', '?x') if out_edge else Atom(
            '?x', '?p', entity)
        description = Description2(body=[link])
        output = set()
        # out_g = self.graph.triples((entity_name, None,None)) + self.graph.triples((None, None,entity_name))
        predicates = self.get_predicates(
            # Description(predicates=['?p'], arguments=[entity, '?x'], pred_directions=[out_edge])
            description)

        for pr in predicates:
            link_atom = Atom(entity, pr, '?x') if out_edge else Atom(
                '?x', pr, entity)
            pr_description = Description2(body=[link_atom])
            entities = self.get_entities(pr_description,
                                         limit=limit_per_relation)
            output |= set([(entity, pr, ent) if out_edge else (ent, pr, entity)
                           for ent in entities])

        # return np.array(output,dtype=object).reshape(-1, 3)
        return output
Beispiel #6
0
    def get_variable_permutations(self, pattern_to_expand: Description2, new_var_arg):

        if self.pattern_structure == ExplanationStructure.PATH:
            # x p x1 ^ x1 p2 x2 ^ x2 p3 x4
            var_args = pattern_to_expand.get_open_var_arg() + [new_var_arg]
            if len(var_args) < 2:
                return []
            return permutations(var_args, 2)
        elif self.pattern_structure == ExplanationStructure.CATEGORICAL:
            var_args = list(pattern_to_expand.anchor_vars) + [new_var_arg]
            return permutations(var_args, 2)
        elif self.pattern_structure == ExplanationStructure.SUBGRAPH:
            var_args = list(pattern_to_expand.get_uniq_var_args()) + [new_var_arg]
            return permutations(var_args, 2)
        elif self.pattern_structure == ExplanationStructure.TREE:
            perms = []
            for arg in pattern_to_expand.get_uniq_var_args():
                perms.append((arg, new_var_arg))  # out edge
                perms.append((new_var_arg, arg))  # in edge
            return perms
        else:
            raise Exception('%r is not a supported Explanation Langauage' % self.pattern_structure)
Beispiel #7
0
    def construct_argument_bind_query(self,
                                      description: Description2,
                                      restriction_pattern=Description2()):

        target_var = description.get_target_var()

        # select part
        select_part = 'select distinct ' + target_var

        from_part = self.construct_from_part()

        # where part
        query_conditions = description.body + restriction_pattern.body

        filter_part = self.create_filter_part(description)

        # where_part = 'WHERE { ' + ' '.join(map(lambda a: a.tuple_sparql_repr(), query_conditions)) + '} '
        where_part = self._create_where_str(query_conditions, filter_part)

        query = select_part + from_part + where_part

        logger.debug(query)
        return query
    def _infer_single(self, description: Description2):
        """
        Infer new facts for the given Description
        :param description:
        :return:
        """
        bindings = self.query_executer.get_arguments_bindings(description,
                                                              restriction_pattern=Description2(body=[Atom('?x',
                                                                                                          self.relation,
                                                                                                          '?z')]))
        head = description.head

        # only supports p(?x,CONSTANT)
        predictions = [Prediction((b, head.predicate, head.object), [description]) for b in bindings]

        return predictions
Beispiel #9
0
    def get_arguments_bindings(self,
                               description: Description2,
                               restriction_pattern: Description2 = None):
        """
         do the inference and generate binging for the head variable.

        :param description:
        :param restriction_pattern: tuple restricting the variables to
        :return:
        """
        # print(description)
        # logger.debug("Get bindings for Description: %r" %description)
        restriction_pattern = restriction_pattern if restriction_pattern else Description2(
        )
        query = self.construct_argument_bind_query(description,
                                                   restriction_pattern)
        res = self.execute(query)

        res = [r[0] for r in res]
        logger.debug("results size: %i", len(res))
        return res
Beispiel #10
0
    def construct_count_query(self,
                              description: Description2,
                              alternative_head=None,
                              not_head=False):

        head = alternative_head if alternative_head else description.head
        target_var = description.get_target_var()

        # select part
        select_part = 'select count(distinct ' + str(target_var) + ') as ?c '
        from_part = self.construct_from_part()

        # where part
        query_conditions = description.body
        # print(query_conditions)
        # print(head)

        # prevent  ->?xi<-
        filter_part = [str(target_var) + ' ' + _sparql_repr(head.predicate) + ' ' + \
                       ('?y' if not_head else _sparql_repr(head.object)) + '. ']

        if not_head:
            filter_part = [
                'FILTER(' + str('?y') + '!=' + _sparql_repr(head.object) + ').'
            ]

        filter_part += self.create_filter_part(description)

        # where_part = 'WHERE { '+
        #              ' '.join(map(lambda a: a.tuple_sparql_repr(), query_conditions)) + ' '.join(filter_part) + '} '
        where_part = self._create_where_str(query_conditions, filter_part)

        query = select_part + from_part + where_part

        logger.debug(query)
        return query
Beispiel #11
0
    def mine_with_constants(self, head, max_length=2, min_coverage=-1.0, negative_heads=None):

        if isinstance(head, tuple):
            head = Atom(head[0], head[1], head[2])

        negative_heads = negative_heads if negative_heads else []
        logger.info('Learn descriptions for ' + str(head))
        # start_var = head.subject if head.subject else '?x'
        descriptions = []

        # for evaluation
        target_head_size = self.query_interface.count(Description2(head=head))
        # logger.info('Taget head size %i' % target_head_size)
        min_support = int(min_coverage * target_head_size)
        # print(negative_heads)
        negative_heads_sizes = [self.query_interface.count(Description2(head=neg_head)) for neg_head in negative_heads]
        # logger.info('Neagtive head sizes %r' % negative_heads_sizes)

        base_description = Description2(head=head)

        previous_level_descriptions = [base_description]

        # TODO  the last iteration will be only to bind constants in the last predicate (better way to be implemented)
        # const_iteration = max_length + 1
        for i in range(1, max_length + 1):

            logger.info("Discovering Level: %i" % (i))
            level_descriptions = []

            for cur_pattern in previous_level_descriptions:
                logger.debug('Expand Description Pattern: %r', cur_pattern)
                # expand()
                description_extended_patterns = self._expand_pattern(cur_pattern, i)
                logger.debug('Expanded patterns Size: %i' % len(description_extended_patterns))

                # bind predicates
                query_bind_descriptions = self._bind_patterns(description_extended_patterns, min_support)

                # bind args if required
                descriptions_with_constants = self._get_patterns_with_bindable_args(query_bind_descriptions)
                query_bind_descriptions += self._bind_patterns(descriptions_with_constants, min_support)

                # Prune bind descriptions
                query_bind_descriptions = list(filter(self._filter_level_descriptions, query_bind_descriptions))

                # Add Quality Scores to binede descriptions
                self._add_quality_to_descriptions(query_bind_descriptions, target_head_size, negative_heads,
                                                  negative_heads_sizes)

                level_descriptions += query_bind_descriptions

            # Remove identical but different order body atoms
            # WARN: may not work well becasue of the trivial implementation of __eq__ and __hash__ of Description2
            level_descriptions = set(level_descriptions)

            # TODO make the filter global
            descriptions += list(filter(self._filter_output_descriptions, level_descriptions))
            previous_level_descriptions = level_descriptions
            logger.info("Done level: " + str(i) + ' level descriptions: ' + str(
                len(level_descriptions)) + ' total descriptions: ' + str(len(descriptions)))

        return descriptions