Ejemplo n.º 1
0
def get_all_propeties_tree(element, retval={}):
    '''
        this method will return a tree structure as opposed to the method
        get_all_propeties. The output structure would be
        {
            <prop_name>: {
                ontolabel: '',
                ontodesc: '',
                range: '',
                children: {
                    <child_prop_name>: {
                        ontolabel: '',
                        ontodesc: '',
                        range: '',
                        children: ...
                    }
                }
            },

        }
    '''
    if not isinstance(element, list):
        element = [element]

    element_subclass = get_predicate_value(element, 'to:subClassOf')
    if collection.intersection(element_subclass,
                               ['to:ComplexProperty', 'to:Entity']):
        pass

    pass
Ejemplo n.º 2
0
 def is_templatized_object(self, elem, additional_check_args):
     pred = additional_check_args[1]
     if pred == 'to:templatizedId':
         return False
     (pred_subclass, pred_type_subclass) = self.gather_info_from_toflerdb(
         pred, ['subclass', 'type_subclass'])
     if collection.intersection(
         ['to:RelationalProperty', 'to:ComplexRelationalProperty'],
             pred_subclass + pred_type_subclass):
         return True
     return False
Ejemplo n.º 3
0
    def _create_match_array(self, request, match_array, agg_query, base_string=""):
        nested_queries = {}
        # is_agg_query_scope = any([key.startswith(
        #     op) for op in self.agg_operators for key in request])
        for f in request:
            f_val = request[f]
            (f, op) = self._extract_operator(f)
            if f_val is None and op is None:
                continue

            if type(f_val) is dict:
                newrequest = f_val
                (f_subclass, ) = self._gather_info_from_toflerdb(
                    f, ['subclass'])
                if collection.intersection(f_subclass, [
                        'to:ComplexRelationalProperty',
                        'to:RelationalProperty']):
                    newrequest = self._prepare_direct_request_body(
                        f, f_val)

                new_agg_query = {}
                child_nested_queries = self._create_match_array(
                    newrequest, match_array, new_agg_query, base_string + f + ".")

                if new_agg_query:
                    agg_query.setdefault("aggs", {}).update(new_agg_query)

                if child_nested_queries:
                    grp_name = base_string.replace(".", "_") if base_string else "root"
                    agg_query.setdefault(grp_name, {
                        "terms": {"field": "id"},
                        "aggs": {}
                    })["aggs"].update(child_nested_queries)

            else:
                field_name = self._get_field_name(f, base_string)
                is_agg_query, is_nested_query, op_query = self._operator_query_builder(
                    op, field_name, f_val, f, base_string)
                if not is_agg_query:
                    match_array.append(op_query)
                else:
                    #for merging if agg belongs to same nested scope
                    if is_nested_query:
                        merge(nested_queries, op_query)
                    else:
                        agg_query.update(op_query)

        return nested_queries
Ejemplo n.º 4
0
 def typecast_input(self, subj, pred, objc):
     # subj = int(subj)
     if pred == 'to:type':
         return objc
     # if not dbutils.exists_in_eternity(pred, ontology_only = True):
     #     pred = int(pred)
     (pred_subclass, pred_type_subclass, pred_range, pred_type_range) = \
         self.gather_info_from_toflerdb(pred, [
             'subclass', 'type_subclass', 'range', 'type_range'])
     if collection.intersection(
         ['to:RelationalProperty', 'to:ComplexRelationalProperty'],
             pred_subclass + pred_type_subclass):
         objc = str(objc)
     else:
         objc = collection.typecast(objc, pred_range + pred_type_range)
     return objc
Ejemplo n.º 5
0
def get_all_entities_of_namespace(nss):
    if not isinstance(nss, list):
        nss = [nss]
    all_namespaces = get_all_namespaces()
    namespaces = collection.intersection(nss, all_namespaces)
    if not len(namespaces):
        # this is required as the namespace is not a query_data in the
        # following query
        return []
    rx = '|'.join(['%s:' % x for x in namespaces])
    rx = '^(%s).*' % rx
    query = """
        SELECT DISTINCT(subject) AS subject FROM toflerdb_ontology WHERE
        subject REGEXP %s
    """
    response = Common.execute_query(query, rx)
    all_entities = []
    for res in response:
        subj = res['subject']
        subj_subclass = get_predicate_value(subj, 'to:subClassOf')
        if 'to:Entity' in subj_subclass:
            all_entities.append(subj)

    return all_entities
Ejemplo n.º 6
0
    def _create_output(self, data, request, output, aggdata, base_string=""):
        if self._to_delete:
            return

        for f in request:
            f_val = request[f]
            (f, op) = self._extract_operator(f)
            if type(f_val) is dict:
                if f not in data:
                    if self._has_graph_filter(f_val):
                        self._to_delete = True
                        return
                    continue
                op = None
                '''
                if f is a subclassof of either to:ComplexRelationalProperty
                or to:RelationalProperty,
                query all the ids in within itself and replace those inplace
                with associated
                '''
                (f_subclass, ) = self._gather_info_from_toflerdb(
                    f, ['subclass'])
                if collection.intersection(f_subclass, [
                        'to:ComplexRelationalProperty',
                        'to:RelationalProperty']):
                    if type(data[f]) is list:
                        #multiple object return case
                        newdata = []
                        hop_ids = []
                        hop_ids = [item['value'] for item in data[f]
                                   if item['value'] not in hop_ids]
                        filter_id = f_val.get('id', None)
                        newrequest = None
                        if filter_id and filter_id in hop_ids:
                            #fetch filtered id
                            newrequest = self._prepare_one_hop_request_body(
                                f, f_val)
                            newrequest.update({'id': filter_id})
                        elif not filter_id:
                            #fetch all ids
                            newrequest = self._prepare_one_hop_request_body(
                                f, f_val)
                            newrequest.update({'id|=': hop_ids})

                        if newrequest:
                            newresponse, newresponse_aggdata = self.query(
                                newrequest, raw_output=True)
                            if len(newresponse):
                                newresponse_normalized = {}
                                for nr in newresponse:
                                    newresponse_normalized[nr['id']] = nr
                                for item in data[f]:
                                    if item['value'] in newresponse_normalized:#checking id in newresponse_normalized
                                        item.update(newresponse_normalized[
                                            item['value']])
                                        newdata.append(item)

                        if len(newdata):
                            data[f] = newdata
                        else:
                            self._to_delete = True
                            return
                    else:
                        #single object return case
                        filter_id = f_val.get('id', None)
                        hop_id = data[f]['value']
                        #ignore unmatched ones
                        if filter_id and hop_id != filter_id:
                            continue
                        newrequest = self._prepare_one_hop_request_body(
                            f, f_val)
                        newrequest.update({'id': hop_id})
                        newresponse, newresponse_aggdata = self.query(
                            newrequest, raw_output=True)
                        if len(newresponse):
                            data[f].update(newresponse[0])
                        else:
                            self._to_delete = True
                            return

                    has_agg_req_keys, op = self._create_aggregation_output(f_val, newresponse_aggdata, "")
                else:
                    #aggdata so replacing all with aggdata
                    has_agg_req_keys, op = self._create_aggregation_output(f_val, aggdata, base_string+f+".")

                if not has_agg_req_keys:
                    if type(data[f]) is list:
                        # print 'list when val is dict', f
                        op = []
                        for item in data[f]:
                            if 'fact_id' in item:
                                item.pop('fact_id')
                            new_op = {}
                            self._create_output(item, f_val, new_op, aggdata, base_string + f + ".")
                            op.append(new_op)
                    else:
                        op = {}
                        data[f].pop('fact_id')
                        # print 'val when val is dict', f
                        self._create_output(data[f], f_val, op, base_string + f + ".")
                output[f] = op

            else:

                if f in data:
                    if type(data[f]) is list:
                        # print 'list when val is not dict', f
                        new_array = []
                        for item in data[f]:
                            if f == 'value':
                                new_array.append(item)
                            else:
                                new_array.append(item['value'])
                        output[f] = new_array
                    else:
                        # print 'val when val is not dict', f
                        if f == 'id' or f == 'value':
                            output[f] = data[f]
                        else:
                            output[f] = data[f]['value']
                else:
                    print f, 'not in data when val is not dict'
Ejemplo n.º 7
0
    def _operator_query_builder(self, op, fld, fld_val, org_fld, base_fld):
        op_query = None
        is_agg_query = is_nested_query = False
        if op is None:
            op_query = self._create_leaf_match_query(fld, fld_val)
        elif op == '~|=':
            op_query = {'bool': {'must_not': []}}
            if not isinstance(fld_val, list):
                fld_val = [fld_val]
            for fv in fld_val:
                op_query['bool']['must_not'].append(
                    self._create_leaf_match_query(fld, fv))
        elif op == '~=':
            op_query = {
                'bool': {
                    'must_not': [self._create_leaf_match_query(fld, fld_val)]
                }
            }
        elif op == '|=':
            op_query = {
                'bool': {
                    'should': [],
                    'minimum_should_match': 1
                }
            }
            if not isinstance(fld_val, list):
                fld_val = [fld_val]
            for fv in fld_val:
                op_query['bool']['should'].append(
                    self._create_leaf_match_query(fld, fv))
        elif op == '>=':
            op_query = {
                'range': {
                    fld: {
                        'gte': fld_val
                    }
                }
            }
        elif op == '>':
            op_query = {
                'range': {
                    fld: {
                        'gt': fld_val
                    }
                }
            }
        elif op == '<=':
            op_query = {
                'range': {
                    fld: {
                        'lte': fld_val
                    }
                }
            }
        elif op == '<':
            op_query = {
                'range': {
                    fld: {
                        'lt': fld_val
                    }
                }
            }
        elif op == '~[]=':
            op_query = {
                'bool': {
                    'should': [
                        {'range': {fld: {'lt': fld_val[0]}}},
                        {'range': {fld: {'gt': fld_val[1]}}}
                    ],
                    'minimum_should_match': 1
                }
            }
        elif op == '[]=':
            op_query = {
                'range': {
                    fld: {
                        'gte': fld_val[0],
                        'lte': fld_val[1]
                    }
                }
            }
        elif op == '~[]':
            op_query = {
                'bool': {
                    'should': [
                        {'range': {fld: {'lte': fld_val[0]}}},
                        {'range': {fld: {'gte': fld_val[1]}}}
                    ],
                    'minimum_should_match': 1
                }
            }
        elif op == '[]':
            op_query = {
                'range': {
                    fld: {
                        'gt': fld_val[0],
                        'lt': fld_val[1]
                    }
                }
            }
        elif op in self.agg_operators:
            is_agg_query = True
            if op == 'groupBy':
                op_query = {
                    fld: {
                        "terms": {
                            "field": fld
                        }
                    }
                }
            elif op == 'count':
                op_query = {
                    fld: {
                        "value_count": {
                            "field": fld
                            # "precision_threshold": 100,
                            # "rehash": false
                        }
                    }
                }
            elif op == 'distinctCount':
                op_query = {
                    fld: {
                        "cardinality": {
                            "field": fld
                            # "precision_threshold": 100,
                            # "rehash": false
                        }
                    }
                }
            elif op == 'avg':
                op_query = {
                    fld: {
                        "avg": {
                            "field": fld
                        }
                    }
                }
            elif op == 'sum':
                op_query = {
                    fld: {
                        "sum": {
                            "field": fld
                        }
                    }
                }
            elif op == 'min':
                op_query = {
                    fld: {
                        "top_hits": {
                            "size": 1,
                            "_source": {
                                "includes": [
                                    base_fld + "id",
                                    fld
                                ]
                            },
                            "sort": {
                                fld: "asc"
                            }
                        }
                    },
                }
            elif op == 'max':
                op_query = {
                    fld: {
                        "top_hits": {
                            "size": 1,
                            "_source": {
                                "includes": [
                                    base_fld + "id",
                                    fld
                                ]
                            },
                            "sort": {
                                fld: "desc"
                            }
                        }
                    },
                }

        (fld_domain_subclass,) = self._gather_info_from_toflerdb(org_fld, ['domain_subclass'])
        # print "base fld", fld, op, org_fld, fld_domain_subclass, base_fld
        if collection.intersection(fld_domain_subclass, [
                'to:ComplexRelationalProperty', 'to:ComplexProperty']):
            is_nested_query = True
            if base_fld.endswith('.'):
                base_fld = base_fld[:-1]
            if is_agg_query:
                op_query = {
                    base_fld+"-nested": {
                        "nested": {
                            "path": base_fld
                        },
                        "aggs": op_query
                    }
                }
            else:
                op_query = {
                    "nested": {
                        "path": base_fld.split(".")[-1],
                        "query": op_query,
                        "inner_hits": {}
                    }
                }

        return is_agg_query, is_nested_query, op_query
Ejemplo n.º 8
0
    def create_snapshot_input(self, subj, pred, objc):
        (subj_type, subj_type_subclass) = self.gather_info_from_toflerdb(
            subj, ['type', 'type_subclass'])
        (pred_type, pred_type_subclass, pred_type_range) = \
            self.gather_info_from_toflerdb(
                pred, ['type', 'type_subclass', 'type_range'])

        # if pred_type_subclass isA to:ComplexProperty or
        # to:ComplexRelationalProperty, this means the complex property is
        # getting assigned to some other node or some other complex property
        # we will be assigning value key only if pred_type_range is not
        # 'to:Null'.
        # if the subj exists in _stores, this means subject is either entity in which case
        # the subj would be found in _stores['snapshot'] or
        # the subj is an instance of any ComplexRelationalProperty/ ComplexProperty,
        # in which case the subject is found in _stores['memory']
        # if the subj does not exists in _stores as key, this means the subject
        # is an instance of ComplexRelationalProperty/ ComplexProperty, in which case
        # the subject can be found under the subj_type key with subject as id
        # we need to assign the value {pred_type : objc} at the appropriate
        # place
        if collection.intersection(
            ['to:ComplexProperty', 'to:ComplexRelationalProperty'],
                pred_type_subclass):
            self.get_node_from_snapshot(subj, pred)
            if not pred in self._stores['inmemory']:
                raise exceptions.InvalidInputValueError(
                    'ComplexProperty property instance can be used only once :'
                    ' %s\nInput Tuple : %s' % (pred, self._fact_tuple))

            value = self._stores['inmemory'][pred]
            is_unique = dbutils.is_unique_predicate_value(pred_type[0])
            value.update({'fact_id': self._fact_id})
            if 'to:Null' not in pred_type_range:
                value.update({
                    'value':
                    self.typecast_input(subj, pred, objc) if 'to:type'
                    not in pred_type else self.add_superclasses(objc)
                })
            if not is_unique:
                value = [value]
            if collection.find_path(self._stores, subj):
                collection.assign_value(self._stores, subj,
                                        {pred_type[0]: value})
            else:
                collection.assign_value(self._stores,
                                        subj_type[0], {pred_type[0]: value},
                                        _id=subj)
            del self._stores['inmemory'][pred]

        # if subj_type_subclass isA to:ComplexProperty or to:ComplexRelationalProperty
        # subject is either a key in _stores['memory'] or id value of key subj_type
        # find the subject as key or find subj_type with subject as fact_id
        # assign {pred : objc} under proper key
        elif collection.intersection(
            ['to:ComplexProperty', 'to:ComplexRelationalProperty'],
                subj_type_subclass):
            # this means we have got a complex relation.
            # get the object from elasticsearch if possible
            self.get_node_from_snapshot(subj, pred)
            is_unique = dbutils.is_unique_predicate_value(pred)
            value = {
                pred:
                self.add_fact_id(self.typecast_input(subj, pred, objc),
                                 is_unique, pred == 'to:type')
            }
            if collection.find_path(self._stores, subj):
                collection.assign_value(self._stores, subj, value)
            elif collection.find_path(self._stores, subj_type[0], _id=subj):
                collection.assign_value(self._stores,
                                        subj_type[0],
                                        value,
                                        _id=subj)
            else:
                self._stores['inmemory'][subj] = {'id': subj}
                collection.assign_value(self._stores, subj, value)

        # if subj_type_subclass isA to:Entity, subject can be found under _stores['snapshot']
        # assign the pred value under it
        # important thing here is the use of if, elif
        # the order ensures that we don't get any complex kind of assignment at this point
        # this is really simple property assertion, and that is why we don't get any
        # "if 'to:Property' in" kind of condition check anywhere
        elif 'to:Entity' in subj_type_subclass:
            # this means we have got a node.
            # get the subject from elasticsearch if possible
            self.get_node_from_snapshot(subj, pred)
            is_unique = dbutils.is_unique_predicate_value(pred)
            value = {
                pred:
                self.add_fact_id(self.typecast_input(subj, pred, objc),
                                 is_unique, pred == 'to:type')
            }
            if subj not in self._stores['snapshot']:
                self._stores['snapshot'][subj] = {'id': subj}
            collection.assign_value(self._stores, subj, value)
Ejemplo n.º 9
0
    def is_valid(self, subj, pred, objc):
        subj_type, subj_type_subclass = self.gather_info_from_toflerdb(
            subj, ['type', 'type_subclass'])
        pred_type, pred_subclass, pred_type_subclass = \
            self.gather_info_from_toflerdb(
                pred, ['type', 'subclass', 'type_subclass'])

        # if subj_type must not be empty
        if not subj_type:
            error_txt = (
                'Subject type must be defined : %s\nInput Tuple : %s' %
                (subj, self._fact_tuple))
            # Common.get_logger().error(error_txt)
            raise exceptions.InvalidInputValueError(error_txt)
            return False

        if not 'to:Property' in pred_type_subclass + pred_subclass:
            error_txt = ('Predicate root type must be of to:Property : %s'
                         '\nInput Tuple : %s' % (pred, self._fact_tuple))
            # Common.get_logger().error(error_txt)
            raise exceptions.InvalidInputValueError(error_txt)
            return False

        # if pred is 'to:type', we need to special handle the case
        # check whether the object exists in ontology
        if pred == 'to:type' and not dbutils.exists_in_eternity(
                objc, ontology_only=True):
            error_txt = (
                'The type does not exist in ontology: %s\nInput Tuple : %s' %
                (objc, self._fact_tuple))
            # Common.get_logger().error(error_txt)
            raise exceptions.InvalidInputValueError(error_txt)
            return False

        # if pred isA to:RelationalProperty, conditions to satisfy
        # 1. object needs to exists in toflerdb
        # 2. pred_domain should be in subj_type + subject_type_subclass
        # 3. pred_range should be in objc_type + objc_type_subclass
        elif 'to:RelationalProperty' in pred_subclass:
            if not dbutils.exists_in_eternity(
                    objc, additional_lookup=self._normalized_input):
                error_txt = (
                    'Object does not exists in toflerdb : %s\nInput Tuple : %s'
                    % (objc, self._fact_tuple))
                # Common.get_logger().error(error_txt)
                raise exceptions.InvalidInputValueError(error_txt)
                return False
            (pred_domain, pred_range) = self.gather_info_from_toflerdb(
                pred, ['domain', 'range'])
            if not collection.intersection(pred_domain,
                                           subj_type + subj_type_subclass):
                error_txt = ('Predicate domain does not satisfy ontology : %s'
                             '\nInput Tuple : %s' % (pred, self._fact_tuple))
                # Common.get_logger().error(error_txt)
                raise exceptions.InvalidInputValueError(error_txt)
                return False
            (objc_type, objc_type_subclass) = self.gather_info_from_toflerdb(
                objc, ['type', 'type_subclass'])
            if not collection.intersection(pred_range,
                                           objc_type + objc_type_subclass):
                error_txt = ('Predicate range does not satisfy ontology : %s'
                             '\nInput Tuple : %s' % (pred, self._fact_tuple))
                # Common.get_logger().error(error_txt)
                raise exceptions.InvalidInputValueError(error_txt)
                return False

        # if pred isA to:ComplexRelationalProperty, conditions to satisfy
        # 1. object needs to exists in toflerdb
        # 2. pred_type_domain should be in subj_type + subject_type_subclass
        # 3. pred_type_range should be in objc_type + objc_type_subclass
        elif 'to:ComplexRelationalProperty' in pred_type_subclass:
            if not dbutils.exists_in_eternity(
                    objc, additional_lookup=self._normalized_input):
                error_txt = (
                    'Object does not exists in toflerdb : %s\nInput Tuple : %s'
                    % (objc, self._fact_tuple))
                # Common.get_logger().error(error_txt)
                raise exceptions.InvalidInputValueError(error_txt)
                return False
            (pred_type_domain, pred_type_range) = \
                self.gather_info_from_toflerdb(
                    pred, ['type_domain', 'type_range'])
            if not collection.intersection(pred_type_domain,
                                           subj_type + subj_type_subclass):
                error_txt = ('Predicate domain does not satisfy ontology : %s'
                             '\nInput Tuple : %s' % (pred, self._fact_tuple))
                # Common.get_logger().error(error_txt)
                raise exceptions.InvalidInputValueError(error_txt)
                return False
            (objc_type, objc_type_subclass) = self.gather_info_from_toflerdb(
                objc, ['type', 'type_subclass'])
            if not collection.intersection(pred_type_range,
                                           objc_type + objc_type_subclass):
                error_txt = ('Predicate range does not satisfy ontology : %s'
                             '\nInput Tuple : %s' % (pred, self._fact_tuple))
                # Common.get_logger().error(error_txt)
                raise exceptions.InvalidInputValueError(error_txt)
                return False

        # if pred isA to:ComplexProperty, conditions to satisfy
        # 1. pred_type_domain should be in subj_type + subject_type_subclass
        elif 'to:ComplexProperty' in pred_type_subclass:
            (pred_type_domain, ) = self.gather_info_from_toflerdb(
                pred, ['type_domain'])
            if not collection.intersection(pred_type_domain,
                                           subj_type + subj_type_subclass):
                error_txt = ('Predicate domain does not satisfy ontology : %s'
                             '\nInput Tuple : %s' % (pred, self._fact_tuple))
                # Common.get_logger().error(error_txt)
                raise exceptions.InvalidInputValueError(error_txt)
                return False

        # if pred isA to:Property, conditions to satisfy
        # 1. pred_domain should be in subj_type + subject_type_subclass
        else:
            (pred_domain, ) = self.gather_info_from_toflerdb(pred, ['domain'])
            if not collection.intersection(pred_domain,
                                           subj_type + subj_type_subclass):
                error_txt = ('Predicate domain does not satisfy ontology : %s'
                             '\nInput Tuple : %s' % (pred, self._fact_tuple))
                # Common.get_logger().error(error_txt)
                raise exceptions.InvalidInputValueError(error_txt)
                return False

        return True
Ejemplo n.º 10
0
    def validate(self, subj, pred, objc):
        '''
        if pred == to:subClassOf:
            (objc + objc_subclass) in [to:Entity, to:Property, to:Literal]
        elif pred == to:ontoLabel:
            subj must be define i.e. subj_subclass is non empty
        elif pred == to:description:
            subj must be define i.e. subj_subclass is non empty
        elif pred == to:domain:
            objc needs to exists in db as subject
        elif pred == to:range:
            objc needs to exists in db as subject
            if subj_subclass is Relational/Complex Relational Property:
                subj_range_subclass in [to:Entity, to:Property]
            elif subj_subclass is Property:
                subj_range_subclass is [to:Literal]
        elif pred == 'to:isUnique':
            pass
        else:
            not valid
        '''
        input_tuple = (subj, pred, objc)
        (subj_subclass, ) = self.gather_info_from_toflerdb(subj, ['subclass'])
        if not subj_subclass:
            error_txt = ("Subject is not defined : %s"
                         "\nInput tuple : %s") % (subj, input_tuple)
            Common.get_logger().error(error_txt)
            raise exceptions.InvalidInputValueError(error_txt)

        if pred == 'to:subClassOf':
            (objc_subclass, ) = self.gather_info_from_toflerdb(
                objc, ['subclass'])
            if not collection.intersection(
                    objc_subclass + [objc],
                ['to:Entity', 'to:Property', 'to:Literal']):
                error_txt = ("Superclass is not defined : %s"
                             "\nInput tuple : %s") % (objc, input_tuple)
                Common.get_logger().error(error_txt)
                raise exceptions.InvalidInputValueError(error_txt)

        elif pred == 'to:ontoLabel':
            pass
        elif pred == 'to:description':
            pass
        elif pred == 'to:domain':
            if not dbutils.exists_in_eternity(
                    objc,
                    additional_lookup=self._normalized_input,
                    ontology_only=True):
                error_txt = ("Domain is not defined : %s"
                             "\nInput tuple : %s") % (objc, input_tuple)
                Common.get_logger().error(error_txt)
                raise exceptions.InvalidInputValueError(error_txt)
        elif pred == 'to:range':
            if not dbutils.exists_in_eternity(
                    objc,
                    additional_lookup=self._normalized_input,
                    ontology_only=True):
                error_txt = ("Range is not defined : %s"
                             "Input tuple : %s") % (objc, input_tuple)
                Common.get_logger().error(error_txt)
                raise exceptions.InvalidInputValueError(error_txt)

            (objc_subclass, ) = self.gather_info_from_toflerdb(
                objc, ['subclass'])
            if collection.intersection(
                ['to:RelationalProperty', 'to:ComplexRelationalProperty'],
                    subj_subclass):
                if not collection.intersection([objc] + objc_subclass,
                                               ['to:Entity', 'to:Property']):
                    error_txt = (
                        "For relational property declaration, range must"
                        "be either of type to:Entity or to:Property."
                        "\nProperty : %s, range type: %s."
                        "\nInput tuple : %s") % (subj, objc_subclass,
                                                 input_tuple)
                    Common.get_logger().error(error_txt)
                    raise exceptions.InvalidInputValueError(error_txt)
            elif 'to:Property' in subj_subclass:
                if not 'to:Literal' in objc_subclass:
                    error_txt = ("For property declaration,"
                                 "range must be of type to:Literal."
                                 "\nProperty : %s, range type: %s."
                                 "\nInput tuple : %s") % (subj, objc_subclass,
                                                          input_tuple)
                    Common.get_logger().error(error_txt)
                    raise exceptions.InvalidInputValueError(error_txt)
        elif pred == 'to:isUnique':
            pass
        else:
            error_txt = (
                "Predicate must be one of to:subClassOf, to:ontoLabel,"
                " to:description, to:domain, to:domain, to:range."
                "\nInput Tuple : %s") % str(input_tuple)
            Common.get_logger().error(error_txt)
            raise exceptions.InvalidInputValueError(error_txt)
Ejemplo n.º 11
0
    def make_mapping(self):
        all_subjects = []
        for info in self._input_list:
            if info['subject'] not in all_subjects:
                all_subjects.append(info['subject'])

        for subj in all_subjects:
            # subj = info['subject']
            # pred = info['predicate']
            # objc = info['object']
            (subj_subclass, subj_domain, subj_domain_subclass, subj_range,
             subj_range_subclass) = self.gather_info_from_toflerdb(
                 subj, [
                     'subclass', 'domain', 'domain_subclass', 'range',
                     'range_subclass'
                 ])
            # print 'subj : %s\nsubj_subclass : %s\nsubj_domain :
            # %s\nsubj_domain_subclass : %s\nsubj_range :
            # %s\nsubj_range_subclass : %s' %(subj, subj_subclass, subj_domain,
            # subj_domain_subclass, subj_range, subj_range_subclass)
            if 'to:Property' not in subj_subclass:
                continue
            if 'to:Property' in subj_domain_subclass:
                for dmn in subj_domain:
                    path = collection.find_path(self._complete_mapping, dmn)
                    if path is None:
                        continue
                    path.append('properties')
                    value = {
                        subj: {
                            'properties': {
                                'fact_id': {
                                    'type': 'string',
                                    'index': 'not_analyzed'
                                },
                            }
                        }
                    }
                    if collection.intersection(
                        ['to:ComplexProperty', 'to:ComplexRelationalProperty'],
                            subj_subclass):
                        value[subj]['type'] = 'nested'

                    if 'to:Null' in subj_range_subclass:
                        # here we don't want any value key
                        pass
                    elif collection.intersection([
                            'to:RelationalProperty',
                            'to:ComplexRelationalProperty'
                    ], subj_subclass):
                        # the value would be a reference to another node
                        value[subj]['properties']['value'] = {
                            'type': 'string',
                            'index': 'not_analyzed'
                        }
                    else:
                        value[subj]['properties']['value'] = \
                            collection.get_datatype(
                                subj_range + subj_range_subclass)
                    collection.assign_value_to_path(self._complete_mapping,
                                                    value, path)
                    collection.assign_value_to_path(self._new_mapping, value,
                                                    path)
            elif 'to:Entity' in subj_domain + subj_domain_subclass:
                value = {
                    subj: {
                        'properties': {
                            'fact_id': {
                                'type': 'string',
                                'index': 'not_analyzed'
                            },
                        }
                    }
                }
                if collection.intersection(
                    ['to:ComplexProperty', 'to:ComplexRelationalProperty'],
                        subj_subclass):
                    value[subj]['type'] = 'nested'

                if 'to:Null' in subj_range_subclass:
                    # here we don't want any value key
                    pass
                elif collection.intersection(
                    ['to:RelationalProperty', 'to:ComplexRelationalProperty'],
                        subj_subclass):
                    # the value would be a reference to another node
                    value[subj]['properties']['value'] = {
                        'type': 'string',
                        'index': 'not_analyzed'
                    }
                else:
                    value[subj]['properties']['value'] = \
                        collection.get_datatype(
                            subj_range + subj_range_subclass)
                self._complete_mapping.update(value)
                self._new_mapping.update(value)
            # add all the superclass properties also
            if len(subj_subclass):
                subj_subclass_domain_of = dbutils.get_inverse_predicate_value(
                    subj_subclass,
                    'to:domain',
                    level=1,
                    additional_lookup=self._inverse_normalized_input)
                # print 'subj_subclass_domain_of : %s\n\n'
                # %subj_subclass_domain_of
                for ssdo in subj_subclass_domain_of:
                    path = collection.find_path(self._complete_mapping, subj)
                    if not path:
                        continue
                    path.append('properties')
                    (ssdo_range, ) = self.gather_info_from_toflerdb(
                        ssdo, ['range'])
                    value = {
                        ssdo: {
                            'properties': {
                                'fact_id': {
                                    'type': 'string',
                                    'index': 'not_analyzed'
                                },
                                'value': collection.get_datatype(ssdo_range)
                            }
                        }
                    }
                    collection.assign_value_to_path(self._new_mapping, value,
                                                    path)
                    collection.assign_value_to_path(self._complete_mapping,
                                                    value, path)