예제 #1
0
def list_hierarchy(class_name, bases):
    """
    Creates a list of the class hierarchy

    Args:
    -----
        class_name: name of the current class
        bases: list/tuple of bases for the current class
    """

    class_list = [Uri(class_name)]
    for base in bases:
        if base.__name__ not in IGNORE_CLASSES:
            class_list.append(Uri(base.__name__))
    return list([i for i in set(class_list)])
예제 #2
0
class CSVProcessor(Processor):
    """CSV RDF Mapping Processor"""
    rdf_name = Uri("kds:RmlCSVPRocessor")

    def __init__(self, **kwargs):
        if "fields" in kwargs:
            self.fields = kwargs.pop("fields")
        if "rml_rules" in kwargs:
            rml_rules = kwargs.pop("rml_rules")
        csv_file = kwargs.pop("csv_file")
        self.reader = csv.DictReader(open(csv_file, 'rb'))
        super(CSVProcessor, self).__init__(rml_rules, **kwargs)

    def __generate_reference__(self, triple_map, **kwargs):
        """Extracts the value of either column by key or by position """
        pass

    def execute(self, triple_map, **kwargs):
        """Method executes mapping between CSV source and
        output RDF

        args:
            triple_map(SimpleNamespace): Triple Map
        """
        pass

    def run(self, **kwargs):
        """Method runs through CSV Reader and applies rules to each
        row.

        """
        pass
예제 #3
0
 def get_uri_list(self, **kwargs):
     """
     Returns a list of Uris to index
     """
     index_status_filter = """
             optional {{ ?s dcterm:modified ?modTime }} .
             optional {{ ?s kds:esIndexTime ?time }} .
             optional {{ ?s kds:esIndexError ?error }}
             filter (
                 !(bound(?time)) ||
                 ?time<?modTime  ||
                 (bound(?error) && ?time < {idx_start_time}))
             """.format(idx_start_time=self.idx_start_time.sparql)
     items_query_template = """
         SELECT DISTINCT ?s ?es_id
         {{
             VALUES ?rdftypes {{\n\t\t{rdf_types} }} .
             ?s a ?rdftypes .
             BIND(SHA1(STR(?s)) as ?es_id) .
             {status_filter}
         }}
         {order_by}
         """
     status_filter = index_status_filter \
                     if not kwargs.get("no_status") else ""
     order_by = kwargs.get("order_by", "")
     sparql = items_query_template.format(
             rdf_types="\n\t\t".join(self.rdf_types),
             status_filter=status_filter,
             order_by=order_by)
     results = [(Uri(item['s']['value']), item['es_id']['value'],)
                for item in self.tstore_conn.query(sparql=sparql)]
     return results #[:100]
예제 #4
0
def get_all_item_data(items, conn, graph=None, output='json', **kwargs):
    """ queries a triplestore with the provided template or uses a generic
    template that returns triples 3 edges out in either direction from the
    provided item_uri

    args:
        items: the starting uri or list of uris to the query
        conn: the rdfframework triplestore connection to query against
        output: 'json' or 'rdf'

    kwargs:
        template: template to use in place of the generic template
        rdfclass: rdfclass the items are based on.
        filters: list of filters to apply
    """
    # set the jinja2 template to use
    if kwargs.get('template'):
        template = kwargs.pop('template')
    else:
        template = "sparqlAllItemDataTemplate.rq"
    # build the keyword arguments for the templace
    template_kwargs = {"prefix": NSM.prefix(), "output": output}
    if isinstance(items, list):
        template_kwargs['uri_list'] = items
    else:
        template_kwargs['item_uri'] = Uri(items).sparql
    if kwargs.get("special_union"):
        template_kwargs['special_union'] = kwargs.get("special_union")
    if kwargs.get('rdfclass'):
        # pdb.set_trace()
        template_kwargs.update(kwargs['rdfclass'].query_kwargs)
    if kwargs.get("filters"):
        template_kwargs['filters'] = make_sparql_filter(kwargs.get('filters'))
    sparql = render_without_request(template, **template_kwargs)
    return conn.query(sparql, **kwargs)
class AddClassHierarchyProcessor(PropertyProcessor, metaclass=PropSingleton):
    """
    Adds the rdf:Class hierarchy URIs to the property's list of values.
    This is useful for indexing in elasticsearch when dealing with rdf:type.
    This way when doing a term search for a particular rdf:type all of the
    subclasses for that type will be included as well.

    Example:
    --------

        For a property with 'schema_Person' as the associated class,
        ['schema:Thing', 'schema:Person'] will be added to the property list
        of values since 'schema:Person' is a subclass of 'schema:Thing'
    """

    definition_uri = Uri('kdr:AddClassHierarchyProcessor')

    def __call__(self, prop):
        data = self.__data_source__(prop)

        rtn_list = [item for item in data]
        for prop_uri in prop.bound_class.hierarchy:
            rtn_list.append(prop_uri)
        rtn_list = list(set(rtn_list))

        self.__set_data__(prop, rtn_list)
class ConvertObjectToStringProcessor(PropertyProcessor):
    """
    Converts the object values of the property to a string

    Args:
    -----
        params: {'kds_lookupProperty': the name of the rdf property in the
                                 object value to convert to a string}

    Returns:
    --------
        strings for each object value
    """
    definition_uri = Uri('kdr:ConvertObjectToStringProcessor')

    def __init__(self, params=[{}], data_attr=None, classnames=[]):
        super().__init__(params, data_attr, classnames)
        str_prop = params[0].get('kds_lookupProperty')
        if str_prop:
            self.str_prop = str_prop[0]
        else:
            self.str_prop = None

    def __call__(self, prop):
        data = self.__data_source__(prop)
        rtn_list = []
        if self.str_prop:
            for val in data:
                if val.get(self.str_prop):
                    rtn_list = [str(item) for item in val[self.str_prop]]
        else:
            rtn_list = [str(item) for item in data]

        self.__set_data__(prop, rtn_list)
class AddClassProcessor(PropertyProcessor, metaclass=PropSingleton):
    """
    Adds the rdf:Class URI to the property's list of values
    """
    definition_uri = Uri('kdr:AddClassProcessor')

    def __call__(self, prop):
        prop += prop.bound_class.class_names
예제 #8
0
    def get_def(prop_defs, def_fields, default_val=None):
        """ returns the cross corelated fields for delealing with mutiple
            vocabularies

        args:
            prop_defs: the propertry definition object
            def_fields: list of the mapped field names
            default_val: Default value if none of the fields are found
        """
        rtn_list = []
        for fld in def_fields:
            if prop_defs.get(fld):
                rtn_list += prop_defs.get(fld)
        if not rtn_list and default_val:
            rtn_list.append(default_val)
        elif rtn_list:
            try:
                rtn_list = list(set(rtn_list))
            except TypeError as e:
                # This deals with a domain that required a conjunction of two
                # rdf_Classes
                # pdb.set_trace()
                new_rtn = []
                for item in rtn_list:
                    if isinstance(item, MODULE.rdfclass.RdfClassBase):
                        new_rtn.append(\
                                "|".join(merge_rdf_list(item['owl_unionOf'])))
                    elif isinstance(item, list):
                        new_rtn.append("|".join(item))
                    else:
                        new_rtn.append(item)
                rtn_list = list(set(new_rtn))
                new_rtn = []
                for item in rtn_list:
                    if "|" in item:
                        new_rtn.append([Uri(domain) \
                                        for domain in item.split("|")])
                    else:
                        new_rtn.append(Uri(item))
                rtn_list = new_rtn

                # pdb.set_trace()
        return rtn_list
예제 #9
0
def unique_append(self, value):
    """ function for only appending unique items to a list.
    #! consider the possibility of item using this to a set
    """
    if value not in self:
        try:
            super(self.__class__, self).append(Uri(value))
        except AttributeError as err:
            if isinstance(value, MODULE.rdfclass.RdfClassBase):
                super(self.__class__, self).append(value)
            else:
                raise err
예제 #10
0
 def set_list_predicates(self):
     """
     Reads through the rml mappings and determines all fields that should
     map to a list/array with a json output
     """
     results = self.rml.query("""
             SELECT DISTINCT ?subj_class ?list_field
             {
                 ?bn rr:datatype rdf:List .
                 ?bn rr:predicate ?list_field .
                 ?s ?p ?bn .
                 ?s rr:subjectMap ?sm_bn .
                 ?sm_bn rr:class ?subj_class .
             }""")
     list_preds = [(Uri(row[0]).sparql, Uri(row[1]).sparql)
                   for row in results]
     array_fields = {}
     for tup in list_preds:
         try:
             array_fields[tup[0]].append(tup[1])
         except KeyError:
             array_fields[tup[0]] = [tup[1]]
     self.array_fields = array_fields
예제 #11
0
def make_property(prop_defs, prop_name, cls_names=[], hierarchy=[]):
    """ Generates a property class from the defintion dictionary

    args:
        prop_defs: the dictionary defining the property
        prop_name: the base name of the property
        cls_name: the name of the rdf_class with which the property is
                  associated
    """
    register = False
    try:
        cls_names.remove('RdfClassBase')
    except ValueError:
        pass
    if cls_names:
        new_name = "%s_%s" % (prop_name.pyuri, "_".join(cls_names))
        prop_defs['kds_appliesToClass'] = cls_names
    elif not cls_names:
        cls_names = [Uri('kdr_AllClasses')]
        register = True
        new_name = prop_name
    else:
        new_name = prop_name

    new_prop = types.new_class(
        new_name, (
            RdfPropertyBase,
            list,
        ), {
            'metaclass': RdfPropertyMeta,
            'prop_defs': prop_defs,
            'class_names': cls_names,
            'prop_name': prop_name,
            'hierarchy': hierarchy
        })
    if register:
        global properties
        global domain_props
        properties[new_name] = new_prop
        for domain in new_prop.rdfs_domain:
            try:
                # domain_props[domain].append(new_prop)
                domain_props[domain][prop_name] = prop_defs
            except KeyError:
                # domain_props[domain] = [new_prop]
                domain_props[domain] = {}
                domain_props[domain][prop_name] = prop_defs
            except TypeError:
                pass
    return new_prop
예제 #12
0
        def test_uri(value):
            """ test to see if the value is a uri or bnode

            Returns: Uri or Bnode """
            # .__wrapped__
            if not isinstance(value, (Uri, BlankNode)):
                try:
                    if value.startswith("_:"):
                        return BlankNode(value)
                    else:
                        return Uri(value)
                except:
                    return BlankNode()
            else:
                return value
예제 #13
0
 def set_context(self):
     """
     Reads throught the namespaces in the RML and generates a context for
     json+ld output when compared to the RdfNsManager namespaces
     """
     results = self.rml.query("""
             SELECT ?o {
                 {
                     ?s rr:class ?o
                 } UNION {
                     ?s rr:predicate ?o
                 }
             }""")
     namespaces = [
         Uri(row[0]).value[0] for row in results
         if isinstance(row[0], rdflib.URIRef)
     ]
     self.context = {ns[0]: ns[1] for ns in namespaces if ns[0]}
예제 #14
0
 def __prepare__(mcs, name, bases, **kwargs):
     # if name == 'bf_UsageAndAccessPolicy':
     #     pdb.set_trace()
     try:
         cls_defs = kwargs.pop('cls_defs')
         props = get_properties(name)  #cls_defs)
         doc_string = make_doc_string(name, cls_defs, bases, props)
         new_def = {}
         # if name == 'bf_Topic': pdb.set_trace()
         new_def['__doc__'] = doc_string
         new_def['doc'] = property(print_doc)
         new_def['properties'] = list_base_properties(
             bases)  #property(list_properties)
         # new_def['json_def'] = cls_defs
         new_def['hierarchy'] = list_hierarchy(name, bases)
         new_def['id'] = None
         new_def['class_names'] = [name]
         es_defs = es_get_class_defs(cls_defs, name)
         if hasattr(bases[0], 'es_defs'):
             es_defs.update(bases[0].es_defs)
         new_def['es_defs'] = get_rml_processors(es_defs)
         new_def['query_kwargs'] = get_query_kwargs(es_defs)
         new_def['uri'] = Uri(name).sparql_uri
         for prop, value in props.items():
             new_def[prop] = MODULE.rdfclass.make_property(
                 value, prop, new_def['class_names'])
             new_def['properties'][prop] = new_def[prop]
         if __a__ not in new_def.keys():
             new_def[__a__] = MODULE.rdfclass.properties.get(__a__)
             new_def['properties'][__a__] = new_def[__a__]
         new_def['cls_defs'] = cls_defs  #cls_defs.pop(name)
         new_def['es_props'] = []
         for prop_name, prop in new_def['properties'].items():
             rng_def = get_prop_range_def(\
                         get_prop_range_defs(new_def['class_names'],
                                             prop.kds_rangeDef))
             if rng_def.get('kds_esLookup'):
                 new_def['es_props'].append(prop_name)
         return new_def
     except KeyError:
         return {}
예제 #15
0
    def __init__(self, data=None, base_uri=None, **kwargs):
        start = datetime.datetime.now()
        self.smap = 's'
        self.pmap = 'p'
        self.omap = 'o'
        self.rmap = {}
        if base_uri:
            base_uri = Uri(base_uri)
        self.base_uri = base_uri
        if kwargs.get("debug"):
            log.setLevel(logging.DEBUG)
        # realate_bnode_obj_types sets whether to relate the object of a class
        # back to itself
        self.relate_obj_types = ['bnode', 'uri']
        if kwargs.get("bnode_only"):
            self.relate_obj_types = ['bnode']

        if data:
            self.load_data(data, **kwargs)
            log.debug("loaded %s triples in %s" %
                      (len(data), (datetime.datetime.now() - start)))
예제 #16
0
def filter_prop_defs(prop_defs, hierarchy, cls_names):
    """ Reads through the prop_defs and returns a dictionary filtered by the
        current class

    args:
        prop_defs: the defintions from the rdf vocabulary defintion
        cls_object: the class object to tie the property
        cls_names: the name of the classes
    """
    def _is_valid(test_list, valid_list):
        """ reads the list of classes in appliesToClass and returns whether
            the test_list matches

        args:
            test_list: the list of clasees to test against
            valid_list: list of possible matches
        """

        for test in test_list:
            if test in valid_list:
                return True
        return False

    new_dict = {}
    valid_classes = [Uri('kdr_AllClasses')] + cls_names + hierarchy
    for def_name, value in prop_defs.items():
        new_dict[def_name] = []
        empty_def = []
        try:
            for item in value:
                if item.get('kds_appliesToClass'):
                    if _is_valid(item['kds_appliesToClass'], valid_classes):
                        new_dict[def_name].append(item)
                else:
                    empty_def.append(item)
            if not new_dict[def_name]:
                new_dict[def_name] = empty_def
        except AttributeError:
            new_dict[def_name] = value
    return new_dict
예제 #17
0
    def run(self, tag=None, output=None, **kwargs):
        """
        runs the extractor

        Args:
        -----
            output: ['filepath', None]

        """
        start = datetime.datetime.now()
        count = 0
        if tag:
            tag = Uri(tag)
            xml_generator = etree.iterparse(
                self.source,
                #events=("start", "end"),
                tag=tag.etree)
        else:
            xml_generator = etree.iterparse(self.source)  #,
            #events=("start", "end"))
        i = 0
        for event, element in xml_generator:
            type_tags = element.findall(_RDF_TYPE_TAG)
            rdf_types = [
                el.get(_RES_TAG) for el in type_tags if el.get(_RES_TAG)
            ]
            # print(rdf_types)
            if str(self.filter_val) in rdf_types:
                pdb.set_trace()
                # print("%s - %s - %s - %s" % (event,
                #                              element.tag,
                #                              element.attrib,
                #                              element.text))
                count += 1
            # if i == 100:
            #     break
            i += 1
            element.clear()
        print("Found '{}' items in {}".format(
            count, (datetime.datetime.now() - start)))
예제 #18
0
    def add_property(self, pred, obj):
        """ adds a property and its value to the class instance

        args:
            pred: the predicate/property to add
            obj: the value/object to add
            obj_method: *** No longer used.
        """
        pred = Uri(pred)
        try:
            self[pred].append(obj)
        # except AttributeError:
        #     new_list = [self[pred]]
        #     new_list.append(obj)
        #     self[pred] = new_list
        except KeyError:
            try:
                new_prop = self.properties[pred]
            except AttributeError:
                self.properties = {}
                self.add_property(pred, obj)
                return
            except KeyError:
                try:
                    new_prop = MODULE.rdfclass.properties[pred]
                except KeyError:
                    new_prop = MODULE.rdfclass.make_property({}, pred,
                                                             self.class_names)
                try:
                    self.properties[pred] = new_prop
                except AttributeError:
                    self.properties = {pred: new_prop}
            init_prop = new_prop(self, get_attr(self, "dataset"))
            setattr(self, pred, init_prop)
            self[pred] = init_prop
            self[pred].append(obj)
        if self.dataset:
            self.dataset.add_rmap_item(self, pred, obj)
예제 #19
0
class SPARQLBatchProcessor(Processor):
    """Class batches all triple_maps queries into a single SPARQL query
    in an attempt to reduce the time spent in the triplestore/network
    bottleneck"""

    rdf_name = Uri("kdr:RmlSPARQLBatchProcessor")

    def __init__(self, rml_rules, triplestore_url=None, triplestore=None):
        super(SPARQLBatchProcessor, self).__init__(rml_rules, **kwargs)
        __set_prefix__()
        if triplestore_url is not None:
            self.triplestore_url = triplestore_url
        elif triplestore is not None:
            self.triplestore = triplestore

    def __get_bindings__(self, sparql):
        bindings = []
        if self.triplestore_url is not None:
            result = requests.post(self.triplestore_url,
                                   data={
                                       "query": sparql,
                                       "format": "json"
                                   })
            bindings = result.json().get("results").get("bindings")
        elif self.triplestore is not None:
            result = self.triplestore.query(sparql)
            bindings = result.bindings

        return bindings

    def __construct_compound_query__(self, triple_map):
        select_clause = PREFIX + """
SELECT"""
        where_clause = """
WHERE {{"""

        for pred_map in triple_map.predicateObjectMap:
            if pred_map.constant is not None or\
               pred_map.reference is not None:
                continue
            #if pred_obj_map.parentTriplesMap is not None:
            #    self.__handle_parents__(
            #        parent_map=pred_obj_map.parentTriplesMap,
            #        subject=entity,
            #        predicate=predicate,
            #        **kwargs)
            #        continue
            select_line = pred_map.query.splitlines()[0]
            for term in select_line.split():
                if term.startswith("?") and term not in select_clause:
                    select_clause += " {}".format(term)
            where_clause += "\nOPTIONAL{{\n\t" +\
                        pred_map.query +\
                        "\n}}\n"
        return select_clause + where_clause + "}}"

    def run(self, **kwargs):
        kwargs['output'] = self.__graph__()
        super(SPARQLBatchProcessor, self).run(**kwargs)
        self.output = kwargs['output']
        return kwargs['output']

    def execute(self, triple_map, output, **kwargs):
        """Method iterates through triple map's predicate object maps
        and processes query.

        Args:
            triple_map(SimpleNamespace): Triple Map
        """
        sparql = PREFIX + triple_map.logicalSource.query.format(**kwargs)
        bindings = self.__get_bindings__(sparql)
        iterator = str(triple_map.logicalSource.iterator)
        for binding in bindings:
            entity_dict = binding.get(iterator)
            if isinstance(entity_dict, rdflib.term.Node):
                entity = entity_dict
            elif isinstance(entity_dict, dict):
                raw_value = entity_dict.get('value')
                if entity_dict.get('type').startswith('bnode'):
                    entity = rdflib.BNode(raw_value)
                else:
                    entity = rdflib.URIRef(raw_value)
            if triple_map.subjectMap.class_ is not None:
                output.add(
                    (entity, rdflib.RDF.type, triple_map.subjectMap.class_))

            sparql_query = self.__construct_compound_query__(
                triple_map).format(**kwargs)
            properties = self.__get_bindings__(sparql_query)
            for pred_obj_map in triple_map.predicateObjectMap:
                predicate = pred_obj_map.predicate
                if pred_obj_map.constant is not None:
                    output.add((entity, predicate, pred_obj_map.constant))
                    continue
                if "#" in str(predicate):
                    key = str(predicate).split("#")[-1]
                else:
                    key = str(predicate).split("/")[-1]

                for property_ in properties:
                    if key in property_.keys():
                        info = {"about": property_.get(key)}
                        object_ = __get_object__(info)
                        output.add((entity, predicate, object_))
예제 #20
0
 def __init__(self, source, output=None, **kwargs):
     self.source = source
     self.output = output
     self.filter_tag = Uri("rdf:type")
     self.filter_val = Uri("bf:Topic")
     self.rdf_type = Uri("rdf:type")
예제 #21
0
"""
import pdb
import datetime
import click

try:
    from lxml import etree
except ImportError:
    log.warning("'lxml' package not available. Using ptyhon 'xml'")
    import xml.etree.ElementTree as etree

from rdfframework.datatypes import Uri, RdfNsManager

RdfNsManager({'bf': 'http://id.loc.gov/ontologies/bibframe/'})

_RES_TAG = Uri("rdf:resource").etree
_RDF_TYPE_TAG = Uri("rdf:type").etree


class Extractor(object):
    """
    Extracts all nodes specified nodes from an xml file

    Args:
    -----
        source: the filepath to the xml file
        output: the filepath to output the results
    """
    def __init__(self, source, output=None, **kwargs):
        self.source = source
        self.output = output
예제 #22
0
def prepare_prop_defs(prop_defs, prop_name, cls_names):
    """
    Examines and adds any missing defs to the prop_defs dictionary for
    use with the RdfPropertyMeta.__prepare__ method

    Args:
    -----
        prop_defs: the defintions from the rdf vocabulary defintion
        prop_name: the property name
        cls_names: the name of the associated classes

    Returns:
    --------
        prop_defs
    """
    def get_def(prop_defs, def_fields, default_val=None):
        """ returns the cross corelated fields for delealing with mutiple
            vocabularies

        args:
            prop_defs: the propertry definition object
            def_fields: list of the mapped field names
            default_val: Default value if none of the fields are found
        """
        rtn_list = []
        for fld in def_fields:
            if prop_defs.get(fld):
                rtn_list += prop_defs.get(fld)
        if not rtn_list and default_val:
            rtn_list.append(default_val)
        elif rtn_list:
            try:
                rtn_list = list(set(rtn_list))
            except TypeError as e:
                # This deals with a domain that required a conjunction of two
                # rdf_Classes
                # pdb.set_trace()
                new_rtn = []
                for item in rtn_list:
                    if isinstance(item, MODULE.rdfclass.RdfClassBase):
                        new_rtn.append(\
                                "|".join(merge_rdf_list(item['owl_unionOf'])))
                    elif isinstance(item, list):
                        new_rtn.append("|".join(item))
                    else:
                        new_rtn.append(item)
                rtn_list = list(set(new_rtn))
                new_rtn = []
                for item in rtn_list:
                    if "|" in item:
                        new_rtn.append([Uri(domain) \
                                        for domain in item.split("|")])
                    else:
                        new_rtn.append(Uri(item))
                rtn_list = new_rtn

                # pdb.set_trace()
        return rtn_list

    required_def_defaults = {
        Uri('kds_rangeDef'): [{}],
        Uri('rdfs_range'): [Uri("xsd_string")],
        Uri('rdfs_domain'):
        cls_names,
        Uri('rdfs_label'): [NSM.nouri(prop_name)],
        Uri('kds_formDefault'): [{
            Uri('kds:appliesToClass'):
            Uri('kdr:AllClasses'),
            Uri('kds:formFieldName'):
            "emailaddr",
            Uri('kds:formLabelName'): [NSM.nouri(prop_name)],
            Uri('kds:formFieldHelp'):
            find_values(DESCRIPTION_FIELDS, prop_defs, None),
            Uri('kds:fieldType'): {
                Uri('rdf:type'): Uri('kdr:TextField')
            }
        }],
        Uri('kds_propertyValidation'): [],
        Uri('kds_propertySecurity'): [],
        Uri('kds_propertyProcessing'): []
    }
    for prop in required_def_defaults:
        if prop not in prop_defs.keys():
            prop_defs[prop] = required_def_defaults[prop]
    prop_defs['rdfs_domain'] = get_def(prop_defs, DOMAIN_FIELDS, cls_names)
    prop_defs['rdfs_range'] = get_def(prop_defs, RANGE_FIELDS,
                                      Uri('xsd_string'))

    return prop_defs
예제 #23
0
import pprint, pdb
import logging
import multiprocessing as mp

# from rdfframework import rdfclass
from rdfframework.utilities import DictClass, make_list, SimpleMapReduce
from rdfframework.configuration import RdfConfigManager
from rdfframework.datatypes import pyrdf, BaseRdfDataType, Uri
from rdfframework.rdfclass import RdfClassBase, remove_parents, list_hierarchy
from .jsonquery import json_qry

MODULE = __import__(__name__)
# import rdfframework.rdfclass as rdfclass
CFG = RdfConfigManager()
pool_size = mp.cpu_count() - 1 or 1
__a__ = Uri("rdf:type")


def convert_row_main(val, i, key, output):
    # rtn_obj = {}
    # rtn_tup = (pyrdf(row['s']), pyrdf(row['p']), pyrdf(row['o']))
    # return pyrdf(row['s']) #rtn_tup
    # for key, value in row.items():
    #     # try:
    #     # print("convert_row_main: ", value)
    #     # if value.get("datatype") == 'http://www.w3.org/2001/XMLSchema#dateTime':
    #     #     pdb.set_trace()
    #     rtn_obj[key] = pyrdf(value)
    #     # print(rtn_obj)
    #     # except:
    #     #     pdb.set_trace()
예제 #24
0
class SPARQLProcessor(Processor):
    """SPARQLProcessor provides a RML Processor for external SPARQL endpoints"""
    rdf_name = Uri("kdr:RmlSPARQLProcessor")

    def __init__(self, rml_rules, **kwargs):
        # if "rml_rules" in kwargs:
        #     rml_rules = kwargs.pop("rml_rules")
        super(SPARQLProcessor, self).__init__(rml_rules, **kwargs)
        __set_prefix__()
        #! self.triplestore = kwargs.get("triplestore", self.__graph__())

        # Sets defaults
        self.limit, self.offset = 5000, 0
        self.data_query = self.rml.value(subject=NS_MGR.kds.DataQuery.rdflib,
                                         predicate=NS_MGR.rml.query.rdflib)

    def __get_bindings__(self, sparql, output_format):
        """Internal method queries triplestore or remote
        sparql endpont and returns the bindings

        Args:

        ----
            sparql: String of SPARQL query
            output_format: String of type of outputform
        """
        return self.ext_conn.query(sparql,
                                   rtn_format=output_format,
                                   debug=False)

    def run(self, **kwargs):
        kwargs['output'] = self.__graph__()
        if "limit" in kwargs:
            self.limit = kwargs.get('limit')
        if "offset" in kwargs:
            self.offset = kwargs.get('offset')
        start = datetime.datetime.now()
        if kwargs.get("no_json"):
            self.use_json_qry = False
        else:
            self.use_json_qry = self.default_use_json_qry
        if self.use_json_qry:
            if not kwargs.get('dataset'):
                if self.data_query:
                    sparql = PREFIX + self.data_query.format(**kwargs)
                    data = self.ext_conn.query(sparql)
                else:
                    try:
                        data = get_all_item_data(
                            items=kwargs[kwargs['iri_key']],
                            conn=self.ext_conn,
                            output='json',
                            debug=False)
                        log.debug("data triple count: %s", len(data))
                    except KeyError:
                        raise KeyError(
                            "missing kwarg['iri_key'] defining which"
                            " kwarg to use that contians the subject"
                            " uri used to query for data. Example: "
                            "iri_key='instance_iri, instance_iri="
                            "<http://some.iri>")
                kwargs['dataset'] = RdfDataset(data)
                # pdb.set_trace()
        # start = datetime.datetime.now()
        super(SPARQLProcessor, self).run(**kwargs)
        # print("query time: ", (datetime.datetime.now() - start))
        self.output = kwargs['output']
        return kwargs['output']

    def execute(self, triple_map, output, **kwargs):
        """Execute """
        subjects = []
        if NS_MGR.ql.JSON.rdflib in \
                triple_map.logicalSource.reference_formulations:
            output_format = "json"
        else:
            output_format = "xml"
        if 'limit' not in kwargs:
            kwargs['limit'] = self.limit
        if 'offset' not in kwargs:
            kwargs['offset'] = self.offset
        # log.debug("triple_map.logicalSource: \n%s",
        # pprint.pformat(triple_map.logicalSource.__dict__))
        iterator = str(triple_map.logicalSource.iterator)
        start = datetime.datetime.now()
        key, json_query = None, None
        # pdb.set_trace()
        if hasattr(triple_map.logicalSource, 'json_query') \
                and self.use_json_qry:
            key = kwargs.get(str(triple_map.logicalSource.json_key))
            if not key:
                key =[val for val in kwargs.values() \
                      if isinstance(val, rdflib.URIRef)][0]
            json_query = triple_map.logicalSource.json_query
            bindings = kwargs['dataset'].json_qry(json_query, {'$': key})
        else:
            sparql = PREFIX + triple_map.logicalSource.query.format(**kwargs)
            bindings = self.__get_bindings__(sparql, output_format)
        for binding in bindings:
            if key:
                try:
                    entity_raw = binding.subject.rdflib
                except AttributeError:
                    entity_raw = binding
            else:
                entity_raw = binding.get(iterator)
            if isinstance(entity_raw,
                          (rdflib.URIRef, rdflib.BNode, BaseRdfDataType)):
                entity = entity_raw
            else:
                raw_value = entity_raw.get('value')
                if entity_raw.get('type').startswith('bnode'):
                    entity = BlankNode(raw_value)
                else:
                    entity = Uri(raw_value)
            if triple_map.subjectMap.class_ is not None:
                sub = entity
                if isinstance(entity, BaseRdfDataType):
                    sub = entity.rdflib
                output.add((sub, NS_MGR.rdf.type.rdflib,
                            triple_map.subjectMap.class_))
            # pdb.set_trace()
            for pred_obj_map in triple_map.predicateObjectMap:
                predicate = pred_obj_map.predicate
                kwargs[iterator] = entity

                if pred_obj_map.parentTriplesMap is not None:
                    self.__handle_parents__(
                        output=output,
                        parent_map=pred_obj_map.parentTriplesMap,
                        subject=entity,
                        predicate=predicate,
                        **kwargs)
                    continue
                if pred_obj_map.reference is not None:
                    ref_key = str(pred_obj_map.reference)
                    if pred_obj_map.json_query:
                        # if pred_obj_map.json_query =="$.schema_logo":
                        # pdb.set_trace()
                        if ref_key in binding:
                            for item in binding[ref_key]:
                                output.add((entity, predicate, item.rdflib))
                            continue
                    else:
                        if ref_key in binding:
                            object_ = __get_object__(binding[ref_key])
                            output.add((entity, predicate, object_))
                        continue
                if pred_obj_map.constant is not None:
                    if isinstance(entity, BaseRdfDataType):
                        entity = entity.rdflib
                    output.add((entity, predicate, pred_obj_map.constant))
                    continue

                json_query = None
                if pred_obj_map.json_query and self.use_json_qry:
                    json_query = pred_obj_map.json_query
                    start = datetime.datetime.now()
                    # pdb.set_trace()
                    # if str(pred_obj_map.predicate) == "http://purl.org/dc/terms/creator":
                    #     pdb.set_trace()
                    pre_obj_bindings = kwargs['dataset'].json_qry(
                        json_query, {'$': entity})
                else:
                    sparql_query = PREFIX + pred_obj_map.query.format(**kwargs)
                    pre_obj_bindings = self.__get_bindings__(
                        sparql_query, output_format)

                for row in pre_obj_bindings:
                    if json_query and self.use_json_qry:
                        if isinstance(entity, BaseRdfDataType):
                            entity = entity.rdflib
                        output.add((entity, predicate, row.rdflib))
                    else:
                        object_ = __get_object__(row)
                        if object_ is None:
                            continue
                        if isinstance(entity, BaseRdfDataType):
                            entity = entity.rdflib
                        output.add((entity, predicate, object_))
            subjects.append(entity)
        return subjects
예제 #25
0
    def execute(self, triple_map, output, **kwargs):
        """Execute """
        subjects = []
        if NS_MGR.ql.JSON.rdflib in \
                triple_map.logicalSource.reference_formulations:
            output_format = "json"
        else:
            output_format = "xml"
        if 'limit' not in kwargs:
            kwargs['limit'] = self.limit
        if 'offset' not in kwargs:
            kwargs['offset'] = self.offset
        # log.debug("triple_map.logicalSource: \n%s",
        # pprint.pformat(triple_map.logicalSource.__dict__))
        iterator = str(triple_map.logicalSource.iterator)
        start = datetime.datetime.now()
        key, json_query = None, None
        # pdb.set_trace()
        if hasattr(triple_map.logicalSource, 'json_query') \
                and self.use_json_qry:
            key = kwargs.get(str(triple_map.logicalSource.json_key))
            if not key:
                key =[val for val in kwargs.values() \
                      if isinstance(val, rdflib.URIRef)][0]
            json_query = triple_map.logicalSource.json_query
            bindings = kwargs['dataset'].json_qry(json_query, {'$': key})
        else:
            sparql = PREFIX + triple_map.logicalSource.query.format(**kwargs)
            bindings = self.__get_bindings__(sparql, output_format)
        for binding in bindings:
            if key:
                try:
                    entity_raw = binding.subject.rdflib
                except AttributeError:
                    entity_raw = binding
            else:
                entity_raw = binding.get(iterator)
            if isinstance(entity_raw,
                          (rdflib.URIRef, rdflib.BNode, BaseRdfDataType)):
                entity = entity_raw
            else:
                raw_value = entity_raw.get('value')
                if entity_raw.get('type').startswith('bnode'):
                    entity = BlankNode(raw_value)
                else:
                    entity = Uri(raw_value)
            if triple_map.subjectMap.class_ is not None:
                sub = entity
                if isinstance(entity, BaseRdfDataType):
                    sub = entity.rdflib
                output.add((sub, NS_MGR.rdf.type.rdflib,
                            triple_map.subjectMap.class_))
            # pdb.set_trace()
            for pred_obj_map in triple_map.predicateObjectMap:
                predicate = pred_obj_map.predicate
                kwargs[iterator] = entity

                if pred_obj_map.parentTriplesMap is not None:
                    self.__handle_parents__(
                        output=output,
                        parent_map=pred_obj_map.parentTriplesMap,
                        subject=entity,
                        predicate=predicate,
                        **kwargs)
                    continue
                if pred_obj_map.reference is not None:
                    ref_key = str(pred_obj_map.reference)
                    if pred_obj_map.json_query:
                        # if pred_obj_map.json_query =="$.schema_logo":
                        # pdb.set_trace()
                        if ref_key in binding:
                            for item in binding[ref_key]:
                                output.add((entity, predicate, item.rdflib))
                            continue
                    else:
                        if ref_key in binding:
                            object_ = __get_object__(binding[ref_key])
                            output.add((entity, predicate, object_))
                        continue
                if pred_obj_map.constant is not None:
                    if isinstance(entity, BaseRdfDataType):
                        entity = entity.rdflib
                    output.add((entity, predicate, pred_obj_map.constant))
                    continue

                json_query = None
                if pred_obj_map.json_query and self.use_json_qry:
                    json_query = pred_obj_map.json_query
                    start = datetime.datetime.now()
                    # pdb.set_trace()
                    # if str(pred_obj_map.predicate) == "http://purl.org/dc/terms/creator":
                    #     pdb.set_trace()
                    pre_obj_bindings = kwargs['dataset'].json_qry(
                        json_query, {'$': entity})
                else:
                    sparql_query = PREFIX + pred_obj_map.query.format(**kwargs)
                    pre_obj_bindings = self.__get_bindings__(
                        sparql_query, output_format)

                for row in pre_obj_bindings:
                    if json_query and self.use_json_qry:
                        if isinstance(entity, BaseRdfDataType):
                            entity = entity.rdflib
                        output.add((entity, predicate, row.rdflib))
                    else:
                        object_ = __get_object__(row)
                        if object_ is None:
                            continue
                        if isinstance(entity, BaseRdfDataType):
                            entity = entity.rdflib
                        output.add((entity, predicate, object_))
            subjects.append(entity)
        return subjects
예제 #26
0
class XMLProcessor(Processor):
    """XML RDF Mapping Processor"""
    rdf_name = Uri("kdr:RmlXMLProcessor")

    def __init__(self, **kwargs):
        if "rml_rules" in kwargs:
            rml_rules = kwargs.pop("rml_rules")
        super(XMLProcessor, self).__init__(rml_rules, **kwargs)
        if "namespaces" in kwargs:
            self.xml_ns = kwargs.pop("namespaces")
        else:
            self.xml_ns = dict()
        self.constants.update(kwargs)

    def __generate_reference__(self, triple_map, **kwargs):
        """Internal method takes a triple_map and returns the result of
        applying to XPath to the current DOM context

        Args:
        -----
            triple_map: SimpleNamespace
            element: etree.Element
        """
        element = kwargs.get("element")
        found_elements = element.xpath(triple_map.reference,
                                       namespaces=self.xml_ns)
        for elem in found_elements:
            raw_text = elem.text.strip()
            #! Quick and dirty test for valid URI
            if not raw_text.startswith("http"):
                continue
            return rdflib.URIRef(raw_text)

    def __reference_handler__(self, output, **kwargs):
        """Internal method for handling rr:reference in triples map

        Keyword Args:

        -------------
            predicate_obj_map: SimpleNamespace
            element: etree.Element
            subject: rdflib.URIRef
        """
        subjects = []
        pred_obj_map = kwargs.get("predicate_obj_map")
        element = kwargs.get("element")
        subject = kwargs.get("subject")
        if pred_obj_map.reference is None:
            return subjects
        predicate = pred_obj_map.predicate
        found_elements = element.xpath(str(pred_obj_map.reference),
                                       namespaces=self.xml_ns)

        for found_elem in found_elements:
            if not hasattr(pred_obj_map, "datatype") or \
                pred_obj_map.datatype is None:
                datatype = None
            else:
                datatype = pred_obj_map.datatype
            if isinstance(found_elem, str):  # Handle xpath attributes
                object_ = self.__generate_object_term__(datatype, found_elem)
                output.add((subject, predicate, object_))
                continue
            if found_elem.text is None or len(found_elem.text) < 1:
                continue
            if pred_obj_map.constant is not None:
                output.add((subject, predicate, pred_obj_map.constant))
                continue
            if pred_obj_map.delimiters != []:
                subjects.extend(
                    self.__generate_delimited_objects__(
                        output,
                        triple_map=pred_obj_map,
                        subject=subject,
                        predicate=predicate,
                        element=found_elem,
                        delimiters=pred_obj_map.delimiters,
                        datatype=datatype))
            else:
                object_ = self.__generate_object_term__(
                    datatype, found_elem.text)
                output.add((subject, predicate, object_))
        return subjects

    def execute(self, triple_map, output, **kwargs):
        """Method executes mapping between source

        Args:

        -----
            triple_map: SimpleNamespace, Triple Map

        """
        subjects = []
        found_elements = self.source.xpath(str(
            triple_map.logicalSource.iterator),
                                           namespaces=self.xml_ns)
        for element in found_elements:
            subject = self.generate_term(term_map=triple_map.subjectMap,
                                         element=element,
                                         **kwargs)
            start = len(output)
            for row in triple_map.predicateObjectMap:
                predicate = row.predicate
                if row.template is not None:
                    obj_ = self.generate_term(term_map=row, **kwargs)
                    output.add((subject, predicate, obj_))
                if row.parentTriplesMap is not None:
                    self.__handle_parents__(output,
                                            parent_map=row.parentTriplesMap,
                                            subject=subject,
                                            predicate=predicate,
                                            **kwargs)
                new_subjects = self.__reference_handler__(
                    output,
                    predicate_obj_map=row,
                    element=element,
                    subject=subject)
                subjects.extend(new_subjects)
                if row.constant is not None:
                    output.add((subject, predicate, row.constant))
            if start < len(output):
                if triple_map.subjectMap.class_ is not None:
                    output.add((subject, NS_MGR.rdf.type.rdflib,
                                triple_map.subjectMap.class_))
                subjects.append(subject)
        return subjects

    def run(self, xml, **kwargs):
        """Method takes either an etree.ElementTree or raw XML text
        as the first argument.

        Args:
            xml(etree.ElementTree or text
        """
        kwargs['output'] = self.__graph__()
        if isinstance(xml, str):
            try:
                self.source = etree.XML(xml)
            except ValueError:
                try:
                    self.source = etree.XML(xml.encode())
                except:
                    raise ValueError("Cannot run error {}".format(
                        sys.exc_info()[0]))
        else:
            self.source = xml
        super(XMLProcessor, self).run(**kwargs)
        self.output = kwargs['output']
        return kwargs['output']
예제 #27
0
class JSONProcessor(Processor):
    """JSON RDF Mapping Processor"""
    rdf_name = Uri("kdr:RmlJSONProcessor")

    def __init__(self, **kwargs):
        try:
            rml_rules = kwargs.pop("rml_rules")
        except KeyError:
            rml_rules = []
        super(JSONProcessor, self).__init__(rml_rules, **kwargs)

    def __generate_reference__(self, triple_map, **kwargs):
        json_obj = kwargs.get("obj")
        path_expr = jsonpath_ng.parse(triple_map.reference)
        results = [r.value.strip() for r in path_expr.find(json_obj)]
        for row in results:
            if rdflib.term._is_valid_uri(row):
                return rdflib.URIRef(row)

    def __reference_handler__(self, output, **kwargs):
        """Internal method for handling rr:reference in triples map

        Keyword Args:

        -------------
            predicate_obj_map: SimpleNamespace
            obj: dict
            subject: rdflib.URIRef
        """
        subjects = []
        pred_obj_map = kwargs.get("predicate_obj_map")
        obj = kwargs.get("obj")
        subject = kwargs.get("subject")
        if pred_obj_map.reference is None:
            return subjects
        predicate = pred_obj_map.predicate
        ref_exp = jsonpath_ng.parse(str(pred_obj_map.refernce))
        found_objects = [r.value for r in ref_exp(obj)]
        for row in found_objects:
            output.add((subject, predicate, rdflib.Literal(row)))

    def execute(self, triple_map, output, **kwargs):
        """Method executes mapping between JSON source and
        output RDF

        Args:

        -----
            triple_map: SimpleNamespace
        """
        subjects = []
        logical_src_iterator = str(triple_map.logicalSource.iterator)
        json_object = kwargs.get('obj', self.source)
        # Removes '.' as a generic iterator, replace with '@'
        if logical_src_iterator == ".":
            results = [
                None,
            ]
        else:
            json_path_exp = jsonpath_ng.parse(logical_src_iterator)
            results = [r.value for r in json_path_exp.find(json_object)][0]
        for row in results:
            subject = self.generate_term(term_map=triple_map.subjectMap,
                                         **kwargs)
            for pred_obj_map in triple_map.predicateObjectMap:
                predicate = pred_obj_map.predicate
                if pred_obj_map.template is not None:
                    output.add((subject, predicate,
                                self.generate_term(term_map=pred_obj_map,
                                                   **kwargs)))

                if pred_obj_map.parentTriplesMap is not None:
                    self.__handle_parents__(
                        output,
                        parent_map=pred_obj_map.parentTriplesMap,
                        subject=subject,
                        predicate=predicate,
                        obj=row,
                        **kwargs)
                if pred_obj_map.reference is not None:
                    ref_exp = jsonpath_ng.parse(str(pred_obj_map.reference))
                    found_objects = [r.value for r in ref_exp.find(row)]
                    for obj in found_objects:
                        if rdflib.term._is_valid_uri(obj):
                            rdf_obj = rdflib.URIRef(str(obj))
                        else:
                            rdf_obj = rdflib.Literal(str(obj))
                        output.add((subject, predicate, rdf_obj))
                if pred_obj_map.constant is not None:
                    output.add((subject, predicate, pred_obj_map.constant))
            subjects.append(subject)
        return subjects

    def run(self, source, **kwargs):
        """Method takes a JSON source and any keywords and transforms from
        JSON to Lean BIBFRAME 2.0 triples

        Args:

        ----
            source: str, dict
        """
        kwargs['output'] = self.__graph__()
        if isinstance(source, str):
            import json
            source = json.loads(source)
        self.source = source
        super(JSONProcessor, self).run(**kwargs)
        self.output = kwargs['output']
        return output
예제 #28
0
class CSVRowProcessor(Processor):
    """RML Processor for CSV/TSV or other delimited file supported by the
    python standard library module csv"""
    rdf_name = Uri("kdr:RmlCSVRowProcessor")

    def __init__(self, **kwargs):
        if "rml_rules" in kwargs:
            rml_rules = kwargs.pop("rml_rules")
        else:
            rml_rules = []
        super(CSVRowProcessor, self).__init__(rml_rules, **kwargs)

    def __generate_reference__(self, triple_map, **kwargs):
        """Generates a RDF entity based on triple map

        Args:
            triple_map(SimpleNamespace): Triple Map
        """
        raw_value = self.source.get(str(triple_map.reference))
        if raw_value is None or len(raw_value) < 1:
            return
        if hasattr(triple_map, "datatype"):
            if triple_map.datatype == NS_MGR.xsd.anyURI.rdflib:
                output = rdflib.URIRef(raw_value)
            else:
                output = rdflib.Literal(raw_value,
                                        datatype=triple_map.datatype)
        else:
            output = rdflib.Literal(raw_value)
        return output

    def execute(self, triple_map, output, **kwargs):
        """Method executes mapping between CSV source and
        output RDF

        args:
            triple_map(SimpleNamespace): Triple Map
        """
        subject = self.generate_term(term_map=triple_map.subjectMap, **kwargs)
        start_size = len(output)
        all_subjects = []
        for pred_obj_map in triple_map.predicateObjectMap:
            predicate = pred_obj_map.predicate
            if pred_obj_map.template is not None:
                object_ = self.generate_term(term_map=pred_obj_map, **kwargs)
                if len(str(object)) > 0:
                    output.add((subject, predicate, object_))

            if pred_obj_map.parentTriplesMap is not None:
                self.__handle_parents__(
                    parent_map=pred_obj_map.parentTriplesMap,
                    subject=subject,
                    predicate=predicate,
                    **kwargs)
            if pred_obj_map.reference is not None:
                object_ = self.generate_term(term_map=pred_obj_map, **kwargs)
                if object_ and len(str(object_)) > 0:
                    output.add((subject, predicate, object_))
            if pred_obj_map.constant is not None:
                output.add((subject, predicate, pred_obj_map.constant))
        finish_size = len(output)
        if finish_size > start_size:
            output.add((subject, NS_MGR.rdf.type.rdflib,
                        triple_map.subjectMap.class_))
            all_subjects.append(subject)
        return all_subjects

    def run(self, row, **kwargs):
        """Methods takes a row and depending if a dict or list,
        runs RML rules.

        Args:
        -----
            row(Dict, List): Row from CSV Reader
        """
        self.source = row
        kwargs['output'] = self.__graph__()
        super(CSVRowProcessor, self).run(**kwargs)
        return kwargs['output']
예제 #29
0
class RdfClassFactory(RdfBaseFactory):
    """ Extends RdfBaseFactory to property creation specific querying """
    log_level = logging.INFO  #MLOG_LVL #
    cache_file = "classes.json"
    classes_key = set([Uri(item) for item in RDF_CLASSES])
    inferred_key = set([Uri(item) for item in INFERRED_CLASS_PROPS])
    rdf_type = Uri('rdf_type')

    def __init__(self, conn, reset=False, nsm=NSM, cfg=CFG):
        if cfg.props_initialized != True:
            err_msg = [
                "RdfPropertyFactory must be run prior to",
                "the intialization of RdfClassFactory!"
            ]
            raise RuntimeError(" ".join(err_msg))
        sparql_template = "sparqlDefinitionClassesAll.rq"
        super().__init__(conn, sparql_template, reset, nsm, cfg)

    def make(self):
        """ reads through the definitions and generates an python class for each
        definition """
        log.setLevel(self.log_level)
        created = []
        self.set_class_dict()
        start = datetime.datetime.now()
        log.info(" # of classes to create: %s" % len(self.class_dict))
        log.debug(" creating classes that are not subclassed")

        for name, cls_defs in self.class_dict.items():
            # if name in ['bf_Organization', 'bf_Agent']:
            #     pdb.set_trace()
            if not self.class_dict[name].get('rdfs_subClassOf'):
                created.append(name)
                setattr(
                    MODULE.rdfclass,
                    name,
                    types.new_class(
                        name,
                        (RdfClassBase, ),
                        {  #'metaclass': RdfClassMeta,
                            'cls_defs': cls_defs
                        }))
        log.debug(" created %s classes in: %s", len(created),
                  (datetime.datetime.now() - start))
        for name in created:
            del self.class_dict[name]
        left = len(self.class_dict)
        classes = []
        while left > 0:
            new = []
            for name, cls_defs in self.class_dict.items():
                # if name in ['bf_Organization', 'bf_Agent']:
                # pdb.set_trace()
                parents = self.class_dict[name].get('rdfs_subClassOf')
                if not parents:
                    bases += (RdfClassBase, )
                else:
                    for parent in make_list(parents):
                        bases = tuple()
                        if parent in created or parent in classes:
                            if parent in classes:
                                bases += (RdfClassBase, )
                            else:
                                base = getattr(MODULE.rdfclass, parent)
                                bases += (base, ) + base.__bases__
                if len(bases) > 0:
                    created.append(name)
                    setattr(
                        MODULE.rdfclass,
                        name,
                        types.new_class(
                            name,
                            bases,
                            {  #'metaclass': RdfClassMeta,
                                'cls_defs': cls_defs
                            }))
            for name in created:
                try:
                    del self.class_dict[name]
                except KeyError:
                    pass
            if left == len(self.class_dict):
                # c_list = [self.class_dict[name].get('rdfs_subClassOf') \
                #           for name in self.class_dict]
                missing_parents = []
                for name in self.class_dict:
                    missing_parents += \
                            self.class_dict[name].get('rdfs_subClassOf', [])
                missing_parents = set(missing_parents)
                still_valid = set([
                    name for name in self.class_dict
                    if name not in missing_parents
                ])
                classes = list(missing_parents.difference(\
                            set(self.class_dict.keys())))
                # classess = []
                # for cl in c_list:
                #     for item in cl:
                #         classes.append(item)

                for name in self.class_dict:
                    if name in classes:
                        classes.remove(name)
                    for p_name in self.class_dict[name].get(
                            'rdfs_subClassOf', []).copy():
                        if p_name in classes:
                            self.class_dict[name]['rdfs_subClassOf'].remove(\
                                    p_name)
                # pdb.set_trace()
            left = len(self.class_dict)
        # self.tie_properties(created)
        log.info(" created all classes in %s",
                 (datetime.datetime.now() - start))

    def set_class_dict(self):
        """ Reads through the dataset and assigns self.class_dict the key value
            pairs for the classes in the dataset
        """

        self.class_dict = {}
        for name, cls_defs in self.defs.items():
            def_type = set(cls_defs.get(self.rdf_type, []))
            if name.type == 'bnode':
                continue
            # a class can be determined by checking to see if it is of an
            # rdf_type listed in the classes_key or has a property that is
            # listed in the inferred_key
            if def_type.intersection(self.classes_key) or \
                    list([cls_defs.get(item) for item in self.inferred_key]):
                self.class_dict[name] = cls_defs

    def tie_properties(self, class_list):
        """ Runs through the classess and ties the properties to the class

        args:
            class_list: a list of class names to run
        """
        log.setLevel(self.log_level)
        start = datetime.datetime.now()
        log.info(" Tieing properties to the class")
        for cls_name in class_list:
            cls_obj = getattr(MODULE.rdfclass, cls_name)
            prop_dict = dict(cls_obj.properties)
            for prop_name, prop_obj in cls_obj.properties.items():
                setattr(cls_obj, prop_name, link_property(prop_obj, cls_obj))
        log.info(" Finished tieing properties in: %s",
                 (datetime.datetime.now() - start))
예제 #30
0
def json_qry(dataset, qry_str, params={}):
    """ Takes a json query string and returns the results

    args:
        dataset: RdfDataset to query against
        qry_str: query string
        params: dictionary of params
    """
    # if qry_str.startswith("$.bf_itemOf[rdf_type=bf_Print].='print',\n"):
    #     pdb.set_trace()
    if not '$' in qry_str:
        qry_str = ".".join(['$', qry_str.strip()])
    dallor_val = params.get("$", dataset)
    if isinstance(dallor_val, rdflib.URIRef):
        dallor_val = Uri(dallor_val)
    if qry_str.strip() == '$':
        return [dallor_val]
    parsed_qry = parse_json_qry(qry_str)
    qry_parts = parsed_qry['qry_parts']
    post_actions = parsed_qry['params']
    # print(qry_parts)
    rtn_list = UniqueList()
    if params.get('dataset'):
        dataset = params['dataset']
    for or_part in qry_parts:
        if or_part[1] == 0:
            if isinstance(dallor_val, dict):
                result = dallor_val
            else:
                try:
                    result = dataset[dallor_val]
                except KeyError:
                    try:
                        result = dataset[Uri(dallor_val)]
                    except KeyError:
                        try:
                            result = dataset[BlankNode(dallor_val)]
                        except KeyError:
                            continue

            forward = True
            for part in or_part[0][1:]:
                if part == "*":
                    forward = not forward
                else:
                    if forward:
                        result = get_json_qry_item(result, part)
                    else:
                        result = get_reverse_json_qry_item(result, part, False)
        else:
            result = dataset
            parts = or_part[0].copy()
            parts.reverse()
            forward = False
            for part in parts[1:]:
                if part == "*":
                    forward = not forward
                else:
                    if forward:
                        result = get_json_qry_item(result, part)
                    else:
                        result = get_reverse_json_qry_item(
                            result, part, False, dallor_val)
        rtn_list += result
    for action in post_actions:
        rtn_list = action(rtn_list)
    return rtn_list