Esempio n. 1
0
    def __init__(self, **kwargs):

        log = logging.getLogger("%s.%s" % (self.log_name,
                                           inspect.stack()[0][3]))
        log.setLevel(self.log_level)

        config = kwargs.get('config')
        if config:
            print("Should find value")
            CFG = RdfConfigManager(config=config)
        else:
            raise EnvironmentError("kwarg 'config' is required")
        self.cfg = CFG
        NSM = RdfNsManager(config=CFG)
        self.root_file_path = CFG.RDF_DEFINITION_FILE_PATH
        self._set_datafiles()
        self.rml = DictClass()
        # if the the definition files have been modified since the last json
        # files were saved reload the definition files
        reset = kwargs.get('reset',False)
        # verify that the server core is up and running
        servers_up = True
        if kwargs.get('server_check', True):
            servers_up = verify_server_core(600, 0)
        else:
            log.info("server check skipped")
        if not servers_up:
            log.info("Sever core not initialized --- Framework Not loaded")
        if servers_up:
            log.info("*** Loading Framework ***")
            self._load_data(reset)
            RdfPropertyFactory(CFG.def_tstore, reset=reset)
            RdfClassFactory(CFG.def_tstore, reset=reset)
            log.info("*** Framework Loaded ***")
Esempio n. 2
0
Base injester classes and functions for injesting raw data into an
rdfframework based application
"""
import pdb
import datetime
import click

try:
    from lxml import etree
except ImportError:
    log.warning("'lxml' package not available. Using ptyhon 'xml'")
    import xml.etree.ElementTree as etree

from rdfframework.datatypes import Uri, RdfNsManager

RdfNsManager({'bf': 'http://id.loc.gov/ontologies/bibframe/'})

_RES_TAG = Uri("rdf:resource").etree
_RDF_TYPE_TAG = Uri("rdf:type").etree


class Extractor(object):
    """
    Extracts all nodes specified nodes from an xml file

    Args:
    -----
        source: the filepath to the xml file
        output: the filepath to output the results
    """
    def __init__(self, source, output=None, **kwargs):
    'o': {
        'type': 'uri',
        'value': 'http://id.loc.gov/ontologies/bibframe/Instance'
    },
    'p': {
        'type': 'uri',
        'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'
    },
    's': {
        'type': 'uri',
        'value':
        'https://plains2peaks.org/d573941e-82c6-11e7-b159-005056c00008'
    }
}]

if __name__ == '__main__':
    time_test(DATA)
    time_test(DATA, multiprocessing=True)
    time_test(DATA)
    time_test(DATA, multiprocessing=True)
    from rdfframework.sparql import get_all_item_data
    from rdfframework.connections import Blazegraph
    from rdfframework.datatypes import RdfNsManager
    RdfNsManager({"bf": "http://id.loc.gov/ontologies/bibframe/"})
    data_iri = "<https://plains2peaks.org/d573941e-82c6-11e7-b159-005056c00008>"
    conn = Blazegraph(namespace="plain2peak")
    data = get_all_item_data(data_iri, conn)
    print("data count: ", len(data))
    time_test(data)
    time_test(data, multiprocessing=True)
Esempio n. 4
0
import os
import requests
import copy
import json
import pdb


from rdfframework.utilities import render_without_request, UniqueList
from rdfframework.datatypes import RdfNsManager, Uri

NSM = RdfNsManager()
DEBUG = True

def run_query_series(queries, conn):
    """
    Iterates through a list of queries and runs them through the connection

    Args:
    -----
        queries: list of strings or tuples containing (query_string, kwargs)
        conn: the triplestore connection to use
    """
    results = []
    for item in queries:
        qry = item
        kwargs = {}
        if isinstance(item, tuple):
            qry = item[0]
            kwargs = item[1]
        result = conn.update_query(qry, **kwargs)
        # pdb.set_trace()
Esempio n. 5
0
import jsonpath_ng
# import bibcat
# from bibcat.maps import get_map

from rdfframework.datatypes import RdfNsManager, BaseRdfDataType, Uri
from rdfframework.connections import setup_conn, Blazegraph, RdflibConn, \
        make_tstore_conn
from rdfframework.sparql import get_all_item_data
from rdfframework.datasets import RdfDataset
from rdfframework.utilities import pick, KeyRegistryMeta
from rdfframework.datatypes import Uri
from .rmlmanager import RmlManager

BIBCAT_BASE = os.path.abspath(os.path.split(os.path.dirname(__file__))[0])
NS_MGR = RdfNsManager()
RML_MGR = RmlManager()
PREFIX = None
# __version__ = bibcat.__version__

try:
    from lxml import etree
except ImportError:
    import xml.etree.ElementTree as etree


class Processor(object, metaclass=KeyRegistryMeta):
    __required_idx_attrs__ = {'rdf_name', '__name__'}
    """
    Base class for RDF Mapping Language Processors, child classes
    encapsulate different types of Data sources
Esempio n. 6
0
def make_doc_string(name, cls_def, bases=[], props={}):
    from rdfframework.datatypes import RdfNsManager
    NSM = RdfNsManager()
    footer_text = """*** autogenerated from knowledgelinks.io rdfframework
                        rdf definitions"""
    doc_items = [name]
    label_fields = LABEL_FIELDS
    description_fields = DESCRIPTION_FIELDS
    note_fields = NOTE_FIELDS
    prop_fields = PROP_FIELDS
    label = format_doc_vals(data=find_values(label_fields, cls_def),
                            descriptor="Label",
                            divider=" | ",
                            subdivider=", ")
    if len(label) > 0:
        doc_items.append(label)
    description = format_doc_vals(data=find_values(description_fields,
                                                   cls_def),
                                  descriptor="Description",
                                  divider="",
                                  subdivider="\n")
    if len(description) > 0:
        doc_items.append(description)
    parents = [("", base.__name__.pyuri) for base in bases[:-1]
               if base.__name__ not in ['RdfPropertyBase', 'RdfClassBase']]
    if len(parents) > 0:
        # pdb.set_trace()
        cls_hierarchy = format_doc_vals(data=parents,
                                        descriptor="Class Hierarchy",
                                        divider=" -> ",
                                        subdivider=" -> ")
        doc_items.append(cls_hierarchy)

    for base in bases[:-1]:
        try:
            if "Properties:" in base.__doc__:
                doc_items.append(\
                        base.__doc__[ \
                        base.__doc__.find(\
                        "Properties"):].replace(\
                        "Properties:", "Inherited from %s:" \
                                % base.__name__.pyuri).replace(footer_text,""))
        except TypeError:
            pass

    try:
        prop_notes = [(prop.pyuri, " ".join([item[1] for item in \
                      find_values(description_fields,
                                  prop_def,
                                  method='dict')])) \
                      for prop, prop_def in props.items() \
                      if len(find_values(description_fields,
                                         prop_def,
                                         method='dict')) > 0]

        prop_notes.sort()
        properties = format_doc_vals(data=prop_notes,
                                     descriptor="Properties",
                                     divider="",
                                     subdivider="\n",
                                     subindent=14,
                                     key_join=True)

        doc_items.append(properties)
    except AttributeError:
        pass

    footer = format_doc_vals(data=[("", footer_text)],
                             descriptor="",
                             seperator="\n",
                             divider="\n",
                             subdivider="")
    doc_items.append(footer)
    return "\n\n".join(doc_items)
Esempio n. 7
0
import requests
import urllib
import datetime
import pdb

from dateutil.parser import parse as date_parse

from rdfframework.connections import ConnManager
from rdfframework.datatypes import RdfNsManager
from rdfframework.configuration import RdfConfigManager
from rdfframework.utilities import make_list
from .datamanager import DataFileManager

__CONNS__ = ConnManager()
__CFG__ = RdfConfigManager()
__NSM__ = RdfNsManager()


class DefManagerMeta(type):
    """ Metaclass ensures that there is only one instance of the RdfConnManager
    """

    _instances = {}

    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super(DefManagerMeta,
                                        cls).__call__(*args, **kwargs)
        else:
            values = None
            if kwargs.get("conn"):