def __init__(self, **kwargs): log = logging.getLogger("%s.%s" % (self.log_name, inspect.stack()[0][3])) log.setLevel(self.log_level) config = kwargs.get('config') if config: print("Should find value") CFG = RdfConfigManager(config=config) else: raise EnvironmentError("kwarg 'config' is required") self.cfg = CFG NSM = RdfNsManager(config=CFG) self.root_file_path = CFG.RDF_DEFINITION_FILE_PATH self._set_datafiles() self.rml = DictClass() # if the the definition files have been modified since the last json # files were saved reload the definition files reset = kwargs.get('reset',False) # verify that the server core is up and running servers_up = True if kwargs.get('server_check', True): servers_up = verify_server_core(600, 0) else: log.info("server check skipped") if not servers_up: log.info("Sever core not initialized --- Framework Not loaded") if servers_up: log.info("*** Loading Framework ***") self._load_data(reset) RdfPropertyFactory(CFG.def_tstore, reset=reset) RdfClassFactory(CFG.def_tstore, reset=reset) log.info("*** Framework Loaded ***")
Base injester classes and functions for injesting raw data into an rdfframework based application """ import pdb import datetime import click try: from lxml import etree except ImportError: log.warning("'lxml' package not available. Using ptyhon 'xml'") import xml.etree.ElementTree as etree from rdfframework.datatypes import Uri, RdfNsManager RdfNsManager({'bf': 'http://id.loc.gov/ontologies/bibframe/'}) _RES_TAG = Uri("rdf:resource").etree _RDF_TYPE_TAG = Uri("rdf:type").etree class Extractor(object): """ Extracts all nodes specified nodes from an xml file Args: ----- source: the filepath to the xml file output: the filepath to output the results """ def __init__(self, source, output=None, **kwargs):
'o': { 'type': 'uri', 'value': 'http://id.loc.gov/ontologies/bibframe/Instance' }, 'p': { 'type': 'uri', 'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' }, 's': { 'type': 'uri', 'value': 'https://plains2peaks.org/d573941e-82c6-11e7-b159-005056c00008' } }] if __name__ == '__main__': time_test(DATA) time_test(DATA, multiprocessing=True) time_test(DATA) time_test(DATA, multiprocessing=True) from rdfframework.sparql import get_all_item_data from rdfframework.connections import Blazegraph from rdfframework.datatypes import RdfNsManager RdfNsManager({"bf": "http://id.loc.gov/ontologies/bibframe/"}) data_iri = "<https://plains2peaks.org/d573941e-82c6-11e7-b159-005056c00008>" conn = Blazegraph(namespace="plain2peak") data = get_all_item_data(data_iri, conn) print("data count: ", len(data)) time_test(data) time_test(data, multiprocessing=True)
import os import requests import copy import json import pdb from rdfframework.utilities import render_without_request, UniqueList from rdfframework.datatypes import RdfNsManager, Uri NSM = RdfNsManager() DEBUG = True def run_query_series(queries, conn): """ Iterates through a list of queries and runs them through the connection Args: ----- queries: list of strings or tuples containing (query_string, kwargs) conn: the triplestore connection to use """ results = [] for item in queries: qry = item kwargs = {} if isinstance(item, tuple): qry = item[0] kwargs = item[1] result = conn.update_query(qry, **kwargs) # pdb.set_trace()
import jsonpath_ng # import bibcat # from bibcat.maps import get_map from rdfframework.datatypes import RdfNsManager, BaseRdfDataType, Uri from rdfframework.connections import setup_conn, Blazegraph, RdflibConn, \ make_tstore_conn from rdfframework.sparql import get_all_item_data from rdfframework.datasets import RdfDataset from rdfframework.utilities import pick, KeyRegistryMeta from rdfframework.datatypes import Uri from .rmlmanager import RmlManager BIBCAT_BASE = os.path.abspath(os.path.split(os.path.dirname(__file__))[0]) NS_MGR = RdfNsManager() RML_MGR = RmlManager() PREFIX = None # __version__ = bibcat.__version__ try: from lxml import etree except ImportError: import xml.etree.ElementTree as etree class Processor(object, metaclass=KeyRegistryMeta): __required_idx_attrs__ = {'rdf_name', '__name__'} """ Base class for RDF Mapping Language Processors, child classes encapsulate different types of Data sources
def make_doc_string(name, cls_def, bases=[], props={}): from rdfframework.datatypes import RdfNsManager NSM = RdfNsManager() footer_text = """*** autogenerated from knowledgelinks.io rdfframework rdf definitions""" doc_items = [name] label_fields = LABEL_FIELDS description_fields = DESCRIPTION_FIELDS note_fields = NOTE_FIELDS prop_fields = PROP_FIELDS label = format_doc_vals(data=find_values(label_fields, cls_def), descriptor="Label", divider=" | ", subdivider=", ") if len(label) > 0: doc_items.append(label) description = format_doc_vals(data=find_values(description_fields, cls_def), descriptor="Description", divider="", subdivider="\n") if len(description) > 0: doc_items.append(description) parents = [("", base.__name__.pyuri) for base in bases[:-1] if base.__name__ not in ['RdfPropertyBase', 'RdfClassBase']] if len(parents) > 0: # pdb.set_trace() cls_hierarchy = format_doc_vals(data=parents, descriptor="Class Hierarchy", divider=" -> ", subdivider=" -> ") doc_items.append(cls_hierarchy) for base in bases[:-1]: try: if "Properties:" in base.__doc__: doc_items.append(\ base.__doc__[ \ base.__doc__.find(\ "Properties"):].replace(\ "Properties:", "Inherited from %s:" \ % base.__name__.pyuri).replace(footer_text,"")) except TypeError: pass try: prop_notes = [(prop.pyuri, " ".join([item[1] for item in \ find_values(description_fields, prop_def, method='dict')])) \ for prop, prop_def in props.items() \ if len(find_values(description_fields, prop_def, method='dict')) > 0] prop_notes.sort() properties = format_doc_vals(data=prop_notes, descriptor="Properties", divider="", subdivider="\n", subindent=14, key_join=True) doc_items.append(properties) except AttributeError: pass footer = format_doc_vals(data=[("", footer_text)], descriptor="", seperator="\n", divider="\n", subdivider="") doc_items.append(footer) return "\n\n".join(doc_items)
import requests import urllib import datetime import pdb from dateutil.parser import parse as date_parse from rdfframework.connections import ConnManager from rdfframework.datatypes import RdfNsManager from rdfframework.configuration import RdfConfigManager from rdfframework.utilities import make_list from .datamanager import DataFileManager __CONNS__ = ConnManager() __CFG__ = RdfConfigManager() __NSM__ = RdfNsManager() class DefManagerMeta(type): """ Metaclass ensures that there is only one instance of the RdfConnManager """ _instances = {} def __call__(cls, *args, **kwargs): if cls not in cls._instances: cls._instances[cls] = super(DefManagerMeta, cls).__call__(*args, **kwargs) else: values = None if kwargs.get("conn"):