def _test_serializer(cat, num, inputpath, expectedpath, context, options): input_graph = _load_nquads(inputpath) expected_json = _load_json(expectedpath) result_json = from_rdf(input_graph, context, base=TC_BASE + inputpath, use_native_types=options.get('useNativeTypes', False), use_rdf_type=options.get('useRdfType', False)) _compare_json(expected_json, result_json)
def do_test_json(cat, num, inputpath, expectedpath, context, options): base = TC_BASE + inputpath input_obj = _load_json(inputpath) input_graph = ConjunctiveGraph() to_rdf( input_obj, input_graph, base=base, context_data=context, produce_generalized_rdf=True, ) expected_json = _load_json(expectedpath) use_native_types = True # CONTEXT in input_obj result_json = from_rdf( input_graph, context, base=TC_BASE + inputpath, use_native_types=options.get("useNativeTypes", use_native_types), use_rdf_type=options.get("useRdfType", False), ) def _prune_json(data): if CONTEXT in data: data.pop(CONTEXT) if GRAPH in data: data = data[GRAPH] # def _remove_empty_sets(obj): return data expected_json = _prune_json(expected_json) result_json = _prune_json(result_json) _compare_json(expected_json, result_json)
def htm_modified(file_path): g=rdflib.Graph() try: g.load(file_path) except IOError: return None ld = serializer.from_rdf(g, context_data=context, base=None, use_native_types=False, use_rdf_type=False, auto_compact=False, startnode=None, index=False) graph = ld['@graph'] nodes = {} for obj in graph: if isinstance(obj,dict): obj = obj.copy() if "@id" in obj and obj["@id"].startswith("_"): nodeid = obj["@id"] node = nodes.get(nodeid,{}) del obj["@id"] node.update(obj) nodes[nodeid] = node # now remove the blank nodes and the files newnodes = [] top = None for obj in unblank_node(graph,nodes): try: # if obj[u'@type']== u'pgterms:file': if unicodestr(obj[u'@id']).endswith('.htm'): return obj[u'dcterms:modified' ][u'@value'] except: pass
def htm_modified(file_path): g=rdflib.Graph() try: g.load(file_path) except IOError: return None ld = serializer.from_rdf(g, context_data=context, base=None, use_native_types=False, use_rdf_type=False, auto_compact=False, startnode=None, index=False) graph = ld['@graph'] nodes = {} for obj in graph: if isinstance(obj,dict): obj = obj.copy() if "@id" in obj and obj["@id"].startswith("_"): nodeid = obj["@id"] node = nodes.get(nodeid,{}) del obj["@id"] node.update(obj) nodes[nodeid] = node # now remove the blank nodes and the files newnodes = [] top = None for obj in unblank_node(graph,nodes): try: # if obj[u'@type']== u'pgterms:file': if unicode(obj[u'@id']).endswith('.htm'): return obj[u'dcterms:modified' ][u'@value'] except: pass
def to_jsonld(source, contextref, contextobj=None, index=None): contextpath = scriptpath("../sys/context/%s.jsonld" % contextref) contexturi = "/sys/context/%s.jsonld" % contextref context = [contextpath, contextobj] if contextobj else contextpath data = from_rdf(source, context_data=context) data['@context'] = [contexturi, contextobj] if contextobj else contexturi # customize to a convenient shape (within the bounds of JSON-LD) if index: graph_key, index_key = index nodes = data.pop(graph_key) graphmap = data[graph_key] = {} else: nodes = data['@graph'] index_key = None base = contextobj.get('@base') for node in nodes: nodeid = node['@id'] if base and nodeid.startswith(base): node['@id'] = nodeid[len(base)-1:] elif nodeid.startswith('_:'): del node['@id'] # TODO: lossy if referenced, should be embedded.. if index_key: key = None if index_key in ('#', '/'): leaf = node['@id'].rsplit(index_key, 1)[-1] key = leaf or node['@id'] elif index_key in node: key = node[index_key] if key: graphmap[key] = node return data
def pg_rdf_to_json(file_path): g = rdflib.Graph() g.load(file_path) #print(g.serialize(format='json-ld', indent=4, context=context)) ld = serializer.from_rdf(g, context_data=context, base=None, use_native_types=False, use_rdf_type=False, auto_compact=False, startnode=None, index=False) graph = ld['@graph'] #print(json.dumps(graph,indent=2, separators=(',', ': '), sort_keys=True)) nodes = {} for obj in graph: if isinstance(obj, dict): obj = obj.copy() if "@id" in obj and obj["@id"].startswith("_"): nodeid = obj["@id"] node = nodes.get(nodeid, {}) del obj["@id"] node.update(obj) nodes[nodeid] = node # now remove the blank nodes and the files newnodes = [] top = None for obj in unblank_node(graph, nodes): try: # if obj['@type'] == 'pgterms:file': continue elif obj['@type'] == 'pgterms:ebook': top = obj elif obj.has_key('@id') and (unicode(obj['@id']) == 'http://www.gutenberg.org/'): continue else: newnodes.append(obj) except KeyError: continue #print(json.dumps(top,indent=2, separators=(',', ': '), sort_keys=True)) entities = {} for node in newnodes: node_id = node.get('@id', None) if node_id: entities[node_id] = mapdata(node, pandata_map, entities) for adder in pandata_adders: adder(top, entities) top2 = mapdata(top, pandata_map, entities) for postprocessor in postprocessors: postprocessor(top2) return top2
def pg_rdf_to_json(file_path): g=rdflib.Graph() g.load(file_path) #print(g.serialize(format='json-ld', indent=4, context=context)) ld = serializer.from_rdf(g, context_data=context, base=None, use_native_types=False, use_rdf_type=False, auto_compact=False, startnode=None, index=False) graph = ld['@graph'] #print(json.dumps(graph,indent=2, separators=(',', ': '), sort_keys=True)) nodes = {} for obj in graph: if isinstance(obj,dict): obj = obj.copy() if "@id" in obj and obj["@id"].startswith("_"): nodeid = obj["@id"] node = nodes.get(nodeid,{}) del obj["@id"] node.update(obj) nodes[nodeid] = node # now remove the blank nodes and the files newnodes = [] top = None for obj in unblank_node(graph,nodes): try: # if obj['@type']== 'pgterms:file': continue elif obj['@type']== 'pgterms:ebook': top = obj elif obj.has_key('@id') and (unicode(obj['@id'])=='http://www.gutenberg.org/'): continue else: newnodes.append(obj) except KeyError: continue #print(json.dumps(top,indent=2, separators=(',', ': '), sort_keys=True)) entities={} for node in newnodes: node_id=node.get('@id',None) if node_id: entities[node_id]=mapdata(node,pandata_map,entities) for adder in pandata_adders: adder(top,entities) top2 = mapdata(top, pandata_map, entities) for postprocessor in postprocessors: postprocessor(top2) return top2
def serialize_as_jsonld(graph, stream, base=None, encoding=None, **kwargs): """Serialize RDF graph as JSONLD. Code copied from: https://github.com/RDFLib/rdflib-jsonld/blob/master/rdflib_jsonld/serializer.py with addition of json_hook functionality. """ # TODO: docstring w. args and return value encoding = encoding or 'utf-8' if encoding not in ('utf-8', 'utf-16'): warnings.warn("JSON should be encoded as unicode. " + "Given encoding was: %s" % encoding) context_data = kwargs.get('context') use_native_types = kwargs.get('use_native_types', False), use_rdf_type = kwargs.get('use_rdf_type', False) auto_compact = kwargs.get('auto_compact', False) indent = kwargs.get('indent', 2) separators = kwargs.get('separators', (',', ': ')) sort_keys = kwargs.get('sort_keys', True) ensure_ascii = kwargs.get('ensure_ascii', False) obj = from_rdf(graph.store, context_data, base, use_native_types, use_rdf_type, auto_compact=auto_compact) # Check hook for JSON postprocessing json_hook = kwargs.get('json_hook', None) if (json_hook is not None): obj = json_hook(obj) data = json.dumps(obj, indent=indent, separators=separators, sort_keys=sort_keys, ensure_ascii=ensure_ascii) return data
def _test_json(cat, num, inputpath, expectedpath, context, options): base = TC_BASE + inputpath input_obj = _load_json(inputpath) input_graph = ConjunctiveGraph() to_rdf(input_obj, input_graph, base=base, context_data=context) expected_json = _load_json(expectedpath) result_json = from_rdf(input_graph, context, base=TC_BASE + inputpath, use_native_types=options.get('useNativeTypes', False), use_rdf_type=options.get('useRdfType', False)) def _prune_json(data): if CONTEXT in data: data.pop(CONTEXT) if GRAPH in data: data = data[GRAPH] return data expected_json = _prune_json(expected_json) result_json = _prune_json(result_json) _compare_json(expected_json, result_json)
def _test_json(cat, num, inputpath, expectedpath, context, options): base = TC_BASE + inputpath input_obj = _load_json(inputpath) input_graph = ConjunctiveGraph() to_rdf(input_obj, input_graph, base=base, context_data=context, produce_generalized_rdf=True) expected_json = _load_json(expectedpath) use_native_types = True # CONTEXT in input_obj result_json = from_rdf(input_graph, context, base=TC_BASE + inputpath, use_native_types=options.get('useNativeTypes', use_native_types), use_rdf_type=options.get('useRdfType', False)) def _prune_json(data): if CONTEXT in data: data.pop(CONTEXT) if GRAPH in data: data = data[GRAPH] #def _remove_empty_sets(obj): return data expected_json = _prune_json(expected_json) result_json = _prune_json(result_json) _compare_json(expected_json, result_json)
def graphAsJson(g): # This is not the same as g.serialize(format='json-ld')! That # version omits literal datatypes. return json.dumps(from_rdf(g))
def _to_jsonld(source, context_uri, contextobj): data = from_rdf(source, context_data=contextobj) data['@context'] = context_uri _embed_singly_referenced_bnodes(data) _expand_ids(data['@graph'], contextobj['@context']) return data
def main(): parser = argparse.ArgumentParser( description='Convert MARC21 Classification to SKOS/RDF') parser.add_argument('infile', nargs='?', help='Input XML file') parser.add_argument('outfile', nargs='?', help='Output RDF file') parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='More verbose output') parser.add_argument( '-o', '--outformat', dest='outformat', metavar='FORMAT', nargs='?', help='Output format: turtle (default), jskos, or ndjson') parser.add_argument( '--include', action='append', dest='include', default=[], help= 'RDF file(s) to include in the output (e.g. to define a concept scheme). ' 'Must be the same format as {outformat}.') parser.add_argument( '--uri', dest='base_uri', help='Concept URI template. See vocabularies.yml for examples.') parser.add_argument( '--scheme', dest='scheme', help= 'Concept scheme, either an URI or a key from vocabularies.yml (For a list: mc2skos --list).' ) parser.add_argument('--whitespace', dest='whitespace', metavar='STRING', help='Replace whitespaces in URI templates with this.') parser.add_argument('--altlabels', '--indexterms', dest='altlabels', action='store_true', help='Include altlabels (from 7XX or 4XX).') parser.add_argument( '--notes', dest='notes', action='store_true', help= 'Include note fields (DEPRECATED as including notes is now the default).' ) parser.add_argument('--exclude_notes', dest='exclude_notes', action='store_true', help='Exclude note fields.') parser.add_argument('--components', dest='components', action='store_true', help='Include component information from 765.') parser.add_argument('--webdewey', dest='webdewey', action='store_true', help='Include non-standard WebDewey notes from 680.') parser.add_argument('--skip-classification', dest='skip_classification', action='store_true', help='Skip classification records') parser.add_argument('--skip-authority', dest='skip_authority', action='store_true', help='Skip authority records') parser.add_argument( '--expand', dest='expand', action='store_true', help= 'Use Skosify to infer skos:hasTopConcept, skos:narrower and skos:related' ) parser.add_argument('--skosify', dest='skosify', help='Run Skosify with given configuration file') parser.add_argument('-l', '--list-schemes', dest='list_schemes', action='store_true', help='List default concept schemes.') parser.add_argument( '--nll-lang', dest='nll_lang', action='store_true', help='Set langugage tags specific to the NLL authority file.') args = parser.parse_args() if args.notes: warnings.warn( '--notes is deprecated as including notes is now the default. ' 'The inverse option --exclude_notes has been added to exclude notes.', DeprecationWarning) with pkg_resources.resource_stream(__name__, 'vocabularies.yml') as fp: vocabularies = Vocabularies() vocabularies.load_yaml(fp) vocabularies.set_default_scheme(generic=args.base_uri, scheme=args.scheme, whitespace=args.whitespace) if args.list_schemes: print('Schemes:') for voc in vocabularies: print('- %s' % voc) return supported_formats = ['turtle', 'jskos', 'ndjson'] if not args.outformat and args.outfile: ext = args.outfile.rpartition('.')[-1] if ext in supported_formats: args.outformat = ext if not args.outformat: args.outformat = 'turtle' elif args.outformat not in supported_formats: raise ValueError("Format not supported, must be one of '%s'." % "', '".join(supported_formats)) graph = Graph() for filename in args.include: if args.outformat == 'turtle': graph.load(filename, format='turtle') else: graph.load(filename, format='json-ld') nm = graph.namespace_manager nm.bind('dcterms', DCTERMS) nm.bind('skos', SKOS) nm.bind('wd', WD) nm.bind('mads', MADS) nm.bind('owl', OWL) if args.verbose: console_handler.setLevel(logging.DEBUG) else: console_handler.setLevel(logging.INFO) if args.infile is None: raise ValueError('Filename not specified') options = { 'include_altlabels': args.altlabels, 'exclude_notes': args.exclude_notes, 'include_components': args.components, 'include_webdewey': args.webdewey, 'skip_classification': args.skip_classification, 'skip_authority': args.skip_authority, 'expand': args.expand, 'skosify': args.skosify, 'vocabularies': vocabularies, 'nll_lang': args.nll_lang } marc = MarcFileReader(args.infile) graph = process_records(marc.records(), graph, **options) if not graph: logger.warning('RDF result is empty!') return if args.outfile and args.outfile != '-': out_file = open(args.outfile, 'wb') else: if (sys.version_info > (3, 0)): out_file = sys.stdout.buffer else: out_file = sys.stdout if args.outformat == 'turtle': # @TODO: Perhaps use OrderedTurtleSerializer if available, but fallback to default Turtle serializer if not? serializer = OrderedTurtleSerializer(graph) serializer.class_order = [ SKOS.ConceptScheme, SKOS.Concept, ] serializer.sorters = [ (r'/([0-9A-Z\-]+)--([0-9.\-;:]+)/e', lambda x: 'C{}--{}'.format(x[0], x[1])), # table numbers (r'/([0-9.\-;:]+)/e', lambda x: 'B' + x[0]), # standard schedule numbers (r'^(.+)$', lambda x: 'A' + x[0]), # fallback ] serializer.serialize(out_file) elif args.outformat in ['jskos', 'ndjson']: s = pkg_resources.resource_string(__name__, 'jskos-context.json').decode('utf-8') context = json.loads(s) jskos = json_ld.from_rdf(graph, context) if args.outformat == 'jskos': jskos['@context'] = u'https://gbv.github.io/jskos/context.json' out_file.write( json.dumps(jskos, sort_keys=True, indent=2).encode('utf-8')) else: for record in jskos['@graph'] if '@graph' in jskos else [jskos]: record[ '@context'] = u'https://gbv.github.io/jskos/context.json' out_file.write( json.dumps(record, sort_keys=True).encode('utf-8') + b'\n') if out_file != sys.stdout: logger.info('Wrote %s: %s' % (args.outformat, args.outfile))
def jsonFromPatch(p): return json.dumps({'patch': { 'adds': from_rdf(_graphFromQuads2(p.addQuads)), 'deletes': from_rdf(_graphFromQuads2(p.delQuads)), }})
def main(): parser = argparse.ArgumentParser(description='Convert MARC21 Classification to SKOS/RDF') parser.add_argument('infile', nargs='?', help='Input XML file') parser.add_argument('outfile', nargs='?', help='Output RDF file') parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='More verbose output') parser.add_argument('-o', '--outformat', dest='outformat', metavar='FORMAT', nargs='?', help='Output format: turtle (default), jskos, or ndjson') parser.add_argument('--include', dest='include', help='RDF file to loaded into the graph' + '(e.g. to define a concept scheme). Must be the same format as {outformat}.') parser.add_argument('--uri', dest='base_uri', help='URI template') parser.add_argument('--scheme', dest='scheme_uri', help='SKOS scheme for all records, use {edition} to specify edition.') # parser.add_argument('--table_scheme', dest='table_scheme_uri', help='SKOS scheme for table records, use {edition} to specify edition.') parser.add_argument('--whitespace', dest='whitespace', metavar='STRING', help='Replace whitespaces in URI templates with this.') parser.add_argument('--altlabels', '--indexterms', dest='altlabels', action='store_true', help='Include altlabels (from 7XX or 4XX).') parser.add_argument('--notes', dest='notes', action='store_true', help='Include note fields (DEPRECATED as including notes is now the default).') parser.add_argument('--exclude_notes', dest='exclude_notes', action='store_true', help='Exclude note fields.') parser.add_argument('--components', dest='components', action='store_true', help='Include component information from 765.') parser.add_argument('--webdewey', dest='webdewey', action='store_true', help='Include non-standard WebDewey notes from 680.') parser.add_argument('--skip-classification', dest='skip_classification', action='store_true', help='Skip classification records') parser.add_argument('--skip-authority', dest='skip_authority', action='store_true', help='Skip authority records') parser.add_argument('--expand', dest='expand', action='store_true', help='Use Skosify to infer skos:hasTopConcept, skos:narrower and skos:related') parser.add_argument('--skosify', dest='skosify', help='Run Skosify with given configuration file') parser.add_argument('-l', '--list-schemes', dest='list_schemes', action='store_true', help='List default concept schemes.') args = parser.parse_args() if args.notes: warnings.warn('--notes is deprecated as including notes is now the default. ' 'The inverse option --exclude_notes has been added to exclude notes.', DeprecationWarning) if args.list_schemes: print('Classification schemes:') for k in CONFIG['classification_schemes'].keys(): scheme = ConceptScheme(k, ClassificationRecord) print('- %s' % scheme) print('Authority vocabularies:') for k in CONFIG['subject_schemes'].keys(): scheme = ConceptScheme(k, AuthorityRecord) print('- %s' % scheme) return supported_formats = ['turtle', 'jskos', 'ndjson'] if not args.outformat and args.outfile: ext = args.outfile.rpartition('.')[-1] if ext in supported_formats: args.outformat = ext if not args.outformat: args.outformat = 'turtle' elif args.outformat not in supported_formats: raise ValueError("Format not supported, must be one of '%s'." % "', '".join(supported_formats)) graph = Graph() if args.include: if args.outformat == 'turtle': graph.load(args.include, format='turtle') else: graph.load(args.include, format='json-ld') nm = graph.namespace_manager nm.bind('dcterms', DCTERMS) nm.bind('skos', SKOS) nm.bind('wd', WD) nm.bind('mads', MADS) nm.bind('owl', OWL) if args.verbose: console_handler.setLevel(logging.DEBUG) else: console_handler.setLevel(logging.INFO) if args.infile is None: raise ValueError('Filename not specified') options = { 'base_uri': args.base_uri, 'scheme_uri': args.scheme_uri, 'whitespace': args.whitespace, 'include_altlabels': args.altlabels, 'exclude_notes': args.exclude_notes, 'include_components': args.components, 'include_webdewey': args.webdewey, 'skip_classification': args.skip_classification, 'skip_authority': args.skip_authority, 'expand': args.expand, 'skosify': args.skosify, } marc = MarcFileReader(args.infile) graph = process_records(marc.records(), graph, **options) if not graph: logger.warning('RDF result is empty!') return if args.outfile and args.outfile != '-': out_file = open(args.outfile, 'wb') else: if (sys.version_info > (3, 0)): out_file = sys.stdout.buffer else: out_file = sys.stdout if args.outformat == 'turtle': # @TODO: Perhaps use OrderedTurtleSerializer if available, but fallback to default Turtle serializer if not? serializer = OrderedTurtleSerializer(graph) serializer.class_order = [ SKOS.ConceptScheme, SKOS.Concept, ] serializer.sorters = [ (r'/([0-9A-Z\-]+)--([0-9.\-;:]+)/e', lambda x: 'C{}--{}'.format(x[0], x[1])), # table numbers (r'/([0-9.\-;:]+)/e', lambda x: 'B' + x[0]), # standard schedule numbers (r'^(.+)$', lambda x: 'A' + x[0]), # fallback ] serializer.serialize(out_file) elif args.outformat in ['jskos', 'ndjson']: s = pkg_resources.resource_string(__name__, 'jskos-context.json').decode('utf-8') context = json.loads(s) jskos = json_ld.from_rdf(graph, context) if args.outformat == 'jskos': jskos['@context'] = u'https://gbv.github.io/jskos/context.json' out_file.write(json.dumps(jskos, sort_keys=True, indent=2).encode('utf-8')) else: for record in jskos['@graph'] if '@graph' in jskos else [jskos]: record['@context'] = u'https://gbv.github.io/jskos/context.json' out_file.write(json.dumps(record, sort_keys=True).encode('utf-8') + b'\n') if out_file != sys.stdout: logger.info('Wrote %s: %s' % (args.outformat, args.outfile))
args = sys.argv[1:] def json_dump(o): print json.dumps(o, indent=2, separators=(',', ': '), sort_keys=True, ensure_ascii=False).encode('utf-8') if len(args) == 1: vocab_fpath = args[0] graph = Graph().parse(vocab_fpath, format='turtle') term_map = make_term_map(graph) json_dump(term_map) else: map_fpath, fpath = args with open(map_fpath) as fp: mapping = json.load(fp)['mapping'] if fpath.endswith('.ttl'): graph = Graph().parse(fpath, format='turtle') #remapped = remap(mapping, graph.serialize(format='json-ld-object')) from rdflib_jsonld.serializer import from_rdf data = from_rdf(graph, auto_compact=True) else: if fpath == '-': data = json.load(sys.stdin) else: with open(fpath) as fp: data = json.load(fp) remap(mapping, data) del data['@context'] data = autoframe(data) json_dump(data)