def test_bnodes_sort(self): graph = Graph() graph.load("tests/data/bnodes.ttl", format="turtle") ots = OrderedTurtleSerializer(graph) out = BytesIO() ots.serialize(out)
def run(self): if "glosis_cl" in self.input_csv: main_class = OWL.Class else: main_class = Namespace('http://www.w3.org/ns/sosa/').Procedure serializer = OrderedTurtleSerializer(self.graph) serializer.class_order = [ OWL.Ontology, SKOS.ConceptScheme, main_class, SKOS.Concept ] with open(self.output, 'wb') as fp: serializer.serialize(fp)
def test_numeric(self): graph = Graph() graph.load("tests/data/numeric_unsorted.ttl", format="turtle") ots = OrderedTurtleSerializer(graph) out = BytesIO() ots.serialize(out) out = "\n".join([x for x in out.getvalue().decode("utf-8").split("\n") if x.startswith("<")]) ref = open("tests/data/numeric_sorted.ttl").read() ref = "\n".join([x for x in ref.split("\n") if x.startswith("<")]) assert ref.strip() == out.strip()
def test_custom_sorter(self): graph = Graph() graph.load("tests/data/group_unsorted.ttl", format="turtle") ots = OrderedTurtleSerializer(graph) ots.sorters = [("(.*?)([0-9]+)$", lambda x: self.xhash(x[0]) + int(x[1]))] out = BytesIO() ots.serialize(out) out = "\n".join([x for x in out.getvalue().decode("utf-8").split("\n") if x.startswith("<")]) ref = open("tests/data/group_sorted.ttl").read() ref = "\n".join([x for x in ref.split("\n") if x.startswith("<")]) assert ref.strip() == out.strip()
def test_dewey_sorter(self): graph = Graph() graph.load("tests/data/dewey_unsorted.ttl", format="turtle") ots = OrderedTurtleSerializer(graph) ots.sorters = [ ("/([0-9A-Z\-]+)\-\-([0-9.\-;:]+)/e", lambda x: "T{0}--{0}".format(x[0], x[1])), # table numbers ("/([0-9.\-;:]+)/e", lambda x: "A" + x[0]), # standard schedule numbers ] out = BytesIO() ots.serialize(out) out = "\n".join([x for x in out.getvalue().decode("utf-8").split("\n") if x.startswith("<")]) ref = open("tests/data/dewey_sorted.ttl").read() ref = "\n".join([x for x in ref.split("\n") if x.startswith("<")]) assert ref.strip() == out.strip()
def build(task): logger.info('Building mappings') g = load_mappings_from_file(task.file_dep, uri_filter) if target.endswith('.nt'): stream = BytesIO() g.serialize(stream, format='nt') with open(target, 'wb') as fp: stream.seek(0) fp.writelines(sorted(stream.readlines())) elif target.endswith('.ttl'): for pf in prefixes: g.namespace_manager.bind(pf[0], URIRef(pf[1])) serializer = OrderedTurtleSerializer(g) with open(task.targets[0], 'wb') as fp: serializer.serialize(fp) else: raise Error('Unknown file ext') logger.info('Wrote %s' % task.targets[0])
def main(): parser = argparse.ArgumentParser(description='Convert MARC21 Classification to SKOS/RDF') parser.add_argument('infile', nargs='?', help='Input XML file') parser.add_argument('outfile', nargs='?', help='Output RDF file') parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='More verbose output') parser.add_argument('-o', '--outformat', dest='outformat', metavar='FORMAT', nargs='?', help='Output format: turtle (default), jskos, or ndjson') parser.add_argument('--include', dest='include', help='RDF file to loaded into the graph' + '(e.g. to define a concept scheme). Must be the same format as {outformat}.') parser.add_argument('--uri', dest='base_uri', help='URI template') parser.add_argument('--scheme', dest='scheme_uri', help='SKOS scheme for all records, use {edition} to specify edition.') # parser.add_argument('--table_scheme', dest='table_scheme_uri', help='SKOS scheme for table records, use {edition} to specify edition.') parser.add_argument('--whitespace', dest='whitespace', metavar='STRING', help='Replace whitespaces in URI templates with this.') parser.add_argument('--altlabels', '--indexterms', dest='altlabels', action='store_true', help='Include altlabels (from 7XX or 4XX).') parser.add_argument('--notes', dest='notes', action='store_true', help='Include note fields (DEPRECATED as including notes is now the default).') parser.add_argument('--exclude_notes', dest='exclude_notes', action='store_true', help='Exclude note fields.') parser.add_argument('--components', dest='components', action='store_true', help='Include component information from 765.') parser.add_argument('--webdewey', dest='webdewey', action='store_true', help='Include non-standard WebDewey notes from 680.') parser.add_argument('--skip-classification', dest='skip_classification', action='store_true', help='Skip classification records') parser.add_argument('--skip-authority', dest='skip_authority', action='store_true', help='Skip authority records') parser.add_argument('--expand', dest='expand', action='store_true', help='Use Skosify to infer skos:hasTopConcept, skos:narrower and skos:related') parser.add_argument('--skosify', dest='skosify', help='Run Skosify with given configuration file') parser.add_argument('-l', '--list-schemes', dest='list_schemes', action='store_true', help='List default concept schemes.') args = parser.parse_args() if args.notes: warnings.warn('--notes is deprecated as including notes is now the default. ' 'The inverse option --exclude_notes has been added to exclude notes.', DeprecationWarning) if args.list_schemes: print('Classification schemes:') for k in CONFIG['classification_schemes'].keys(): scheme = ConceptScheme(k, ClassificationRecord) print('- %s' % scheme) print('Authority vocabularies:') for k in CONFIG['subject_schemes'].keys(): scheme = ConceptScheme(k, AuthorityRecord) print('- %s' % scheme) return supported_formats = ['turtle', 'jskos', 'ndjson'] if not args.outformat and args.outfile: ext = args.outfile.rpartition('.')[-1] if ext in supported_formats: args.outformat = ext if not args.outformat: args.outformat = 'turtle' elif args.outformat not in supported_formats: raise ValueError("Format not supported, must be one of '%s'." % "', '".join(supported_formats)) graph = Graph() if args.include: if args.outformat == 'turtle': graph.load(args.include, format='turtle') else: graph.load(args.include, format='json-ld') nm = graph.namespace_manager nm.bind('dcterms', DCTERMS) nm.bind('skos', SKOS) nm.bind('wd', WD) nm.bind('mads', MADS) nm.bind('owl', OWL) if args.verbose: console_handler.setLevel(logging.DEBUG) else: console_handler.setLevel(logging.INFO) if args.infile is None: raise ValueError('Filename not specified') options = { 'base_uri': args.base_uri, 'scheme_uri': args.scheme_uri, 'whitespace': args.whitespace, 'include_altlabels': args.altlabels, 'exclude_notes': args.exclude_notes, 'include_components': args.components, 'include_webdewey': args.webdewey, 'skip_classification': args.skip_classification, 'skip_authority': args.skip_authority, 'expand': args.expand, 'skosify': args.skosify, } marc = MarcFileReader(args.infile) graph = process_records(marc.records(), graph, **options) if not graph: logger.warning('RDF result is empty!') return if args.outfile and args.outfile != '-': out_file = open(args.outfile, 'wb') else: if (sys.version_info > (3, 0)): out_file = sys.stdout.buffer else: out_file = sys.stdout if args.outformat == 'turtle': # @TODO: Perhaps use OrderedTurtleSerializer if available, but fallback to default Turtle serializer if not? serializer = OrderedTurtleSerializer(graph) serializer.class_order = [ SKOS.ConceptScheme, SKOS.Concept, ] serializer.sorters = [ (r'/([0-9A-Z\-]+)--([0-9.\-;:]+)/e', lambda x: 'C{}--{}'.format(x[0], x[1])), # table numbers (r'/([0-9.\-;:]+)/e', lambda x: 'B' + x[0]), # standard schedule numbers (r'^(.+)$', lambda x: 'A' + x[0]), # fallback ] serializer.serialize(out_file) elif args.outformat in ['jskos', 'ndjson']: s = pkg_resources.resource_string(__name__, 'jskos-context.json').decode('utf-8') context = json.loads(s) jskos = json_ld.from_rdf(graph, context) if args.outformat == 'jskos': jskos['@context'] = u'https://gbv.github.io/jskos/context.json' out_file.write(json.dumps(jskos, sort_keys=True, indent=2).encode('utf-8')) else: for record in jskos['@graph'] if '@graph' in jskos else [jskos]: record['@context'] = u'https://gbv.github.io/jskos/context.json' out_file.write(json.dumps(record, sort_keys=True).encode('utf-8') + b'\n') if out_file != sys.stdout: logger.info('Wrote %s: %s' % (args.outformat, args.outfile))
def serialize(self): serializer = OrderedTurtleSerializer(self.g) with open(self.filepath, 'wb') as fp: serializer.serialize(fp)
def main(): parser = argparse.ArgumentParser( description='Convert MARC21 Classification to SKOS/RDF') parser.add_argument('infile', nargs='?', help='Input XML file') parser.add_argument('outfile', nargs='?', help='Output RDF file') parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='More verbose output') parser.add_argument( '-o', '--outformat', dest='outformat', metavar='FORMAT', nargs='?', help='Output format: turtle (default), jskos, or ndjson') parser.add_argument( '--include', action='append', dest='include', default=[], help= 'RDF file(s) to include in the output (e.g. to define a concept scheme). ' 'Must be the same format as {outformat}.') parser.add_argument( '--uri', dest='base_uri', help='Concept URI template. See vocabularies.yml for examples.') parser.add_argument( '--scheme', dest='scheme', help= 'Concept scheme, either an URI or a key from vocabularies.yml (For a list: mc2skos --list).' ) parser.add_argument('--whitespace', dest='whitespace', metavar='STRING', help='Replace whitespaces in URI templates with this.') parser.add_argument('--altlabels', '--indexterms', dest='altlabels', action='store_true', help='Include altlabels (from 7XX or 4XX).') parser.add_argument( '--notes', dest='notes', action='store_true', help= 'Include note fields (DEPRECATED as including notes is now the default).' ) parser.add_argument('--exclude_notes', dest='exclude_notes', action='store_true', help='Exclude note fields.') parser.add_argument('--components', dest='components', action='store_true', help='Include component information from 765.') parser.add_argument('--webdewey', dest='webdewey', action='store_true', help='Include non-standard WebDewey notes from 680.') parser.add_argument('--skip-classification', dest='skip_classification', action='store_true', help='Skip classification records') parser.add_argument('--skip-authority', dest='skip_authority', action='store_true', help='Skip authority records') parser.add_argument( '--expand', dest='expand', action='store_true', help= 'Use Skosify to infer skos:hasTopConcept, skos:narrower and skos:related' ) parser.add_argument('--skosify', dest='skosify', help='Run Skosify with given configuration file') parser.add_argument('-l', '--list-schemes', dest='list_schemes', action='store_true', help='List default concept schemes.') parser.add_argument( '--nll-lang', dest='nll_lang', action='store_true', help='Set langugage tags specific to the NLL authority file.') args = parser.parse_args() if args.notes: warnings.warn( '--notes is deprecated as including notes is now the default. ' 'The inverse option --exclude_notes has been added to exclude notes.', DeprecationWarning) with pkg_resources.resource_stream(__name__, 'vocabularies.yml') as fp: vocabularies = Vocabularies() vocabularies.load_yaml(fp) vocabularies.set_default_scheme(generic=args.base_uri, scheme=args.scheme, whitespace=args.whitespace) if args.list_schemes: print('Schemes:') for voc in vocabularies: print('- %s' % voc) return supported_formats = ['turtle', 'jskos', 'ndjson'] if not args.outformat and args.outfile: ext = args.outfile.rpartition('.')[-1] if ext in supported_formats: args.outformat = ext if not args.outformat: args.outformat = 'turtle' elif args.outformat not in supported_formats: raise ValueError("Format not supported, must be one of '%s'." % "', '".join(supported_formats)) graph = Graph() for filename in args.include: if args.outformat == 'turtle': graph.load(filename, format='turtle') else: graph.load(filename, format='json-ld') nm = graph.namespace_manager nm.bind('dcterms', DCTERMS) nm.bind('skos', SKOS) nm.bind('wd', WD) nm.bind('mads', MADS) nm.bind('owl', OWL) if args.verbose: console_handler.setLevel(logging.DEBUG) else: console_handler.setLevel(logging.INFO) if args.infile is None: raise ValueError('Filename not specified') options = { 'include_altlabels': args.altlabels, 'exclude_notes': args.exclude_notes, 'include_components': args.components, 'include_webdewey': args.webdewey, 'skip_classification': args.skip_classification, 'skip_authority': args.skip_authority, 'expand': args.expand, 'skosify': args.skosify, 'vocabularies': vocabularies, 'nll_lang': args.nll_lang } marc = MarcFileReader(args.infile) graph = process_records(marc.records(), graph, **options) if not graph: logger.warning('RDF result is empty!') return if args.outfile and args.outfile != '-': out_file = open(args.outfile, 'wb') else: if (sys.version_info > (3, 0)): out_file = sys.stdout.buffer else: out_file = sys.stdout if args.outformat == 'turtle': # @TODO: Perhaps use OrderedTurtleSerializer if available, but fallback to default Turtle serializer if not? serializer = OrderedTurtleSerializer(graph) serializer.class_order = [ SKOS.ConceptScheme, SKOS.Concept, ] serializer.sorters = [ (r'/([0-9A-Z\-]+)--([0-9.\-;:]+)/e', lambda x: 'C{}--{}'.format(x[0], x[1])), # table numbers (r'/([0-9.\-;:]+)/e', lambda x: 'B' + x[0]), # standard schedule numbers (r'^(.+)$', lambda x: 'A' + x[0]), # fallback ] serializer.serialize(out_file) elif args.outformat in ['jskos', 'ndjson']: s = pkg_resources.resource_string(__name__, 'jskos-context.json').decode('utf-8') context = json.loads(s) jskos = json_ld.from_rdf(graph, context) if args.outformat == 'jskos': jskos['@context'] = u'https://gbv.github.io/jskos/context.json' out_file.write( json.dumps(jskos, sort_keys=True, indent=2).encode('utf-8')) else: for record in jskos['@graph'] if '@graph' in jskos else [jskos]: record[ '@context'] = u'https://gbv.github.io/jskos/context.json' out_file.write( json.dumps(record, sort_keys=True).encode('utf-8') + b'\n') if out_file != sys.stdout: logger.info('Wrote %s: %s' % (args.outformat, args.outfile))