def setUp(self): ruleStore, ruleGraph, self.network = makeRuleStore([fixture("test_cmpuri.n3")]) g = Graph() g.parse(StringIO(""" <http://example.org/> a _:x . <http://EXAMPLE.ORG/> a _:x . <HTTP://example.org:80/> a _:x . <http://example.com/> a _:x . """), format="n3") self.network.feedFactsToAdd(generateTokenSet(g)) logging.debug("Inferred Facts:\n%s" % self.network.inferredFacts.serialize(format="n3"))
def setUp(self): ruleStore, ruleGraph, self.network = makeRuleStore([fixture("test_regexp.n3")]) g = Graph() g.parse(StringIO(""" @prefix dc: <http://purl.org/dc/terms/>. @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . <http://example.org/> dc:title "abc/123" . <http://example.org/> rdfs:seeAlso <http://example.org/12345.txt>. """), format="n3") self.network.feedFactsToAdd(generateTokenSet(g)) logging.debug("Inferred Facts:\n%s" % self.network.inferredFacts.serialize(format="n3"))
def curate(): parser = argparse.ArgumentParser(description=""" Tool for curation of CKAN datasets. It reads RDF descriptions of the dataset and applies the specified rules. Inferred statements are written to the standard output in Notation 3 and built-in predicates may be used to check specific conditions or perform certain actions. """) parser.add_argument("-b", dest="base", default="http://semantic.ckan.net/package/", help="RDF description base URI to look for packages to consider") parser.add_argument("-r", dest="rules", action="append", default=[], help="N3 rules (can specify more than once)",) parser.add_argument("-k", dest="api_key", help="CKAN API Key") parser.add_argument("-a", dest="api_base", help="CKAN API base") parser.add_argument("-d", dest="delta", action="store_true", help="Accumulate closure delta") parser.add_argument("-l", dest="logfile", help="Log to file") parser.add_argument("-v", dest="debug", action="store_true", help="Verbose output") parser.add_argument("-s", dest="save", action="store_true", help="Save inferred metadata back to CKAN") parser.add_argument("datasets", nargs="*", help="Dataset(s) to check") args = parser.parse_args() logcfg = { "level": logging.DEBUG if args.debug else logging.INFO, "format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s" } if args.logfile: logcfg["filename"] = args.logfile logging.basicConfig(**logcfg) log = logging.getLogger(__name__) ruleStore, ruleGraph, network = makeRuleStore(args.rules) datasets = args.datasets if not datasets: datasets = CkanClient(base_location=args.api_base, api_key=args.api_key ).package_register_get() closureDelta = Graph() for dataset in datasets: log.info("processing %s" % dataset) network.reset(closureDelta) if not is_uri(dataset): dataset = args.base + dataset try: g = Graph() g.parse(dataset) except Exception, e: log.error("exception loading graph at %s" % (args.base+dataset,)) continue network.feedFactsToAdd(generateTokenSet(g)) if args.save: queue.process(base_location=args.api_base, api_key=args.api_key) if not args.delta: closureDelta = Graph()
def setUp(self): ruleStore, ruleGraph, self.network = makeRuleStore([fixture("test_bad_url.n3")])