def read_schemas(loadExtensions=False): """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt""" import os.path import glob import re global schemasInitialized if (not schemasInitialized or DYNALOAD): log.info("(re)loading core and annotations.") files = glob.glob("data/*.rdfa") file_paths = [] for f in files: file_paths.append(full_path(f)) parser = parsers.MakeParserOfType('rdfa', None) items = parser.parse(file_paths, "core") #set default home for those in core that do not have one setHomeValues(items, "core", True) files = glob.glob("data/*examples.txt") read_examples(files) files = glob.glob("data/2015-04-vocab_counts.txt") for file in files: usage_data = read_file(file) parser = parsers.UsageFileParser(None) parser.parse(usage_data) schemasInitialized = True
def read_schemas(loadExtensions=False): """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt""" load_start = datetime.datetime.now() global schemasInitialized schemasInitialized = True if (not schemasInitialized or DYNALOAD): log.debug("[%s] (re)loading core and annotations." % getInstanceId(short=True)) files = glob.glob("data/*.rdfa") jfiles = glob.glob("data/*.jsonld") for jf in jfiles: rdfequiv = jf[:-7] + ".rdfa" if not rdfequiv in files: #Only add .jsonld files if no equivalent .rdfa files.append(jf) file_paths = [] for f in files: file_paths.append(full_path(f)) apirdflib.load_graph('core', file_paths) log.info("[%s] Loaded core graphs in %s" % (getInstanceId(short=True), (datetime.datetime.now() - load_start))) load_start = datetime.datetime.now() files = glob.glob("data/2015-04-vocab_counts.txt") for file in files: usage_data = read_file(file) parser = parsers.UsageFileParser(None) parser.parse(usage_data) log.debug("[%s]Loaded usage data in %s" % (getInstanceId(short=True), (datetime.datetime.now() - load_start))) schemasInitialized = True
def read_schemas(loadExtensions=False): """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt""" import os.path import glob import re global schemasInitialized schemasInitialized = True if (not schemasInitialized or DYNALOAD): log.info("(re)loading core and annotations.") files = glob.glob("data/*.rdfa") jfiles = glob.glob("data/*.jsonld") for jf in jfiles: rdfequiv = jf[:-7]+".rdfa" if not rdfequiv in files: #Only add .jsonld files if no equivalent .rdfa files.append(jf) file_paths = [] for f in files: file_paths.append(full_path(f)) apirdflib.load_graph('core',file_paths) files = glob.glob("data/*examples.txt") read_examples(files,'core') files = glob.glob("data/2015-04-vocab_counts.txt") for file in files: usage_data = read_file(file) parser = parsers.UsageFileParser(None) parser.parse(usage_data) schemasInitialized = True
def read_schemas(loadExtensions=False): """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt""" import os.path import glob import re global schemasInitialized if (not schemasInitialized or DYNALOAD): log.info("(re)loading core and annotations.") files = glob.glob("data/*.rdfa") file_paths = [] for f in files: file_paths.append(full_path(f)) parser = parsers.MakeParserOfType('rdfa', None) items = parser.parse(file_paths, "core") if loadExtensions: log.info("(re)scanning for extensions.") extfiles = glob.glob("data/ext/*/*.rdfa") log.info("Extensions found: %s ." % " , ".join(extfiles) ) fnstrip_re = re.compile("\/.*") for ext in extfiles: ext_file_path = full_path(ext) extid = ext.replace('data/ext/', '') extid = re.sub(fnstrip_re,'',extid) log.info("Preparing to parse extension data: %s as '%s'" % (ext_file_path, "%s" % extid)) parser = parsers.MakeParserOfType('rdfa', None) all_layers[extid] = "1" extitems = parser.parse([ext_file_path], layer="%s" % extid) # put schema triples in a layer # log.debug("Results: %s " % len( extitems) ) for x in extitems: if x is not None: log.debug("%s:%s" % ( extid, str(x.id) )) # e.g. see 'data/ext/bib/bibdemo.rdfa' files = glob.glob("data/*examples.txt") example_contents = [] for f in files: example_content = read_file(f) example_contents.append(example_content) parser = parsers.ParseExampleFile(None) parser.parse(example_contents) files = glob.glob("data/2015-04-vocab_counts.txt") for file in files: usage_data = read_file(file) parser = parsers.UsageFileParser(None) parser.parse(usage_data) schemasInitialized = True