Python Graph.load 예제들, rdflib.graph.Graph.load Python 예제들

예제 #1

0

파일 보기

파일: statmap.py 프로젝트: scriptotek/datakilder

def write_mappings():

    # Include all pairs:
    # ll = pair_association(unique_pairs, loglike)

    # Only pairs with frequency >= 5:
    unique_pars_5 = np.array([a.split('!') for a, b in pc.items() if b >= 5])

    ll = pair_association(unique_pars_5, loglike)

    print 'Write mappings'
    lls = sorted(ll.items(), key=lambda x: x[1])

    ww = np.array(ll.values())
    mn = ww.mean()

    print "- Mean LL is {:2f}".format(mn)
    print "- {:.2f} % is >= mean LL".format(
        float(ww[ww >= mn].shape[0]) / ww.shape[0])
    print "- {:.2f} % is < mean LL".format(
        float(ww[ww < mn].shape[0]) / ww.shape[0])

    # Whether to lookup DDC labels and add them to the mapping sheet
    addDdcLabels = False

    if addDdcLabels:
        # Load WebDewey data
        g = Graph()
        for x in glob('../../webdewey/DDK23/*.ttl'):
            print x
            g.load(x, format='turtle')

    fsj = re.compile('.*\(Form\)')
    with open('mappings.csv', 'w') as f:
        writer = csv.writer(f, delimiter='\t')
        for x in lls[::-1]:
            if x[1] < mn:
                break

            q = x[0].split('!', 1)

            if fsj.match(q[0]):  # Utelat form
                continue

            if addDdcLabels:
                lab = g.preferredLabel(
                    URIRef('http://dewey.info/class/' + q[1] + '/e23/'),
                    labelProperties=[SKOS.prefLabel, SKOS.altLabel])
                if len(lab) != 0:
                    lab = lab[0][1].value
                else:
                    lab = '(no label)'
                # Term, Dewey, Dewey Caption, Loglike
                writer.writerow([q[0], q[1], lab.encode('utf-8'), x[1]])
            else:
                # Term, Dewey, Loglike
                writer.writerow([q[0], q[1], x[1]])

예제 #2

0

파일 보기

파일: statmap.py 프로젝트: scriptotek/datakilder

def write_mappings():

    # Include all pairs:
    # ll = pair_association(unique_pairs, loglike)

    # Only pairs with frequency >= 5:
    unique_pars_5 = np.array([a.split('!') for a, b in pc.items() if b >= 5])

    ll = pair_association(unique_pars_5, loglike)

    print 'Write mappings'
    lls = sorted(ll.items(), key=lambda x: x[1])

    ww = np.array(ll.values())
    mn = ww.mean()

    print "- Mean LL is {:2f}".format(mn)
    print "- {:.2f} % is >= mean LL".format(float(ww[ww >= mn].shape[0]) / ww.shape[0])
    print "- {:.2f} % is < mean LL".format(float(ww[ww < mn].shape[0]) / ww.shape[0])

    # Whether to lookup DDC labels and add them to the mapping sheet
    addDdcLabels = False

    if addDdcLabels:
        # Load WebDewey data
        g = Graph()
        for x in glob('../../webdewey/DDK23/*.ttl'):
            print x
            g.load(x, format='turtle')

    fsj = re.compile('.*\(Form\)')
    with open('mappings.csv', 'w') as f:
        writer = csv.writer(f, delimiter='\t')
        for x in lls[::-1]:
            if x[1] < mn:
                break

            q = x[0].split('!', 1)

            if fsj.match(q[0]):  # Utelat form
                continue

            if addDdcLabels:
                lab = g.preferredLabel(URIRef('http://dewey.info/class/' + q[1] + '/e23/'), labelProperties=[SKOS.prefLabel, SKOS.altLabel])
                if len(lab) != 0:
                    lab = lab[0][1].value
                else:
                    lab = '(no label)'
                # Term, Dewey, Dewey Caption, Loglike
                writer.writerow([q[0], q[1], lab.encode('utf-8'), x[1]])
            else:
                # Term, Dewey, Loglike
                writer.writerow([q[0], q[1], x[1]])

예제 #3

0

파일 보기

파일: adapter.py 프로젝트: realfagstermer/roald-converters

    def load_mappings(self, filename, graph=None):
        tmp = Graph()
        if graph is None:
            graph = Graph()
        tmp.load(filename, format=self.extFromFilename(filename))

        skosify.infer.skos_symmetric_mappings(tmp)

        for tr in tmp.triples_choices((None, [SKOS.exactMatch, SKOS.closeMatch, SKOS.broadMatch, SKOS.narrowMatch, SKOS.relatedMatch], None)):
            #if tr[0] in all_concepts:
            graph.add(tr)

        return graph

예제 #4

0

파일 보기

파일: model.py 프로젝트: ShiZhan/seed

def init_model(root_directory, model_file):
    """
    initialize models:
    1. check if core model exists (in CWD), create it if necessary;
    2. check core model version compatibility;
    3. generate node model file based on the content of root_directory.
    """
    if os.path.exists(DEFAULT_CORE_MODEL):
        SEED_LOG.info('load core model')

        core_model = Graph()
        core_model.load(DEFAULT_CORE_MODEL)

        version = core_model.value(URIRef(SEED_BASE), OWL.versionInfo)

        SEED_LOG.info('core model version: [%s]' % version)

        if not version == VERSION:
            SEED_LOG.error(
                'incompatible to program version [%s], need to regenerate.' \
                % VERSION)

            gen_core()

        else:
            SEED_LOG.info('version compatible')

    else:
        SEED_LOG.error('core model does not exist, need to generate.')

        gen_core()

    # generate node model by importing specified root directory

    root_directory = os.path.abspath(root_directory)

    if not os.path.exists(root_directory):
        SEED_LOG.error('directory not exist')
        return

    SEED_LOG.info('reading object list ...')

    object_list = read_tree(root_directory)

    SEED_LOG.info('creating node model ...')

    write_model(object_list, model_file)

    SEED_LOG.info('%d object individuals created in %s.' % \
        (len(object_list), model_file))

예제 #5

0

파일 보기

파일: dodo.py 프로젝트: realfagstermer/realfagstermer

    def stats(task):

        t0 = time.time()

        g = Graph()
        g.load('dist/realfagstermer.complete.ttl', format='turtle')

        s = json.load(open('realfagstermer.github.io/_data/stats.json', 'r'))
        current = stats_from_graph(g)
        current['ts'] = now = int(time.time())
        s.append(current)

        json.dump(current, open('realfagstermer.github.io/_data/stats_current.json', 'w'), indent=2, sort_keys=True)
        json.dump(s, open('realfagstermer.github.io/_data/stats.json', 'w'), indent=2, sort_keys=True)

        dt = time.time() - t0
        logger.info('Generated stats in %.1f seconds', dt)

예제 #6

0

파일 보기

파일: data_ub_tasks.py 프로젝트: scriptotek/data_ub_tasks

def enrich_and_concat(files, out_file):
    graph = Graph()
    for sourcefile in files:
        graph.load(sourcefile, format="turtle")

    skosify = Skosify()

    # Enrichments: broader <-> narrower, related <-> related
    logger.debug("Skosify: Enriching relations")
    skosify.enrich_relations(graph, False, True, True)

    with open(out_file + ".tmp", "w") as handle:
        graph.serialize(handle, format="turtle")

    os.rename(out_file + ".tmp", out_file)

    return len(graph)

예제 #7

0

파일 보기

    def stats(task):

        t0 = time.time()

        g = Graph()
        g.load('dist/realfagstermer.complete.ttl', format='turtle')

        s = json.load(open('realfagstermer.github.io/_data/stats.json', 'r'))
        current = stats_from_graph(g)
        current['ts'] = now = int(time.time())
        s.append(current)

        json.dump(current, open('realfagstermer.github.io/_data/stats_current.json', 'w'), indent=2, sort_keys=True)
        json.dump(s, open('realfagstermer.github.io/_data/stats.json', 'w'), indent=2, sort_keys=True)

        dt = time.time() - t0
        logger.info('Generated stats in %.1f seconds', dt)

예제 #8

0

파일 보기

def load_mappings_from_file(filenames, uri_filter='http'):
    g = Graph()
    g.namespace_manager.bind('skos', SKOS)

    g2 = Graph()
    for filename in filenames:
        g2.load(filename, format='turtle')
    skosify.infer.skos_symmetric_mappings(g2, related=False)
    skosify.infer.skos_hierarchical_mappings(g2, narrower=True)

    for tr in g2:
        if tr[1] in [
                SKOS.exactMatch, SKOS.closeMatch, SKOS.relatedMatch,
                SKOS.broadMatch, SKOS.narrowMatch
        ]:
            if tr[0].startswith(uri_filter):
                g.add(tr)
                # q[0][0].strip()http://data.ub.uio.no/realfagstermer/c0
    return g

예제 #9

0

파일 보기

def eye(graphs, eye_path="eye", include_proof=False):
    """
    Process a set of graphs with EYE, and return the inferred triples.
    """
    pass_opt = '--pass-only-new' if not include_proof else '--pass-all'
    out_parser = 'turtle' if not include_proof else 'n3'

    eyep = Popen([eye_path, '-', pass_opt],
                 stdin=PIPE,
                 stdout=PIPE,
                 stderr=PIPE)

    for graph in graphs:
        graph.serialize(eyep.stdin, format='n3')
    eyep.stdin.close()

    infered = Graph()
    infered.load(eyep.stdout, format=out_parser)
    log = eyep.stderr.read()
    eyep.wait()

    return infered, log

예제 #10

0

파일 보기

파일: skos.py 프로젝트: realfagstermer/roald-converters

    def prepare(self):
        logger.info('Building RDF graph')

        graph = Graph()

        for inc in self.include:
            lg0 = len(graph)
            graph.load(inc, format=self.extFromFilename(inc))
            logger.info(' - Included {} triples from {}'.format(len(graph) - lg0, inc))

        try:
            scheme_uri = next(graph.triples((None, RDF.type, SKOS.ConceptScheme)))
        except StopIteration:
            raise Exception('Concept scheme URI could not be found in vocabulary scheme data')
        scheme_uri = scheme_uri[0]

        now = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
        graph.set((URIRef(scheme_uri), DCTERMS.modified, Literal(now, datatype=XSD.dateTime)))

        lg0 = len(graph)
        for resource in self.vocabulary.resources:
            self.convert_resource(graph, resource, self.vocabulary.resources, scheme_uri,
                                  self.vocabulary.default_language.alpha2)
        logger.info(' - Added {} triples'.format(len(graph) - lg0))

        all_concepts = set([tr[0] for tr in graph.triples((None, RDF.type, SKOS.Concept))])
        for inc in self.mappings_from:
            lg0 = len(graph)
            mappings = self.load_mappings(inc)
            for tr in mappings.triples((None, None, None)):
                if tr[0] in all_concepts:
                    graph.add(tr)
            logger.info(' - Added {} mappings from {}'.format(len(graph) - lg0, inc))

        logger.info('Skosify...')
        self.skosify_process(graph)
        return {'graph': graph}

예제 #11

0

파일 보기

def enrich_and_concat(files, out_file):
    graph = Graph()
    for sourcefile in files:
        if sourcefile.endswith('.nt'):
            graph.load(sourcefile, format='nt')
        elif sourcefile.endswith('.ttl'):
            graph.load(sourcefile, format='turtle')
        else:
            graph.load(sourcefile)

    logger.debug("Skosify: Enriching relations")
    skosify.infer.skos_hierarchical(graph, True)
    skosify.infer.skos_related(graph)

    with open(out_file + '.tmp', 'wb+') as handle:
        graph.serialize(handle, format='turtle')

    os.rename(out_file + '.tmp', out_file)

    return len(graph)

예제 #12

0

파일 보기

파일: get_lagennu_org_data.py 프로젝트: rinfo/rdl

    if not args:
        print "USAGE: %s FILE [rdf...]" % p.basename(cmd)
        print "Where FILE is a local copy of <https://lagen.nu/1976:725>. Get it by doing e.g.:"
        print "  $ /usr/bin/curl -sk 'https://lagen.nu/1976:725' > /tmp/sfs-1976_725.xhtml"
        print
        print "If additional local rdf files are supplied, a diff of the " \
            "extracted data and the supplied data is output (instead of just the " \
            "extracted data)."
        exit()
    docpath = args[0]

    graph = fsdoc_to_graph(docpath)

    from rdfextras.tools.pathutils import guess_format
    cmp_graph = Graph()
    for fpath in args[1:]:
        cmp_graph.load(fpath, format=guess_format(fpath))

    if cmp_graph:
        from rdflib.compare import graph_diff
        in_both, in_first, in_second = graph_diff(graph, cmp_graph)
        print "# %s new statements:" % len(in_first)
        for pfx, uri in graph.namespaces():
            in_first.bind(pfx, uri)
        print in_first.serialize(format='n3')

    else:
        print "# Nothing to compare against. New RDF is:"
        print graph.serialize(format='n3')

예제 #13

0

파일 보기

def ttl2solr(infile, outfile, vocab_name=None):
    logger.info('ttl2solr: Loading %s', infile)
    g = Graph()
    g.load(infile, format='turtle')

    # Build parent lookup hash
    # logger.debug('Building parent lookup hash')
    parents = {}
    for c, p in g.subject_objects(SKOS.broader):
        c = text_type(c)  # to string
        p = text_type(p)  # to string
        if c not in parents:
            parents[c] = set()
        parents[c].add(p)

    # Build labels lookup hash using two fast passes
    # logger.debug('Building labels lookup hash')
    labels = {}
    for c, p in g.subject_objects(SKOS.altLabel):
        labels[text_type(c)] = text_type(p)
    for c, p in g.subject_objects(SKOS.prefLabel):
        labels[text_type(c)] = text_type(p)  # overwrite altLabel with prefLabel if found

    # logger.debug('Building documents')
    docs = []
    unknown_preds = set()
    for uriref in g.subjects(RDF.type, SKOS.Concept):
        doc = {'id': text_type(uriref)}
        if vocab_name is not None:
            doc['vocabulary'] = vocab_name

        for pred, obj in g.predicate_objects(uriref):
            if pred not in schema:
                if pred not in unknown_preds:
                    logger.warning('Encountered unknown predicate with no mapping to JSON: %s', pred)
                    unknown_preds.add(pred)
                continue
            if pred == SKOS.inScheme and schema[pred] in vocabs:
                doc['vocab'] = vocabs[schema[pred]]
                continue
            if schema[pred] is None:
                continue
            if schema[pred] not in doc:
                doc[schema[pred]] = []

            doc[schema[pred]].append(text_type(obj))

        # Add labels from broader concepts

        bcs = []
        for bc in get_breadcrumbs([[text_type(uriref)]], parents):
            bc = [labels.get(x) for x in reversed(bc[1:])]
            bcs.append('/'.join([x for x in bc if x is not None]))
        doc['paths'] = bcs

        byLevel = [[text_type(uriref)]]  # Level 0
        level = 0
        while True:
            byLevel.append([])
            for x in byLevel[level]:
                byLevel[level + 1].extend(parents.get(x, set()))
            if len(byLevel[level + 1]) == 0:
                break
            level += 1

        for level, items in enumerate(byLevel[1:4]):
            # logger.debug(level, items)
            doc['parentsLevel{}'.format(level + 1)] = [labels[x] for x in items if x in labels]  # Vi mangler labels for enkelt toppetiketter, som f.eks. 'http://data.ub.uio.no/ddc/19'

        docs.append(doc)
    logger.info('ttl2solr: Storing %d documents in %s', len(docs), outfile)
    json.dump(docs, open(outfile, 'w'), indent=2)

예제 #14

0

파일 보기

파일: skos.py 프로젝트: realfagstermer/roald-converters

    def load(self, filename):
        """
        Note: This loader only loads categories and mappings
        """
        graph = Graph()
        graph.load(filename, format=self.extFromFilename(filename))

        logger.info('Read %d triples from %s', len(graph), filename)

        skosify.infer.skos_symmetric_mappings(graph, related=False)

        # Load mappings
        n_mappings = 0
        n_memberships = 0
        for tr in graph.triples_choices((None, [SKOS.exactMatch, SKOS.closeMatch, SKOS.broadMatch, SKOS.narrowMatch, SKOS.relatedMatch], None)):
            source_concept = tr[0]
            res_id = self.vocabulary.id_from_uri(source_concept)
            if res_id is not None:
                shortName = str(tr[1]).split('#')[1]
                try:
                    self.vocabulary.resources[res_id].add('mappings.%s' % shortName, str(tr[2]))
                    n_mappings += 1
                except KeyError:
                    logger.warning('Concept not found: %s', res_id)

        # Load categories
        for tr in graph.triples((None, RDF.type, UOC.Category)):
            cat_lab = graph.preferredLabel(tr[0], lang='nb')[0][1].value
            cat_id = '' + tr[0]

            cat = Concept().set_type('Category')
            cat.set('id', cat_id)
            cat.set('prefLabel.nb', Label(cat_lab))
            self.vocabulary.resources.load([cat])


            for tr2 in graph.triples((tr[0], SKOS.member, None)):
                uri = str(tr2[2])
                res_id = self.vocabulary.id_from_uri(uri)
                if res_id is not None:
                    try:
                        self.vocabulary.resources[res_id].add('memberOf', cat_id)
                        n_memberships += 1
                    except KeyError:
                        logger.warning('Concept not found: %s', res_id)

        # Load number of ccmapper mapping candidates
        for tr in graph.triples((None, LOCAL.ccmapperCandidates, None)):
            source_concept = tr[0]
            res_id = self.vocabulary.id_from_uri(source_concept)
            if res_id is not None:
                shortName = str(tr[1]).split('#')[1]
                try:
                    self.vocabulary.resources[res_id].set('ccmapperCandidates', int(tr[2]))
                except KeyError:
                    logger.warning('Concept not found: %s', res_id)

        # Load ccmapper mapping state
        for tr in graph.triples((None, LOCAL.ccmapperState, None)):
            source_concept = tr[0]
            res_id = self.vocabulary.id_from_uri(source_concept)
            if res_id is not None:
                shortName = str(tr[1]).split('#')[1]
                try:
                    self.vocabulary.resources[res_id].set('ccmapperState', tr[2])
                except KeyError:
                    logger.warning('Concept not found: %s', res_id)

        logger.info('Loaded %d mappings and %d category memberships from %s', n_mappings, n_memberships, filename)

예제 #15

0

파일 보기

파일: ttl2solr.py 프로젝트: danmichaelo/ubdata-tools

def convert(infile, outfile):
    logger.debug('Loading %s', infile)
    g = Graph()
    g.load(infile, format='turtle')

    # Build parent lookup hash
    logger.debug('Building parent lookup hash')
    parents = {}
    for c, p in g.subject_objects(SKOS.broader):
        c = text_type(c)  # to string
        p = text_type(p)  # to string
        if c not in parents:
            parents[c] = set()
        parents[c].add(p)

    # Build labels lookup hash using two fast passes
    logger.debug('Building labels lookup hash')
    labels = {}
    for c, p in g.subject_objects(SKOS.altLabel):
        labels[text_type(c)] = text_type(p)
    for c, p in g.subject_objects(SKOS.prefLabel):
        labels[text_type(c)] = text_type(p)  # overwrite altLabel with prefLabel if found

    logger.debug('Building documents')
    docs = []
    for uriref in g.subjects(RDF.type, SKOS.Concept):
        doc = {'id': text_type(uriref)}

        for pred, obj in g.predicate_objects(uriref):
            if pred not in schema:
                logger.error('Encountered unknown predicate with no mapping to JSON: %s', pred)
                continue
            if pred == SKOS.inScheme and schema[pred] in vocabs:
                doc['vocab'] = vocabs[schema[pred]]
                continue
            if schema[pred] is None:
                continue
            if schema[pred] not in doc:
                doc[schema[pred]] = []

            doc[schema[pred]].append(text_type(obj))

        # Add labels from broader concepts

        byLevel = [[text_type(uriref)]]  # Level 0
        level = 0
        while True:
            byLevel.append([])
            for x in byLevel[level]:
                byLevel[level + 1].extend(parents.get(x, set()))
            if len(byLevel[level + 1]) == 0:
                break
            level += 1

        for level, items in enumerate(byLevel[1:-1]):
            # logger.debug(level, items)
            doc['parentsLevel{}'.format(level)] = [labels[x] for x in items if x in labels]  # Vi mangler labels for enkelt toppetiketter, som f.eks. 'http://data.ub.uio.no/ddc/19'

        docs.append(doc)
    logger.debug('Generated %d documents', len(docs))

    logger.debug('Saving %s', outfile)
    json.dump(docs, open(outfile, 'w'), indent=2)

예제 #16

0

파일 보기


RESULT = Namespace("http://www.w3.org/2002/03owlt/resultsOntology#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

results = Graph()

system = BNode("system")
results.add((system, FOAF["homepage"], URIRef("http://rdflib.net/")))
results.add((system, RDFS.label, Literal("RDFLib")))
results.add((system, RDFS.comment, Literal("")))

if __name__ == "__main__":
    manifest = Graph()
    manifest.load(
        cached_file(
            "http://www.w3.org/2000/10/rdf-tests/rdfcore/Manifest.rdf"))
    import sys, getopt
    try:
        optlist, args = getopt.getopt(sys.argv[1:], 'h:', ["help"])
    except getopt.GetoptError, msg:
        write(msg)
        #usage()

    try:
        argv = sys.argv
        for arg in sys.argv[1:]:
            verbose = 1
            case = URIRef(arg)
            write(u"Testing: %s" % case)
            if (case, RDF.type, TEST["PositiveParserTest"]) in manifest:

예제 #17

0

파일 보기

파일: rebuild_images.py 프로젝트: projekt-opal/cc.licenserdf

from __future__ import print_function

# Third-party
from rdflib import Literal, Namespace
from rdflib.graph import Graph


NS_FOAF = Namespace("http://xmlns.com/foaf/0.1/")
NS_EXIF = Namespace("http://www.w3.org/2003/12/exif/ns#")

index = Graph()
index.bind("cc", "http://creativecommons.org/ns#")
index.bind("dc", "http://purl.org/dc/elements/1.1/")
index.bind("dcq","http://purl.org/dc/terms/")
index.bind("rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")
index.bind("foaf","http://xmlns.com/foaf/0.1/")
index.load('./rdf/index.rdf')

output = Graph()
output.bind("foaf","http://xmlns.com/foaf/0.1/")
output.bind("exif","http://www.w3.org/2003/12/exif/ns#")

for img in index.objects(None, NS_FOAF.logo):
    print(img)
    width, height = img[:-len('.png')].split('/')[-1].split('x')
    output.add( (img, NS_EXIF.width, Literal(width)) )
    output.add( (img, NS_EXIF.height, Literal(height)) )

file('./rdf/images.rdf', 'w').write(
    output.serialize(format="pretty-xml", max_depth=2))

예제 #18

0

파일 보기

파일: nt.py 프로젝트: bcroq/rdfextras

 def testModel(self):
     g = Graph()
     g.load("http://www.w3.org/2000/10/rdf-tests/rdfcore/rdfms-empty-property-elements/test002.nt", format="nt")

예제 #19

0

파일 보기

파일: diff_test.py 프로젝트: realfagstermer/realfagstermer-publish

import codecs

MADS = Namespace('http://www.loc.gov/mads/rdf/v1#')

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('[%(asctime)s %(levelname)s] %(message)s')

console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)

logging.info('Loading removed.nt')
removed = Graph()
removed.load('realfagstermer/removed.nt', format='nt')

logging.info('Loading added.nt')
added = Graph()
added.load('realfagstermer/added.nt', format='nt')

logging.info('Loading realfagstermer.new.nt as <current>')
current = Graph()
current.load('realfagstermer/realfagstermer.new.nt', format='nt')
current.namespace_manager.bind('skos', SKOS)
current.namespace_manager.bind('mads', MADS)
current.namespace_manager.bind('dct', DCTERMS)


logging.info('Computing')

예제 #20

0

파일 보기

파일: ttl2solr.py 프로젝트: scriptotek/data_ub_tasks

def ttl2solr(infile, outfile, vocab_name=None):
    logger.info('ttl2solr: Loading %s', infile)
    g = Graph()
    g.load(infile, format='turtle')

    # Build parent lookup hash
    # logger.debug('Building parent lookup hash')
    parents = {}
    for c, p in g.subject_objects(SKOS.broader):
        c = text_type(c)  # to string
        p = text_type(p)  # to string
        if c not in parents:
            parents[c] = set()
        parents[c].add(p)

    # Build labels lookup hash using two fast passes
    # logger.debug('Building labels lookup hash')
    labels = {}
    for c, p in g.subject_objects(SKOS.altLabel):
        labels[text_type(c)] = text_type(p)
    for c, p in g.subject_objects(SKOS.prefLabel):
        labels[text_type(c)] = text_type(p)  # overwrite altLabel with prefLabel if found

    # logger.debug('Building documents')
    docs = []
    unknown_preds = set()
    for uriref in g.subjects(RDF.type, SKOS.Concept):
        doc = {'id': text_type(uriref)}
        if vocab_name is not None:
            doc['vocabulary'] = vocab_name

        for pred, obj in g.predicate_objects(uriref):
            if pred not in schema:
                if pred not in unknown_preds:
                    logger.warning('Encountered unknown predicate with no mapping to JSON: %s', pred)
                    unknown_preds.add(pred)
                continue
            if pred == SKOS.inScheme and schema[pred] in vocabs:
                doc['vocab'] = vocabs[schema[pred]]
                continue
            if schema[pred] is None:
                continue
            if schema[pred] not in doc:
                doc[schema[pred]] = []

            doc[schema[pred]].append(text_type(obj))

        # Add labels from broader concepts

        bcs = []
        for bc in get_breadcrumbs([[text_type(uriref)]], parents):
            bc = [labels.get(x) for x in reversed(bc[1:])]
            bcs.append('/'.join([x for x in bc if x is not None]))
        doc['paths'] = bcs

        byLevel = [[text_type(uriref)]]  # Level 0
        level = 0
        while True:
            byLevel.append([])
            for x in byLevel[level]:
                byLevel[level + 1].extend(parents.get(x, set()))
            if len(byLevel[level + 1]) == 0:
                break
            level += 1

        for level, items in enumerate(byLevel[1:4]):
            # logger.debug(level, items)
            doc['parentsLevel{}'.format(level + 1)] = [labels[x] for x in items if x in labels]  # Vi mangler labels for enkelt toppetiketter, som f.eks. 'http://data.ub.uio.no/ddc/19'

        docs.append(doc)
    logger.info('ttl2solr: Storing %d documents in %s', len(docs), outfile)
    json.dump(docs, open(outfile, 'w'), indent=2)

예제 #21

0

파일 보기

파일: test_rdfxml.py 프로젝트: gklyne/rdflib

RESULT = Namespace("http://www.w3.org/2002/03owlt/resultsOntology#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")


results = Graph()

system = BNode("system")
results.add((system, FOAF["homepage"], URIRef("http://rdflib.net/")))
results.add((system, RDFS.label, Literal("RDFLib")))
results.add((system, RDFS.comment, Literal("")))


if __name__ == "__main__":
    manifest = Graph()
    manifest.load(cached_file(
        "http://www.w3.org/2000/10/rdf-tests/rdfcore/Manifest.rdf"))
    import sys
    import getopt
    try:
        optlist, args = getopt.getopt(sys.argv[1:], 'h:', ["help"])
    except getopt.GetoptError, msg:
        write(msg)
        # usage()

    try:
        argv = sys.argv
        for arg in sys.argv[1:]:
            verbose = 1
            case = URIRef(arg)
            write(u"Testing: %s" % case)
            if (case, RDF.type, TEST["PositiveParserTest"]) in manifest: