Python rdf_data 예제들, belfast.util.rdf_data Python 예제들

예제 #1

0

파일 보기

파일: rdfmodels.py 프로젝트: emory-libraries-ecds/belfast-group-site

def profile_people():
    'Generate a list of :class:`RdfPerson` with profiles on the site.'
    g = rdf_data()
    start = time.time()
    current_site = Site.objects.get(id=settings.SITE_ID)
    res = g.query('''
        PREFIX schema: <%(schema)s>
        PREFIX rdf: <%(rdf)s>
        SELECT DISTINCT ?person
        WHERE {
          ?person rdf:type schema:Person .
          ?person schema:familyName ?name .
          FILTER regex(str(?person), "^http://%(site)s")
        } ORDER BY ?name
        ''' % {'schema': rdfns.SCHEMA_ORG, 'rdf': rdflib.RDF,
               'site': current_site.domain}
        )
    # FIXME:  should be possible to filter at this level
    # on precense of a dbpedia description or a local schema description
    # but can't get the query to work...

    logger.debug('Found %d people in %.02f sec' % (len(res),
                 time.time() - start))
    # people = [RdfPerson(g.get_context(r['person']), r['person']) for r in res]
    people = [RdfPerson(g, r['person']) for r in res]
    return people

예제 #2

0

파일 보기

파일: dumprdf.py 프로젝트: emory-libraries-ecds/belfast-group-site

    def handle(self, *args, **options):
        graph = rdf_data()
        verbosity = options.get('verbosity', self.v_normal)

        if verbosity >= self.v_normal:
            print >> sys.stderr, "Exporting %d triples" % len(graph)

        print graph.serialize(format=options['format'])

예제 #3

0

파일 보기

파일: loadrdf.py 프로젝트: emory-libraries-ecds/belfast-group-site

    def handle(self, filename, *args, **options):
        graph = rdf_data()
        size = len(graph)
        verbosity = options.get('verbosity', self.v_normal)

        graph.parse(filename, format=options['format'])

        if verbosity >= self.v_normal:
            print >> sys.stderr, "Loaded %d triples" % (len(graph) - size)

예제 #4

0

파일 보기

파일: views.py 프로젝트: emory-libraries-ecds/belfast-group-site

def egograph_node_info(request, id):
    """HTML snippet to provide information about a node in the egograph.
    Intended to be loaded and displayed via AJAX.

    Some overlap with :meth:`belfast.network.views.node_info`.
    """

    # id is the person to whom this node is connected
    uri = local_uri(reverse("people:profile", args=[id]), request)
    g = rdf_data()
    ego_person = RdfPerson(g, rdflib.URIRef(uri))

    # NOTE: some overlap here with networks node_info view

    # id param is the node we want information
    node_id = request.GET.get("id", None)
    if node_id is None:
        raise Http404

    node_uri = rdflib.URIRef(node_id)
    # TODO: better to get relations from gexf or rdf ?
    graph = gexf.read_gexf(settings.GEXF_DATA["full"])
    node = graph.node[node_id]
    context = {"node": node}

    if node.get("type", None) == "Person":
        # init rdf person
        person = RdfPerson(rdf_data(), rdflib.URIRef(node_id))
        context["person"] = person

    # determine relation between node and ego-center
    rels = set(g.predicates(ego_person.identifier, node_uri))
    # TODO: may want to display other relationships?

    # special case: if "mentions", should be a poem; find for display/link
    if rdfns.SCHEMA_ORG.mentions in rels:
        txts = set(g.subjects(rdfns.SCHEMA_ORG.mentions, node_uri)) - set([ego_person.identifier])
        if txts:
            poems = [RdfPoem(g, p) for p in txts]
            # explicitly skip any non-poems, just in case
            context["poems"] = [p for p in poems if rdfns.FREEBASE["book/poem"] in p.rdf_types]

    return render(request, "network/node_info.html", context)

예제 #5

0

파일 보기

파일: rdfmodels.py 프로젝트: emory-libraries-ecds/belfast-group-site

    def connections(self, rdftype=None, resource=rdflib.resource.Resource):
        '''Generate a dictionary of connected entities (direct neighbors
        in the network graph) with a list of relationship terms (edge labels).
        Optionally, takes an RDF type to filter the entities (e.g., restrict
        only to People or Organizations), and a subclass of
        :class:`rdflib.resource.Resource` to initialize the entity as.'''
        network = network_data()
        graph = rdf_data()

        if self.nx_node_id not in network.nodes():
            return {}

        # this also works...
        # neighbors = network.neighbors(self.nx_node_id)
        ego_graph = self.ego_graph()
        neighbors = ego_graph.nodes()

        connections = {}
        for node in neighbors:
            weight = 0
            # don't include the current person in their own connections
            if node == self.nx_node_id:
                continue

            uriref = rdflib.URIRef(node)
            # if an rdf type was specified, filter out items that do not
            # match that type.
            if rdftype is not None and \
               (uriref, rdflib.RDF.type, rdftype) not in graph:
                continue

            res = resource(graph, uriref)
            rels = set()
            # find any edges between this node and me
            # include data to simplify accessing edge label
            # use edges & labels from original multidigraph
            all_edges = network.out_edges(node, data=True) + \
                network.in_edges(node, data=True)

            for edge in all_edges:
                src, target, data = edge
                if self.nx_node_id in edge and 'label' in data:
                    weight += data.get('weight', 1)  # assume default of 1 if not set
                    rels.add(data['label'])

            if 'knows' in rels and 'correspondedWith' in rels:
                rels.remove('knows')

            # connections[res] = {'rels': rels, 'weight': weight}
            connections[res] = (rels, weight)

        # sort by weight so strongest connections will be listed first
        conn =  sorted(connections.items(), key=lambda x: x[1][1], reverse=True)
        return conn

예제 #6

0

파일 보기

파일: views.py 프로젝트: emory-libraries-ecds/belfast-group-site

def egograph_js(request, id):
    "Egograph information as JSON for a single person."
    uri = local_uri(reverse("people:profile", args=[id]), request)
    g = rdf_data()
    person = RdfPerson(g, rdflib.URIRef(uri))
    graph = person.ego_graph(radius=1, types=["Person", "Organization", "Place"])
    # annotate nodes in graph with degree
    #  NOTE: not a directional graph, so in/out degree not available

    graph = annotate_graph(
        graph, fields=["degree", "in_degree", "out_degree", "betweenness_centrality", "eigenvector_centrality"]
    )

    data = json_graph.node_link_data(graph)
    return HttpResponse(json.dumps(data), content_type="application/json")

예제 #7

0

파일 보기

파일: views.py 프로젝트: emory-libraries-ecds/belfast-group-site

def profile(request, id):
    "Display a profile page for a single person associated with the Belfast Group."
    uri = local_uri(reverse("people:profile", args=[id]), request)
    g = rdf_data()
    uriref = rdflib.URIRef(uri)
    # check that the generated URI is actually a person in our rdf dataset;
    # if not, 404
    if not (uriref, rdflib.RDF.type, rdfns.SCHEMA_ORG.Person) in g:
        raise Http404
    person = RdfPerson(g, uriref)
    groupsheets = get_rdf_groupsheets(author=uri)  # TODO: move to rdfperson class

    return render(
        request,
        "people/profile.html",
        {"person": person, "groupsheets": groupsheets, "page_rdf_type": "schema:ProfilePage"},
    )

예제 #8

0

파일 보기

파일: views.py 프로젝트: emory-libraries-ecds/belfast-group-site

def _network_graph(min_degree=1, **kwargs):
    graph = network_data().copy()  # don't modify the original network

    rdfgraph = rdf_data()
    # filter graph by type of node
    types = ["Person", "Organization", "Place", "BelfastGroupSheet"]

    for n in graph.nodes():
        if "type" not in graph.node[n] or graph.node[n]["type"] not in types:
            graph.remove_node(n)
            continue

        # use groupsheets to infer a connection between the author
        # of the groupsheet and the group itself
        # FIXME: this needs to be in data prep/clean, NOT here
        # TODO: should be handled in prep now; confirm and then remove this logic
        if graph.node[n]["type"] == "BelfastGroupSheet":

            sheet = RdfGroupSheet(rdfgraph, rdflib.URIRef(n))
            # FIXME: error handling when author is not in the graph?
            # should probably at least log this...
            if sheet.author and unicode(sheet.author.identifier) in graph:
                graph.add_edge(unicode(sheet.author.identifier), BELFAST_GROUP_URI, weight=4)

            # remove the groupsheet itself from the network, to avoid
            # cluttering up the graph with too much information
            # graph.add_edge(n, BELFAST_GROUP_URI, weight=5)
            graph.remove_node(n)

    # AFTER filtering by type, filter out by requested minimum degree

    removed = 0
    for n in graph.nodes():
        if graph.degree(n) < min_degree:
            removed += 1
            graph.remove_node(n)

    logger.info("removed %d nodes with degree less than %d" % (removed, min_degree))

    return graph

예제 #9

0

파일 보기

파일: views.py 프로젝트: emory-libraries-ecds/belfast-group-site

def node_info(request):
    """Return an HTML snippet with brief information about a node in the
    network (e.g., name, number of Group sheets, link to profile page
    if there is one).  Intended to be called via AJAX and displayed with
    the network graphs.

    Expects a url parameter ``id`` with the node identifier.
    """
    node_id = request.GET.get("id", None)
    # if no id is specified, 404
    if node_id is None:
        raise Http404
    # TODO: better to get from gexf or rdf ?
    graph = gexf.read_gexf(settings.GEXF_DATA["full"])
    node = graph.node[node_id]
    context = {"node": node}
    if node.get("type", None) == "Person":
        # init rdf person
        person = RdfPerson(rdf_data(), rdflib.URIRef(node_id))
        context["person"] = person
    # TODO: handle other types? location, organization
    return render(request, "network/node_info.html", context)

예제 #10

0

파일 보기

파일: rdfmodels.py 프로젝트: emory-libraries-ecds/belfast-group-site

def find_places():
    'Generate a list of :class:`RdfLocation` associated with Belfast Group people.'
    g = rdf_data()
    return [RdfLocation(g, subj) for subj in g.subjects(predicate=rdflib.RDF.type,
                                                       object=rdfns.SCHEMA_ORG.Place)]

예제 #11

0

파일 보기

파일: rdfmodels.py 프로젝트: emory-libraries-ecds/belfast-group-site

def BelfastGroup():
    '''Convenience method to initalize and return an :class:`RdfOrganization`
    for the Belfast Group'''
    return RdfOrganization(rdf_data(), rdfns.BELFAST_GROUP_URIREF)

예제 #12

0

파일 보기

파일: models.py 프로젝트: emory-libraries-ecds/belfast-group-site

 def rdfcollection(self):
     ''':class:`belfast.groupsheets.rdfodels.RdfArchivalCollection` this image
     came from, if :attr:`collection_uri` is set.'''
     if self.collection_uri is not None:
         return RdfArchivalCollection(rdf_data(), rdflib.URIRef(self.collection_uri))

예제 #13

0

파일 보기

파일: models.py 프로젝트: emory-libraries-ecds/belfast-group-site

 def rdfperson(self):
     'associated :class:`~belfast.people.rdfmodels.RdfPerson`'
     return RdfPerson(rdf_data(), rdflib.URIRef(self.person_uri))

예제 #14

0

파일 보기

파일: prep_dataset.py 프로젝트: emory-libraries-ecds/belfast-group-site

    def handle(self, *args, **options):
        self.verbosity = options['verbosity']

        # harvest from the current configured site
        current_site = Site.objects.get(id=settings.SITE_ID)
        self.harvest_urls.extend(['http://%s/groupsheets/%s/' % (current_site.domain.rstrip('/'), i)
                                  for i in self.tei_ids])

        # if specific steps are specified, run only those
        # otherwise, run all steps
        all_steps = not any([options['harvest'], options['queens'],
                             options['related'], options['smush'],
                             options['gexf'], options['identify'],
                             options['connect']])

        # initialize graph persistence
        # graph = rdflib.ConjunctiveGraph('Sleepycat')
        # graph.open(settings.RDF_DATABASE, create=True)

        graph = rdf_data()


        # if clear is specified, remove the entire db
        if options['clear']:
            if self.verbosity >= self.v_normal:
                print 'Removing %d contexts and %d triples from the current RDF graph' % \
                      (len(list(graph.contexts())), len(graph))
            # can't find a reliable way to remove all triples and contexts
            # so close the graph, remove everything, and start over
            graph.close()
            shutil.rmtree(settings.RDF_DATABASE)
            graph.open(settings.RDF_DATABASE, create=True)

        if all_steps or options['harvest']:
            self.stdout.write('-- Harvesting RDF from EmoryFindingAids related to the Belfast Group')
            # inaccurate; also harvesting tei from local site

            HarvestRdf(self.harvest_urls,
                       find_related=True, verbosity=self.verbosity,
                       graph=graph, no_cache=options['no_cache'])
            # local info from RDF data - additional bios, Group sheet in private collection
            self.stdout.write('-- Adding RDF data from local fixtures')
            LocalRDF(graph, self.local_rdf_fixtures)

        if all_steps or options['queens']:
            self.stdout.write('-- Converting Queens University Belfast Group collection description to RDF')
            QUB(self.QUB_input, verbosity=self.verbosity, graph=graph,
                url=QUB.QUB_BELFAST_COLLECTION)

        if all_steps or options['identify']:
            # identify groupsheets in the data and add local groupsheet type if not present
            self.stdout.write('-- Identifying groupsheets')
            IdentifyGroupSheets(graph)

        if all_steps or options['smush']:
            # smush any groupsheets in the data
            self.stdout.write('-- Smushing groupsheet URIs and generating local profile URIs')
            # NOTE: might be nice to smush *after* cleaning up author names, but for some reason
            # that results in a number of authors/groupsheets getting dropped
            SmushGroupSheets(graph)
            ProfileUris(graph)

        if all_steps or options['related']:
            self.stdout.write('-- Annotating graph with related information from VIAF, GeoNames, and DBpedia')
            Annotate(graph)

        if all_steps or options['connect']:
            # infer connections
            self.stdout.write('-- Inferring connections: groupsheet time period, owner, authors affiliated with group')
            InferConnections(graph)
            # TODO: groupsheet owner based on source collection

        if all_steps or options['gexf']:
            # generate gexf
            self.stdout.write('-- Generating network graphs and saving as GEXF')
            nx.Rdf2Gexf(graph, settings.GEXF_DATA['full'])
            nx.BelfastGroupGexf(graph, settings.GEXF_DATA['bg1'])

        # set last-modification time
        set_site_lastmodified(graph)
        graph.close()