Example #1
0
    def testBinding(self) -> None:
        class a:
            def __init__(self, v: str) -> None:
                self.v = v[3:-3]

            def __str__(self) -> str:
                return "<<<%s>>>" % self.v

        dtA = rdflib.URIRef("urn:dt:a")
        bind(dtA, a)

        va = a("<<<2>>>")
        la = Literal(va, normalize=True)
        self.assertEqual(la.value, va)
        self.assertEqual(la.datatype, dtA)

        la2 = Literal("<<<2>>>", datatype=dtA)
        self.assertTrue(isinstance(la2.value, a))
        self.assertEqual(la2.value.v, va.v)

        class b:
            def __init__(self, v: str) -> None:
                self.v = v[3:-3]

            def __str__(self) -> str:
                return "B%s" % self.v

        dtB = rdflib.URIRef("urn:dt:b")
        bind(dtB, b, None, lambda x: "<<<%s>>>" % x)

        vb = b("<<<3>>>")
        lb = Literal(vb, normalize=True)
        self.assertEqual(lb.value, vb)
        self.assertEqual(lb.datatype, dtB)
Example #2
0
def rdf_bind_to_string(rdf_type):
    """Python2/3 compatibility wrapper around rdflib.term.bind that binds a
    term to the appropriate string type.

    """
    string_type = unicode if sys.version_info < (3, ) else str  # noqa
    bind(rdf_type, string_type)
Example #3
0
    def testBinding(self): 

        class a: 
            def __init__(self,v): 
                self.v=v[3:-3]
            def __str__(self): 
                return '<<<%s>>>'%self.v

        dtA=rdflib.URIRef('urn:dt:a')
        bind(dtA,a)

        va=a("<<<2>>>")
        la=Literal(va, normalize=True)
        self.assertEqual(la.value,va)
        self.assertEqual(la.datatype, dtA)
    
        la2=Literal("<<<2>>>", datatype=dtA)
        self.assertTrue(isinstance(la2.value, a))
        self.assertEqual(la2.value.v,va.v)
    
        class b: 
            def __init__(self,v): 
                self.v=v[3:-3]
            def __str__(self): 
                return 'B%s'%self.v

        dtB=rdflib.URIRef('urn:dt:b')
        bind(dtB,b,None,lambda x: '<<<%s>>>'%x)
        
        vb=b("<<<3>>>")
        lb=Literal(vb, normalize=True)
        self.assertEqual(lb.value,vb)
        self.assertEqual(lb.datatype, dtB)
Example #4
0
    def testSpecificBinding(self) -> None:
        def lexify(s: str) -> str:
            return "--%s--" % s

        def unlexify(s: str) -> str:
            return s[2:-2]

        datatype = rdflib.URIRef("urn:dt:mystring")

        # Datatype-specific rule
        bind(datatype, str, unlexify, lexify, datatype_specific=True)

        s = "Hello"
        normal_l = Literal(s)
        self.assertEqual(str(normal_l), s)
        self.assertEqual(normal_l.toPython(), s)
        self.assertEqual(normal_l.datatype, None)

        specific_l = Literal("--%s--" % s, datatype=datatype)
        self.assertEqual(str(specific_l), lexify(s))
        self.assertEqual(specific_l.toPython(), s)
        self.assertEqual(specific_l.datatype, datatype)
Example #5
0
you can also add your own.

This example shows how :meth:`rdflib.term.bind` lets you register new
mappings between literal datatypes and python objects

"""

from rdflib import Graph, Literal, Namespace, XSD
from rdflib.term import bind

if __name__ == '__main__':

    # complex numbers are not registered by default
    # no custom constructor/serializer needed since
    # complex('(2+3j)') works fine
    bind(XSD.complexNumber, complex)

    ns = Namespace("urn:my:namespace:")

    c = complex(2, 3)

    l = Literal(c)

    g = Graph()
    g.add((ns.mysubject, ns.myprop, l))

    n3 = g.serialize(format='n3')

    # round-trip through n3

    g2 = Graph()
Example #6
0
from six import with_metaclass
from rdflib.term import URIRef
from rdflib.term import bind

from gutenberg._domain_model.exceptions import UnsupportedFeatureException
from gutenberg._domain_model.types import validate_etextno
from gutenberg._util.abc import abstractclassmethod
from gutenberg._util.objects import all_subclasses
from gutenberg.acquire.metadata import load_metadata

import sys

# Add a binding for Project Gutenberg's Language datatype
if sys.version_info < (3, ):
    bind(URIRef('http://purl.org/dc/terms/RFC4646'), unicode)
else:
    bind(URIRef('http://purl.org/dc/terms/RFC4646'), str)


def get_metadata(feature_name, etextno):
    """Looks up the value of a meta-data feature for a given text.

    Arguments:
        feature_name (str): The name of the meta-data to look up.
        etextno (int): The identifier of the Gutenberg text for which to look
            up the meta-data.

    Returns:
        frozenset: The values of the meta-data for the text or an empty set if
            the text does not have meta-data associated with the feature.

DCT = Namespace("http://purl.org/dc/terms/")
DCAT = Namespace("http://www.w3.org/ns/dcat#")
ADMS = Namespace("http://www.w3.org/ns/adms#")
VCARD = Namespace("http://www.w3.org/2006/vcard/ns#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
SCHEMA = Namespace('http://schema.org/')
TIME = Namespace('http://www.w3.org/2006/time')
LOCN = Namespace('http://www.w3.org/ns/locn#')
GSP = Namespace('http://www.opengis.net/ont/geosparql#')
OWL = Namespace('http://www.w3.org/2002/07/owl#')
SPDX = Namespace('http://spdx.org/rdf/terms#')
HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')

bind(datatype='http://www.opengis.net/ont/geosparql#asWKT', pythontype=str)


NAMESPACES = {
    'dct': DCT,
    'dcat': DCAT,
    'adms': ADMS,
    'vcard': VCARD,
    'foaf': FOAF,
    'schema': SCHEMA,
    'time': TIME,
    'skos': SKOS,
    'locn': LOCN,
    'gsp': GSP,
    'owl': OWL,
    'spdx': SPDX,
Example #8
0
you can also add your own.

This example shows how :meth:`rdflib.term.bind` lets you register new
mappings between literal datatypes and Python objects
"""


from rdflib import Graph, Literal, Namespace, XSD
from rdflib import term

if __name__ == "__main__":

    # Complex numbers are not registered by default
    # No custom constructor/serializer needed since
    # complex('(2+3j)') works fine
    term.bind(XSD.complexNumber, complex)

    # Create a complex number RDFlib Literal
    EG = Namespace("http://example.com/")
    c = complex(2, 3)
    l = Literal(c)

    # Add it to a graph
    g = Graph()
    g.add((EG.mysubject, EG.myprop, l))
    # Print the triple to see what it looks like
    print(list(g)[0])
    # prints: (
    #           rdflib.term.URIRef('http://example.com/mysubject'),
    #           rdflib.term.URIRef('http://example.com/myprop'),
    #           rdflib.term.Literal(
Example #9
0
from json import loads, dumps

from rdflib.term import Literal, bind, Identifier, URIRef
import six

from .quantity import Quantity
from .utils import FCN
from . import BASE_SCHEMA_URL
bind(URIRef(BASE_SCHEMA_URL + '/datatype/quantity'),
     Quantity,
     constructor=Quantity.parse,
     lexicalizer=Quantity.__str__)

bind(URIRef(BASE_SCHEMA_URL + '/datatype/list'),
     list,
     constructor=loads,
     lexicalizer=dumps)
bind(URIRef(BASE_SCHEMA_URL + '/datatype/object'),
     dict,
     constructor=loads,
     lexicalizer=lambda x: dumps(x, sort_keys=True))

# XXX: RDFLib should take the first match for a python type, so we'll translate into the
# list and object types above
bind(URIRef('http://markw.cc/yarom/schema/datatype/list'),
     list,
     constructor=loads,
     lexicalizer=dumps)
bind(URIRef('http://markw.cc/yarom/schema/datatype/object'),
     dict,
     constructor=loads,
Example #10
0
"""
from rdflib import Graph, Literal
from rdflib.namespace import Namespace, RDF, DCTERMS
from rdflib.term import bind

from pyspark.sql import SparkSession, SQLContext, Row
from pyspark import SparkContext

import sys, csv, io, json, boto3, re
""" Edit these variables! """
PGTERMS = Namespace(u'http://www.gutenberg.org/2009/pgterms/')
BUCKET = 'YOUR_BUCKET_NAME'  # name of your bucket
PGPATH = 's3n://' + BUCKET + '/gutenberg_dataset/'  # path where Gutenberg data resides
WORKINGDIR = 's3n://' + BUCKET + '/results/'  # path where you are storing your results
PGCAT = WORKINGDIR + 'pgcat.json/part-00000'  # path to your catalogue index (expects json)
bind(DCTERMS.RFC4646, str)


def cnt_pronouns(id, text):
    """ Count gender pronouns in a single ebook
    Note: performs case-insensitive matching
    """
    mcount = len(
        re.findall(r'\b(he|him|himself|his)\b', str(text), re.IGNORECASE))
    fcount = len(
        re.findall(r'\b(she|her|herself|hers)\b', str(text), re.IGNORECASE))
    result = [id, mcount, fcount]
    return result


def get_keys(bucket_name):
Example #11
0
"""

rdflib.term.bind lets you register new mappings between literal
datatypes and python objects 

"""


from rdflib import Graph, Literal, Namespace, XSD
from rdflib.term import bind

# complex numbers are not registered by default
# no custom constructor/serializer needed since 
# complex('(2+3j)') works fine
bind(XSD.complexNumber, complex) 

ns=Namespace("urn:my:namespace:")

c=complex(2,3)

l=Literal(c)

g=Graph()
g.add((ns.mysubject, ns.myprop, l))

n3=g.serialize(format='n3')

# round-trip through n3

g2=Graph()
g2.parse(data=n3, format='n3')
Example #12
0
an RDF graph.

"""
from rdflib.term import bind, URIRef
from rdflib.graph import Graph
from rdflib.namespace import Namespace

from sparql import a, SPARQLEndpoint, SelectQuery


# dump lots of debug info
DEBUG = False


# by default return rdflib.term.Literals as Unicode strings
bind(None, unicode)


class RDFClassManager(object):
    """A container like object which represents a SPARQL query and returns
    RDFClass objects.
    
    """
    def __init__(self, cls, query=None):
        self.cls = cls
        if query:
            self.query = query
        else:
            self.query = SelectQuery().select("?resource").where("?resource", a,
                    cls.class_uri)
Example #13
0
from six import with_metaclass
from rdflib.term import URIRef
from rdflib.term import bind

from gutenberg._domain_model.exceptions import UnsupportedFeatureException
from gutenberg._domain_model.types import validate_etextno
from gutenberg._util.abc import abstractclassmethod
from gutenberg._util.objects import all_subclasses
from gutenberg.acquire.metadata import load_metadata

import sys


# Add a binding for Project Gutenberg's Language datatype
if sys.version_info < (3,):
    bind(URIRef('http://purl.org/dc/terms/RFC4646'), unicode)
else:
    bind(URIRef('http://purl.org/dc/terms/RFC4646'), str)


def get_metadata(feature_name, etextno):
    """Looks up the value of a meta-data feature for a given text.

    Arguments:
        feature_name (str): The name of the meta-data to look up.
        etextno (int): The identifier of the Gutenberg text for which to look
            up the meta-data.

    Returns:
        frozenset: The values of the meta-data for the text or an empty set if
            the text does not have meta-data associated with the feature.
Example #14
0
from urllib import urlopen, quote_plus

NSS = { 'atom':'http://www.w3.org/2005/Atom',
        'zapi':'http://zotero.org/ns/api',
        'x':'http://www.w3.org/1999/xhtml' }
DC = Namespace('http://purl.org/dc/terms/')
SCH = Namespace('http://schema.org/')

def yearmonth2date(ym):
    y,m = [ int(x) for x in ym.split('-') ]
    return datetime.date(y,m,1)

def date2yearmonth(d):
    return date.year + '-' + date.month

bind(XSD.gYearMonth, datetime.date, yearmonth2date, date2yearmonth)

def allbut(unwanted, d):
    return { k:v for k,v in d.iteritems() if not k in unwanted }

def entry2dict(entry):
    rows = entry.iterfind('.//x:tr', NSS) 
    kfun = lambda x: x[0]
    items = [ (row.attrib['class'], row.find('x:td', NSS).text)
              for row in rows ]
    return dict([(k, [ x[1] for x in g ]) 
                 for k,g in groupby(sorted(items, key=kfun), kfun) ])

def load_graph(url):
    r = None
    while True: