def __init__(self, **kwargs): """Initialized a instance of GraphIngester Args: es(elasticsearch.ElasticSearch): Instance of Elasticsearch graph(rdflib.Graph): BIBFRAM RDF Graph repository(flask_fedora_commons.Repository): Fedora Commons Repository quiet(boolean): If False, prints status of ingestion debug(boolean): Adds additional information for debugging purposes """ self.bf2uris = {} self.debug = kwargs.get('debug', False) self.uris2uuid = {} self.elastic_search = kwargs.get('elastic_search', Elasticsearch()) if not self.elastic_search.indices.exists('bibframe'): helper_directory = os.path.dirname(__file__) base_directory = helper_directory.split( "{0}catalog{0}helpers".format(os.path.sep))[0] with open( os.path.join( base_directory, "search{0}config{0}bibframe-map.json".format( os.path.sep))) as raw_json: bf_map = json.load(raw_json) self.elastic_search.indices.create(index='bibframe', body=bf_map) self.graph = kwargs.get('graph', default_graph()) self.repository = kwargs.get('repository', Repository()) self.quiet = kwargs.get('quiet', False)
def setUp(self): "Setup's repository, assumes Fedora 4 is localhost:8080" self.app = Flask(__name__) self.app.testing = True self.repo = Repository() self.repo.setup() self.work_uri = rdflib.URIRef( urllib.parse.urljoin(FEDORA_BASE_URL, "/rest/test/work/{}".format(uuid.uuid4()))) self.work_rdf = rdflib.Graph() self.work_rdf.add((self.work_uri, rdflib.RDFS.label, rdflib.Literal("Work for Unit Test"))) self.work_rdf.add((self.work_uri, rdflib.RDF.type, BIBFRAME.Monograph)) self.work_rdf.add((self.work_uri, rdflib.RDF.type, SCHEMA_ORG.Book)) self.work_rdf.add((self.work_uri, BIBFRAME.workTitle, rdflib.Literal("Original Work Title"))) # Add RDF Graph to Fedora new_request = urllib.request.Request( str(self.work_uri), data=self.work_rdf.serialize(format='turtle'), method='PUT', headers={"Content-Type": "text/turtle"}) urllib.request.urlopen(new_request)
class TestFlaskExtension(unittest.TestCase): "Unit tests for use of Repository as a Flask extension" def setUp(self): "Setup's repository, assumes Fedora 4 is localhost:8080" application = Flask(__name__) self.repo = Repository(app=application) self.repo.setup() self.client = self.repo.app.test_client() def test_app_exists(self): "Method tests if app exits" rev = self.client.get('/') self.assertTrue(rev) def tearDown(self): "Standard unit test overridden teardown method" pass
def __init__(self, record, elastic_search=Elasticsearch(), repository=Repository()): """Initializes RecordIngester class Args: record: A MARC21 or MARC XML file elastic_search: Elasticsearch instance, defaults to localhost repository: Flask Fedora Commons Repository instance, defaults to localhost """ self.elastic_search = elastic_search if not self.elastic_search.indices.exists('marc'): self.elastic_search.indices.create('marc') self.record = record self.repository = repository
class TestFedoraCommons(unittest.TestCase): "Unit tests for flask_fedora_commons.Repository class" def setUp(self): "Setup's repository, assumes Fedora 4 is localhost:8080" self.app = Flask(__name__) self.app.testing = True self.repo = Repository() self.repo.setup() self.work_uri = rdflib.URIRef( urllib.parse.urljoin(FEDORA_BASE_URL, "/rest/test/work/{}".format(uuid.uuid4()))) self.work_rdf = rdflib.Graph() self.work_rdf.add((self.work_uri, rdflib.RDFS.label, rdflib.Literal("Work for Unit Test"))) self.work_rdf.add((self.work_uri, rdflib.RDF.type, BIBFRAME.Monograph)) self.work_rdf.add((self.work_uri, rdflib.RDF.type, SCHEMA_ORG.Book)) self.work_rdf.add((self.work_uri, BIBFRAME.workTitle, rdflib.Literal("Original Work Title"))) # Add RDF Graph to Fedora new_request = urllib.request.Request( str(self.work_uri), data=self.work_rdf.serialize(format='turtle'), method='PUT', headers={"Content-Type": "text/turtle"}) urllib.request.urlopen(new_request) def test_repo_exists(self): "Tests if repository exists" self.assertTrue(self.repo is not None) def test__value_format__(self): """Tests simple function that returns SPARQL format for object, either wraps URI with <{URI}> or literal value with quotes "{VALUE}" """ self.assertEqual( "<http://bibframe.org/vocab/Work>", self.repo.__value_format__('http://bibframe.org/vocab/Work')) self.assertEqual('"A most excellent work"', self.repo.__value_format__('A most excellent work')) def test_create(self): "Method tests default creation of an opaque URI" default_uri = self.repo.create() self.assertTrue(default_uri) # Remove default_uri because it won't be in test collection self.repo.delete(default_uri) def test_create_uri(self): "Method tests creation of a URI pattern in Fedora" bibframe_test_uri = self.repo.create(uri=self.work_uri) self.assertEqual(bibframe_test_uri, self.work_uri) def test_create_graph(self): "Method tests creation of a URI with an existing graph" new_graph = rdflib.Graph() test_uri = rdflib.URIRef('http://example.org/1234') new_graph.add( (test_uri, BIBFRAME.workTitle, rdflib.Literal("Example Title"))) bibframe_uri = self.repo.create(uri=None, graph=new_graph) self.assertTrue(bibframe_uri) fedora_graph = rdflib.Graph().parse(bibframe_uri) self.assertEqual( "Example Title", str( fedora_graph.value(subject=rdflib.URIRef(bibframe_uri), predicate=BIBFRAME.workTitle))) self.repo.delete(bibframe_uri) def test_create_uri_graph(self): "Method tests creation of Fedora object with existing URI and Graph" new_graph = rdflib.Graph() new_graph.add((self.work_uri, BIBFRAME.title, rdflib.URIRef('http://example.org/4567'))) result = self.repo.create(uri=self.work_uri, graph=new_graph) self.assertEqual(self.work_uri, result) fedora_graph = rdflib.Graph().parse(self.work_uri) self.assertEqual( 'http://example.org/4567', str( fedora_graph.value(subject=self.work_uri, predicate=BIBFRAME.title))) def test_dedup(self): "Tests deduplication method for repository" ## self.repo.__dedup__() pass def test_delete(self): "Test delete a Fedora object based on an URI" self.repo.delete(self.work_uri) self.assertFalse(self.repo.exists(self.work_uri)) def test_as_json(self): "Tests outputting JSON-LD without Context" # JSON-LD without Context work_json = json.loads(self.repo.as_json(str(self.work_uri))) self.assertEqual(work_json[0]['@id'], str(self.work_uri)) def test_as_json_context(self): "Tests outputing Fedora 4 object as JSON-LD with Context" # JSON-LD with Context work_json = json.loads( self.repo.as_json( str(self.work_uri), context={ "@vocab": "http://bibframe.org/vocab/", "fcrepo": "http://fedora.info/definitions/v4/repository#", "fedora": "http://fedora.info/definitions/v4/rest-api#", "@language": "en" })) self.assertEqual(work_json['@id'], str(self.work_uri)) def test_exists(self): "Tests if a Fedora Object exists in the repository" self.assertTrue(self.repo.exists(self.work_uri)) self.assertFalse(self.repo.exists('http://example.org/Work/1')) def test_read(self): "Tests if a Fedora Object can be read into a rdflib.Graph object" work_rdf = self.repo.read(str(self.work_uri)) label = work_rdf.value(subject=self.work_uri, predicate=rdflib.RDFS.label) self.assertEqual(label.value, "Work for Unit Test") def test_remove(self): "Tests a property can be removed from a Fedora Object" self.repo.remove(self.work_uri, 'rdf:type', BIBFRAME.Monograph) work_rdf = self.repo.read(str(self.work_uri)) work_types = [ obj for obj in work_rdf.objects(subject=self.work_uri, predicate=rdflib.RDF.type) ] self.assertNotIn(BIBFRAME.Monograph, work_types) def test_replace(self): """Tests if Fedora Object's property value (the object in a subject-predicate-object triple) can be replaced""" self.repo.replace(self.work_uri, 'bf:workTitle', "Original Work Title", "A new title") work_rdf = self.repo.read(str(self.work_uri)) self.assertEqual( "A new title", str( work_rdf.value(subject=self.work_uri, predicate=BIBFRAME.workTitle))) def test_repo_setup(self): "Tests if all namespaces are registered in the repository" fedora_namespaces = rdflib.Graph().parse("/".join( [FEDORA_BASE_URL, "rest", "fcr:namespaces"])) pref_namespace_uri = rdflib.term.URIRef( 'http://purl.org/vocab/vann/preferredNamespaceUri') self.assertEqual( str( fedora_namespaces.value(subject=rdflib.term.URIRef( str(BIBFRAME)), predicate=pref_namespace_uri)), str(BIBFRAME)) def test_search(self): "Tests basic SPARQL " work_results = self.repo.search("Unit Test") self.assertEqual( 1, int( work_results.value( subject=rdflib.URIRef( 'http://localhost:8080/rest/fcr:search?q=Unit+Test'), predicate=rdflib.URIRef( 'http://sindice.com/vocab/search#totalResults')))) no_results = self.repo.search("4546WW") self.assertEqual( 0, int( no_results.value( subject=rdflib.URIRef( 'http://localhost:8080/rest/fcr:search?q=4546WW'), predicate=rdflib.URIRef( 'http://sindice.com/vocab/search#totalResults')))) def test_insert(self): "Tests inserting a new property to an existing Fedora Object" self.repo.insert(self.work_uri, "schema:about", "This is a test work") work_rdf = self.repo.read(str(self.work_uri)) self.assertEqual( "This is a test work", str( work_rdf.value(subject=self.work_uri, predicate=SCHEMA_ORG.about))) self.repo.insert(self.work_uri, "bf:sameAs", "http://example.org/Work/12334") work_rdf = self.repo.read(str(self.work_uri)) self.assertEqual( rdflib.URIRef("http://example.org/Work/12334"), work_rdf.value(subject=self.work_uri, predicate=BIBFRAME.sameAs)) def tearDown(self): """Standard unit test overridden teardown method, deletes all Fedora objects stored under rest/tests""" self.repo.delete(urllib.parse.urljoin(FEDORA_BASE_URL, "/rest/test/"))
def setUp(self): "Setup's repository, assumes Fedora 4 is localhost:8080" application = Flask(__name__) self.repo = Repository(app=application) self.repo.setup() self.client = self.repo.app.test_client()
class TestFedoraCommons(unittest.TestCase): "Unit tests for flask_fedora_commons.Repository class" def setUp(self): "Setup's repository, assumes Fedora 4 is localhost:8080" self.app = Flask(__name__) self.app.testing = True self.repo = Repository() self.repo.setup() self.work_uri = rdflib.URIRef( urllib.parse.urljoin(FEDORA_BASE_URL, "/rest/test/work/{}".format(uuid.uuid4()))) self.work_rdf = rdflib.Graph() self.work_rdf.add((self.work_uri, rdflib.RDFS.label, rdflib.Literal("Work for Unit Test"))) self.work_rdf.add((self.work_uri, rdflib.RDF.type, BIBFRAME.Monograph)) self.work_rdf.add((self.work_uri, rdflib.RDF.type, SCHEMA_ORG.Book)) self.work_rdf.add((self.work_uri, BIBFRAME.workTitle, rdflib.Literal("Original Work Title"))) # Add RDF Graph to Fedora new_request = urllib.request.Request( str(self.work_uri), data=self.work_rdf.serialize(format='turtle'), method='PUT', headers={"Content-Type": "text/turtle"}) urllib.request.urlopen(new_request) def test_repo_exists(self): "Tests if repository exists" self.assertTrue(self.repo is not None) def test__value_format__(self): """Tests simple function that returns SPARQL format for object, either wraps URI with <{URI}> or literal value with quotes "{VALUE}" """ self.assertEqual( "<http://bibframe.org/vocab/Work>", self.repo.__value_format__('http://bibframe.org/vocab/Work')) self.assertEqual( '"A most excellent work"', self.repo.__value_format__('A most excellent work')) def test_create(self): "Method tests default creation of an opaque URI" default_uri = self.repo.create() self.assertTrue(default_uri) # Remove default_uri because it won't be in test collection self.repo.delete(default_uri) def test_create_uri(self): "Method tests creation of a URI pattern in Fedora" bibframe_test_uri = self.repo.create(uri=self.work_uri) self.assertEqual( bibframe_test_uri, self.work_uri) def test_create_graph(self): "Method tests creation of a URI with an existing graph" new_graph = rdflib.Graph() test_uri = rdflib.URIRef('http://example.org/1234') new_graph.add( (test_uri, BIBFRAME.workTitle, rdflib.Literal("Example Title"))) bibframe_uri = self.repo.create(uri=None, graph=new_graph) self.assertTrue(bibframe_uri) fedora_graph = rdflib.Graph().parse(bibframe_uri) self.assertEqual( "Example Title", str(fedora_graph.value( subject=rdflib.URIRef(bibframe_uri), predicate=BIBFRAME.workTitle))) self.repo.delete(bibframe_uri) def test_create_uri_graph(self): "Method tests creation of Fedora object with existing URI and Graph" new_graph = rdflib.Graph() new_graph.add( (self.work_uri, BIBFRAME.title, rdflib.URIRef('http://example.org/4567'))) result = self.repo.create( uri=self.work_uri, graph=new_graph) self.assertEqual(self.work_uri, result) fedora_graph = rdflib.Graph().parse(self.work_uri) self.assertEqual( 'http://example.org/4567', str(fedora_graph.value( subject=self.work_uri, predicate=BIBFRAME.title))) def test_dedup(self): "Tests deduplication method for repository" ## self.repo.__dedup__() pass def test_delete(self): "Test delete a Fedora object based on an URI" self.repo.delete(self.work_uri) self.assertFalse(self.repo.exists(self.work_uri)) def test_as_json(self): "Tests outputting JSON-LD without Context" # JSON-LD without Context work_json = json.loads(self.repo.as_json(str(self.work_uri))) self.assertEqual(work_json[0]['@id'], str(self.work_uri)) def test_as_json_context(self): "Tests outputing Fedora 4 object as JSON-LD with Context" # JSON-LD with Context work_json = json.loads( self.repo.as_json( str(self.work_uri), context={ "@vocab": "http://bibframe.org/vocab/", "fcrepo": "http://fedora.info/definitions/v4/repository#", "fedora": "http://fedora.info/definitions/v4/rest-api#", "@language": "en"})) self.assertEqual(work_json['@id'], str(self.work_uri)) def test_exists(self): "Tests if a Fedora Object exists in the repository" self.assertTrue(self.repo.exists(self.work_uri)) self.assertFalse(self.repo.exists('http://example.org/Work/1')) def test_read(self): "Tests if a Fedora Object can be read into a rdflib.Graph object" work_rdf = self.repo.read(str(self.work_uri)) label = work_rdf.value(subject=self.work_uri, predicate=rdflib.RDFS.label) self.assertEqual(label.value, "Work for Unit Test") def test_remove(self): "Tests a property can be removed from a Fedora Object" self.repo.remove(self.work_uri, 'rdf:type', BIBFRAME.Monograph) work_rdf = self.repo.read(str(self.work_uri)) work_types = [ obj for obj in work_rdf.objects( subject=self.work_uri, predicate=rdflib.RDF.type)] self.assertNotIn(BIBFRAME.Monograph, work_types) def test_replace(self): """Tests if Fedora Object's property value (the object in a subject-predicate-object triple) can be replaced""" self.repo.replace( self.work_uri, 'bf:workTitle', "Original Work Title", "A new title") work_rdf = self.repo.read(str(self.work_uri)) self.assertEqual( "A new title", str( work_rdf.value( subject=self.work_uri, predicate=BIBFRAME.workTitle))) def test_repo_setup(self): "Tests if all namespaces are registered in the repository" fedora_namespaces = rdflib.Graph().parse("/".join([ FEDORA_BASE_URL, "rest", "fcr:namespaces"])) pref_namespace_uri = rdflib.term.URIRef( 'http://purl.org/vocab/vann/preferredNamespaceUri') self.assertEqual( str(fedora_namespaces.value( subject=rdflib.term.URIRef(str(BIBFRAME)), predicate=pref_namespace_uri)), str(BIBFRAME)) def test_search(self): "Tests basic SPARQL " work_results = self.repo.search("Unit Test") self.assertEqual( 1, int(work_results.value( subject=rdflib.URIRef( 'http://localhost:8080/rest/fcr:search?q=Unit+Test'), predicate=rdflib.URIRef( 'http://sindice.com/vocab/search#totalResults')))) no_results = self.repo.search("4546WW") self.assertEqual( 0, int(no_results.value( subject=rdflib.URIRef( 'http://localhost:8080/rest/fcr:search?q=4546WW'), predicate=rdflib.URIRef( 'http://sindice.com/vocab/search#totalResults')))) def test_insert(self): "Tests inserting a new property to an existing Fedora Object" self.repo.insert(self.work_uri, "schema:about", "This is a test work") work_rdf = self.repo.read(str(self.work_uri)) self.assertEqual( "This is a test work", str( work_rdf.value( subject=self.work_uri, predicate=SCHEMA_ORG.about))) self.repo.insert( self.work_uri, "bf:sameAs", "http://example.org/Work/12334") work_rdf = self.repo.read(str(self.work_uri)) self.assertEqual( rdflib.URIRef("http://example.org/Work/12334"), work_rdf.value( subject=self.work_uri, predicate=BIBFRAME.sameAs)) def tearDown(self): """Standard unit test overridden teardown method, deletes all Fedora objects stored under rest/tests""" self.repo.delete(urllib.parse.urljoin(FEDORA_BASE_URL, "/rest/test/"))