def test_item_count(self): '''Test that item_count is picked up''' doc = json.load(open(DIR_FIXTURES + '/couchdb_item_count.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['id'], '0c0e6ee502a2afda21128841f0addf23') self.assertEqual(sdoc['item_count'], 2) doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['id'], 'ark:/13030/ft009nb05r') self.assertNotIn('item_count', sdoc)
def test_item_count(self): """Test that item_count is picked up""" doc = json.load(open(DIR_FIXTURES + "/couchdb_item_count.json")) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc["id"], "0c0e6ee502a2afda21128841f0addf23") self.assertEqual(sdoc["item_count"], 2) doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json")) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc["id"], "ark:/13030/ft009nb05r") self.assertNotIn("item_count", sdoc)
def test_decade_facet(self): '''Test generation of decade facet Currently generated from sourceResource.date.displayDate ''' doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['facet_decade'], set(['1880s', '1890s'])) # no "date" in sourceResource doc = json.load(open(DIR_FIXTURES + '/couchdb_nocampus.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['facet_decade'], set(['unknown']))
def test_decade_facet(self): """Test generation of decade facet Currently generated from sourceResource.date.displayDate """ doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json")) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc["facet_decade"], set(["1880s", "1890s"])) # no "date" in sourceResource doc = json.load(open(DIR_FIXTURES + "/couchdb_nocampus.json")) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc["facet_decade"], set(["unknown"]))
def test_type_mapping(self): doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json")) ret = map_couch_to_solr_doc(doc) self.assertEqual(ret["type"], "image") doc["sourceResource"]["type"] = "moving image" ret = map_couch_to_solr_doc(doc) self.assertEqual(ret["type"], "moving image") doc["sourceResource"]["type"] = "movingimage" ret = map_couch_to_solr_doc(doc) self.assertEqual(ret["type"], "moving image") doc["sourceResource"]["type"] = "Physical ObjectXX" ret = map_couch_to_solr_doc(doc) self.assertEqual(ret["type"], "physical object") doc["sourceResource"]["type"] = "physicalobject" ret = map_couch_to_solr_doc(doc) self.assertEqual(ret["type"], "physical object")
def test_type_mapping(self): doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json')) ret = map_couch_to_solr_doc(doc) self.assertEqual(ret['type'], 'image') doc['sourceResource']['type'] = 'moving image' ret = map_couch_to_solr_doc(doc) self.assertEqual(ret['type'], 'moving image') doc['sourceResource']['type'] = 'movingimage' ret = map_couch_to_solr_doc(doc) self.assertEqual(ret['type'], 'moving image') doc['sourceResource']['type'] = 'Physical ObjectXX' ret = map_couch_to_solr_doc(doc) self.assertEqual(ret['type'], 'physical object') doc['sourceResource']['type'] = 'physicalobject' ret = map_couch_to_solr_doc(doc) self.assertEqual(ret['type'], 'physical object')
def test_sort_title_string_only(self): '''Many of the sourceResource title fields are flat strings. Deal with this''' doc = json.load(open(DIR_FIXTURES + '/couchdb_title_flat_string.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['sort_title'], 'atlas negative collection image')
def test_map_date_not_a_list(self): """Test how the mapping works when the sourceResource/date is a dict not a list """ doc = json.load(open(DIR_FIXTURES + "/couchdb_solr_date_map.json")) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc["date"], ["between 1885-1890"])
def test_map_date_not_a_list(self): '''Test how the mapping works when the sourceResource/date is a dict not a list ''' doc = json.load(open(DIR_FIXTURES + '/couchdb_solr_date_map.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['date'], ['between 1885-1890'])
def test_dejson_from_map(self): '''Test that the dejson works from the mapping function''' doc = json.load(open(DIR_FIXTURES + '/couchdb_ucla.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['coverage'], [ "Topanga (Calif.)", "Pacific Palisades, Los Angeles (Calif.)", "Venice (Los Angeles, Calif.)", "Los Angeles (Calif.)" ])
def test_sort_dates(self): """test the sort_date_start/end values""" doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json")) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc["sort_date_start"], DT(1885, 1, 1, tzinfo=UTC)) self.assertEqual(sdoc["sort_date_end"], DT(1890, 1, 1, tzinfo=UTC)) doc = json.load(open(DIR_FIXTURES + "/couchdb_no_pretty_id.json")) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc["sort_date_start"], DT(2013, 9, 30, tzinfo=UTC)) self.assertEqual(sdoc["sort_date_end"], DT(2013, 9, 30, tzinfo=UTC)) doc = json.load(open(DIR_FIXTURES + "/couchdb_nocampus.json")) sdoc = map_couch_to_solr_doc(doc) self.assertNotIn("sort_date_start", sdoc) self.assertNotIn("sort_date_end", sdoc) doc = json.load(open(DIR_FIXTURES + "/couchdb_solr_date_map.json")) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc["sort_date_start"], DT(1885, 7, 4, tzinfo=UTC)) self.assertEqual(sdoc["sort_date_end"], DT(1890, 8, 3, tzinfo=UTC))
def test_sort_dates(self): '''test the sort_date_start/end values''' doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['sort_date_start'], DT(1885, 1, 1, tzinfo=UTC)) self.assertEqual(sdoc['sort_date_end'], DT(1890, 1, 1, tzinfo=UTC)) doc = json.load(open(DIR_FIXTURES + '/couchdb_no_pretty_id.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['sort_date_start'], DT(2013, 9, 30, tzinfo=UTC)) self.assertEqual(sdoc['sort_date_end'], DT(2013, 9, 30, tzinfo=UTC)) doc = json.load(open(DIR_FIXTURES + '/couchdb_nocampus.json')) sdoc = map_couch_to_solr_doc(doc) self.assertNotIn('sort_date_start', sdoc) self.assertNotIn('sort_date_end', sdoc) doc = json.load(open(DIR_FIXTURES + '/couchdb_solr_date_map.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['sort_date_start'], DT(1885, 7, 4, tzinfo=UTC)) self.assertEqual(sdoc['sort_date_end'], DT(1890, 8, 3, tzinfo=UTC))
def test_map_couch_to_solr_no_campus(self): doc = json.load(open(DIR_FIXTURES + "/couchdb_nocampus.json")) sdoc = map_couch_to_solr_doc(doc) self.assertNotIn("campus", sdoc) self.assertNotIn("campus_url", sdoc) self.assertNotIn("campus_name", sdoc) self.assertNotIn("campus_data", sdoc) repo_data = ["https://registry.cdlib.org/api/v1/repository/4/::" "Bancroft Library"] self.assertEqual(sdoc["repository_data"], repo_data) self.assertEqual(sdoc["sort_title"], u"neighbor my neighbor what a happy boy") self.assertEqual(sdoc["type"], ["image", "physical object"])
def test_map_couch_to_solr_no_campus(self): doc = json.load(open(DIR_FIXTURES + '/couchdb_nocampus.json')) sdoc = map_couch_to_solr_doc(doc) self.assertNotIn('campus', sdoc) self.assertNotIn('campus_url', sdoc) self.assertNotIn('campus_name', sdoc) self.assertNotIn('campus_data', sdoc) repo_data = [ 'https://registry.cdlib.org/api/v1/repository/4/::' 'Bancroft Library' ] self.assertEqual(sdoc['repository_data'], repo_data) self.assertEqual(sdoc['sort_title'], u'neighbor my neighbor what a happy boy') self.assertEqual(sdoc['type'], ['image', 'physical object'])
def test_map_couch_to_solr_nuxeo_doc(self, mock_boto): '''Test the mapping of a couch db source json doc from Nuxeo to a solr schema compatible doc ''' doc = json.load( open(DIR_FIXTURES + '/26098--0025ad8f-a44e-4f58-8238-c7b60b2fb850.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['id'], '0025ad8f-a44e-4f58-8238-c7b60b2fb850') self.assertEqual(sdoc['harvest_id_s'], '26098--0025ad8f-a44e-4f58-8238-c7b60b2fb850') self.assertEqual(sdoc['title'], ['Brag']) self.assertEqual(sdoc['sort_title'], 'brag') self.assertEqual(sdoc['alternative_title'], ['test alt title']) self.assertEqual(sdoc['creator'], ['Dunya Ramicova']) self.assertEqual(sdoc['description'], [ u'"Design #39; BRAG; Entourage; Principle Base A" written on ' 'drawing. No signature on drawing.', u'Director: David Pountney; Scene Designer: Robert Israel; ' 'Producer: English National Opera, London, UK', u'English National Opera' ]) self.assertEqual(sdoc['reference_image_dimensions'], u'899:1199') self.assertEqual(sdoc['extent'], u'9" x 12" ') self.assertEqual( sdoc['format'], u'Graphite pencil, and Dr. Ph Martins Liquid Watercolor on ' 'watercolor paper') self.assertEqual(sdoc['genre'], ['Drawing']) self.assertNotIn('identifier', sdoc) self.assertEqual(sdoc['language'], ['English', 'eng']) self.assertEqual(sdoc['provenance'], u'Gift of the Naify Family') self.assertNotIn('publisher', sdoc) self.assertEqual(sdoc['relation'], [u'The Fairy Queen']) self.assertEqual(sdoc['rights'], [ u'copyrighted', u'Creative Commons Attribution - NonCommercial-NoDerivatives ' '(CC BY-NC-ND 4.0)' ]) self.assertEqual(sdoc['structmap_text'], 'Brag') self.assertEqual(sdoc['structmap_url'], u's3://static.ucldc.cdlib.org/media_json/' '0025ad8f-a44e-4f58-8238-c7b60b2fb850-media.json') self.assertEqual(sdoc['subject'], [None]) self.assertEqual(sdoc['type'], 'image') self.assertEqual(sdoc['location'], u'Box 13, Folder 25')
def main(collection_key): v = CouchDBCollectionFilter( couchdb_obj=get_couchdb(), collection_key=collection_key) solr_db = Solr(URL_SOLR) results = [] for r in v: dt_start = dt_end = datetime.datetime.now() try: doc = fill_in_title(r.doc) has_required_fields(r.doc) except KeyError, e: print(e.message) continue solr_doc = map_couch_to_solr_doc(r.doc) results.append(solr_doc) solr_doc = push_doc_to_solr(solr_doc, solr_db=solr_db) dt_end = datetime.datetime.now()
def test_map_couch_to_solr_nuxeo_doc(self): """Test the mapping of a couch db source json doc from Nuxeo to a solr schema compatible doc """ doc = json.load(open(DIR_FIXTURES + "/26098--0025ad8f-a44e-4f58-8238-c7b60b2fb850.json")) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc["id"], "0025ad8f-a44e-4f58-8238-c7b60b2fb850") self.assertEqual(sdoc["harvest_id_s"], "26098--0025ad8f-a44e-4f58-8238-c7b60b2fb850") self.assertEqual(sdoc["title"], ["Brag"]) self.assertEqual(sdoc["sort_title"], "brag") self.assertEqual(sdoc["alternative_title"], ["test alt title"]) self.assertEqual(sdoc["creator"], ["Dunya Ramicova"]) self.assertEqual( sdoc["description"], [ u'"Design #39; BRAG; Entourage; Principle Base A" written on ' "drawing. No signature on drawing.", u"Director: David Pountney; Scene Designer: Robert Israel; " "Producer: English National Opera, London, UK", u"English National Opera", ], ) self.assertEqual(sdoc["reference_image_dimensions"], u"899:1199") self.assertEqual(sdoc["extent"], u'9" x 12" ') self.assertEqual( sdoc["format"], u"Graphite pencil, and Dr. Ph Martins Liquid Watercolor on " "watercolor paper" ) self.assertEqual(sdoc["genre"], ["Drawing"]) self.assertNotIn("identifier", sdoc) self.assertEqual(sdoc["language"], ["English", "eng"]) self.assertEqual(sdoc["provenance"], [u"Gift of Dunya Ramicova, 2014"]) self.assertNotIn("publisher", sdoc) self.assertEqual(sdoc["relation"], [u"The Fairy Queen"]) self.assertEqual( sdoc["rights"], [u"copyrighted", u"Creative Commons Attribution - NonCommercial-NoDerivatives " "(CC BY-NC-ND 4.0)"], ) self.assertEqual(sdoc["structmap_text"], "Brag") self.assertEqual( sdoc["structmap_url"], u"s3://static.ucldc.cdlib.org/media_json/" "0025ad8f-a44e-4f58-8238-c7b60b2fb850-media.json", ) self.assertEqual(sdoc["subject"], [None]) self.assertEqual(sdoc["type"], "image")
def test_sort_title_all_punctuation(self, mock_boto): doc = json.load(open(DIR_FIXTURES + '/couchdb_title_all_punc.json')) doc['sourceResource']['title'] = ['????$$%(@*#_!'] sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['id'], u'0025ad8f-a44e-4f58-8238-c7b60b2fb850') self.assertEqual(sdoc['sort_title'], '~title unknown')
def test_map_couch_to_solr_doc(self): '''Test the mapping of a couch db source json doc to a solr schema compatible doc. ''' doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['id'], 'ark:/13030/ft009nb05r') self.assertEqual(sdoc['harvest_id_s'], '23066--http://ark.cdlib.org/ark:/13030/ft009nb05r') self.assertEqual(sdoc['reference_image_md5'], 'f2610262f487f013fb96149f98990fb0') self.assertEqual(sdoc['reference_image_dimensions'], '1244:1500') self.assertEqual(sdoc['url_item'], 'http://ark.cdlib.org/ark:/13030/ft009nb05r') self.assertNotIn('item_count', sdoc) self.assertNotIn('campus', sdoc) self.assertEqual(sdoc['campus_url'], [u'https://registry.cdlib.org/api/v1/campus/1/']) self.assertEqual(sdoc['campus_name'], [u'UC Berkeley']) self.assertEqual( sdoc['campus_data'], [u'https://registry.cdlib.org/api/v1/campus/1/::UC Berkeley']) self.assertNotIn('repository', sdoc) self.assertEqual(sdoc['repository_url'], [u'https://registry.cdlib.org/api/v1/repository/4/']) self.assertEqual(sdoc['repository_name'], [u'Bancroft Library']) repo_data = [ 'https://registry.cdlib.org/api/v1/repository/4/::' 'Bancroft Library::UC Berkeley' ] self.assertEqual(sdoc['repository_data'], repo_data) self.assertNotIn('collection', sdoc) self.assertEqual( sdoc['collection_url'], ['https://registry.cdlib.org/api/v1/collection/23066/']) self.assertEqual(sdoc['collection_name'], ['Uchida (Yoshiko) photograph collection']) c_data = [ 'https://registry.cdlib.org/api/v1/collection/23066/::' 'Uchida (Yoshiko) photograph collection' ] self.assertEqual(sdoc['collection_data'], c_data) self.assertEqual(sdoc['url_item'], u'http://ark.cdlib.org/ark:/13030/ft009nb05r') self.assertEqual(sdoc['contributor'], ['contrib 1', 'contrib 2']) self.assertEqual(sdoc['spatial'], [ 'Palm Springs (Calif.)', 'San Jacinto Mountains (Calif.)', 'Tahquitz Stream', 'Tahquitz Canyon' ]) self.assertEqual(sdoc['coverage'], [ 'Palm Springs (Calif.)', 'San Jacinto Mountains (Calif.)', 'Tahquitz Stream', 'Tahquitz Canyon' ]) self.assertEqual(sdoc['creator'], [u'creator 1', u'creator 2']) self.assertEqual( sdoc['description'], [u'description 1', u'description 2', u'description 3']) self.assertEqual(sdoc['date'], ['between 1885-1890']) self.assertEqual(sdoc['language'], ['English']) self.assertEqual(sdoc['publisher'], [ u'The Bancroft Library, University of California, Berkeley, ' 'Berkeley, CA 94720-6000, Phone: (510) 642-6481, Fax: (510) ' '642-7589, Email: [email protected], ' 'URL: http://bancroft.berkeley.edu/' ]), self.assertEqual(sdoc['relation'], [ u'http://www.oac.cdlib.org/findaid/ark:/13030/ft6k4007pc', u'http://bancroft.berkeley.edu/collections/jarda.html', u'hb158005k9', u'BANC PIC 1986.059--PIC', u'http://calisphere.universityofcalifornia.edu/', u'http://bancroft.berkeley.edu/' ]) self.assertEqual(sdoc['rights'], [ u'Transmission or reproduction of materials protected by ' 'copyright beyond that allowed by fair use requires the written ' 'permission of the copyright owners. Works not in the public ' 'domain cannot be commercially exploited without permission of ' 'the copyright owner. Responsibility for any use rests ' 'exclusively with the user.', u'The Bancroft Library--assigned', u'All requests to reproduce, publish, quote from, or otherwise ' 'use collection materials must be submitted in writing to the ' 'Head of Public Services, The Bancroft Library, University of ' 'California, Berkeley 94720-6000. See: ' 'http://bancroft.berkeley.edu/reference/permissions.html', u'University of California, Berkeley, Berkeley, CA 94720-6000, ' 'Phone: (510) 642-6481, Fax: (510) 642-7589, Email: ' '*****@*****.**' ]) self.assertEqual(sdoc['rights_uri'], [u'http://rightsstatements.org/vocab/NoC-CR/1.0/']) self.assertEqual(sdoc['subject'], [ u'Yoshiko Uchida photograph collection', u'Japanese American Relocation Digital Archive' ]) self.assertEqual(sdoc['title'], [u'Neighbor']) self.assertEqual(sdoc['sort_title'], u'neighbor') self.assertEqual(sdoc['type'], u'image') self.assertEqual(sdoc['format'], 'mods') self.assertTrue('extent' not in sdoc) self.assertEqual(sdoc['sort_title'], u'neighbor') self.assertEqual(sdoc['temporal'], [u'1964-1965'])
def test_map_couch_to_solr_doc(self): '''Test the mapping of a couch db source json doc to a solr schema compatible doc. ''' doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json')) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc['id'], 'ark:/13030/ft009nb05r') self.assertEqual(sdoc['harvest_id_s'], '23066--http://ark.cdlib.org/ark:/13030/ft009nb05r') self.assertEqual(sdoc['reference_image_md5'], 'f2610262f487f013fb96149f98990fb0') self.assertEqual(sdoc['reference_image_dimensions'], '1244:1500') self.assertEqual(sdoc['url_item'], 'http://ark.cdlib.org/ark:/13030/ft009nb05r') self.assertNotIn('item_count', sdoc) self.assertNotIn('campus', sdoc) self.assertEqual(sdoc['campus_url'], [u'https://registry.cdlib.org/api/v1/campus/1/']) self.assertEqual(sdoc['campus_name'], [u'UC Berkeley']) self.assertEqual( sdoc['campus_data'], [u'https://registry.cdlib.org/api/v1/campus/1/::UC Berkeley']) self.assertNotIn('repository', sdoc) self.assertEqual(sdoc['repository_url'], [u'https://registry.cdlib.org/api/v1/repository/4/']) self.assertEqual(sdoc['repository_name'], [u'Bancroft Library']) repo_data = [ 'https://registry.cdlib.org/api/v1/repository/4/::' 'Bancroft Library::UC Berkeley' ] self.assertEqual(sdoc['repository_data'], repo_data) self.assertNotIn('collection', sdoc) self.assertEqual( sdoc['collection_url'], ['https://registry.cdlib.org/api/v1/collection/23066/']) self.assertEqual(sdoc['collection_name'], ['Uchida (Yoshiko) photograph collection']) c_data = [ 'https://registry.cdlib.org/api/v1/collection/23066/::' 'Uchida (Yoshiko) photograph collection' ] self.assertEqual(sdoc['collection_data'], c_data) self.assertEqual(sdoc['url_item'], u'http://ark.cdlib.org/ark:/13030/ft009nb05r') self.assertEqual(sdoc['contributor'], ['contrib 1', 'contrib 2']) self.assertEqual(sdoc['spatial'], [ 'Palm Springs (Calif.)', 'San Jacinto Mountains (Calif.)', 'Tahquitz Stream', 'Tahquitz Canyon' ]) self.assertEqual(sdoc['coverage'], [ 'Palm Springs (Calif.)', 'San Jacinto Mountains (Calif.)', 'Tahquitz Stream', 'Tahquitz Canyon' ]) self.assertEqual(sdoc['creator'], [u'creator 1', u'creator 2']) self.assertEqual(sdoc['description'], [ u'description 1', u'description 2', u'description 3' ]) self.assertEqual(sdoc['date'], ['between 1885-1890']) self.assertEqual(sdoc['language'], ['English']) self.assertEqual(sdoc['publisher'], [ u'The Bancroft Library, University of California, Berkeley, ' 'Berkeley, CA 94720-6000, Phone: (510) 642-6481, Fax: (510) ' '642-7589, Email: [email protected], ' 'URL: http://bancroft.berkeley.edu/' ]), self.assertEqual(sdoc['relation'], [ u'http://www.oac.cdlib.org/findaid/ark:/13030/ft6k4007pc', u'http://bancroft.berkeley.edu/collections/jarda.html', u'hb158005k9', u'BANC PIC 1986.059--PIC', u'http://calisphere.universityofcalifornia.edu/', u'http://bancroft.berkeley.edu/' ]) self.assertEqual(sdoc['rights'], [ u'Transmission or reproduction of materials protected by ' 'copyright beyond that allowed by fair use requires the written ' 'permission of the copyright owners. Works not in the public ' 'domain cannot be commercially exploited without permission of ' 'the copyright owner. Responsibility for any use rests ' 'exclusively with the user.', u'The Bancroft Library--assigned', u'All requests to reproduce, publish, quote from, or otherwise ' 'use collection materials must be submitted in writing to the ' 'Head of Public Services, The Bancroft Library, University of ' 'California, Berkeley 94720-6000. See: ' 'http://bancroft.berkeley.edu/reference/permissions.html', u'University of California, Berkeley, Berkeley, CA 94720-6000, ' 'Phone: (510) 642-6481, Fax: (510) 642-7589, Email: ' '*****@*****.**' ]) self.assertEqual(sdoc['subject'], [ u'Yoshiko Uchida photograph collection', u'Japanese American Relocation Digital Archive' ]) self.assertEqual(sdoc['title'], [u'Neighbor']) self.assertEqual(sdoc['sort_title'], u'neighbor') self.assertEqual(sdoc['type'], u'image') self.assertEqual(sdoc['format'], 'mods') self.assertTrue('extent' not in sdoc) self.assertEqual(sdoc['sort_title'], u'neighbor') self.assertEqual(sdoc['temporal'], [u'1964-1965'])
def test_push_doc_to_solr(self, mock_solr): '''Unit test calls to solr''' doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json')) sdoc = map_couch_to_solr_doc(doc) push_doc_to_solr(sdoc, mock_solr)
def test_map_couch_to_solr_doc(self): """Test the mapping of a couch db source json doc to a solr schema compatible doc. """ doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json")) sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc["id"], "ark:/13030/ft009nb05r") self.assertEqual(sdoc["harvest_id_s"], "23066--http://ark.cdlib.org/ark:/13030/ft009nb05r") self.assertEqual(sdoc["reference_image_md5"], "f2610262f487f013fb96149f98990fb0") self.assertEqual(sdoc["reference_image_dimensions"], "1244:1500") self.assertEqual(sdoc["url_item"], "http://ark.cdlib.org/ark:/13030/ft009nb05r") self.assertNotIn("item_count", sdoc) self.assertNotIn("campus", sdoc) self.assertEqual(sdoc["campus_url"], [u"https://registry.cdlib.org/api/v1/campus/1/"]) self.assertEqual(sdoc["campus_name"], [u"UC Berkeley"]) self.assertEqual(sdoc["campus_data"], [u"https://registry.cdlib.org/api/v1/campus/1/::UC Berkeley"]) self.assertNotIn("repository", sdoc) self.assertEqual(sdoc["repository_url"], [u"https://registry.cdlib.org/api/v1/repository/4/"]) self.assertEqual(sdoc["repository_name"], [u"Bancroft Library"]) repo_data = ["https://registry.cdlib.org/api/v1/repository/4/::" "Bancroft Library::UC Berkeley"] self.assertEqual(sdoc["repository_data"], repo_data) self.assertNotIn("collection", sdoc) self.assertEqual(sdoc["collection_url"], ["https://registry.cdlib.org/api/v1/collection/23066/"]) self.assertEqual(sdoc["collection_name"], ["Uchida (Yoshiko) photograph collection"]) c_data = ["https://registry.cdlib.org/api/v1/collection/23066/::" "Uchida (Yoshiko) photograph collection"] self.assertEqual(sdoc["collection_data"], c_data) self.assertEqual(sdoc["url_item"], u"http://ark.cdlib.org/ark:/13030/ft009nb05r") self.assertEqual(sdoc["contributor"], ["contrib 1", "contrib 2"]) self.assertEqual( sdoc["spatial"], ["Palm Springs (Calif.)", "San Jacinto Mountains (Calif.)", "Tahquitz Stream", "Tahquitz Canyon"], ) self.assertEqual( sdoc["coverage"], ["Palm Springs (Calif.)", "San Jacinto Mountains (Calif.)", "Tahquitz Stream", "Tahquitz Canyon"], ) self.assertEqual(sdoc["creator"], [u"creator 1", u"creator 2"]) self.assertEqual(sdoc["description"], [u"description 1", u"description 2", u"description 3"]) self.assertEqual(sdoc["date"], ["between 1885-1890"]) self.assertEqual(sdoc["language"], ["English"]) self.assertEqual( sdoc["publisher"], [ u"The Bancroft Library, University of California, Berkeley, " "Berkeley, CA 94720-6000, Phone: (510) 642-6481, Fax: (510) " "642-7589, Email: [email protected], " "URL: http://bancroft.berkeley.edu/" ], ), self.assertEqual( sdoc["relation"], [ u"http://www.oac.cdlib.org/findaid/ark:/13030/ft6k4007pc", u"http://bancroft.berkeley.edu/collections/jarda.html", u"hb158005k9", u"BANC PIC 1986.059--PIC", u"http://calisphere.universityofcalifornia.edu/", u"http://bancroft.berkeley.edu/", ], ) self.assertEqual( sdoc["rights"], [ u"Transmission or reproduction of materials protected by " "copyright beyond that allowed by fair use requires the written " "permission of the copyright owners. Works not in the public " "domain cannot be commercially exploited without permission of " "the copyright owner. Responsibility for any use rests " "exclusively with the user.", u"The Bancroft Library--assigned", u"All requests to reproduce, publish, quote from, or otherwise " "use collection materials must be submitted in writing to the " "Head of Public Services, The Bancroft Library, University of " "California, Berkeley 94720-6000. See: " "http://bancroft.berkeley.edu/reference/permissions.html", u"University of California, Berkeley, Berkeley, CA 94720-6000, " "Phone: (510) 642-6481, Fax: (510) 642-7589, Email: " "*****@*****.**", ], ) self.assertEqual( sdoc["subject"], [u"Yoshiko Uchida photograph collection", u"Japanese American Relocation Digital Archive"] ) self.assertEqual(sdoc["title"], [u"Neighbor"]) self.assertEqual(sdoc["sort_title"], u"neighbor") self.assertEqual(sdoc["type"], u"image") self.assertEqual(sdoc["format"], "mods") self.assertTrue("extent" not in sdoc) self.assertEqual(sdoc["sort_title"], u"neighbor") self.assertEqual(sdoc["temporal"], [u"1964-1965"])
def test_push_doc_to_solr(self, mock_solr): """Unit test calls to solr""" doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json")) sdoc = map_couch_to_solr_doc(doc) push_doc_to_solr(sdoc, mock_solr)
def test_sort_title_all_punctuation(self): doc = json.load(open(DIR_FIXTURES + "/couchdb_title_all_punc.json")) doc["sourceResource"]["title"] = ["????$$%(@*#_!"] sdoc = map_couch_to_solr_doc(doc) self.assertEqual(sdoc["id"], u"0025ad8f-a44e-4f58-8238-c7b60b2fb850") self.assertEqual(sdoc["sort_title"], "~title unknown")