def test_enrich_subject_one_char_string2(): """Should not include subject""" INPUT = { "id": "123", "spatial": [ {"name": "Asheville"}, {"name": "North Carolina"} ], "subject": [ "h", "hi" ] } EXPECTED = { "id": "123", "spatial": [ {"name": "Asheville"}, {"name": "North Carolina"} ], "subject": [] } resp, content = _get_server_response(json.dumps(INPUT)) assert resp.status == 200 print str(json.loads(content)) assert json.loads(content) == EXPECTED
def test_physical_format_from_format_and_type(): """ Test physical format appending from format and type fields """ INPUT = { "format": ["76.8 x 104 cm", "Oil on canvas", "7 1/4 x 6 inches (18.4 x 15.2 cm)", "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)"], "type": ["Paintings", "Painting"] } EXPECTED = { "format": ["76.8 x 104 cm", "Oil on canvas", "7 1/4 x 6 inches (18.4 x 15.2 cm)", "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)", "Paintings", "Painting"] } resp, content = H.request(server() + "enrich-type?prop=type&format_field=format", "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") FETCHED = json.loads(content) assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff() resp, content = H.request(server() + "enrich-format?prop=format&type_field=type", "POST", body=content) assert str(resp.status).startswith("2") FETCHED = json.loads(content) assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
def test_move_date_values_iterify_if_string(): """Should iterify as string and append date""" prop = "sourceResource/spatial" INPUT = { "sourceResource": { "spatial": "Asheville", "temporal": "1940" } } EXPECTED = { "sourceResource": { "spatial": [ "Asheville" ], "temporal": [ "1940", ] } } resp,content = _get_server_response(json.dumps(INPUT), prop=prop) assert resp.status == 200 print >> sys.stderr, json.loads(content) print >> sys.stderr, EXPECTED assert json.loads(content) == EXPECTED
def test_enrich_subject_one_char_string1(): """Should not add one or two char strings to DPLA schema""" INPUT = { "id": "123", "spatial": [ {"name": "Asheville"}, {"name": "North Carolina"} ], "subject": [ "subject", "a", "ab", "hello" ] } EXPECTED = { "id": "123", "spatial": [ {"name": "Asheville"}, {"name": "North Carolina"} ], "subject": [ {"name": "Subject"}, {"name": "Hello"} ] } resp, content = _get_server_response(json.dumps(INPUT)) assert resp.status == 200 print str(json.loads(content)) assert json.loads(content) == EXPECTED
def test_enrich_date_parse_century_date(): """Correctly transform a date of format '19th c.'""" url = server() + "enrich_earliest_date?prop=date" INPUT = {"date": "19th c."} EXPECTED = { "date": { "begin": None, "end": None, "displayDate": "19th c" # period stripped assumed OK } } resp,content = H.request(url,"POST",body=json.dumps(INPUT)) result = json.loads(content) assert result["date"] == EXPECTED["date"], \ "%s != %s" % (result["date"], EXPECTED["date"]) INPUT = {"date": "19th century"} EXPECTED = { "date": { "begin": None, "end": None, "displayDate": "19th century" } } resp,content = H.request(url,"POST",body=json.dumps(INPUT)) result = json.loads(content) assert result["date"] == EXPECTED["date"], \ "%s != %s" % (result["date"], EXPECTED["date"])
def dedup_value(body, ctype, action="dedup_value", prop=None): ''' Service that accepts a JSON document and enriches the prop field of that document by: a) Removing duplicates ''' if prop: try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" for p in prop.split(","): if exists(data, p): v = getprop(data, p) if isinstance(v, list): # Remove whitespace, periods, parens, brackets clone = [re.sub("[ \.\(\)\[\]\{\}]", "", s).lower() for s in v] # Get index of unique values index = list(set([clone.index(s) for s in list(set(clone))])) setprop(data, p, [v[i] for i in index]) return json.dumps(data)
def mix_freemix(body, ctype): # See: http://foundry.zepheira.com/issues/137#note-10 """ { "datasets": { "dataset1": "http://recollection.zepheira.com/data/guide/data-profile-arthur-y-ford-photograph-albums-reprise-for-demo/data.json", "dataset2": "http://recollection.zepheira.com/data/guide/jean-thomas-collection/data.json" }, "alignProperties": { "label": "Name", "dataset1": "Surname", "dataset2": "Name_of_Candidate" } } """ USER, PASSWD = "loc", "recollection" cache_dir = make_named_cache("mix.freemix.json") H = httplib2.Http(cache_dir) if USER: H.add_credentials(USER, PASSWD) request = json.loads(body) datasets = request["datasets"] alignments = request.get("alignProperties") if len(datasets) != 2: raise ValueError("You must provide Mixer exactly 2 data sets") if alignments: mixed = [] for dataset in datasets: # Replace the data set URL with the content logger.debug("Processing dataset: %s" % (datasets[dataset])) resp, content = H.request(datasets[dataset]) items = json.loads(content)[u"items"] prop = alignments[dataset] newprop_label = alignments["label"] # Potluck (the usual mixer client) seems to generate property names such as # "Activity / Activity" which Exhibit cannot handle. Work around that. # See: http://foundry.zepheira.com/issues/334 newprop = UNSUPPORTED_IN_EXHIBITKEY.sub("_", newprop_label) logger.debug("Mapping: %s -> %s" % (prop, newprop)) for item in items: if prop in item: item[newprop] = item[prop] mixed.append(item) else: mixed = [] for dataset in datasets: # Replace the data set URL with the content logger.debug("Processing dataset: %s" % (datasets[dataset])) resp, content = H.request(datasets[dataset]) items = json.loads(content)[u"items"] mixed += items for (counter, item) in enumerate(mixed): item[u"id"] = u"_%i" % counter result = json.dumps({"items": mixed}, indent=4) return result
def enrich_storage(body, ctype): """Establishes a pipeline of services identified by an ordered list of URIs provided in request header "Pipeline-Item" """ request_headers = copy_headers_to_dict(request.environ) rec_enrichments = request_headers.get(u"Pipeline-Item","").split(",") records = json.loads(body) # Counts enriched_coll_count = 0 enriched_item_count = 0 missing_id_count = 0 missing_source_resource_count = 0 errors = [] enriched_records = {} for record in records: error, enriched_record_text = pipe(record, ctype, rec_enrichments, "HTTP_PIPELINE_ITEM") if error: errors.append(error) enriched_record = json.loads(enriched_record_text) if enriched_record.get("_id", None): ingest_type = enriched_record.get("ingestType") # Item records should have sourceResource if (ingest_type == "item" and not "sourceResource" in enriched_record): logger.error("Record %s does not have sourceResource: %s" % (enriched_record["_id"], enriched_record)) missing_source_resource_count += 1 else: enriched_records[enriched_record["_id"]] = enriched_record if ingest_type == "item": enriched_item_count += 1 else: enriched_coll_count += 1 else: logger.error("Found a record without an _id %s" % enriched_record) missing_id_count += 1 data = { "enriched_records": enriched_records, "enriched_coll_count": enriched_coll_count, "enriched_item_count": enriched_item_count, "missing_id_count": missing_id_count, "missing_source_resource_count": missing_source_resource_count, "errors": errors } return json.dumps(data) return json.dumps(docs)
def primotodpla(body,ctype,geoprop=None): """ Convert output of JSON-ified PRIMO (MWDL) format into the DPLA JSON-LD format. Parameter "geoprop" specifies the property name containing lat/long coords """ try : data = json.loads(body) except: response.code = 500 response.add_header("content-type","text/plain") return "Unable to parse body as JSON" global GEOPROP GEOPROP = geoprop out = { "@context": CONTEXT, "sourceResource": {} } # Apply all transformation rules from original document for p in CHO_TRANSFORMER: if exists(data, p): out["sourceResource"].update(CHO_TRANSFORMER[p](data, p)) for p in AGGREGATION_TRANSFORMER: if exists(data, p): out.update(AGGREGATION_TRANSFORMER[p](data, p)) # Apply transformations that are dependent on more than one # original document field sp_props = ["display/lds08"] ipo_props = ["display/lds04"] title_props = ["display/title", "display/lds10"] out["sourceResource"].update(multi_transform(data, "spatial", sp_props, "list")) out["sourceResource"].update(multi_transform(data, "isPartOf", ipo_props)) out["sourceResource"].update(multi_transform(data, "title", title_props)) dp_props = ["display/lds03"] out.update(multi_transform(data, "dataProvider", dp_props)) # Additional content not from original document if "HTTP_CONTRIBUTOR" in request.environ: try: out["provider"] = json.loads(base64.b64decode(request.environ["HTTP_CONTRIBUTOR"])) except Exception as e: logger.debug("Unable to decode Contributor header value: "+request.environ["HTTP_CONTRIBUTOR"]+"---"+repr(e)) # Strip out keys with None/null values? out = dict((k,v) for (k,v) in out.items() if v) return json.dumps(out)
def set_prop(body, ctype, prop=None, value=None, condition_prop=None, condition_value=None, _dict=None): """Sets the value of prop. Keyword arguments: body -- the content to load ctype -- the type of content prop -- the prop to set value -- the value to set prop to condition_prop -- (optional) the field that must exist to set the prop condition_value -- (optional, if condition_prop set) the value that condition_prop must have to set the prop """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" if not value: logger.error("No value was supplied to set_prop.") else: if _dict: try: value = json.loads(value) except Exception, e: logger.error("Unable to parse set_prop value: %s" % e) return body def _set_prop(): """Returns true if 1. The condition_prop is not set OR 2. The condition_prop is set and exists and the condition_value is None OR 3. The condition_prop is set and exists, the condition_value is set, and the value of condition_prop equals condition_value """ return (not condition_prop or (exists(data, condition_prop) and (not condition_value or getprop(data, condition_prop) == condition_value))) if _set_prop(): try: setprop(data, prop, value) except Exception, e: logger.error("Error in set_prop: %s" % e)
def oaisetname(body,ctype,sets_service=None): ''' Service that accepts a JSON document and sets the "name" property based on looking up the set in the HTTP_CONTEXT using the service passed in the 'sets_service' parameter. Assumes that the set_service returns a JSON array of two-element arrays, where the first element is the id and the second element the complete name. ''' if not sets_service: response.code = 500 response.add_header('content-type','text/plain') return "No set service has been selected" try : data = json.loads(body) except: response.code = 500 response.add_header('content-type','text/plain') return "Unable to parse body as JSON" if not is_absolute(sets_service): prefix = request.environ['wsgi.url_scheme'] + '://' prefix += request.environ['HTTP_HOST'] if request.environ.get('HTTP_HOST') else request.environ['SERVER_NAME'] sets_service = prefix + sets_service H = httplib2.Http('/tmp/.cache') H.force_exception_as_status_code = True resp, content = H.request(sets_service) if not resp[u'status'].startswith('2'): print >> sys.stderr, ' HTTP error ('+resp[u'status']+') resolving URL: '+sets_service try : sets = json.loads(content) except: response.code = 500 response.add_header('content-type','text/plain') return "Unable to parse sets service result as JSON: " + repr(content) setpos = data['_id'].find('--') match = data['_id'][setpos+2:] if setpos > -1 else data['_id'] for s in sets: if match == s['setSpec']: data[u'title'] = s['setName'] if s['setDescription']: data[u'description'] = s['setDescription'].strip() break return json.dumps(data)
def oaisetname(body,ctype,sets_service=None): ''' Service that accepts a JSON document and sets the "name" property based on looking up the set in the HTTP_CONTEXT using the service passed in the 'sets_service' parameter. Assumes that the set_service returns a JSON array of two-element arrays, where the first element is the id and the second element the complete name. ''' if not sets_service: response.code = 500 response.add_header('content-type','text/plain') return "No set service has been selected" try : data = json.loads(body) except: response.code = 500 response.add_header('content-type','text/plain') return "Unable to parse body as JSON" try : collection = request.environ['HTTP_COLLECTION'] except: response.code = 500 response.add_header('content-type','text/plain') return "No Collection header found" H = httplib2.Http('/tmp/.cache') H.force_exception_as_status_code = True resp, content = H.request(sets_service) if not resp[u'status'].startswith('2'): print >> sys.stderr, ' HTTP error ('+resp[u'status']+') resolving URL: '+sets_service try : sets = json.loads(content) except: response.code = 500 response.add_header('content-type','text/plain') return "Unable to parse sets service result as JSON: " + repr(content) for s in sets: if s['setSpec'] == collection: data[u'title'] = s['setName'] if s['setDescription']: data[u'description'] = s['setDescription'] break return json.dumps(data)
def test_copy_prop_to_prop_create_dict_key1(): """Should copy to_prop into new dict with key""" prop1 = "key1" prop2 = "sourceResource/key2" to_prop = "sourceResource/to_dict" key1 = "key1" key2 = "key2" create = True INPUT = { "key1": "value1", "sourceResource": { "key2": "value2", "key3": "value3" }, "key4": "value4" } EXPECTED1 = { "key1": "value1", "sourceResource": { "key2": "value2", "key3": "value3", "to_dict" : {"key1": "value1"} }, "key4": "value4" } EXPECTED2 = { "key1": "value1", "sourceResource": { "key2": "value2", "key3": "value3", "to_dict" : { "key1": "value1", "key2": "value2" } }, "key4": "value4" } resp,content = _get_server_response(json.dumps(INPUT), prop=prop1, to_prop=to_prop, key=key1, create=create) assert resp.status == 200 assert json.loads(content) == EXPECTED1 resp,content = _get_server_response(json.dumps(EXPECTED1), prop=prop2, to_prop=to_prop, key=key2, create=create) assert resp.status == 200 assert json.loads(content) == EXPECTED2
def test_description_transform2(): INPUT = {"metadata": {"mods": {"note": {"#text": "A description"}}}} EXPECTED = {"description": "A description"} resp, content = _get_server_response(json.dumps(INPUT), provider="HARVARD") assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content)["sourceResource"])
def capitalize_value(body, ctype, prop=",".join(DEFAULT_PROP), exclude=None): """ Service that accepts a JSON document and capitalizes the prop field of that document """ if prop is None: response.code = 500 response.add_header('content-type', 'text/plain') msg = "Prop param is None" logger.error(msg) return msg try: data = json.loads(body) except Exception as e: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON\n" + str(e) prop = prop.split(",") if exclude in prop: prop.remove(exclude) for p in prop: if p: capitalize(data, p) return json.dumps(data)
def test_convert_spatial_string_to_dictionary(): """ Format UIUC spatial dictionaries """ INPUT = { "id": "12345", "sourceResource": { "spatial": [ { "name": "Honolulu, HI" }, { "name": "1972 to Present" } ] }, "creator": "David" } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ { "name": "Honolulu, HI" } ] }, "creator": "David" } url = server() + "uiuc_enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_enrich_location_after_provider_specific_enrich_location4(): """ Previous specific-provider location did not set state. """ INPUT = { "id": "12345", "sourceResource": { "spatial": [{"city": "Asheville; La Jolla", "county": "Buncombe;San Diego", "country": "United States"}] }, "creator": "Miguel", } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ {"city": "Asheville", "county": "Buncombe", "country": "United States"}, {"city": "La Jolla", "county": "San Diego"}, ] }, "creator": "Miguel", } url = server() + "enrich_location" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def artstor_cleanup_creator(body, ctype, prop="sourceResource/creator"): """ Service that accepst a JSON document and removes cleans the sourceResource/creator field by removing the values in REGEXES if the field value begins with them """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" if exists(data, prop): item = getprop(data, prop) if not isinstance(item, list): item = [item] for i in range(len(item)): for s in CLEANUP: item[i] = re.sub(r"(?i)^{0}".format(s), "", item[i].strip()).lstrip() setprop(data, prop, item[0] if len(item) == 1 else item) return json.dumps(data)
def test_set_prop5(): """Should set prop to value, since condition_prop exists""" prop = "sourceResource/rights" value = "rights" condition_prop = "sourceResource" INPUT = { "key1": "value1", "sourceResource": { "key1" : "value1", "rights": "value2" }, "key2": "value2" } EXPECTED = { "key1": "value1", "sourceResource": { "key1" : "value1", "rights": "rights" }, "key2": "value2" } resp,content = _get_server_response(json.dumps(INPUT), prop=prop, value=value, condition_prop="sourceResource") assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_unset_prop2(): """Should unset prop since condition is met""" action = "unset" prop = "sourceResource/rights" condition = "is_digit" INPUT = { "_id": "12345", "key1": "value1", "sourceResource": { "key1" : "value1", "rights": "20010983784" }, "key2": "value2" } EXPECTED = { "_id": "12345", "key1": "value1", "sourceResource": { "key1" : "value1" }, "key2": "value2" } resp,content = _get_server_response(json.dumps(INPUT), action=action, prop=prop, condition=condition) assert resp.status == 200 print_error_log() assert json.loads(content) == EXPECTED
def test_unset_prop1(): """Should unset prop""" action = "unset" prop = "sourceResource/rights" INPUT = { "_id": "12345", "key1": "value1", "sourceResource": { "key1" : "value1", "rights": "value2" }, "key2": "value2" } EXPECTED = { "_id": "12345", "key1": "value1", "sourceResource": { "key1" : "value1" }, "key2": "value2" } resp,content = _get_server_response(json.dumps(INPUT), action=action, prop=prop) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_enrich_list_of_dictionaries_and_strings(): """Should handle list of dictionaries and strings""" INPUT = { "id": "12345", "sourceResource": { "spatial": [ {"country": "United States", "county": "Buncombe", "state": "North Carolina"}, "Rushmore, Mount", "Mount Rushmore National Memorial", ] }, } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ {"country": "United States", "county": "Buncombe", "state": "North Carolina"}, {"name": "Rushmore, Mount"}, {"name": "Mount Rushmore National Memorial"}, ] }, } url = server() + "enrich_location" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_set_prop2(): """Should create the prop and set its value""" prop = "sourceResource/rights" value = "rights" INPUT = { "key1": "value1", "sourceResource": { "key1" : "value1" }, "key2": "value2" } EXPECTED = { "key1": "value1", "sourceResource": { "key1" : "value1", "rights": "rights" }, "key2": "value2" } resp,content = _get_server_response(json.dumps(INPUT), prop=prop, value=value) assert resp.status == 200 assert json.loads(content) == EXPECTED
def nypl_identify_object(body, ctype, download="True"): try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" original_document_key = u"originalRecord" original_preview_key = u"tmp_image_id" preview_format = "http://images.nypl.org/index.php?id={0}&t=t" if original_document_key not in data: logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id']) return body if original_preview_key not in data[original_document_key]: logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_preview_key, data[u'id']) return body preview_url = preview_format.format(data[original_document_key][original_preview_key]) data["object"] = preview_url status = IGNORE if download == "True": status = PENDING if "admin" in data: data["admin"]["object_status"] = status else: data["admin"] = {"object_status": status} return json.dumps(data)
def scdl_enrich_location(body, ctype, action="scdl_enrich_location", prop="sourceResource/spatial"): """ Service that accepts a JSON document and enriches the "spatial" field of that document. For use with the scdl profiles """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" if exists(data, prop): value = getprop(data,prop) for v in iterify(value): name = replace_state_abbreviations(v["name"].rstrip()) v["name"] = name # Try to extract a County if " county " in name.lower(): # "XXX County (S.C.)" => county: XXX v["county"] = name[0:name.lower().index("county")].strip() elif "(S.C.)" in name: # "XXX (S.C)" => city: XXX v["city"] = name[0:name.index("(S.C.)")].strip() return json.dumps(data)
def uscsetdataprovider(body, ctype, prop="dataProvider"): """ Service that accepts a JSON document and sets the "dataProvider" field of that document to: 1. The first value of the originalRecord/source field (placed in dataProvider in the oai-to-dpla module) for the chs set (setSpec p15799coll65) 2. The string "University of Southern California. Libraries" for all other sets For primary use with USC documents """ try : data = json.loads(body) except Exception: response.code = 500 response.add_header('content-type','text/plain') return "Unable to parse body as JSON" data_provider = getprop(data, "dataProvider", True) if getprop(data, "originalRecord/setSpec") == "p15799coll65": setprop(data, "dataProvider", data_provider[0]) else: setprop(data, "dataProvider", "University of Southern California. Libraries") return json.dumps(data)
def test_unset_prop6(): """Should unset prop since conditions are met for multiple condition props""" action = "unset" prop = "_id" condition = "hathi_exclude" condition_prop = "dataProvider%2CsourceResource%2Ftype" INPUT = { "_id": "12345", "dataProvider": ["Hathitrust", "University of Minnesota"], "sourceResource": { "type": "image" } } EXPECTED = { "dataProvider": ["Hathitrust", "University of Minnesota"], "sourceResource": { "type": "image" } } resp, content = _get_server_response(json.dumps(INPUT), action=action, prop=prop, condition=condition, condition_prop=condition_prop) print_error_log() assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_removing_bracket(): """Should remove bracket from the beginning of the name""" INPUT = { "id": "12345", "sourceResource": {"spatial": ["Charleston (S.C.); [Germany; Poland; Israel; New York (N.Y.); Georgia (U.S.)"]}, "creator": "Miguel", } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ {"name": "Charleston (S.C.)"}, {"name": "Germany"}, {"name": "Poland"}, {"name": "Israel"}, {"name": "New York (N.Y.)"}, {"name": "Georgia (U.S.)"}, ] }, "creator": "Miguel", } url = server() + "enrich_location" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def movedatevalues(body, ctype, action="move_date_values", prop=None, to_prop="sourceResource/temporal"): """ Service that accepts a JSON document and moves any dates found in the prop field to the temporal field. """ if not prop: logger.error("Prop param is None in %s" % __name__) return body REGSEARCH = [ "\d{1,4}\s*[-/]\s*\d{1,4}\s*[-/]\s*\d{1,4}\s*[-/]\s*\d{1,4}\s*[-/]\s*\d{1,4}\s*[-/]\s*\d{1,4}", "\d{1,2}\s*[-/]\s*\d{4}\s*[-/]\s*\d{1,2}\s*[-/]\s*\d{4}", "\d{4}\s*[-/]\s*\d{1,2}\s*[-/]\s*\d{4}\s*[-/]\s*\d{1,2}", "\d{1,4}\s*[-/]\s*\d{1,4}\s*[-/]\s*\d{1,4}", "\d{4}\s*[-/]\s*\d{4}", "\d{1,2}\s*[-/]\s*\d{4}", "\d{4}\s*[-/]\s*\d{1,2}", "\d{4}s?", "\d{1,2}\s*(?:st|nd|rd|th)\s*century", ".*circa.*" ] def cleanup(s): s = re.sub("[\(\)\.\?]", "",s) return s.strip() try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" if exists(data, prop): values = getprop(data, prop) remove = [] toprop = getprop(data, to_prop) if exists(data, to_prop) else [] for v in (values if isinstance(values, list) else [values]): c = cleanup(v) for pattern in REGSEARCH: m = re.compile(pattern, re.I).findall(c) if len(m) == 1 and not re.sub(m[0], "", c).strip(): if m[0] not in toprop: toprop.append(m[0]) # Append the non-cleaned value to remove remove.append(v) break if toprop: setprop(data, to_prop, toprop) if len(values) == len(remove): delprop(data, prop) else: setprop(data, prop, [v for v in values if v not in remove]) return json.dumps(data)
def test_default_type(): """Should set type to default value""" INPUT = {"id": "123", "sourceResource": {"type": "bananas"}} EXPECTED = {"id": "123", "sourceResource": {"type": "image"}} resp, content = _get_server_response(json.dumps(INPUT), default="image") assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def enrichlocation(body,ctype,action="enrich_location", prop="sourceResource/spatial"): """ Service that accepts a JSON document and enriches the "spatial" field of that document by iterating through the spatial fields and mapping to the state and iso3166-2, if not already mapped, through teh get_isostate function. This function takes the optional parameter abbrev, and if it is set it will search the fields for State name abbreviations. If a previous provider- specific location enrichment module ran, the default is to not search those fields for State name abbreviations, but only for full State names. """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" if exists(data,prop): v = iterify(getprop(data,prop)) for i in range(len(v)): if isinstance(v[i], dict): for k in v[i].keys(): v[i][k] = remove_space_around_semicolons(v[i][k]) else: v[i] = {"name": remove_space_around_semicolons(v[i])} # If any of the spatial fields contain semi-colons, we need to create # multiple dictionaries. semicolons = None for d in v: for k in d.keys(): if d[k] and ';' in d[k]: semicolons = True break setprop(data,prop,(create_dictionaries(v) if semicolons else v)) return json.dumps(data)
def test_usc_enrich_location_find_coordinates(): """Should remove all spatial values except for the lat/long coordinate""" INPUT = { "sourceResource": { "spatial": [{ "name": " 123 " }, { "name": "-130.4560,,32.9870" }, { "name": "1234" }, { "name": "Asheville" }, { "name": "82.5542, 35.6008" }] } } EXPECTED = {"sourceResource": {"spatial": [{"name": "82.5542, 35.6008"}]}} resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp["status"] == "200" assert_same_jsons(EXPECTED, json.loads(content))
def testMappings(self): fixture = path.join(DIR_FIXTURES, 'flickr-doc.json') with open(fixture) as f: INPUT = f.read() resp, content = self._get_server_response(INPUT) self.assertEqual(resp.status, 200) obj = json.loads(content) self.assertEqual( obj['isShownAt'], 'https://www.flickr.com/photos/sdasmarchives/34394586825/') self.assertEqual( obj['isShownBy'], 'https://farm5.staticflickr.com/4169/34394586825_375e0b1706_z.jpg') srcRes = obj['sourceResource'] self.assertEqual(srcRes['title'], 'Ryan Aeronautical Image') self.assertEqual( srcRes['description'], "PictionID:42184448 - Title:Atlas 34, on Pad-------4-9-62; MT62-35498 ; UNCLASSIFIED , APR 9 1962 , ----NCS|ASTRONAUTICS/A DIVISION OF GENERAL DYNAMICS CORPORATION ; missile in image is numbered 34 - Catalog:14_001945 - Filename:14_001945.tif - - - - Image from the Convair/General Dynamics Astronautics Atlas Negative Collection---Please Tag these images so that the information can be permanently stored with the digital file.---Repository: San Diego Air and Space Museum" ) self.assertNotIn('date', srcRes) self.assertEqual(srcRes['subject'], ['woo yay', 'Hoopla']) self.assertEqual(srcRes['format'], "photo")
def test_removing_bracket(): """Should remove bracket from the beginning of the name""" INPUT = { "id": "12345", "sourceResource": {"spatial": [ "Charleston (S.C.); [Germany; Poland; Israel; New York (N.Y.); Georgia (U.S.)" ]}, "creator": "Miguel" } EXPECTED = { "id": "12345", "sourceResource": {"spatial": [ { "name" : "Charleston (S.C.)" }, { "name": "Germany" }, { "name": "Poland" }, { "name": "Israel" }, { "name": "New York (N.Y.)" }, { "name": "Georgia (U.S.)" } ]}, "creator": "Miguel" } url = server() + "enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_usc_enrich_location_clean(): """Should remove all 1-3 digit numbers and values containing 's.d', then join the remaining values on whitespace """ INPUT = { "sourceResource": { "spatial": [{ "name": " 123 " }, { "name": "-130.4560,,32.9870" }, { "name": "s.d]" }, { "name": "s.d" }, { "name": "1234" }, { "name": "456" }, { "name": "s.d." }, { "name": "Asheville" }, { "name": "789" }] } } EXPECTED = { "sourceResource": { "spatial": [{ "name": "-130.4560,,32.9870 1234 Asheville" }] } } resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp["status"] == "200" assert_same_jsons(EXPECTED, json.loads(content))
def test_basic_forward_lookup(): """ Simple geocode """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "name": "Bakersfield, CA" } ] }, "creator": "David" } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "name": "Bakersfield, CA", "city": "Bakersfield", "state": "California", "county": "Kern County", "country": "United States", "coordinates": "35.37329, -119.01871" } ] }, "creator": "David" } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_lapl_oai_mapping(): fixture = path.join(DIR_FIXTURES, 'lapl-oai.json') with open(fixture) as f: INPUT = f.read() resp, content = _get_server_response(INPUT) assert str(resp.status).startswith("2"), str(resp) + "\n" + content doc = json.loads(content) TC.assertIn(u'sourceResource', doc) TC.assertIn(u'title', doc[u'sourceResource']) TC.assertEqual(doc['sourceResource']['title'][0], u'Olvera Street shop') TC.assertIn(u'description', doc[u'sourceResource']) TC.assertEqual(len(doc['sourceResource']['description']), 2) TC.assertEqual( doc['sourceResource']['description'][1], u'A man and two boys sit in front of an Olvera Street shop. A large sign on the right reads, "For Your Fortune Consult Princess Lorena - The Morning Star." Another sign posted above the doorway reads, "Chief Kut - Mescalero." It is not clear if the man sitting on the right is Chief Kut.' ) TC.assertIn(u'format', doc[u'sourceResource']) TC.assertEqual(doc['sourceResource']['format'][0], u'1 photographic print :b&w ;15 x 11 cm.') TC.assertIn(u'identifier', doc[u'sourceResource']) TC.assertEqual(len(doc['sourceResource']['identifier']), 5) TC.assertEqual(doc['sourceResource']['identifier'][2], u'N-011-201 8x10') TC.assertIn(u'isShownAt', doc) TC.assertEqual( doc['isShownAt'], u'https://tessa.lapl.org/cdm/ref/collection/photos/id/36479') TC.assertIn(u'isShownBy', doc) TC.assertEqual( doc['isShownBy'], u'http://173.196.26.125/utils/ajaxhelper?CISOROOT=photos&CISOPTR=36479&action=2&DMHEIGHT=2000&DMWIDTH=2000&DMSCALE=100' ) TC.assertEqual(len(doc['sourceResource']['subject']), 8) TC.assertEqual(doc['sourceResource']['subject'][0], {'name': 'Signs and signboards--California--Los Angeles.'}) TC.assertEqual(doc['sourceResource']['contributor'], [ 'Made accessible through a grant from the John Randolph Haynes and Dora Haynes Foundation.' ]) TC.assertEqual(doc['sourceResource']['creator'], ['Schultheis, Herman.'])
def test_shred9(): """Do not shred on values within parenthesis""" INPUT = { "p": "String one; (String two; two and a part of two); String three; String four; (abc dbf; sss;k)", "q": "d;e;f", "h": "String one; (String two; two and a part of two) String three; String four; (abc dbf; sss;k)", "m": "String one; Begin of two (String two; two and a part of two) String three; String four; (abc dbf; sss;k)", "g": "bananas", "a": "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)" } EXPECTED = { "p": [ "String one", "(String two; two and a part of two)", "String three", "String four", "(abc dbf; sss;k)" ], "q": ["d", "e", "f"], "h": [ 'String one', '(String two; two and a part of two) String three', 'String four', '(abc dbf; sss;k)' ], "m": [ 'String one', 'Begin of two (String two; two and a part of two) String three', 'String four', '(abc dbf; sss;k)' ], "a": "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)", "g": "bananas" } url = server() + "shred?prop=p,q,h,m,g,a" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") FETCHED = json.loads(content) assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
def test_map_oac_dc_meta(): '''Test that the DC meta values from OAC are pulled to sourceResource''' fixture = path.join(DIR_FIXTURES, 'oac-xml.json') with open(fixture) as f: INPUT = f.read() resp, content = _get_server_response(INPUT) TC.assertEqual(resp.status, 200) content_obj = json.loads(content) srcRes = content_obj['sourceResource'] TC.assertEqual(len(srcRes['format']), 1) # suppresses q="x" TC.assertEqual(srcRes['format'], ['painting: b&w ;']) TC.assertNotIn('relation', srcRes) TC.assertEqual(len(srcRes['subject']), 2) # suppresses q="series" TC.assertEqual(srcRes['subject'], [{ 'name': u'Japanese Americans' }, { 'name': u'Uchida' }]) TC.assertEqual(srcRes['date'], ["7/21/42", "7/21/72"]) TC.assertEqual(srcRes['copyrightDate'], ["2011"]) TC.assertEqual(srcRes['alternativeTitle'], [ "[Chinese man sitting on top of dynamite and white labor, poised to explode brick wall of Public Opinion]", "Another alternate title" ]) TC.assertEqual(srcRes['genre'], ["Hashira-e"]) TC.assertEqual(srcRes['rights'], [ "Transmission or reproduction of materials protected by copyright beyond that allowed by fair use requires the written permission of the copyright owners. Works not in the public domain cannot be commercially exploited without permission of the copyright owner. Responsibility for any use rests exclusively with the user.", "The Bancroft Library--assigned", "All requests to reproduce, publish, quote from, or otherwise use collection materials must be submitted in writing to the Head of Public Services, The Bancroft Library, University of California, Berkeley 94720-6000. See: http://bancroft.berkeley.edu/reference/permissions.html", "The Bancroft Library University of California Berkeley, CA 94720-6000" ]) TC.assertEqual( srcRes['spatial'], ["San Francisco (Calif.)", "Chinatown (San Francisco, Calif.)."]) TC.assertEqual(srcRes['temporal'], [ "China -- History -- Warlord period, 1916-1928.", "China -- Politics and government -- 1912-1949." ])
def remove_list_values(body, ctype, prop=None, values=None): """Given a comma-separated string of values, removes any instance of each value from the prop. """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" v = getprop(data, prop, True) if isinstance(v, list) and values is not None: values = values.split(",") v = [s for s in v if s not in values] if v: setprop(data, prop, v) else: delprop(data, prop) return json.dumps(data)
def test_copy_prop_no_overwrite1(): """Should not overwrite to_prop since it exists""" prop = "sourceResource/key2" to_prop = "sourceResource/key3" create = True no_overwrite = True INPUT = { "key1": "value1", "sourceResource": { "key2": "value2", "key3": "value3" }, "key4": "value4" } resp, content = _get_server_response(json.dumps(INPUT), prop=prop, to_prop=to_prop, create=create, no_overwrite=no_overwrite) assert resp.status == 200 assert json.loads(content) == INPUT
def drop_long_values(body, ctype, field=None, max_length=150): ''' Look for long values in the sourceResource field specified. If value is longer than max_length, delete ''' try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" fieldvalues = data['sourceResource'].get(field) if isinstance(fieldvalues, list): new_list = [] for item in fieldvalues: if len(item) <= int(max_length): new_list.append(item) data['sourceResource'][field] = new_list else: # scalar if len(fieldvalues) > int(max_length): del data['sourceResource'][field] return json.dumps(data)
def test_drop_long_values(): """Correctly transform a date value that cannot be parsed""" INPUT = { "sourceResource": { "description": [ "could be 1928ish?", "this is a long string will blow up flake 8, should drop this", "short" ] } } EXPECTED = { "sourceResource": { "description": ["could be 1928ish?", "short"] } } url = server() + "drop-long-values?field=description&max_length=20" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) TC.assertEqual(resp.status, 200) TC.assertEqual(json.loads(content), EXPECTED)
def test_move_date_values_spatial3(): """ Should remove spatial field if only element is a date. """ prop = "sourceResource/spatial" INPUT = { "sourceResource": { "spatial" : [ " 1901 - 1999 " ] } } EXPECTED = { "sourceResource": { "temporal": [ "1901 - 1999" ] } } resp,content = _get_server_response(json.dumps(INPUT),prop) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_unset_prop8(): """Should not unset prop since condition is not met with dataProvider""" action = "unset" prop = "_id" condition = "hathi_exclude" condition_prop = "dataProvider%2CsourceResource%2Ftype" INPUT = { "_id": "12345", "dataProvider": "Hathitrust", "sourceResource": { "type": "image" } } resp, content = _get_server_response(json.dumps(INPUT), action=action, prop=prop, condition=condition, condition_prop=condition_prop) print_error_log() assert resp.status == 200 assert json.loads(content) == INPUT
def test_calpoly_oai_dc_mapping(): fixture = path.join(DIR_FIXTURES, 'caltech.json') with open(fixture) as f: INPUT = f.read() TC.assertIn('id', INPUT) resp, content = _get_server_response(INPUT) TC.assertEqual(resp.status, 200) obj = json.loads(content) TC.assertIn('sourceResource', obj) TC.assertIn('originalRecord', obj) srcRes = obj['sourceResource'] TC.assertEqual( obj['isShownAt'], "http://maccready.library.caltech.edu/islandora/object/pbm%3A631") TC.assertEqual( obj['isShownBy'], "http://maccready.library.caltech.edu/islandora/object/pbm%3A631/datastream/TN/view/Model%20airplanes%20and%20gliders%3A%20includes%20photographs%20of%20MacCready%20with%20award%20winning%20planes.%202%20Sheets.%20%2834%20items%29.jpg" ) TC.assertEqual(srcRes['description'], [ "ca.1937-1941,1945. Part of: Paul B. MacCready Papers ca. 1930-2002. Series 7: Audio-Visual material; Subseries 1: Photographic slides; Box 1, Folder 1" ]) TC.assertEqual(srcRes['format'], ["projected graphic", "Black and White 35mm slides"])
def mdlstatelocatedin(body, ctype): """ Service that accepts a JSON document and extracts the state from the address in the first dataProvider value """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" prop = "dataProvider" if exists(data, prop): address = iterify(getprop(data, prop))[0] for st, state in states.items(): if (re.search("\s+%s\s+" % st, address) or re.search("\s+%s\s+" % state, address)): setprop(data, "sourceResource/stateLocatedIn", state) break return json.dumps(data)
def testMappings(self): fixture = path.join(DIR_FIXTURES, 'flickr-doc.json') with open(fixture) as f: INPUT = f.read() resp, content = self._get_server_response(INPUT) self.assertEqual(resp.status, 200) obj = json.loads(content) self.assertEqual( obj['isShownAt'], 'https://www.flickr.com/photos/sdasmarchives/34394586825/') self.assertEqual( obj['isShownBy'], 'https://farm5.staticflickr.com/4169/34394586825_375e0b1706_z.jpg') srcRes = obj['sourceResource'] self.assertEqual(srcRes['title'], 'Atlas 55D') self.assertEqual( srcRes['description'], 'Details: Prelaunch; Complex 12; AMR --Image from the Convair/General Dynamics Astronautics Atlas Negative Collection---Please Tag these images so that the information can be permanently stored with the digital file.---Repository: San Diego Air and Space Museum' ) self.assertEqual(srcRes['subject'], ['woo yay', 'Hoopla']) self.assertEqual(srcRes['format'], "photo") self.assertEqual(srcRes['identifier'], ["14_008096", "43829091", "14_008096.TIF"])
def test_ia_identify_object(): """Fetching Internet Archive document thumbnail""" INPUT_JSON = """ { "originalRecord": { "_id": "test_id", "files": {"gif": "test_id.gif"} } } """ EXPECTED_PREVIEW = "http://www.archive.org/download/test_id/test_id.gif" url = server() + "ia_identify_object" resp, content = H.request(url, "POST", body=INPUT_JSON) assert str(resp.status).startswith("2"), str(resp) + "\n" + content doc = json.loads(content) assert u"object" in doc, "object path not found in document" FETCHED_PREVIEW = doc[u"object"] assert FETCHED_PREVIEW == EXPECTED_PREVIEW, "%s != %s" % (FETCHED_PREVIEW, EXPECTED_PREVIEW)
def test_copy_prop_str_to_str(): """Should extend to_prop""" prop = "note" to_prop = "sourceResource/description" INPUT = { "note": "This is a note", "sourceResource": { "description": "This is a description" } } EXPECTED = { "note": "This is a note", "sourceResource": { "description": ["This is a description", "This is a note"] } } resp, content = _get_server_response(json.dumps(INPUT), prop=prop, to_prop=to_prop) assert resp.status == 200 assert json.loads(content) == EXPECTED
def nara_enrich_location(body, ctype, action="nara_enrich_location", prop="sourceResource/spatial"): """ Service that massages a NARA JSON document. """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" if (exists(data, prop)): # Check spatial dictionaries to see if they are valid spatials = [] for spatial in iterify(getprop(data, prop)): spatials.append(format_spatial(spatial)) setprop(data, prop, spatials) return json.dumps(data)
def test_texas_enrich_location4(): """Should do nothing with limits""" INPUT = { "id": "12345", "sourceResource": { "spatial": [ "Canada - British Columbia Province - Vancouver Island - Victoria", "north=34.19; east=-99.94;", "northlimit=34.25; eastlimit=-99.88; southlimit=34.13; westlimit=-100;" ] } } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ { "name": "Canada - British Columbia Province - Vancouver Island - Victoria", "country": "Canada", "state": "British Columbia Province", "county": "Vancouver Island", "city": "Victoria" }, { "name": "34.19, -99.94" }, { "name": "northlimit=34.25; eastlimit=-99.88; southlimit=34.13; westlimit=-100;" } ] } } url = server() + "texas_enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_geocode_coordinate_provided2(): """Should use coordinates provided in the coordinates property""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "name": "United States--Massachussetts", "coordinates": "42.358631134, -71.0567016602" } ] }, "creator": "David" } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "county": "Suffolk County", "state": "Massachusetts", "country": "United States", "name": "United States--Massachussetts", "coordinates": "42.358631134, -71.0567016602" } ] }, "creator": "David" } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_copy_prop_dict_to_list(): """Should append to to_prop""" prop = "sourceResource/from_dict" to_prop = "sourceResource/to_list" INPUT = { "key1": "value1", "sourceResource": { "key1": "value1", "from_dict": { "key1": "value1" }, "to_list": ["a", "b", "c"], "key2": "value2" }, "key2": "value2" } EXPECTED = { "key1": "value1", "sourceResource": { "key1": "value1", "from_dict": { "key1": "value1" }, "to_list": ["a", "b", "c", { "key1": "value1" }], "key2": "value2" }, "key2": "value2" } resp, content = _get_server_response(json.dumps(INPUT), prop=prop, to_prop=to_prop) assert resp.status == 200 assert json.loads(content) == EXPECTED
def scdl_enrich_location(body, ctype, action="scdl_enrich_location", prop="sourceResource/spatial"): """ Service that accepts a JSON document and enriches the "spatial" field of that document. For use with the scdl profiles """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" if exists(data, prop): value = getprop(data, prop) for v in iterify(value): name = replace_state_abbreviations(v["name"].rstrip()) v["name"] = name # Try to extract a County if " county " in name.lower(): # "XXX County (S.C.)" => county: XXX v["county"] = name[0:name.lower().index("county")].strip() if "(S.C.)" in name: v["state"] = "South Carolina" v["country"] = "United States" elif "(S.C.)" in name: # "XXX (S.C)" => city: XXX v["city"] = name[0:name.index("(S.C.)")].strip() v["state"] = "South Carolina" v["country"] = "United States" return json.dumps(data)
def test_unset_prop4(): """Should do nothing to INPUT but catch keyError since condition is not in CONDITIONS """ action = "unset" prop = "sourceResource/rights" condition = "is_digits" INPUT = { "_id": "12345", "key1": "value1", "sourceResource": { "key1": "value1", "rights": "value2" }, "key2": "value2" } resp, content = _get_server_response(json.dumps(INPUT), action=action, prop=prop, condition=condition) assert resp.status == 200 assert json.loads(content) == INPUT
def augment_freemix(body, ctype): #See: http://foundry.zepheira.com/issues/133#note-4 ''' Render the contents of a file as best as possible in Exhibit JSON * Supports Excel, BibTex and JSON for now Sample queries: * curl "http://*****:*****@foo.xls" --header "Content-Type: application/vnd.ms-excel" "http://localhost:8880/freemix.json" ''' fixup_obj_labels = True obj = json.loads(body) dataprofile = obj['data_profile'] objkeys = {} source = obj[u'items'] augmented_items = [] failed_items = {} for prop in dataprofile["properties"]: if not prop["enabled"]: continue prop_types = [ t[PROP_TYPE_MARKER_LEN:] for t in prop["tags"] if t.startswith(PROP_TYPE_MARKER) ] #logger.debug("PROPERTY TYPES: " + repr(prop_types)) if prop_types: for aug, sid in AUGMENTATIONS.items(): handler = service_proxy(sid) if aug in prop_types and (u"composite" in prop or aug == u'shredded_list'): handler(source, prop, augmented_items, failed_items) #logger.debug('AUGMENTATION: ' + repr((prop['property'], augmented_items))) #Inefficiency of creating a dict only to get its values response = {'items': augmented_items, 'failed': failed_items} return json.dumps(response, indent=4)
def testMappings(self): fixture = path.join(DIR_FIXTURES, 'eMuseum-xml.json') with open(fixture) as f: INPUT = f.read() resp, content = self._get_server_response(INPUT) self.assertEqual(resp.status, 200) obj = json.loads(content) self.assertEqual(obj['_id'], '26251--11529') self.assertEqual(obj['id'], '748a227d50f2f9ea132f5748b8e89323') self.assertEqual( obj['@id'], 'http://ucldc.cdlib.org/api/items/748a227d50f2f9ea132f5748b8e89323' ) self.assertEqual(obj['isShownAt'], 'http://digitalcollections.hoover.org/objects/11529') self.assertEqual(obj['isShownBy'], 'https://img.youtube.com/vi/qxVJVE9oKg4/default.jpg') srcRes = obj['sourceResource'] self.assertEqual(srcRes['date'], '1914/1918?') self.assertEqual( srcRes['title'], "Money is power. A war saving certificate in every Canadian home. Get yours now at Post Offices or banks." ) self.assertEqual(srcRes['type'], 'Image')
def test_capitalize_value(): """Should capitalize first letter of each property""" INPUT = { "id": "123", "spatial": { "key1": "asheville", "key2": "north Carolina" }, "subject": ["subject", "hi there", "hello"] } EXPECTED = { "id": "123", "spatial": { "key1": "Asheville", "key2": "North Carolina" }, "subject": ["Subject", "Hi there", "Hello"] } resp, content = _get_server_response( json.dumps(INPUT), prop="spatial/key1,spatial/key2,subject") assert resp.status == 200 FETCHED = json.loads(content) assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
def scdl_geocode_regions(body, ctype, action="scdl_geocode_regions", prop="sourceResource/spatial"): """ Service that accepts a JSON document and forcibly sets the coordinates for South Carolina regions. For use with the scdl profiles """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" if exists(data, prop): value = getprop(data, prop) for v in iterify(value): if (is_region(v)): geocode_region(v) return json.dumps(data)