def nypl_select_hasview(body, ctype): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text original_document_key = u"originalRecord" original_preview_key = u"tmp_high_res_link" source_key = u"hasView" if original_document_key not in data: logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id']) return body if original_preview_key not in data[original_document_key]: logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_preview_key, data[u'id']) return body data[source_key] = {"@id": data[original_document_key][original_preview_key], "format": None} return json.dumps(data)
def nypl_select_hasview(body, ctype): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % ( HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text original_document_key = u"originalRecord" original_preview_key = u"tmp_high_res_link" source_key = u"hasView" if original_document_key not in data: logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id']) return body if original_preview_key not in data[original_document_key]: logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_preview_key, data[u'id']) return body data[source_key] = { "@id": data[original_document_key][original_preview_key], "format": None } return json.dumps(data)
def artstor_cleanup_creator(body, ctype, prop="sourceResource/creator"): """ Service that accepst a JSON document and removes cleans the sourceResource/creator field by removing the values in REGEXES if the field value begins with them """ try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % ( HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text if exists(data, prop): item = getprop(data, prop) if not isinstance(item, list): item = [item] for i in range(len(item)): for s in CLEANUP: item[i] = re.sub(r"(?i)^{0}".format(s), "", item[i].strip()).lstrip() setprop(data, prop, item[0] if len(item) == 1 else item) return json.dumps(data)
def artstor_cleanup_creator(body, ctype, prop="sourceResource/creator"): """ Service that accepst a JSON document and removes cleans the sourceResource/creator field by removing the values in REGEXES if the field value begins with them """ try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text if exists(data, prop): item = getprop(data, prop) if not isinstance(item, list): item = [item] for i in range(len(item)): for s in CLEANUP: item[i] = re.sub(r"(?i)^{0}".format(s), "", item[i].strip()).lstrip() setprop(data, prop, item[0] if len(item) == 1 else item) return json.dumps(data)
def ia_identify_object(body, ctype, download="True"): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text original_preview_key = "originalRecord/files/gif" preview_format = "http://www.archive.org/download/{0}/{1}" try: preview_url = preview_format.format(getprop(data, "originalRecord/_id"), getprop(data, original_preview_key)) except KeyError: logger.error("Can not build preview url by path \"%s\" for doc [%s]", original_preview_key, data[u"id"]) return body data["object"] = preview_url status = IGNORE if download == "True": status = PENDING if "admin" in data: data["admin"]["object_status"] = status else: data["admin"] = {"object_status": status} return json.dumps(data)
def nypl_identify_object(body, ctype, list_sets=None): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text H = httplib2.Http('/tmp/.cache') H.force_exception_as_status_code = True resp, content = H.request(list_sets) if not resp[u'status'].startswith('2'): logger.error(' HTTP error (' + resp[u'status'] + ') resolving URL: ' + list_sets) return body content_dict = xmltodict.parse(content, xml_attribs=True, attr_prefix='', force_cdata=False, ignore_whitespace_cdata=True) sets = content_dict["nyplAPI"]["response"] for r in sets: if "collection" == r: for coll_dict in sets[r]: if "uuid" in coll_dict and "title" in coll_dict and (coll_dict["uuid"] == data["title"] or coll_dict["uuid"] in data["@id"]): data["title"] = coll_dict["title"] return json.dumps(data)
def artstor_identify_object(body, ctype, download="True"): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % ( HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text original_document_key = u"originalRecord" original_sources_key = u"handle" artstor_preview_prefix = "/size1/" if original_document_key not in data: logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id']) return body if original_sources_key not in data[original_document_key]: logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_sources_key, data[u'id']) return body preview_url = None http_re = re.compile("https?://.*$", re.I) for s in data[original_document_key][original_sources_key]: if artstor_preview_prefix in s: match = re.search(http_re, s) if match: preview_url = match.group(0) break if not preview_url: logger.error( "Can't find url with '%s' prefix in [%s] for fetching document preview url for Artstor.", artstor_preview_prefix, data[original_document_key][original_sources_key]) return body data["object"] = preview_url status = IGNORE if download == "True": status = PENDING if "admin" in data: data["admin"]["object_status"] = status else: data["admin"] = {"object_status": status} return json.dumps(data)
def georgia_identify_object(body, ctype, download="True"): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text original_document_key = u"originalRecord" original_sources_key = u"id" preview_url_pattern = "http://dlg.galileo.usg.edu/%(repo)s/%(coll)s/do-th:%(item)s" if original_document_key not in data: logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id']) return body if original_sources_key not in data[original_document_key]: logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_sources_key, data[u'id']) return body _id = data[original_document_key][original_sources_key] _id_head, sep, _item_id_tuple = _id.rpartition(":") if not _item_id_tuple: logger.error("Can not get item id tuple from the [%s] identifier.", _id) return body try: repo, coll, item = _item_id_tuple.split("_", 3) except ValueError: logger.error("Can not fetch \"repo, coll, item\" values from [%s], splitting by \"_\"", _item_id_tuple) return body preview_url = preview_url_pattern % {"repo": repo, "coll": coll, "item": item} data["object"] = {"@id": preview_url, "format": None, "rights": selector.getprop(data, "aggregatedCHO/rights", keyErrorAsNone=True)} status = IGNORE if download == "True": status = PENDING if "admin" in data: data["admin"]["object_status"] = status else: data["admin"] = {"object_status": status} return json.dumps(data)
def artstor_identify_object(body, ctype, download="True"): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text original_document_key = u"originalRecord" original_sources_key = u"handle" artstor_preview_prefix = "Thumbnail" if original_document_key not in data: logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id']) return body if original_sources_key not in data[original_document_key]: logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_sources_key, data[u'id']) return body preview_url = None http_re = re.compile("https?://.*$", re.I) for s in data[original_document_key][original_sources_key]: if s.startswith(artstor_preview_prefix): match = re.search(http_re, s) if match: preview_url = match.group(0) break if not preview_url: logger.error("Can't find url with '%s' prefix in [%s] for fetching document preview url for Artstor.", artstor_preview_prefix, data[original_document_key][original_sources_key]) return body data["object"] = {"@id": preview_url, "format": None, "rights": selector.getprop(data, "aggregatedCHO/rights", keyErrorAsNone=True)} status = IGNORE if download == "True": status = PENDING if "admin" in data: data["admin"]["object_status"] = status else: data["admin"] = {"object_status": status} return json.dumps(data)
def artstor_select_source(body, ctype): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text original_document_key = u"originalRecord" original_sources_key = u"handle" artstor_source_probe = ("/object/", "Image View:") source_key = u"isShownAt" if original_document_key not in data: logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id']) return body if original_sources_key not in data[original_document_key]: logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_sources_key, data[u'id']) return body source = None http_re = re.compile("https?://.*$", re.I) for s in data[original_document_key][original_sources_key]: for probe in artstor_source_probe: if probe in s: match = re.search(http_re, s) if match: source = match.group(0) break if not source: logger.error("Can't find url with any of '%s' probe in [%s] for fetching document source for Artstor.", artstor_source_probe, data[original_document_key][original_sources_key]) return body try: selector.setprop(data, source_key, source) except KeyError: logger.error("Can't set value, \"%s\" path does not exist in doc [%s]", source_key, data[u'id']) return body else: return json.dumps(data)
def filter_empty_values_endpoint(body, ctype, ignore_key="dplaSourceRecord"): """ Cleans empty leaves of given json tree; Argument: ignore_key - comma separated list of keys that should be ignored while traversing the tree; """ try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text ignore_keys = [k.strip() for k in ignore_key.split(",") if k] data = filter_dict(data, filter_empty_leaves, ignore_keys) return json.dumps(data)
def filter_fields_endpoint(body, ctype, keys): """ Cleans elements of json with given keys if corresponding value is empty; Argument: keys - comma separated list of top-level keys that should be checked for emptiness in json tree; """ try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text check_keys = [k.strip() for k in keys.split(",") if k] data = filter_dict(data, filter_fields, check_keys) return json.dumps(data)
def filter_empty_values_endpoint(body, ctype, ignore_key="originalRecord"): """ Cleans empty leaves of given json tree; Argument: ignore_key - comma separated list of keys that should be ignored while traversing the tree; """ try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % ( HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text ignore_keys = [k.strip() for k in ignore_key.split(",") if k] data = filter_dict(data, filter_empty_leaves, ignore_keys) return json.dumps(data)
def filter_fields_endpoint(body, ctype, keys): """ Cleans elements of json with given keys if corresponding value is empty; Argument: keys - comma separated list of top-level keys that should be checked for emptiness in json tree; """ try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % ( HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text check_keys = [k.strip() for k in keys.split(",") if k] data = filter_dict(data, filter_fields, check_keys) return json.dumps(data)
def nypl_identify_object(body, ctype, download="True"): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % ( HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text original_document_key = u"originalRecord" original_preview_key = u"tmp_image_id" preview_format = "http://images.nypl.org/index.php?id={0}&t=t" if original_document_key not in data: logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id']) return body if original_preview_key not in data[original_document_key]: logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_preview_key, data[u'id']) return body preview_url = preview_format.format( data[original_document_key][original_preview_key]) data["object"] = preview_url status = IGNORE if download == "True": status = PENDING if "admin" in data: data["admin"]["object_status"] = status else: data["admin"] = {"object_status": status} return json.dumps(data)
def artstor_cleanup(body, ctype): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % ( HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text data_provider_key = u"dataProvider" if exists(data, data_provider_key): item = getprop(data, data_provider_key) if isinstance(item, basestring): cleaned_data_provider = item.replace("Repository:", "").lstrip() setprop(data, data_provider_key, cleaned_data_provider) return json.dumps(data)
def artstor_cleanup(body, ctype): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text data_provider_key = u"dataProvider" if exists(data, data_provider_key): item = getprop(data, data_provider_key) if isinstance(item, basestring): cleaned_data_provider = item.replace("Repository:", "").lstrip() setprop(data, data_provider_key, cleaned_data_provider) return json.dumps(data)
def nypl_identify_object(body, ctype, download="True"): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text original_document_key = u"originalRecord" original_preview_key = u"tmp_image_id" preview_format = "http://images.nypl.org/index.php?id={0}&t=t" if original_document_key not in data: logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id']) return body if original_preview_key not in data[original_document_key]: logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_preview_key, data[u'id']) return body preview_url = preview_format.format(data[original_document_key][original_preview_key]) data["object"] = preview_url status = IGNORE if download == "True": status = PENDING if "admin" in data: data["admin"]["object_status"] = status else: data["admin"] = {"object_status": status} return json.dumps(data)
def nypl_identify_object(body, ctype, list_sets=None): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % ( HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text H = httplib2.Http('/tmp/.cache') H.force_exception_as_status_code = True resp, content = H.request(list_sets) if not resp[u'status'].startswith('2'): logger.error(' HTTP error (' + resp[u'status'] + ') resolving URL: ' + list_sets) return body content_dict = xmltodict.parse(content, xml_attribs=True, attr_prefix='', force_cdata=False, ignore_whitespace_cdata=True) sets = content_dict["nyplAPI"]["response"] for r in sets: if "collection" == r: for coll_dict in sets[r]: if "uuid" in coll_dict and "title" in coll_dict and ( coll_dict["uuid"] == data["title"] or coll_dict["uuid"] in data["@id"]): data["title"] = coll_dict["title"] return json.dumps(data)
def georgia_identify_object(body, ctype, download="True"): try: assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % ( HTTP_HEADER_TYPE, HTTP_TYPE_JSON) data = json.loads(body) except Exception as e: error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e)) logger.exception(error_text) response.code = HTTP_INTERNAL_SERVER_ERROR response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT) return error_text original_document_key = u"originalRecord" original_sources_key = u"id" preview_url_pattern = "http://dlg.galileo.usg.edu/%(repo)s/%(coll)s/do-th:%(item)s" if original_document_key not in data: logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id']) return body if original_sources_key not in data[original_document_key]: logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_sources_key, data[u'id']) return body _id = data[original_document_key][original_sources_key] _id_head, sep, _item_id_tuple = _id.rpartition(":") if not _item_id_tuple: logger.error("Can not get item id tuple from the [%s] identifier.", _id) return body try: repo, coll, item = _item_id_tuple.split("_", 2) except ValueError: logger.error( "Can not fetch \"repo, coll, item\" values from [%s], splitting by \"_\"", _item_id_tuple) return body preview_url = preview_url_pattern % { "repo": repo, "coll": coll, "item": item } data["object"] = preview_url status = IGNORE if download == "True": status = PENDING if "admin" in data: data["admin"]["object_status"] = status else: data["admin"] = {"object_status": status} return json.dumps(data)