コード例 #1
0
def test_substitution_with_deleting_missing_values():
    data = {
                "xxx": "yyy",
                "aaa": {
                    "bbb": "ccc",
                    "xxx": [
                        {"eee": "aaa"},
                        {"xxx": "eee"},
                        {"eee": "bbb"},
                        {"eee": "doesnt exist"},
                        {"eee": "doesnt exist"}
                    ]
                },
    }

    INPUT = json.dumps(data)
    data["aaa"]["xxx"] = [
                        {"eee": "AAA222"},
                        {"xxx": "eee"},
                        {"eee": "BBB222"},
                        { },
                        { }
    ]

    EXPECTED_OUTPUT = json.dumps(data)
    resp, content = _get_server_response(INPUT, "aaa/xxx/eee", "aaa/xxx/eee", "test2", None, True)
    assert resp.status == 200
    assert_same_jsons(content, EXPECTED_OUTPUT)
コード例 #2
0
def test_enrich_date_parse_century_date():
    """Correctly transform a date of format '19th c.'"""
    url = server() + "enrich_earliest_date?prop=date"
    INPUT = {"date": "19th c."}
    EXPECTED = {
        "date": {
            "begin": None,
            "end": None,
            "displayDate": "19th c"  # period stripped assumed OK
        }
    }
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    result = json.loads(content)
    assert result["date"] == EXPECTED["date"], \
           "%s != %s" % (result["date"], EXPECTED["date"])
    INPUT = {"date": "19th century"}
    EXPECTED = {
        "date": {
            "begin": None,
            "end": None,
            "displayDate": "19th century"
        }
    }
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    result = json.loads(content)
    assert result["date"] == EXPECTED["date"], \
           "%s != %s" % (result["date"], EXPECTED["date"])
コード例 #3
0
def test_remove_spaces_around_dashes():
    """Should remove spaces around dashes."""
    INPUT = {
        "id": "123",
        "spatial": [
            {"name": "Asheville"},
            {"name": "North Carolina"}
        ],
        "subject": [
            "hello there",
            "aaa--bbb",
            "aaa --bbb",
            "aaa-- bbb",
            "aaa --  bbb",
            "aaa  --  bbb    -- ccc - - ddd -- "
        ]
    }
    EXPECTED = {
        "id": "123",
        "spatial": [
            {"name": "Asheville"},
            {"name": "North Carolina"}
        ],
        "subject": [
            {"name": "Hello there"},
            {"name": "Aaa--bbb"},
            {"name": "Aaa--bbb"},
            {"name": "Aaa--bbb"},
            {"name": "Aaa--bbb"},
            {"name": "Aaa--bbb--ccc - - ddd--"},
        ]
    }
    resp, content = _get_server_response(json.dumps(INPUT))
    assert_same_jsons(json.dumps(EXPECTED), content)
    assert resp.status == 200
コード例 #4
0
ファイル: test_lookup.py プロジェクト: eldios/ingestion
def test_substitute_with_list_of_dictionaries():
    """
    Should convert all dicts in a list.
    """
    data = {
                "xxx": "yyy",
                "aaa": {
                    "bbb": "ccc",
                    "xxx": [
                        {"eee": "aaa"},
                        {"xxx": "eee"},
                        {"eee": "bbb"}
                    ]
                }
    }

    INPUT = json.dumps(data)
    data["aaa"]["xxx"] = [
                        {"eee": "AAA222"},
                        {"xxx": "eee"},
                        {"eee": "BBB222"},
    ]

    EXPECTED_OUTPUT = json.dumps(data)
    resp, content = _get_server_response(INPUT, "aaa/xxx/eee", "aaa/xxx/eee", "test2")
    print_error_log()
    pinfo(resp, content)

    assert resp.status == 200
    assert_same_jsons(content, EXPECTED_OUTPUT)
コード例 #5
0
ファイル: geocode.py プロジェクト: eldios/ingestion
def geocode(body,ctype,prop=None,newprop=None):
    '''   
    Service that accepts a JSON document and "unshreds" the value of the
    field named by the "prop" parameter
    '''   

    try :
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "Unable to parse body as JSON"

    if prop not in data:
        return json.dumps(data) # graceful abort

    if not newprop:
        newprop = prop

    if hasattr(data[prop],'__iter__'): # Handle strings and iterables
        data[newprop] = [ lookup_place(place) for place in data[prop] ]
    else:
        data[newprop] = lookup_place(data[prop])

    return json.dumps(data)
コード例 #6
0
def test_substitution_using_scdl_format_dict():
    formats = \
        ("Pamphlets", "Pamphlets"), \
        ("Pamphlet", "Pamphlets"), \
        ("pamphlets", "Pamphlets"), \
        ("Manuscripts", "Manuscripts"), \
        ("Manuscript", "Manuscripts"), \
        ("manuscripts", "Manuscripts"), \
        ("Photograph", "Photographs"), \
        ("Photographs", "Photographs"), \
        ("Photograph", "Photographs") \

    data = {
                "xxx": "yyy",
                "aaa": ""
    }

    for f in formats:
        data["aaa"] = f[0]
        INPUT = json.dumps(data)
        data["aaa"] = f[1]
        EXPECTED_OUTPUT = json.dumps(data)
        print "Checking: %s" + repr(f)
        resp, content = _get_server_response(INPUT, "aaa", "aaa", "scdl_fix_format", None, False)
        print_error_log()
        assert resp.status == 200
        assert_same_jsons(EXPECTED_OUTPUT, content)
コード例 #7
0
ファイル: enrich.py プロジェクト: dpla/ingestion
def enrich_storage(body, ctype):
    """Establishes a pipeline of services identified by an ordered list of URIs
       provided in request header "Pipeline-Item"
    """

    request_headers = copy_headers_to_dict(request.environ)
    rec_enrichments = request_headers.get(u"Pipeline-Item","").split(",")

    records = json.loads(body)

    # Counts
    enriched_coll_count = 0
    enriched_item_count = 0
    missing_id_count = 0
    missing_source_resource_count = 0

    errors = []
    enriched_records = {}
    for record in records:
        error, enriched_record_text = pipe(record, ctype, rec_enrichments,
                                           "HTTP_PIPELINE_ITEM")
        if error:
            errors.append(error)

        enriched_record = json.loads(enriched_record_text)

        if enriched_record.get("_id", None):
            ingest_type = enriched_record.get("ingestType")
            # Item records should have sourceResource
            if (ingest_type == "item" and not
                "sourceResource" in enriched_record):
                logger.error("Record %s does not have sourceResource: %s" %
                             (enriched_record["_id"], enriched_record))
                missing_source_resource_count += 1
            else:
                enriched_records[enriched_record["_id"]] = enriched_record
                if ingest_type == "item":
                    enriched_item_count += 1
                else:
                    enriched_coll_count += 1
        else:
            logger.error("Found a record without an _id %s" % enriched_record)
            missing_id_count += 1

    data = {
        "enriched_records": enriched_records,
        "enriched_coll_count": enriched_coll_count,
        "enriched_item_count": enriched_item_count,
        "missing_id_count": missing_id_count,
        "missing_source_resource_count": missing_source_resource_count,
        "errors": errors
    }

    return json.dumps(data)


    return json.dumps(docs)
コード例 #8
0
def download_preview(body, ctype):
    """
    Reponsible for:  downloading a preview for a document
    Usage: as a module in separate pipeline, to be run on existing
    documents in the repository to download the thumbnails.
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    # Check the "admin/object_status" field
    status = None
    try:
        status = getprop(data, "admin/object_status")
        if status in ["error", "downloaded"]:
            logger.debug("Status is %s, doing nothing" % status)
            return body
    except KeyError as e:
        logger.error(e.args[0])
        data = set_error(data)
        return json.dumps(data)

    # Thumbnail URL
    url = None
    try:
        url = getprop(data, "object/@id")
    except KeyError as e:
        logger.error(e.args[0])
        data = set_error(data)
        return json.dumps(data)

    # Document ID
    id = None
    try:
        id = getprop(data, "id")
    except KeyError as e:
        logger.error(e.args[0])
        data = set_error(data)
        return json.dumps(data)

    download = False
    if status == "pending":
        download = True

    (relative_fname, mime, status) = download_image(url, id, download)

    if not relative_fname:
        logger.error("Cannot save thumbnail from: %s." % (url))

    # so everything is OK and the file is on disk
    doc = update_document(data, relative_fname, mime, status)
    return json.dumps(doc)
コード例 #9
0
ファイル: test_lookup.py プロジェクト: marktriggs/ingestion
def test_substitution_with_missing_value_and_the_same_field():
    """Should remove the field if value is missing from dict."""
    data = {"xxx": "yyy", "aaa": {"bbb": "ccc", "xxx": "doesnt exist"}}
    INPUT = json.dumps(data)
    data = {"xxx": "yyy", "aaa": {"bbb": "ccc"}}
    EXPECTED_OUTPUT = json.dumps(data)
    resp, content = _get_server_response(INPUT, "aaa/xxx", "aaa/xxx", "test2", None, True)
    print_error_log()
    assert resp.status == 200
    assert_same_jsons(content, EXPECTED_OUTPUT)
コード例 #10
0
def test_enrichment_for_creator_field():
    """Should remove spaces around dashes."""
    INPUT = {
        "id": "123",
        "spatial": [
            {"name": "Asheville"},
            {"name": "North Carolina"}
        ],
        "creator": [
            "hello there",
            "123",
            ". hi ",
            ".  hi",
            "             . hi there    ",
            "a banana",
            "''.more complicated....",
            '""""....even more complicated....."\'""""',
            "hello there;;",
            ";;hello there;;",
            "aaa--bbb",
            "aaa --bbb",
            "aaa-- bbb",
            "aaa --  bbb",
            "aaa  --  bbb    -- ccc - - ddd -- ",
            "aaa  ---  bbb    --- ccc--- ddd---",
            "aaa  ----  bbb    ----ccc---- ddd----"
        ]
    }
    EXPECTED = {
        "id": "123",
        "spatial": [
            {"name": "Asheville"},
            {"name": "North Carolina"}
        ],
        "creator": [
            {"name": "Hello there"},
            {"name": "123"},
            {"name": "Hi there"},
            {"name": "A banana"},
            {"name": "More complicated"},
            {"name": "Even more complicated"},
            {"name": "Hello there"},
            {"name": "Hello there"},
            {"name": "Aaa--bbb"},
            {"name": "Aaa--bbb"},
            {"name": "Aaa--bbb"},
            {"name": "Aaa--bbb"},
            {"name": "Aaa--bbb--ccc--ddd--"},
            {"name": "Aaa--bbb--ccc--ddd--"},
            {"name": "Aaa--bbb--ccc--ddd--"}
        ]
    }
    resp, content = _get_server_response(json.dumps(INPUT), "creator")
    assert_same_jsons(json.dumps(EXPECTED), content)
    assert resp.status == 200
コード例 #11
0
def mdlenrichlocation(body,ctype,action="mdl-enrich-location", prop="sourceResource/spatial"):
    """
    Service that accepts a JSON document and enriches the "spatial" field of that document by
    combining all spatial fields into one. Will also split out country and state on a 
    best-efforts basis.

    For primary use with MDL documents.

    Possible avenues of improvement:
      - For fields with semi-colons, permute and create multiple spatial elements 
      - Create an ordered list of "names" for the geocoder to attempt to lookup 
        as opposed to our single concatenated list:
          - Everything concatenated together 
          - Everything concatenated together up to "United States" 
          - Remove left-most elements one by one
    """
    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data,prop):
        sp = {}
        v = getprop(data,prop)
        fields = len(v)
        if not fields:
            logger.error("Spatial is empty.")
            return json.dumps(data)
        else:
            # Concatenate all values together to form the name field 
            sp["name"] = ", ".join(v)
            logger.info("mdl-enrich-location: %s => %s" % (fields, sp["name"],))

            if (1 == fields): 
                # If there is only one element present, it is a country 
                sp["country"] = clean(v[0])
            elif "United States" in v: 
                country_index = v.index("United States")
                sp["country"] = clean(v[country_index])

                # The prior item is almost always a state 
                if (country_index > 1):
                    state = clean(v[country_index - 1])
                    if (is_state(state)): 
                        sp["state"] = state

        if sp:
            sp = [sp]
            setprop(data, prop, sp)

    return json.dumps(data)
コード例 #12
0
ファイル: test_lookup.py プロジェクト: eldios/ingestion
def test_substitution_for_different_fields_and_array():
    """
    Should return json when original json is array.
    """
    data = {"xxx": "yyy", "aaa": ["aa", "bbb", "ccc", "ddd"]}
    INPUT = json.dumps(data)
    data["zzz"] = ["aa", "BBB", "CCC", "DDD"]
    EXPECTED_OUTPUT = json.dumps(data)
    resp, content = _get_server_response(INPUT, "aaa", "zzz", "test")
    print_error_log()
    assert resp.status == 200
    assert_same_jsons(content, EXPECTED_OUTPUT)
コード例 #13
0
ファイル: test_lookup.py プロジェクト: eldios/ingestion
def test_dictionary_subsitution():
    """
    Should substitute when there is dictionary field.
    """
    data = {"xxx": "yyy", "aaa": {"bbb": "ccc"}}
    INPUT = json.dumps(data)
    data["aaa"] = {"bbb": "CCC"}
    EXPECTED_OUTPUT = json.dumps(data)
    resp, content = _get_server_response(INPUT, "aaa/bbb", "aaa/bbb", "test")
    print_error_log()
    assert resp.status == 200
    assert_same_jsons(content, EXPECTED_OUTPUT)
コード例 #14
0
ファイル: test_lookup.py プロジェクト: marktriggs/ingestion
def test_dict_substitution_in_different_field():
    """
    Should add another field when prop is dictionary field.
    """
    data = {"xxx": "yyy", "aaa": {"bbb": "ccc", "xxx": {"eee": "aaa"}}}

    INPUT = json.dumps(data)
    data["aaa"]["xxx"]["ccc"] = "AAA222"

    EXPECTED_OUTPUT = json.dumps(data)
    resp, content = _get_server_response(INPUT, "aaa/xxx/eee", "aaa/xxx/ccc", "test2")
    print_error_log()
    assert resp.status == 200
    assert_same_jsons(content, EXPECTED_OUTPUT)
コード例 #15
0
def mdlenrichlocation(body,ctype,action="mdl-enrich-location", prop="aggregatedCHO/spatial"):
    """
    Service that accepts a JSON document and enriches the "spatial" field of that document by:

    a) Mapping to city, county, state, country, and iso3166-2 if there are 4 fields OR
    b) Mapping to city, state, country, and iso3166-2 if there are 3 fields OR
    c) Mapping to county and country if there are 2 fields

    For primary use with MDL documents.
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data,prop):
        sp = {}
        v = getprop(data,prop)
        fields = len(v)
        if not fields:
            logger.error("Spatial is empty.")
            return json.dumps(data)
        elif fields == 1:
            sp["country"] = v[0]["name"]
        elif fields == 2:
            sp["state"]   = v[0]["name"]
            sp["country"] = v[1]["name"]
        elif fields == 3:
            sp["county"]  = v[0]["name"]
            sp["state"]   = v[1]["name"]
            sp["country"] = v[2]["name"]
        elif fields == 4:
            sp["city"]    = v[0]["name"]
            sp["county"]  = v[1]["name"]
            sp["state"]   = v[2]["name"]
            sp["country"] = v[3]["name"]
        else:
            sp["city"]    = v[0]["name"]
            sp["county"]  = v[2]["name"]
            sp["state"]   = v[3]["name"]
            sp["country"] = v[4]["name"]

        if sp:
            sp = [sp]
            setprop(data, prop, sp)

    return json.dumps(data)
コード例 #16
0
def test_copy_prop_to_prop_create_dict_key1():
    """Should copy to_prop into new dict with key"""
    prop1 = "key1"
    prop2 = "sourceResource/key2"
    to_prop = "sourceResource/to_dict"
    key1 = "key1"
    key2 = "key2" 
    create = True

    INPUT = {
        "key1": "value1",
        "sourceResource": {
            "key2": "value2",
            "key3": "value3"
        },
        "key4": "value4"
    }
    EXPECTED1 = {
        "key1": "value1",
        "sourceResource": {
            "key2": "value2",
            "key3": "value3",
            "to_dict" : {"key1": "value1"}
        },
        "key4": "value4"
    }
    EXPECTED2 = {
        "key1": "value1",
        "sourceResource": {
            "key2": "value2",
            "key3": "value3",
            "to_dict" : {
                "key1": "value1",
                "key2": "value2"
            }
        },
        "key4": "value4"
    }

    resp,content = _get_server_response(json.dumps(INPUT), prop=prop1,
        to_prop=to_prop, key=key1, create=create)
    assert resp.status == 200
    assert json.loads(content) ==  EXPECTED1

    resp,content = _get_server_response(json.dumps(EXPECTED1), prop=prop2,
        to_prop=to_prop, key=key2, create=create)
    assert resp.status == 200
    assert json.loads(content) ==  EXPECTED2
コード例 #17
0
def test_convert_spatial_string_to_dictionary():
    """
    Format UIUC spatial dictionaries 
    """
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                { 
                    "name": "Honolulu, HI"
                },
                { 
                    "name": "1972 to Present"
                }
            ]
        },
        "creator": "David"
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": "Honolulu, HI"
                }
            ]
        },
        "creator": "David"
    }
        
    url = server() + "uiuc_enrich_location"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
コード例 #18
0
def dedup_value(body, ctype, action="dedup_value", prop=None):
    '''
    Service that accepts a JSON document and enriches the prop field of that document by:

    a) Removing duplicates
    '''

    if prop:
        try:
            data = json.loads(body)
        except:
            response.code = 500
            response.add_header('content-type', 'text/plain')
            return "Unable to parse body as JSON"

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    for p in prop.split(","):
        if exists(data, p):
            v = getprop(data, p)
            if isinstance(v, list):
                # Remove whitespace, periods, parens, brackets
                clone = [re.sub("[ \.\(\)\[\]\{\}]", "", s).lower() for s in v]
                # Get index of unique values
                index = list(set([clone.index(s) for s in list(set(clone))]))
            
                setprop(data, p, [v[i] for i in index])

    return json.dumps(data)
コード例 #19
0
def test_unset_prop6():
    """Should unset prop since conditions are met for multiple condition
       props"""
    action = "unset"
    prop = "_id"
    condition = "hathi_exclude"
    condition_prop = "dataProvider%2CsourceResource%2Ftype"

    INPUT = {
        "_id": "12345",
        "dataProvider": ["Hathitrust", "University of Minnesota"],
        "sourceResource": {
            "type": "image"
        }
    }
    EXPECTED = {
        "dataProvider": ["Hathitrust", "University of Minnesota"],
        "sourceResource": {
            "type": "image"
        }
    }

    resp, content = _get_server_response(json.dumps(INPUT), action=action,
        prop=prop, condition=condition, condition_prop=condition_prop)
    print_error_log()
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
コード例 #20
0
def test_unset_prop2():
    """Should unset prop since condition is met"""
    action = "unset"
    prop = "sourceResource/rights"
    condition = "is_digit"

    INPUT = {
        "_id": "12345",
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1",
            "rights": "20010983784"
        },
        "key2": "value2"
    }
    EXPECTED = {
        "_id": "12345",
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1"
        },
        "key2": "value2"
    }

    resp,content = _get_server_response(json.dumps(INPUT), action=action,
        prop=prop, condition=condition)
    assert resp.status == 200
    print_error_log()
    assert json.loads(content) == EXPECTED
コード例 #21
0
def scdl_enrich_location(body, ctype, action="scdl_enrich_location", prop="sourceResource/spatial"):
    """
    Service that accepts a JSON document and enriches the "spatial" field of that document.

    For use with the scdl profiles
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data, prop):
        value = getprop(data,prop)
        for v in iterify(value): 
            name = replace_state_abbreviations(v["name"].rstrip())
            v["name"] = name

            # Try to extract a County
            if " county " in name.lower(): 
                # "XXX County (S.C.)" => county: XXX
                v["county"] = name[0:name.lower().index("county")].strip()
            elif "(S.C.)" in name:
                # "XXX (S.C)" => city: XXX
                v["city"] = name[0:name.index("(S.C.)")].strip()

    return json.dumps(data)
コード例 #22
0
def test_set_prop2():
    """Should create the prop and set its value"""
    prop = "sourceResource/rights"
    value = "rights"

    INPUT = {
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1"
        },
        "key2": "value2"
    }
    EXPECTED = {
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1",
            "rights": "rights"
        },
        "key2": "value2"
    }

    resp,content = _get_server_response(json.dumps(INPUT), prop=prop,
        value=value)
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
コード例 #23
0
def nypl_identify_object(body, ctype, download="True"):

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    original_document_key = u"originalRecord"
    original_preview_key = u"tmp_image_id"
    preview_format = "http://images.nypl.org/index.php?id={0}&t=t"

    if original_document_key not in data:
        logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id'])
        return body

    if original_preview_key not in data[original_document_key]:
        logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_preview_key, data[u'id'])
        return body

    preview_url = preview_format.format(data[original_document_key][original_preview_key])
    data["object"] = preview_url

    status = IGNORE
    if download == "True":
        status = PENDING

    if "admin" in data:
        data["admin"]["object_status"] = status
    else:
        data["admin"] = {"object_status": status}

    return json.dumps(data)
コード例 #24
0
def test_enrich_location_after_provider_specific_enrich_location4():
    """
    Previous specific-provider location did not set state.
    """
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [{"city": "Asheville; La Jolla", "county": "Buncombe;San Diego", "country": "United States"}]
        },
        "creator": "Miguel",
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"city": "Asheville", "county": "Buncombe", "country": "United States"},
                {"city": "La Jolla", "county": "San Diego"},
            ]
        },
        "creator": "Miguel",
    }

    url = server() + "enrich_location"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
コード例 #25
0
def test_set_prop5():
    """Should set prop to value, since condition_prop exists"""
    prop = "sourceResource/rights"
    value = "rights"
    condition_prop = "sourceResource"

    INPUT = {
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1",
            "rights": "value2"
        },
        "key2": "value2"
    }
    EXPECTED = {
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1",
            "rights": "rights"
        },
        "key2": "value2"
    }

    resp,content = _get_server_response(json.dumps(INPUT), prop=prop,
        value=value, condition_prop="sourceResource")
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
コード例 #26
0
def test_dedup_value1():
    """Should remove duplicate values"""

    props = "subject,spatial,description"
    INPUT = {
        "subject": [
            "This is a subject",
            "This is a subject.",
            " this is a SuBject . ",
            "This is another subject (1780).",
            "This is another subject 1780",
            "   thiS IS anOther subject (1780)"
        ],
        "spatial": ["North Carolina", "New York"],
        "description": "A description"
    }
    EXPECTED = {
        "subject": [
            "This is a subject",
            "This is another subject (1780)."
        ],
        "spatial": ["North Carolina", "New York"],
        "description": "A description"
    }

    resp, content = _get_server_response(json.dumps(INPUT), props)
    assert resp.status == 200
    assert_same_jsons(EXPECTED, content)
コード例 #27
0
def test_removing_bracket():
    """Should remove bracket from the beginning of the name"""
    INPUT = {
        "id": "12345",
        "sourceResource": {"spatial": ["Charleston (S.C.); [Germany; Poland; Israel; New York (N.Y.); Georgia (U.S.)"]},
        "creator": "Miguel",
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"name": "Charleston (S.C.)"},
                {"name": "Germany"},
                {"name": "Poland"},
                {"name": "Israel"},
                {"name": "New York (N.Y.)"},
                {"name": "Georgia (U.S.)"},
            ]
        },
        "creator": "Miguel",
    }

    url = server() + "enrich_location"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
コード例 #28
0
def test_enrich_list_of_dictionaries_and_strings():
    """Should handle list of dictionaries and strings"""
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"country": "United States", "county": "Buncombe", "state": "North Carolina"},
                "Rushmore, Mount",
                "Mount Rushmore National Memorial",
            ]
        },
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"country": "United States", "county": "Buncombe", "state": "North Carolina"},
                {"name": "Rushmore, Mount"},
                {"name": "Mount Rushmore National Memorial"},
            ]
        },
    }

    url = server() + "enrich_location"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
コード例 #29
0
def test_unset_prop1():
    """Should unset prop"""
    action = "unset"
    prop = "sourceResource/rights"

    INPUT = {
        "_id": "12345",
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1",
            "rights": "value2"
        },
        "key2": "value2"
    }
    EXPECTED = {
        "_id": "12345",
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1"
        },
        "key2": "value2"
    }

    resp,content = _get_server_response(json.dumps(INPUT), action=action,
        prop=prop)
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
コード例 #30
0
def artstor_cleanup_creator(body, ctype, prop="sourceResource/creator"):
    """
    Service that accepst a JSON document and removes cleans the
    sourceResource/creator field by removing the values in REGEXES if the
    field value begins with them
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data, prop):
        item = getprop(data, prop)
        if not isinstance(item, list):
            item = [item]
        for i in range(len(item)):
            for s in CLEANUP:
                item[i] = re.sub(r"(?i)^{0}".format(s), "", item[i].strip()).lstrip()
            
        setprop(data, prop, item[0] if len(item) == 1 else item)

    return json.dumps(data)
コード例 #31
0
def test_drop_long_values():
    """Correctly transform a date value that cannot be parsed"""
    INPUT = {
        "sourceResource": {
            "description": [
                "could be 1928ish?",
                "this is a long string will blow up flake 8, should drop this",
                "short"
            ]
        }
    }
    EXPECTED = {
        "sourceResource": {
            "description": ["could be 1928ish?", "short"]
        }
    }

    url = server() + "drop-long-values?field=description&max_length=20"

    resp, content = H.request(url, "POST", body=json.dumps(INPUT))

    TC.assertEqual(resp.status, 200)
    TC.assertEqual(json.loads(content), EXPECTED)
コード例 #32
0
def test_unset_prop8():
    """Should not unset prop since condition is not met with dataProvider"""
    action = "unset"
    prop = "_id"
    condition = "hathi_exclude"
    condition_prop = "dataProvider%2CsourceResource%2Ftype"

    INPUT = {
        "_id": "12345",
        "dataProvider": "Hathitrust",
        "sourceResource": {
            "type": "image"
        }
    }

    resp, content = _get_server_response(json.dumps(INPUT),
                                         action=action,
                                         prop=prop,
                                         condition=condition,
                                         condition_prop=condition_prop)
    print_error_log()
    assert resp.status == 200
    assert json.loads(content) == INPUT
コード例 #33
0
def test_copy_prop_str_to_str():
    """Should extend to_prop"""
    prop = "note"
    to_prop = "sourceResource/description"

    INPUT = {
        "note": "This is a note",
        "sourceResource": {
            "description": "This is a description"
        }
    }
    EXPECTED = {
        "note": "This is a note",
        "sourceResource": {
            "description": ["This is a description", "This is a note"]
        }
    }

    resp, content = _get_server_response(json.dumps(INPUT),
                                         prop=prop,
                                         to_prop=to_prop)
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
コード例 #34
0
def test_date_with_brackets():
    """Should transform date with brackets."""

    ranges = [
        "[1960-05-01]",
        "[  1960-05-01  ]"
    ]

    for r in ranges:
        INPUT = {"date": r}
        EXPECTED = {
            u'date' : {
                u'begin' : u'1960-05-01',
                u'end' : u'1960-05-01',
                "displayDate" : r
            }
        }

        url = server() + "enrich_earliest_date?prop=date"

        resp, content = H.request(url, "POST", body=json.dumps(INPUT))
        assert str(resp.status).startswith("2")
        assert_same_jsons(EXPECTED, content)
コード例 #35
0
def test_dedup_value1():
    """Should remove duplicate values"""

    props = "subject,spatial,description"
    INPUT = {
        "subject": [
            "This is a subject", "This is a subject.", " this is a SuBject . ",
            "This is another subject (1780).", "This is another subject 1780",
            "   thiS IS anOther subject (1780)"
        ],
        "spatial": ["North Carolina", "New York"],
        "description":
        "A description"
    }
    EXPECTED = {
        "subject": ["This is a subject", "This is another subject (1780)."],
        "spatial": ["North Carolina", "New York"],
        "description": "A description"
    }

    resp, content = _get_server_response(json.dumps(INPUT), props)
    assert resp.status == 200
    assert_same_jsons(EXPECTED, content)
コード例 #36
0
def mdlstatelocatedin(body, ctype):
    """
    Service that accepts a JSON document and extracts the state from the
    address in the first dataProvider value
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    prop = "dataProvider"
    if exists(data, prop):
        address = iterify(getprop(data, prop))[0]
        for st, state in states.items():
            if (re.search("\s+%s\s+" % st, address)
                    or re.search("\s+%s\s+" % state, address)):
                setprop(data, "sourceResource/stateLocatedIn", state)
                break

    return json.dumps(data)
コード例 #37
0
def nara_enrich_location(body,
                         ctype,
                         action="nara_enrich_location",
                         prop="sourceResource/spatial"):
    """
    Service that massages a NARA JSON document.
    """
    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if (exists(data, prop)):
        # Check spatial dictionaries to see if they are valid
        spatials = []
        for spatial in iterify(getprop(data, prop)):
            spatials.append(format_spatial(spatial))

        setprop(data, prop, spatials)

    return json.dumps(data)
コード例 #38
0
def test_full_date_range():
    """Should handle full date range"""
    INPUT = [
        "1901-01-01-1902-01-01", "1901-01-01/1902-01-01",
        "1901/01/01-1902/01/01", "1901/01/01/1902/01/01",
        "01/01/1901-01/01/1902", "1/1/1901/1/1/1902", "01-01-1901/01-01-1902",
        "1-1-1901-1-1-1902"
    ]

    url = server() + "enrich_earliest_date?prop=date"
    for i in range(len(INPUT)):
        input = {"date": INPUT[i]}
        expected = {
            "date": [{
                "begin": "1901-01-01",
                "end": "1902-01-01",
                "displayDate": INPUT[i]
            }]
        }

        resp, content = H.request(url, "POST", body=json.dumps(input))
        assert str(resp.status).startswith("2")
        assert_same_jsons(expected, content)
コード例 #39
0
def test_copy_prop_dict_to_list():
    """Should append to to_prop"""
    prop = "sourceResource/from_dict"
    to_prop = "sourceResource/to_list"

    INPUT = {
        "key1": "value1",
        "sourceResource": {
            "key1": "value1",
            "from_dict": {
                "key1": "value1"
            },
            "to_list": ["a", "b", "c"],
            "key2": "value2"
        },
        "key2": "value2"
    }
    EXPECTED = {
        "key1": "value1",
        "sourceResource": {
            "key1": "value1",
            "from_dict": {
                "key1": "value1"
            },
            "to_list": ["a", "b", "c", {
                "key1": "value1"
            }],
            "key2": "value2"
        },
        "key2": "value2"
    }

    resp, content = _get_server_response(json.dumps(INPUT),
                                         prop=prop,
                                         to_prop=to_prop)
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
コード例 #40
0
ファイル: test_geocode.py プロジェクト: mlhale7/ingestion
def test_geocode_coordinate_provided2():
    """Should use coordinates provided in the coordinates property"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": "United States--Massachussetts",
                    "coordinates": "42.358631134, -71.0567016602"
                }
            ]
        },
        "creator": "David"
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "county": "Suffolk County",
                    "state": "Massachusetts",
                    "country": "United States",
                    "name": "United States--Massachussetts",
                    "coordinates": "42.358631134, -71.0567016602"
                }
            ]
        },
        "creator": "David"
    }

    url = server() + "geocode"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
コード例 #41
0
def scdl_enrich_location(body,
                         ctype,
                         action="scdl_enrich_location",
                         prop="sourceResource/spatial"):
    """
    Service that accepts a JSON document and enriches the "spatial" field of that document.

    For use with the scdl profiles
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data, prop):
        value = getprop(data, prop)
        for v in iterify(value):
            name = replace_state_abbreviations(v["name"].rstrip())
            v["name"] = name

            # Try to extract a County
            if " county " in name.lower():
                # "XXX County (S.C.)" => county: XXX
                v["county"] = name[0:name.lower().index("county")].strip()
                if "(S.C.)" in name:
                    v["state"] = "South Carolina"
                    v["country"] = "United States"
            elif "(S.C.)" in name:
                # "XXX (S.C)" => city: XXX
                v["city"] = name[0:name.index("(S.C.)")].strip()
                v["state"] = "South Carolina"
                v["country"] = "United States"

    return json.dumps(data)
コード例 #42
0
def test_texas_enrich_location4():
    """Should do nothing with limits"""
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                "Canada - British Columbia Province - Vancouver Island - Victoria",
                "north=34.19; east=-99.94;",
                "northlimit=34.25; eastlimit=-99.88; southlimit=34.13; westlimit=-100;"
            ]
        }
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": "Canada - British Columbia Province - Vancouver Island - Victoria",
                    "country": "Canada",
                    "state": "British Columbia Province",
                    "county": "Vancouver Island",
                    "city": "Victoria"
                },
                {
                    "name": "34.19, -99.94"
                },
                {
                    "name": "northlimit=34.25; eastlimit=-99.88; southlimit=34.13; westlimit=-100;"
                }
            ]
        }
    }
        
    url = server() + "texas_enrich_location"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
コード例 #43
0
def jsonfy_prop(body, ctype, prop=None):
    """ Some data is packed as strings that contain json. (UCSD)
    Take the data in the given property and turn any sub-values that can be
    read by json.loads into json object.
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if prop:
        obj = getprop(data, prop, True)
    else:
        obj = data

    obj_jsonfied = jsonfy_obj(obj)
    if prop:
        setprop(data, prop, obj_jsonfied)
    else:
        data = obj_jsonfied
    return json.dumps(data)
コード例 #44
0
ファイル: test_geocode.py プロジェクト: chadfennell/ingestion
def test_geocode():
    """
    Simple geocode
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                { 
                    "name": "Boston, MA"
                }
            ]
        },
        "creator": "David"
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": "Boston, MA",
                    "state": "Massachusetts",
                    "country": "United States",
                    "coordinates": "42.358631134, -71.0567016602"
                }
            ]
        },
        "creator": "David"
    }
        
    url = server() + "geocode"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(json.loads(content), EXPECTED)
コード例 #45
0
def test_usc_enrich_location_find_coordinates_and_flip():
    """Should flip long/lat value, then remove all spatial values except the
       lat/long coordinate
    """
    INPUT = {
        "sourceResource": {
            "spatial": [{
                "name": " 123 "
            }, {
                "name": "-130.4560,,32.9870"
            }, {
                "name": "1234"
            }, {
                "name": "Asheville"
            }, {
                "name": "92.5542, 35.6008"
            }]
        }
    }
    EXPECTED = {"sourceResource": {"spatial": [{"name": "35.6008, 92.5542"}]}}

    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp["status"] == "200"
    assert_same_jsons(EXPECTED, json.loads(content))
コード例 #46
0
def test_suppress_creator_value_for_string():
    """Should remove creator value if the is 'creator'."""
    INPUT = {
        "sourceResource": {
            "creator":
            "creator",
            "aaa": [
                "Fisker, Kay--Architect--Danish--Male",
                "Fisker, Kay--Architect--Danish--Male", "bbb", "ccc"
            ]
        }
    }
    EXPECTED_OUTPUT = {
        "sourceResource": {
            "aaa": [
                "Fisker, Kay--Architect--Danish--Male",
                "Fisker, Kay--Architect--Danish--Male", "bbb", "ccc"
            ]
        }
    }
    resp, content = _get_server_response(json.dumps(INPUT))
    print_error_log()
    assert resp["status"].startswith("2")
    assert_same_jsons(EXPECTED_OUTPUT, content)
コード例 #47
0
def test_usc_enrich_location():
    """Should join values on whitespace"""
    INPUT = {
        "sourceResource": {
            "spatial": [{
                "name": "-130.4560,,32.9870"
            }, {
                "name": "1234"
            }, {
                "name": "Asheville"
            }]
        }
    }
    EXPECTED = {
        "sourceResource": {
            "spatial": [{
                "name": "-130.4560,,32.9870 1234 Asheville"
            }]
        }
    }

    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp["status"] == "200"
    assert_same_jsons(EXPECTED, json.loads(content))
コード例 #48
0
def augment_freemix(body, ctype):
    #See: http://foundry.zepheira.com/issues/133#note-4
    '''
    Render the contents of a file as best as possible in Exhibit JSON
    * Supports Excel, BibTex and JSON for now

    Sample queries:
    * curl "http://*****:*****@foo.xls" --header "Content-Type: application/vnd.ms-excel" "http://localhost:8880/freemix.json"
    '''
    fixup_obj_labels = True
    obj = json.loads(body)
    dataprofile = obj['data_profile']
    objkeys = {}
    source = obj[u'items']
    augmented_items = []
    failed_items = {}

    for prop in dataprofile["properties"]:
        if not prop["enabled"]: continue
        prop_types = [
            t[PROP_TYPE_MARKER_LEN:] for t in prop["tags"]
            if t.startswith(PROP_TYPE_MARKER)
        ]
        #logger.debug("PROPERTY TYPES: " + repr(prop_types))
        if prop_types:
            for aug, sid in AUGMENTATIONS.items():
                handler = service_proxy(sid)
                if aug in prop_types and (u"composite" in prop
                                          or aug == u'shredded_list'):
                    handler(source, prop, augmented_items, failed_items)
        #logger.debug('AUGMENTATION: ' + repr((prop['property'], augmented_items)))

    #Inefficiency of creating a dict only to get its values
    response = {'items': augmented_items, 'failed': failed_items}
    return json.dumps(response, indent=4)
コード例 #49
0
def test_unset_prop4():
    """Should do nothing to INPUT but catch keyError since condition is not
       in CONDITIONS
    """
    action = "unset"
    prop = "sourceResource/rights"
    condition = "is_digits"

    INPUT = {
        "_id": "12345",
        "key1": "value1",
        "sourceResource": {
            "key1": "value1",
            "rights": "value2"
        },
        "key2": "value2"
    }

    resp, content = _get_server_response(json.dumps(INPUT),
                                         action=action,
                                         prop=prop,
                                         condition=condition)
    assert resp.status == 200
    assert json.loads(content) == INPUT
コード例 #50
0
def scdl_geocode_regions(body,
                         ctype,
                         action="scdl_geocode_regions",
                         prop="sourceResource/spatial"):
    """
    Service that accepts a JSON document and forcibly sets the coordinates for South Carolina regions.

    For use with the scdl profiles
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data, prop):
        value = getprop(data, prop)
        for v in iterify(value):
            if (is_region(v)):
                geocode_region(v)

    return json.dumps(data)
コード例 #51
0
def test_enrich_dates_with_tildes_and_x():
    """Should remove tildes and x characters from dates"""
    INPUT = [{"date": "1946-10x"}, {"date": "1946-10~"}]
    EXPECTED = [{
        "date": [{
            "begin": "1946-10",
            "end": "1946-10",
            "displayDate": "1946-10x"
        }]
    }, {
        "date": [{
            "begin": "1946-10",
            "end": "1946-10",
            "displayDate": "1946-10~"
        }]
    }]

    url = server() + "enrich_earliest_date?prop=date"

    for i in range(len(INPUT)):
        resp, content = H.request(url, "POST", body=json.dumps(INPUT[i]))
        print 'CONTENT:{}'.format(content)
        assert str(resp.status).startswith("2")
        assert_same_jsons(EXPECTED[i], content)
コード例 #52
0
def cleanup_value(body,
                  ctype,
                  action="cleanup_value",
                  prop=",".join(DEFAULT_PROP + DONT_STRIP_DOT_END)):
    '''
    Service that accepts a JSON document and enriches the prop field of that document by:

    a) applying a set of regexps to do data cleanup
    '''

    if prop:
        try:
            data = json.loads(body)
        except:
            response.code = 500
            response.add_header('content-type', 'text/plain')
            return "Unable to parse body as JSON"

        for p in prop.split(","):
            convert(data, p)
    else:
        logger.error("Prop param in None in %s" % __name__)

    return json.dumps(data)
コード例 #53
0
ファイル: test_geocode.py プロジェクト: mlhale7/ingestion
def test_geocode_name_search_context():
    """Contextualize a place name using any additional feature names

    If feature names for city, county, or state are given, use them to
    disambiguate place names that have multiple interpretations.
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "name": "Portland",
                "state": "Maine"
            }
        }
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "city": "Portland",
                "county": "Cumberland County",
                "country": "United States",
                "state": "Maine",
                "name": "Portland",
                "coordinates": "43.66147, -70.25533"
            }
        ]}
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
コード例 #54
0
def test_enrich_temporal_date():
    """Correctly enrich temporal dates"""

    INPUT = {
        "sourceResource": {
            "spatial" : [
                "1901-1999",
                " 1901 - 1999 ",
                " 1901 / 01 / 01",
                "1905-04-12",
                "01/01/1901",
                "1901",
                "North Carolina"
            ]}
    }
    EXPECTED = {
        "sourceResource": {
            "temporal": [
                {"begin": "1901", "end": "1999", "displayDate": "1901-1999"},
                {"begin": "1901", "end": "1999", "displayDate": "1901 - 1999"},
                {"begin": "1901", "end": "1901", "displayDate": "1901"},
                {"begin": "1901-01-01", "end": "1901-01-01", "displayDate": "1901 / 01 / 01"},
                {"begin": "1901-01-01", "end": "1901-01-01", "displayDate": "01/01/1901"},
                {"begin": "1905-04-12", "end": "1905-04-12", "displayDate": "1905-04-12"},
            ],
            "spatial" : ["North Carolina"]}
    }

    url = server() + "move_date_values?prop=sourceResource/spatial"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200

    url = server() + "enrich_date"
    resp, content = H.request(url, "POST", body=content)
    assert resp.status == 200
    assert_same_jsons(EXPECTED, content)
コード例 #55
0
def test_capitalize_value():
    """Should capitalize first letter of each property"""

    INPUT = {
        "id": "123",
        "spatial": {
            "key1": "asheville",
            "key2": "north Carolina"
        },
        "subject": ["subject", "hi there", "hello"]
    }
    EXPECTED = {
        "id": "123",
        "spatial": {
            "key1": "Asheville",
            "key2": "North Carolina"
        },
        "subject": ["Subject", "Hi there", "Hello"]
    }
    resp, content = _get_server_response(
        json.dumps(INPUT), prop="spatial/key1,spatial/key2,subject")
    assert resp.status == 200
    FETCHED = json.loads(content)
    assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
コード例 #56
0
def nypl_identify_object(body, ctype, list_sets=None):

    try:
        assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (
            HTTP_HEADER_TYPE, HTTP_TYPE_JSON)
        data = json.loads(body)
    except Exception as e:
        error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e))
        logger.exception(error_text)
        response.code = HTTP_INTERNAL_SERVER_ERROR
        response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT)
        return error_text

    H = httplib2.Http('/tmp/.cache')
    H.force_exception_as_status_code = True
    resp, content = H.request(list_sets)
    if not resp[u'status'].startswith('2'):
        logger.error('  HTTP error (' + resp[u'status'] + ') resolving URL: ' +
                     list_sets)
        return body
    content_dict = xmltodict.parse(content,
                                   xml_attribs=True,
                                   attr_prefix='',
                                   force_cdata=False,
                                   ignore_whitespace_cdata=True)
    sets = content_dict["nyplAPI"]["response"]

    for r in sets:
        if "collection" == r:
            for coll_dict in sets[r]:
                if "uuid" in coll_dict and "title" in coll_dict and (
                        coll_dict["uuid"] == data["title"]
                        or coll_dict["uuid"] in data["@id"]):
                    data["title"] = coll_dict["title"]

    return json.dumps(data)
コード例 #57
0
def test_enrich_format_cleanup_multiple():
    "Test format normalization and removal of non IMT formats"
    INPUT = {
        "format": [
            "Still Images", "image/JPEG", "audio", "Images", 'application',
            "audio/mp3 (1.46 MB; 1 min., 36 sec.)", "Still Images",
            "image/JPEG", "audio", "Images", 'application',
            "audio/mp3 (1.46 MB; 1 min., 36 sec.)", "Images/jpeg",
            "images/jpeg"
        ]
    }
    EXPECTED = {
        u'format': [
            "Still Images", 'image/jpeg', "audio", "Images", 'application',
            'audio/mpeg'
        ],
        u'type': ["image", "sound"]
    }

    url = server() + "enrich-format?prop=format&type_field=type"

    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert_same_jsons(EXPECTED, content)
    assert str(resp.status).startswith("2")
コード例 #58
0
def dc_clean_invalid_dates(body,
                           ctype,
                           action="cleanup_value",
                           prop="sourceResource/date"):

    if prop is None:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        msg = "Prop param is None"
        logger.error(msg)
        return msg

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    for p in prop.split(","):
        if exists(data, p):
            convert(data, p)

    return json.dumps(data)
def required_values_from_collection_registry(body, ctype, field, mode):
    '''Get values for the required fields sourceResource.rights &
    sourceResource.type from the collection registry data.
    Default mode is to fill in missing data.
    mode='overwrite' will overwrite existing data
    mode='append' will add the values
    '''
    try :
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "Unable to parse body as JSON"
    
    if field == 'rights':
        data = set_rights_from_collection(data, mode)
    elif field == 'type':
        data = set_type_from_collection(data, mode)
    elif field == 'title':
        data = set_title_for_object(data)
    #ensure "@context" is there
    if not exists(data, "@context"):
        data["@context"] = "http://dp.la/api/items/context"
    return json.dumps(data)
コード例 #60
0
def capitalize_value(body, ctype, prop=",".join(DEFAULT_PROP), exclude=None):
    """
    Service that accepts a JSON document and capitalizes the prop field of that document
    """

    if prop:
        try:
            data = json.loads(body)
        except:
            response.code = 500
            response.add_header('content-type', 'text/plain')
            return "Unable to parse body as JSON"

        prop = prop.split(",")
        if exclude in prop:
            prop.remove(exclude)

        for p in prop:
            if p:
                capitalize(data, p)
    else:
        logger.error("Prop param in None in %s" % __name__)

    return json.dumps(data)