def test_enrich_subject_one_char_string2():
    """Should not include subject"""

    INPUT = {
        "id": "123",
        "spatial": [
            {"name": "Asheville"},
            {"name": "North Carolina"}
        ],
        "subject": [
            "h",
            "hi"
        ]
    }
    EXPECTED = {
        "id": "123",
        "spatial": [
            {"name": "Asheville"},
            {"name": "North Carolina"}
        ],
        "subject": []
    }

    resp, content = _get_server_response(json.dumps(INPUT))
    assert resp.status == 200
    print str(json.loads(content))
    assert json.loads(content) == EXPECTED
예제 #2
0
def test_physical_format_from_format_and_type():
    """
Test physical format appending from format and type fields
"""
    INPUT = {
        "format": ["76.8 x 104 cm",
                   "Oil on canvas",
                   "7 1/4 x 6 inches (18.4 x 15.2 cm)",
                   "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)"],
        "type": ["Paintings", "Painting"]
    }
    EXPECTED = {
        "format": ["76.8 x 104 cm",
                   "Oil on canvas",
                   "7 1/4 x 6 inches (18.4 x 15.2 cm)",
                   "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)",
                   "Paintings", "Painting"]
    }

    resp, content = H.request(server() + "enrich-type?prop=type&format_field=format", "POST", body=json.dumps(INPUT))
    assert str(resp.status).startswith("2")
    FETCHED = json.loads(content)
    assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
    resp, content = H.request(server() + "enrich-format?prop=format&type_field=type", "POST", body=content)
    assert str(resp.status).startswith("2")
    FETCHED = json.loads(content)
    assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
예제 #3
0
def test_move_date_values_iterify_if_string():
    """Should iterify as string and append date"""
    prop = "sourceResource/spatial"
    INPUT = {
        "sourceResource": {
            "spatial": "Asheville",
            "temporal": "1940"
        }
    }
    EXPECTED = {
        "sourceResource": {
            "spatial": [
                "Asheville"
            ],
            "temporal": [
                "1940",
            ]
        }
    }

    resp,content = _get_server_response(json.dumps(INPUT), prop=prop) 
    assert resp.status == 200
    print >> sys.stderr, json.loads(content)
    print >> sys.stderr, EXPECTED
    assert json.loads(content) == EXPECTED
def test_enrich_subject_one_char_string1():
    """Should not add one or two char strings to DPLA schema"""

    INPUT = {
        "id": "123",
        "spatial": [
            {"name": "Asheville"},
            {"name": "North Carolina"}
        ],
        "subject": [
            "subject",
            "a",
            "ab",
            "hello"
        ]
    }
    EXPECTED = {
        "id": "123",
        "spatial": [
            {"name": "Asheville"},
            {"name": "North Carolina"}
        ],
        "subject": [
            {"name": "Subject"},
            {"name": "Hello"}
        ]
    }

    resp, content = _get_server_response(json.dumps(INPUT))
    assert resp.status == 200
    print str(json.loads(content))
    assert json.loads(content) == EXPECTED
예제 #5
0
def test_enrich_date_parse_century_date():
    """Correctly transform a date of format '19th c.'"""
    url = server() + "enrich_earliest_date?prop=date"
    INPUT = {"date": "19th c."}
    EXPECTED = {
        "date": {
            "begin": None,
            "end": None,
            "displayDate": "19th c"  # period stripped assumed OK
        }
    }
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    result = json.loads(content)
    assert result["date"] == EXPECTED["date"], \
           "%s != %s" % (result["date"], EXPECTED["date"])
    INPUT = {"date": "19th century"}
    EXPECTED = {
        "date": {
            "begin": None,
            "end": None,
            "displayDate": "19th century"
        }
    }
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    result = json.loads(content)
    assert result["date"] == EXPECTED["date"], \
           "%s != %s" % (result["date"], EXPECTED["date"])
예제 #6
0
def dedup_value(body, ctype, action="dedup_value", prop=None):
    '''
    Service that accepts a JSON document and enriches the prop field of that document by:

    a) Removing duplicates
    '''

    if prop:
        try:
            data = json.loads(body)
        except:
            response.code = 500
            response.add_header('content-type', 'text/plain')
            return "Unable to parse body as JSON"

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    for p in prop.split(","):
        if exists(data, p):
            v = getprop(data, p)
            if isinstance(v, list):
                # Remove whitespace, periods, parens, brackets
                clone = [re.sub("[ \.\(\)\[\]\{\}]", "", s).lower() for s in v]
                # Get index of unique values
                index = list(set([clone.index(s) for s in list(set(clone))]))
            
                setprop(data, p, [v[i] for i in index])

    return json.dumps(data)
예제 #7
0
def mix_freemix(body, ctype):
    # See: http://foundry.zepheira.com/issues/137#note-10
    """
    {
      "datasets": {
        "dataset1": "http://recollection.zepheira.com/data/guide/data-profile-arthur-y-ford-photograph-albums-reprise-for-demo/data.json",
        "dataset2": "http://recollection.zepheira.com/data/guide/jean-thomas-collection/data.json"
      },
      "alignProperties": {
        "label": "Name",
        "dataset1": "Surname",
        "dataset2": "Name_of_Candidate"
      }
    }
    """
    USER, PASSWD = "loc", "recollection"
    cache_dir = make_named_cache("mix.freemix.json")
    H = httplib2.Http(cache_dir)
    if USER:
        H.add_credentials(USER, PASSWD)
    request = json.loads(body)
    datasets = request["datasets"]
    alignments = request.get("alignProperties")

    if len(datasets) != 2:
        raise ValueError("You must provide Mixer exactly 2 data sets")

    if alignments:
        mixed = []
        for dataset in datasets:
            # Replace the data set URL with the content
            logger.debug("Processing dataset: %s" % (datasets[dataset]))
            resp, content = H.request(datasets[dataset])
            items = json.loads(content)[u"items"]
            prop = alignments[dataset]
            newprop_label = alignments["label"]
            # Potluck (the usual mixer client) seems to generate property names such as
            # "Activity / Activity" which Exhibit cannot handle. Work around that.
            # See: http://foundry.zepheira.com/issues/334
            newprop = UNSUPPORTED_IN_EXHIBITKEY.sub("_", newprop_label)
            logger.debug("Mapping: %s -> %s" % (prop, newprop))
            for item in items:
                if prop in item:
                    item[newprop] = item[prop]
                mixed.append(item)
    else:
        mixed = []
        for dataset in datasets:
            # Replace the data set URL with the content
            logger.debug("Processing dataset: %s" % (datasets[dataset]))
            resp, content = H.request(datasets[dataset])
            items = json.loads(content)[u"items"]
            mixed += items

    for (counter, item) in enumerate(mixed):
        item[u"id"] = u"_%i" % counter

    result = json.dumps({"items": mixed}, indent=4)
    return result
예제 #8
0
파일: enrich.py 프로젝트: dpla/ingestion
def enrich_storage(body, ctype):
    """Establishes a pipeline of services identified by an ordered list of URIs
       provided in request header "Pipeline-Item"
    """

    request_headers = copy_headers_to_dict(request.environ)
    rec_enrichments = request_headers.get(u"Pipeline-Item","").split(",")

    records = json.loads(body)

    # Counts
    enriched_coll_count = 0
    enriched_item_count = 0
    missing_id_count = 0
    missing_source_resource_count = 0

    errors = []
    enriched_records = {}
    for record in records:
        error, enriched_record_text = pipe(record, ctype, rec_enrichments,
                                           "HTTP_PIPELINE_ITEM")
        if error:
            errors.append(error)

        enriched_record = json.loads(enriched_record_text)

        if enriched_record.get("_id", None):
            ingest_type = enriched_record.get("ingestType")
            # Item records should have sourceResource
            if (ingest_type == "item" and not
                "sourceResource" in enriched_record):
                logger.error("Record %s does not have sourceResource: %s" %
                             (enriched_record["_id"], enriched_record))
                missing_source_resource_count += 1
            else:
                enriched_records[enriched_record["_id"]] = enriched_record
                if ingest_type == "item":
                    enriched_item_count += 1
                else:
                    enriched_coll_count += 1
        else:
            logger.error("Found a record without an _id %s" % enriched_record)
            missing_id_count += 1

    data = {
        "enriched_records": enriched_records,
        "enriched_coll_count": enriched_coll_count,
        "enriched_item_count": enriched_item_count,
        "missing_id_count": missing_id_count,
        "missing_source_resource_count": missing_source_resource_count,
        "errors": errors
    }

    return json.dumps(data)


    return json.dumps(docs)
예제 #9
0
def primotodpla(body,ctype,geoprop=None):
    """
    Convert output of JSON-ified PRIMO (MWDL) format into the DPLA JSON-LD format.

    Parameter "geoprop" specifies the property name containing lat/long coords
    """

    try :
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header("content-type","text/plain")
        return "Unable to parse body as JSON"

    global GEOPROP
    GEOPROP = geoprop

    out = {
        "@context": CONTEXT,
        "sourceResource": {}
    }

    # Apply all transformation rules from original document
    for p in CHO_TRANSFORMER:
        if exists(data, p):
            out["sourceResource"].update(CHO_TRANSFORMER[p](data, p))
    for p in AGGREGATION_TRANSFORMER:
        if exists(data, p):
            out.update(AGGREGATION_TRANSFORMER[p](data, p))

    # Apply transformations that are dependent on more than one
    # original document field
    sp_props = ["display/lds08"]
    ipo_props = ["display/lds04"]
    title_props = ["display/title", "display/lds10"]
    out["sourceResource"].update(multi_transform(data, "spatial", sp_props, "list"))
    out["sourceResource"].update(multi_transform(data, "isPartOf", ipo_props))
    out["sourceResource"].update(multi_transform(data, "title", title_props))    

    dp_props = ["display/lds03"]
    out.update(multi_transform(data, "dataProvider", dp_props))

    # Additional content not from original document
    if "HTTP_CONTRIBUTOR" in request.environ:
        try:
            out["provider"] = json.loads(base64.b64decode(request.environ["HTTP_CONTRIBUTOR"]))
        except Exception as e:
            logger.debug("Unable to decode Contributor header value: "+request.environ["HTTP_CONTRIBUTOR"]+"---"+repr(e))

    # Strip out keys with None/null values?
    out = dict((k,v) for (k,v) in out.items() if v)

    return json.dumps(out)
예제 #10
0
파일: set_prop.py 프로젝트: dpla/ingestion
def set_prop(body, ctype, prop=None, value=None, condition_prop=None,
             condition_value=None, _dict=None):
    """Sets the value of prop.

    Keyword arguments:
    body -- the content to load
    ctype -- the type of content
    prop -- the prop to set
    value -- the value to set prop to
    condition_prop -- (optional) the field that must exist to set the prop
    condition_value -- (optional, if condition_prop set) the value that
                       condition_prop must have to set the prop
    
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if not value:
        logger.error("No value was supplied to set_prop.")
    else:
        if _dict:
            try:
                value = json.loads(value)
            except Exception, e:
                logger.error("Unable to parse set_prop value: %s" % e)
                return body

        def _set_prop():
            """Returns true if

               1. The condition_prop is not set OR
               2. The condition_prop is set and exists and the condition_value
                  is None OR
               3. The condition_prop is set and exists, the condition_value is
                  set, and the value of condition_prop equals condition_value
            """
            return (not condition_prop or
                    (exists(data, condition_prop) and
                     (not condition_value or
                      getprop(data, condition_prop) == condition_value)))

        if _set_prop():
            try:
                setprop(data, prop, value)
            except Exception, e:
                logger.error("Error in set_prop: %s" % e)
예제 #11
0
def oaisetname(body,ctype,sets_service=None):
    '''   
    Service that accepts a JSON document and sets the "name" property based on looking up
    the set in the HTTP_CONTEXT using the service passed in the 'sets_service' parameter.
    Assumes that the set_service returns a JSON array of two-element arrays, where the first
    element is the id and the second element the complete name.
    '''   
    
    if not sets_service:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "No set service has been selected"

    try :
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "Unable to parse body as JSON"

    if not is_absolute(sets_service):
        prefix = request.environ['wsgi.url_scheme'] + '://' 
        prefix += request.environ['HTTP_HOST'] if request.environ.get('HTTP_HOST') else request.environ['SERVER_NAME']
        sets_service = prefix + sets_service
        
    H = httplib2.Http('/tmp/.cache')
    H.force_exception_as_status_code = True
    resp, content = H.request(sets_service)
    if not resp[u'status'].startswith('2'):
         print >> sys.stderr, '  HTTP error ('+resp[u'status']+') resolving URL: '+sets_service

    try :
        sets = json.loads(content)
    except:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "Unable to parse sets service result as JSON: " + repr(content)

    setpos = data['_id'].find('--')
    match = data['_id'][setpos+2:] if setpos > -1 else data['_id']

    for s in sets:
        if match == s['setSpec']:
            data[u'title'] = s['setName']
            if s['setDescription']:
                data[u'description'] = s['setDescription'].strip()
            break

    return json.dumps(data)
예제 #12
0
def oaisetname(body,ctype,sets_service=None):
    '''   
    Service that accepts a JSON document and sets the "name" property based on looking up
    the set in the HTTP_CONTEXT using the service passed in the 'sets_service' parameter.
    Assumes that the set_service returns a JSON array of two-element arrays, where the first
    element is the id and the second element the complete name.
    '''   
    
    if not sets_service:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "No set service has been selected"

    try :
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "Unable to parse body as JSON"

    try :
        collection = request.environ['HTTP_COLLECTION']
    except:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "No Collection header found"

    H = httplib2.Http('/tmp/.cache')
    H.force_exception_as_status_code = True
    resp, content = H.request(sets_service)
    if not resp[u'status'].startswith('2'):
         print >> sys.stderr, '  HTTP error ('+resp[u'status']+') resolving URL: '+sets_service

    try :
        sets = json.loads(content)
    except:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "Unable to parse sets service result as JSON: " + repr(content)

    for s in sets:
        if s['setSpec'] == collection:
             data[u'title'] = s['setName']
             if s['setDescription']:
                 data[u'description'] = s['setDescription']
             break

    return json.dumps(data)
예제 #13
0
def test_copy_prop_to_prop_create_dict_key1():
    """Should copy to_prop into new dict with key"""
    prop1 = "key1"
    prop2 = "sourceResource/key2"
    to_prop = "sourceResource/to_dict"
    key1 = "key1"
    key2 = "key2" 
    create = True

    INPUT = {
        "key1": "value1",
        "sourceResource": {
            "key2": "value2",
            "key3": "value3"
        },
        "key4": "value4"
    }
    EXPECTED1 = {
        "key1": "value1",
        "sourceResource": {
            "key2": "value2",
            "key3": "value3",
            "to_dict" : {"key1": "value1"}
        },
        "key4": "value4"
    }
    EXPECTED2 = {
        "key1": "value1",
        "sourceResource": {
            "key2": "value2",
            "key3": "value3",
            "to_dict" : {
                "key1": "value1",
                "key2": "value2"
            }
        },
        "key4": "value4"
    }

    resp,content = _get_server_response(json.dumps(INPUT), prop=prop1,
        to_prop=to_prop, key=key1, create=create)
    assert resp.status == 200
    assert json.loads(content) ==  EXPECTED1

    resp,content = _get_server_response(json.dumps(EXPECTED1), prop=prop2,
        to_prop=to_prop, key=key2, create=create)
    assert resp.status == 200
    assert json.loads(content) ==  EXPECTED2
def test_description_transform2():
    INPUT = {"metadata": {"mods": {"note": {"#text": "A description"}}}}
    EXPECTED = {"description": "A description"}

    resp, content = _get_server_response(json.dumps(INPUT), provider="HARVARD")
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content)["sourceResource"])
예제 #15
0
def capitalize_value(body, ctype, prop=",".join(DEFAULT_PROP), exclude=None):
    """
    Service that accepts a JSON document and capitalizes the prop field of that document
    """

    if prop is None:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        msg = "Prop param is None"
        logger.error(msg)
        return msg

    try:
        data = json.loads(body)
    except Exception as e:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON\n" + str(e)

    prop = prop.split(",")
    if exclude in prop:
        prop.remove(exclude)

    for p in prop:
        if p:
            capitalize(data, p)

    return json.dumps(data)
def test_convert_spatial_string_to_dictionary():
    """
    Format UIUC spatial dictionaries 
    """
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                { 
                    "name": "Honolulu, HI"
                },
                { 
                    "name": "1972 to Present"
                }
            ]
        },
        "creator": "David"
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": "Honolulu, HI"
                }
            ]
        },
        "creator": "David"
    }
        
    url = server() + "uiuc_enrich_location"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
예제 #17
0
def test_enrich_location_after_provider_specific_enrich_location4():
    """
    Previous specific-provider location did not set state.
    """
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [{"city": "Asheville; La Jolla", "county": "Buncombe;San Diego", "country": "United States"}]
        },
        "creator": "Miguel",
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"city": "Asheville", "county": "Buncombe", "country": "United States"},
                {"city": "La Jolla", "county": "San Diego"},
            ]
        },
        "creator": "Miguel",
    }

    url = server() + "enrich_location"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
def artstor_cleanup_creator(body, ctype, prop="sourceResource/creator"):
    """
    Service that accepst a JSON document and removes cleans the
    sourceResource/creator field by removing the values in REGEXES if the
    field value begins with them
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data, prop):
        item = getprop(data, prop)
        if not isinstance(item, list):
            item = [item]
        for i in range(len(item)):
            for s in CLEANUP:
                item[i] = re.sub(r"(?i)^{0}".format(s), "", item[i].strip()).lstrip()
            
        setprop(data, prop, item[0] if len(item) == 1 else item)

    return json.dumps(data)
예제 #19
0
def test_set_prop5():
    """Should set prop to value, since condition_prop exists"""
    prop = "sourceResource/rights"
    value = "rights"
    condition_prop = "sourceResource"

    INPUT = {
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1",
            "rights": "value2"
        },
        "key2": "value2"
    }
    EXPECTED = {
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1",
            "rights": "rights"
        },
        "key2": "value2"
    }

    resp,content = _get_server_response(json.dumps(INPUT), prop=prop,
        value=value, condition_prop="sourceResource")
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
예제 #20
0
def test_unset_prop2():
    """Should unset prop since condition is met"""
    action = "unset"
    prop = "sourceResource/rights"
    condition = "is_digit"

    INPUT = {
        "_id": "12345",
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1",
            "rights": "20010983784"
        },
        "key2": "value2"
    }
    EXPECTED = {
        "_id": "12345",
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1"
        },
        "key2": "value2"
    }

    resp,content = _get_server_response(json.dumps(INPUT), action=action,
        prop=prop, condition=condition)
    assert resp.status == 200
    print_error_log()
    assert json.loads(content) == EXPECTED
예제 #21
0
def test_unset_prop1():
    """Should unset prop"""
    action = "unset"
    prop = "sourceResource/rights"

    INPUT = {
        "_id": "12345",
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1",
            "rights": "value2"
        },
        "key2": "value2"
    }
    EXPECTED = {
        "_id": "12345",
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1"
        },
        "key2": "value2"
    }

    resp,content = _get_server_response(json.dumps(INPUT), action=action,
        prop=prop)
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
예제 #22
0
def test_enrich_list_of_dictionaries_and_strings():
    """Should handle list of dictionaries and strings"""
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"country": "United States", "county": "Buncombe", "state": "North Carolina"},
                "Rushmore, Mount",
                "Mount Rushmore National Memorial",
            ]
        },
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"country": "United States", "county": "Buncombe", "state": "North Carolina"},
                {"name": "Rushmore, Mount"},
                {"name": "Mount Rushmore National Memorial"},
            ]
        },
    }

    url = server() + "enrich_location"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
예제 #23
0
def test_set_prop2():
    """Should create the prop and set its value"""
    prop = "sourceResource/rights"
    value = "rights"

    INPUT = {
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1"
        },
        "key2": "value2"
    }
    EXPECTED = {
        "key1": "value1",
        "sourceResource": {
            "key1" : "value1",
            "rights": "rights"
        },
        "key2": "value2"
    }

    resp,content = _get_server_response(json.dumps(INPUT), prop=prop,
        value=value)
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
def nypl_identify_object(body, ctype, download="True"):

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    original_document_key = u"originalRecord"
    original_preview_key = u"tmp_image_id"
    preview_format = "http://images.nypl.org/index.php?id={0}&t=t"

    if original_document_key not in data:
        logger.error("There is no '%s' key in JSON for doc [%s].", original_document_key, data[u'id'])
        return body

    if original_preview_key not in data[original_document_key]:
        logger.error("There is no '%s/%s' key in JSON for doc [%s].", original_document_key, original_preview_key, data[u'id'])
        return body

    preview_url = preview_format.format(data[original_document_key][original_preview_key])
    data["object"] = preview_url

    status = IGNORE
    if download == "True":
        status = PENDING

    if "admin" in data:
        data["admin"]["object_status"] = status
    else:
        data["admin"] = {"object_status": status}

    return json.dumps(data)
def scdl_enrich_location(body, ctype, action="scdl_enrich_location", prop="sourceResource/spatial"):
    """
    Service that accepts a JSON document and enriches the "spatial" field of that document.

    For use with the scdl profiles
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data, prop):
        value = getprop(data,prop)
        for v in iterify(value): 
            name = replace_state_abbreviations(v["name"].rstrip())
            v["name"] = name

            # Try to extract a County
            if " county " in name.lower(): 
                # "XXX County (S.C.)" => county: XXX
                v["county"] = name[0:name.lower().index("county")].strip()
            elif "(S.C.)" in name:
                # "XXX (S.C)" => city: XXX
                v["city"] = name[0:name.index("(S.C.)")].strip()

    return json.dumps(data)
예제 #26
0
def uscsetdataprovider(body, ctype, prop="dataProvider"):
    """   
    Service that accepts a JSON document and sets the "dataProvider"
    field of that document to:

    1. The first value of the originalRecord/source field (placed in
       dataProvider in the oai-to-dpla module) for the chs set (setSpec
       p15799coll65)
    2. The string "University of Southern California. Libraries" for all
       other sets

    For primary use with USC documents
    """

    try :
        data = json.loads(body)
    except Exception:
        response.code = 500
        response.add_header('content-type','text/plain')
        return "Unable to parse body as JSON"


    data_provider = getprop(data, "dataProvider", True)
    if getprop(data, "originalRecord/setSpec") == "p15799coll65":
        setprop(data, "dataProvider", data_provider[0])
    else:
        setprop(data, "dataProvider",
                "University of Southern California. Libraries")

    return json.dumps(data)
예제 #27
0
def test_unset_prop6():
    """Should unset prop since conditions are met for multiple condition
       props"""
    action = "unset"
    prop = "_id"
    condition = "hathi_exclude"
    condition_prop = "dataProvider%2CsourceResource%2Ftype"

    INPUT = {
        "_id": "12345",
        "dataProvider": ["Hathitrust", "University of Minnesota"],
        "sourceResource": {
            "type": "image"
        }
    }
    EXPECTED = {
        "dataProvider": ["Hathitrust", "University of Minnesota"],
        "sourceResource": {
            "type": "image"
        }
    }

    resp, content = _get_server_response(json.dumps(INPUT), action=action,
        prop=prop, condition=condition, condition_prop=condition_prop)
    print_error_log()
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
예제 #28
0
def test_removing_bracket():
    """Should remove bracket from the beginning of the name"""
    INPUT = {
        "id": "12345",
        "sourceResource": {"spatial": ["Charleston (S.C.); [Germany; Poland; Israel; New York (N.Y.); Georgia (U.S.)"]},
        "creator": "Miguel",
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"name": "Charleston (S.C.)"},
                {"name": "Germany"},
                {"name": "Poland"},
                {"name": "Israel"},
                {"name": "New York (N.Y.)"},
                {"name": "Georgia (U.S.)"},
            ]
        },
        "creator": "Miguel",
    }

    url = server() + "enrich_location"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
예제 #29
0
def movedatevalues(body, ctype, action="move_date_values", prop=None,
                   to_prop="sourceResource/temporal"):
    """
    Service that accepts a JSON document and moves any dates found in the prop
    field to the temporal field.
    """

    if not prop:
        logger.error("Prop param is None in %s" % __name__)
        return body

    REGSEARCH = [
        "\d{1,4}\s*[-/]\s*\d{1,4}\s*[-/]\s*\d{1,4}\s*[-/]\s*\d{1,4}\s*[-/]\s*\d{1,4}\s*[-/]\s*\d{1,4}",
        "\d{1,2}\s*[-/]\s*\d{4}\s*[-/]\s*\d{1,2}\s*[-/]\s*\d{4}",
        "\d{4}\s*[-/]\s*\d{1,2}\s*[-/]\s*\d{4}\s*[-/]\s*\d{1,2}",
        "\d{1,4}\s*[-/]\s*\d{1,4}\s*[-/]\s*\d{1,4}",
        "\d{4}\s*[-/]\s*\d{4}",
        "\d{1,2}\s*[-/]\s*\d{4}",
        "\d{4}\s*[-/]\s*\d{1,2}",
        "\d{4}s?",
        "\d{1,2}\s*(?:st|nd|rd|th)\s*century",
        ".*circa.*"
        ]

    def cleanup(s):
        s = re.sub("[\(\)\.\?]", "",s)
        return s.strip()

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data, prop):
        values = getprop(data, prop)
        remove = []
        toprop = getprop(data, to_prop) if exists(data, to_prop) else []
        
        for v in (values if isinstance(values, list) else [values]):
            c = cleanup(v)
            for pattern in REGSEARCH:
                m = re.compile(pattern, re.I).findall(c)
                if len(m) == 1 and not re.sub(m[0], "", c).strip():
                    if m[0] not in toprop:
                        toprop.append(m[0])
                    # Append the non-cleaned value to remove
                    remove.append(v)
                    break

        if toprop:
            setprop(data, to_prop, toprop)
            if len(values) == len(remove):
                delprop(data, prop)
            else:
                setprop(data, prop, [v for v in values if v not in remove])
            

    return json.dumps(data)
예제 #30
0
def test_default_type():
    """Should set type to default value"""
    INPUT = {"id": "123", "sourceResource": {"type": "bananas"}}
    EXPECTED = {"id": "123", "sourceResource": {"type": "image"}}
    resp, content = _get_server_response(json.dumps(INPUT), default="image")
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
def enrichlocation(body,ctype,action="enrich_location", prop="sourceResource/spatial"):
    """
    Service that accepts a JSON document and enriches the "spatial" field of that document by
    iterating through the spatial fields and mapping to the state and iso3166-2, if not already
    mapped, through teh get_isostate function. This function takes the optional parameter abbrev,
    and if it is set it will search the fields for State name abbreviations. If a previous provider-
    specific location enrichment module ran, the default is to not search those fields for State name
    abbreviations, but only for full State names.
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data,prop):
        v = iterify(getprop(data,prop))

        for i in range(len(v)):
            if isinstance(v[i], dict):
                for k in v[i].keys():
                    v[i][k] = remove_space_around_semicolons(v[i][k])
            else:
                v[i] = {"name": remove_space_around_semicolons(v[i])}

        # If any of the spatial fields contain semi-colons, we need to create
        # multiple dictionaries.
        semicolons = None
        for d in v:
            for k in d.keys():
                if d[k] and ';' in d[k]:
                    semicolons = True
                    break

        setprop(data,prop,(create_dictionaries(v) if semicolons else v))

    return json.dumps(data)
예제 #32
0
def test_usc_enrich_location_find_coordinates():
    """Should remove all spatial values except for the lat/long coordinate"""
    INPUT = {
        "sourceResource": {
            "spatial": [{
                "name": " 123 "
            }, {
                "name": "-130.4560,,32.9870"
            }, {
                "name": "1234"
            }, {
                "name": "Asheville"
            }, {
                "name": "82.5542, 35.6008"
            }]
        }
    }
    EXPECTED = {"sourceResource": {"spatial": [{"name": "82.5542, 35.6008"}]}}

    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp["status"] == "200"
    assert_same_jsons(EXPECTED, json.loads(content))
예제 #33
0
 def testMappings(self):
     fixture = path.join(DIR_FIXTURES, 'flickr-doc.json')
     with open(fixture) as f:
         INPUT = f.read()
         resp, content = self._get_server_response(INPUT)
     self.assertEqual(resp.status, 200)
     obj = json.loads(content)
     self.assertEqual(
         obj['isShownAt'],
         'https://www.flickr.com/photos/sdasmarchives/34394586825/')
     self.assertEqual(
         obj['isShownBy'],
         'https://farm5.staticflickr.com/4169/34394586825_375e0b1706_z.jpg')
     srcRes = obj['sourceResource']
     self.assertEqual(srcRes['title'], 'Ryan Aeronautical Image')
     self.assertEqual(
         srcRes['description'],
         "PictionID:42184448 - Title:Atlas 34, on Pad-------4-9-62; MT62-35498 ; UNCLASSIFIED , APR 9 1962 , ----NCS|ASTRONAUTICS/A DIVISION OF GENERAL DYNAMICS CORPORATION ; missile in image is numbered 34 - Catalog:14_001945 - Filename:14_001945.tif - - - - Image from the Convair/General Dynamics Astronautics Atlas Negative Collection---Please Tag these images so that the information can be permanently stored with the digital file.---Repository: San Diego Air and Space Museum"
     )
     self.assertNotIn('date', srcRes)
     self.assertEqual(srcRes['subject'], ['woo yay', 'Hoopla'])
     self.assertEqual(srcRes['format'], "photo")
def test_removing_bracket():
    """Should remove bracket from the beginning of the name"""
    INPUT = {
        "id": "12345",
        "sourceResource": {"spatial": [
            "Charleston (S.C.); [Germany; Poland; Israel; New York (N.Y.); Georgia (U.S.)"
        ]},
        "creator": "Miguel"
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {"spatial": [
            {
                "name" : "Charleston (S.C.)"
            },
            {
                "name": "Germany"
            },
            {
                "name": "Poland"
            },
            {
                "name": "Israel"
            },
            {
                "name": "New York (N.Y.)"
            },
            {
                "name": "Georgia (U.S.)"
            }
        ]},
        "creator": "Miguel"
    }

    url = server() + "enrich_location"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
예제 #35
0
def test_usc_enrich_location_clean():
    """Should remove all 1-3 digit numbers and values containing 's.d', then
       join the remaining values on whitespace
    """
    INPUT = {
        "sourceResource": {
            "spatial": [{
                "name": " 123 "
            }, {
                "name": "-130.4560,,32.9870"
            }, {
                "name": "s.d]"
            }, {
                "name": "s.d"
            }, {
                "name": "1234"
            }, {
                "name": "456"
            }, {
                "name": "s.d."
            }, {
                "name": "Asheville"
            }, {
                "name": "789"
            }]
        }
    }
    EXPECTED = {
        "sourceResource": {
            "spatial": [{
                "name": "-130.4560,,32.9870 1234 Asheville"
            }]
        }
    }

    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp["status"] == "200"
    assert_same_jsons(EXPECTED, json.loads(content))
예제 #36
0
def test_basic_forward_lookup():
    """
    Simple geocode
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": "Bakersfield, CA"
                }
            ]
        },
        "creator": "David"
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": "Bakersfield, CA",
                    "city": "Bakersfield",
                    "state": "California",
                    "county": "Kern County",
                    "country": "United States",
                    "coordinates": "35.37329, -119.01871"
                }
            ]
        },
        "creator": "David"
    }

    url = server() + "geocode"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
def test_lapl_oai_mapping():
    fixture = path.join(DIR_FIXTURES, 'lapl-oai.json')
    with open(fixture) as f:
        INPUT = f.read()
        resp, content = _get_server_response(INPUT)
    assert str(resp.status).startswith("2"), str(resp) + "\n" + content
    doc = json.loads(content)
    TC.assertIn(u'sourceResource', doc)
    TC.assertIn(u'title', doc[u'sourceResource'])
    TC.assertEqual(doc['sourceResource']['title'][0], u'Olvera Street shop')
    TC.assertIn(u'description', doc[u'sourceResource'])
    TC.assertEqual(len(doc['sourceResource']['description']), 2)
    TC.assertEqual(
        doc['sourceResource']['description'][1],
        u'A man and two boys sit in front of an Olvera Street shop. A large sign on the right reads, "For Your Fortune Consult Princess Lorena - The Morning Star." Another sign posted above the doorway reads, "Chief Kut - Mescalero." It is not clear if the man sitting on the right is Chief Kut.'
    )
    TC.assertIn(u'format', doc[u'sourceResource'])
    TC.assertEqual(doc['sourceResource']['format'][0],
                   u'1 photographic print :b&w ;15 x 11 cm.')
    TC.assertIn(u'identifier', doc[u'sourceResource'])
    TC.assertEqual(len(doc['sourceResource']['identifier']), 5)
    TC.assertEqual(doc['sourceResource']['identifier'][2], u'N-011-201 8x10')
    TC.assertIn(u'isShownAt', doc)
    TC.assertEqual(
        doc['isShownAt'],
        u'https://tessa.lapl.org/cdm/ref/collection/photos/id/36479')
    TC.assertIn(u'isShownBy', doc)
    TC.assertEqual(
        doc['isShownBy'],
        u'http://173.196.26.125/utils/ajaxhelper?CISOROOT=photos&CISOPTR=36479&action=2&DMHEIGHT=2000&DMWIDTH=2000&DMSCALE=100'
    )
    TC.assertEqual(len(doc['sourceResource']['subject']), 8)
    TC.assertEqual(doc['sourceResource']['subject'][0],
                   {'name': 'Signs and signboards--California--Los Angeles.'})
    TC.assertEqual(doc['sourceResource']['contributor'], [
        'Made accessible through a grant from the John Randolph Haynes and Dora Haynes Foundation.'
    ])
    TC.assertEqual(doc['sourceResource']['creator'], ['Schultheis, Herman.'])
예제 #38
0
def test_shred9():
    """Do not shred on values within parenthesis"""
    INPUT = {
        "p":
        "String one; (String two; two and a part of two); String three; String four; (abc dbf; sss;k)",
        "q": "d;e;f",
        "h":
        "String one; (String two; two and a part of two) String three; String four; (abc dbf; sss;k)",
        "m":
        "String one; Begin of two (String two; two and a part of two) String three; String four; (abc dbf; sss;k)",
        "g": "bananas",
        "a": "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)"
    }
    EXPECTED = {
        "p": [
            "String one", "(String two; two and a part of two)",
            "String three", "String four", "(abc dbf; sss;k)"
        ],
        "q": ["d", "e", "f"],
        "h": [
            'String one', '(String two; two and a part of two) String three',
            'String four', '(abc dbf; sss;k)'
        ],
        "m": [
            'String one',
            'Begin of two (String two; two and a part of two) String three',
            'String four', '(abc dbf; sss;k)'
        ],
        "a":
        "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)",
        "g":
        "bananas"
    }
    url = server() + "shred?prop=p,q,h,m,g,a"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert str(resp.status).startswith("2")
    FETCHED = json.loads(content)
    assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
def test_map_oac_dc_meta():
    '''Test that the DC meta values from OAC are pulled to sourceResource'''
    fixture = path.join(DIR_FIXTURES, 'oac-xml.json')
    with open(fixture) as f:
        INPUT = f.read()
    resp, content = _get_server_response(INPUT)
    TC.assertEqual(resp.status, 200)
    content_obj = json.loads(content)
    srcRes = content_obj['sourceResource']
    TC.assertEqual(len(srcRes['format']), 1)  # suppresses q="x"
    TC.assertEqual(srcRes['format'], ['painting: b&w ;'])
    TC.assertNotIn('relation', srcRes)
    TC.assertEqual(len(srcRes['subject']), 2)  # suppresses q="series"
    TC.assertEqual(srcRes['subject'], [{
        'name': u'Japanese Americans'
    }, {
        'name': u'Uchida'
    }])
    TC.assertEqual(srcRes['date'], ["7/21/42", "7/21/72"])
    TC.assertEqual(srcRes['copyrightDate'], ["2011"])
    TC.assertEqual(srcRes['alternativeTitle'], [
        "[Chinese man sitting on top of dynamite and white labor, poised to explode brick wall of Public Opinion]",
        "Another alternate title"
    ])
    TC.assertEqual(srcRes['genre'], ["Hashira-e"])
    TC.assertEqual(srcRes['rights'], [
        "Transmission or reproduction of materials protected by copyright beyond that allowed by fair use requires the written permission of the copyright owners. Works not in the public domain cannot be commercially exploited without permission of the copyright owner. Responsibility for any use rests exclusively with the user.",
        "The Bancroft Library--assigned",
        "All requests to reproduce, publish, quote from, or otherwise use collection materials must be submitted in writing to the Head of Public Services, The Bancroft Library, University of California, Berkeley 94720-6000. See: http://bancroft.berkeley.edu/reference/permissions.html", 
        "The Bancroft Library University of California Berkeley, CA 94720-6000"
    ])
    TC.assertEqual(
        srcRes['spatial'],
        ["San Francisco (Calif.)", "Chinatown (San Francisco, Calif.)."])
    TC.assertEqual(srcRes['temporal'], [
        "China -- History -- Warlord period, 1916-1928.",
        "China -- Politics and government -- 1912-1949."
    ])
예제 #40
0
def remove_list_values(body, ctype, prop=None, values=None):
    """Given a comma-separated string of values, removes any instance of each
       value from the prop.
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    v = getprop(data, prop, True)

    if isinstance(v, list) and values is not None:
        values = values.split(",")
        v = [s for s in v if s not in values]
        if v:
            setprop(data, prop, v)
        else:
            delprop(data, prop)

    return json.dumps(data)
예제 #41
0
def test_copy_prop_no_overwrite1():
    """Should not overwrite to_prop since it exists"""
    prop = "sourceResource/key2"
    to_prop = "sourceResource/key3"
    create = True
    no_overwrite = True

    INPUT = {
        "key1": "value1",
        "sourceResource": {
            "key2": "value2",
            "key3": "value3"
        },
        "key4": "value4"
    }

    resp, content = _get_server_response(json.dumps(INPUT),
                                         prop=prop,
                                         to_prop=to_prop,
                                         create=create,
                                         no_overwrite=no_overwrite)
    assert resp.status == 200
    assert json.loads(content) == INPUT
예제 #42
0
def drop_long_values(body, ctype, field=None, max_length=150):
    ''' Look for long values in the sourceResource field specified.
    If value is longer than max_length, delete
    '''
    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    fieldvalues = data['sourceResource'].get(field)
    if isinstance(fieldvalues, list):
        new_list = []
        for item in fieldvalues:
            if len(item) <= int(max_length):
                new_list.append(item)
        data['sourceResource'][field] = new_list
    else:  # scalar
        if len(fieldvalues) > int(max_length):
            del data['sourceResource'][field]

    return json.dumps(data)
예제 #43
0
def test_drop_long_values():
    """Correctly transform a date value that cannot be parsed"""
    INPUT = {
        "sourceResource": {
            "description": [
                "could be 1928ish?",
                "this is a long string will blow up flake 8, should drop this",
                "short"
            ]
        }
    }
    EXPECTED = {
        "sourceResource": {
            "description": ["could be 1928ish?", "short"]
        }
    }

    url = server() + "drop-long-values?field=description&max_length=20"

    resp, content = H.request(url, "POST", body=json.dumps(INPUT))

    TC.assertEqual(resp.status, 200)
    TC.assertEqual(json.loads(content), EXPECTED)
예제 #44
0
def test_move_date_values_spatial3():
    """
    Should remove spatial field if only element is a date.
    """
    prop = "sourceResource/spatial"
    INPUT = {
        "sourceResource": {
            "spatial" : [
                " 1901 - 1999 "
            ]
        }
    }
    EXPECTED = {
        "sourceResource": {
            "temporal": [
                "1901 - 1999"
            ]
        }
    } 
 
    resp,content = _get_server_response(json.dumps(INPUT),prop) 
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
예제 #45
0
def test_unset_prop8():
    """Should not unset prop since condition is not met with dataProvider"""
    action = "unset"
    prop = "_id"
    condition = "hathi_exclude"
    condition_prop = "dataProvider%2CsourceResource%2Ftype"

    INPUT = {
        "_id": "12345",
        "dataProvider": "Hathitrust",
        "sourceResource": {
            "type": "image"
        }
    }

    resp, content = _get_server_response(json.dumps(INPUT),
                                         action=action,
                                         prop=prop,
                                         condition=condition,
                                         condition_prop=condition_prop)
    print_error_log()
    assert resp.status == 200
    assert json.loads(content) == INPUT
def test_calpoly_oai_dc_mapping():
    fixture = path.join(DIR_FIXTURES, 'caltech.json')
    with open(fixture) as f:
        INPUT = f.read()
        TC.assertIn('id', INPUT)
        resp, content = _get_server_response(INPUT)
    TC.assertEqual(resp.status, 200)
    obj = json.loads(content)
    TC.assertIn('sourceResource', obj)
    TC.assertIn('originalRecord', obj)
    srcRes = obj['sourceResource']
    TC.assertEqual(
        obj['isShownAt'],
        "http://maccready.library.caltech.edu/islandora/object/pbm%3A631")
    TC.assertEqual(
        obj['isShownBy'],
        "http://maccready.library.caltech.edu/islandora/object/pbm%3A631/datastream/TN/view/Model%20airplanes%20and%20gliders%3A%20includes%20photographs%20of%20MacCready%20with%20award%20winning%20planes.%202%20Sheets.%20%2834%20items%29.jpg"
    )
    TC.assertEqual(srcRes['description'], [
        "ca.1937-1941,1945. Part of: Paul B. MacCready Papers ca. 1930-2002. Series 7: Audio-Visual material; Subseries 1: Photographic slides; Box 1, Folder 1"
    ])
    TC.assertEqual(srcRes['format'],
                   ["projected graphic", "Black and White 35mm slides"])
예제 #47
0
def mdlstatelocatedin(body, ctype):
    """
    Service that accepts a JSON document and extracts the state from the
    address in the first dataProvider value
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    prop = "dataProvider"
    if exists(data, prop):
        address = iterify(getprop(data, prop))[0]
        for st, state in states.items():
            if (re.search("\s+%s\s+" % st, address)
                    or re.search("\s+%s\s+" % state, address)):
                setprop(data, "sourceResource/stateLocatedIn", state)
                break

    return json.dumps(data)
예제 #48
0
 def testMappings(self):
     fixture = path.join(DIR_FIXTURES, 'flickr-doc.json')
     with open(fixture) as f:
         INPUT = f.read()
         resp, content = self._get_server_response(INPUT)
     self.assertEqual(resp.status, 200)
     obj = json.loads(content)
     self.assertEqual(
         obj['isShownAt'],
         'https://www.flickr.com/photos/sdasmarchives/34394586825/')
     self.assertEqual(
         obj['isShownBy'],
         'https://farm5.staticflickr.com/4169/34394586825_375e0b1706_z.jpg')
     srcRes = obj['sourceResource']
     self.assertEqual(srcRes['title'], 'Atlas 55D')
     self.assertEqual(
         srcRes['description'],
         'Details: Prelaunch; Complex 12; AMR --Image from the Convair/General Dynamics Astronautics Atlas Negative Collection---Please Tag these images so that the information can be permanently stored with the digital file.---Repository: San Diego Air and Space Museum'
     )
     self.assertEqual(srcRes['subject'], ['woo yay', 'Hoopla'])
     self.assertEqual(srcRes['format'], "photo")
     self.assertEqual(srcRes['identifier'],
                      ["14_008096", "43829091", "14_008096.TIF"])
예제 #49
0
def test_ia_identify_object():
    """Fetching Internet Archive document thumbnail"""

    INPUT_JSON = """
    {
   "originalRecord": {
       "_id": "test_id",
       "files": {"gif": "test_id.gif"}
       }
    }
    """

    EXPECTED_PREVIEW = "http://www.archive.org/download/test_id/test_id.gif"

    url = server() + "ia_identify_object"
    resp, content = H.request(url, "POST", body=INPUT_JSON)
    assert str(resp.status).startswith("2"), str(resp) + "\n" + content

    doc = json.loads(content)
    assert u"object" in doc, "object path not found in document"
    FETCHED_PREVIEW = doc[u"object"]
    assert FETCHED_PREVIEW == EXPECTED_PREVIEW, "%s != %s" % (FETCHED_PREVIEW,
                                                              EXPECTED_PREVIEW)
def test_copy_prop_str_to_str():
    """Should extend to_prop"""
    prop = "note"
    to_prop = "sourceResource/description"

    INPUT = {
        "note": "This is a note",
        "sourceResource": {
            "description": "This is a description"
        }
    }
    EXPECTED = {
        "note": "This is a note",
        "sourceResource": {
            "description": ["This is a description", "This is a note"]
        }
    }

    resp, content = _get_server_response(json.dumps(INPUT),
                                         prop=prop,
                                         to_prop=to_prop)
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
예제 #51
0
def nara_enrich_location(body,
                         ctype,
                         action="nara_enrich_location",
                         prop="sourceResource/spatial"):
    """
    Service that massages a NARA JSON document.
    """
    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if (exists(data, prop)):
        # Check spatial dictionaries to see if they are valid
        spatials = []
        for spatial in iterify(getprop(data, prop)):
            spatials.append(format_spatial(spatial))

        setprop(data, prop, spatials)

    return json.dumps(data)
예제 #52
0
def test_texas_enrich_location4():
    """Should do nothing with limits"""
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                "Canada - British Columbia Province - Vancouver Island - Victoria",
                "north=34.19; east=-99.94;",
                "northlimit=34.25; eastlimit=-99.88; southlimit=34.13; westlimit=-100;"
            ]
        }
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": "Canada - British Columbia Province - Vancouver Island - Victoria",
                    "country": "Canada",
                    "state": "British Columbia Province",
                    "county": "Vancouver Island",
                    "city": "Victoria"
                },
                {
                    "name": "34.19, -99.94"
                },
                {
                    "name": "northlimit=34.25; eastlimit=-99.88; southlimit=34.13; westlimit=-100;"
                }
            ]
        }
    }
        
    url = server() + "texas_enrich_location"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
예제 #53
0
def test_geocode_coordinate_provided2():
    """Should use coordinates provided in the coordinates property"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": "United States--Massachussetts",
                    "coordinates": "42.358631134, -71.0567016602"
                }
            ]
        },
        "creator": "David"
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "county": "Suffolk County",
                    "state": "Massachusetts",
                    "country": "United States",
                    "name": "United States--Massachussetts",
                    "coordinates": "42.358631134, -71.0567016602"
                }
            ]
        },
        "creator": "David"
    }

    url = server() + "geocode"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
예제 #54
0
def test_copy_prop_dict_to_list():
    """Should append to to_prop"""
    prop = "sourceResource/from_dict"
    to_prop = "sourceResource/to_list"

    INPUT = {
        "key1": "value1",
        "sourceResource": {
            "key1": "value1",
            "from_dict": {
                "key1": "value1"
            },
            "to_list": ["a", "b", "c"],
            "key2": "value2"
        },
        "key2": "value2"
    }
    EXPECTED = {
        "key1": "value1",
        "sourceResource": {
            "key1": "value1",
            "from_dict": {
                "key1": "value1"
            },
            "to_list": ["a", "b", "c", {
                "key1": "value1"
            }],
            "key2": "value2"
        },
        "key2": "value2"
    }

    resp, content = _get_server_response(json.dumps(INPUT),
                                         prop=prop,
                                         to_prop=to_prop)
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
예제 #55
0
def scdl_enrich_location(body,
                         ctype,
                         action="scdl_enrich_location",
                         prop="sourceResource/spatial"):
    """
    Service that accepts a JSON document and enriches the "spatial" field of that document.

    For use with the scdl profiles
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data, prop):
        value = getprop(data, prop)
        for v in iterify(value):
            name = replace_state_abbreviations(v["name"].rstrip())
            v["name"] = name

            # Try to extract a County
            if " county " in name.lower():
                # "XXX County (S.C.)" => county: XXX
                v["county"] = name[0:name.lower().index("county")].strip()
                if "(S.C.)" in name:
                    v["state"] = "South Carolina"
                    v["country"] = "United States"
            elif "(S.C.)" in name:
                # "XXX (S.C)" => city: XXX
                v["city"] = name[0:name.index("(S.C.)")].strip()
                v["state"] = "South Carolina"
                v["country"] = "United States"

    return json.dumps(data)
예제 #56
0
def test_unset_prop4():
    """Should do nothing to INPUT but catch keyError since condition is not
       in CONDITIONS
    """
    action = "unset"
    prop = "sourceResource/rights"
    condition = "is_digits"

    INPUT = {
        "_id": "12345",
        "key1": "value1",
        "sourceResource": {
            "key1": "value1",
            "rights": "value2"
        },
        "key2": "value2"
    }

    resp, content = _get_server_response(json.dumps(INPUT),
                                         action=action,
                                         prop=prop,
                                         condition=condition)
    assert resp.status == 200
    assert json.loads(content) == INPUT
예제 #57
0
def augment_freemix(body, ctype):
    #See: http://foundry.zepheira.com/issues/133#note-4
    '''
    Render the contents of a file as best as possible in Exhibit JSON
    * Supports Excel, BibTex and JSON for now

    Sample queries:
    * curl "http://*****:*****@foo.xls" --header "Content-Type: application/vnd.ms-excel" "http://localhost:8880/freemix.json"
    '''
    fixup_obj_labels = True
    obj = json.loads(body)
    dataprofile = obj['data_profile']
    objkeys = {}
    source = obj[u'items']
    augmented_items = []
    failed_items = {}

    for prop in dataprofile["properties"]:
        if not prop["enabled"]: continue
        prop_types = [
            t[PROP_TYPE_MARKER_LEN:] for t in prop["tags"]
            if t.startswith(PROP_TYPE_MARKER)
        ]
        #logger.debug("PROPERTY TYPES: " + repr(prop_types))
        if prop_types:
            for aug, sid in AUGMENTATIONS.items():
                handler = service_proxy(sid)
                if aug in prop_types and (u"composite" in prop
                                          or aug == u'shredded_list'):
                    handler(source, prop, augmented_items, failed_items)
        #logger.debug('AUGMENTATION: ' + repr((prop['property'], augmented_items)))

    #Inefficiency of creating a dict only to get its values
    response = {'items': augmented_items, 'failed': failed_items}
    return json.dumps(response, indent=4)
예제 #58
0
 def testMappings(self):
     fixture = path.join(DIR_FIXTURES, 'eMuseum-xml.json')
     with open(fixture) as f:
         INPUT = f.read()
         resp, content = self._get_server_response(INPUT)
     self.assertEqual(resp.status, 200)
     obj = json.loads(content)
     self.assertEqual(obj['_id'], '26251--11529')
     self.assertEqual(obj['id'], '748a227d50f2f9ea132f5748b8e89323')
     self.assertEqual(
         obj['@id'],
         'http://ucldc.cdlib.org/api/items/748a227d50f2f9ea132f5748b8e89323'
     )
     self.assertEqual(obj['isShownAt'],
                      'http://digitalcollections.hoover.org/objects/11529')
     self.assertEqual(obj['isShownBy'],
                      'https://img.youtube.com/vi/qxVJVE9oKg4/default.jpg')
     srcRes = obj['sourceResource']
     self.assertEqual(srcRes['date'], '1914/1918?')
     self.assertEqual(
         srcRes['title'],
         "Money is power.  A war saving certificate in every Canadian home.  Get yours now at Post Offices or banks."
     )
     self.assertEqual(srcRes['type'], 'Image')
예제 #59
0
def test_capitalize_value():
    """Should capitalize first letter of each property"""

    INPUT = {
        "id": "123",
        "spatial": {
            "key1": "asheville",
            "key2": "north Carolina"
        },
        "subject": ["subject", "hi there", "hello"]
    }
    EXPECTED = {
        "id": "123",
        "spatial": {
            "key1": "Asheville",
            "key2": "North Carolina"
        },
        "subject": ["Subject", "Hi there", "Hello"]
    }
    resp, content = _get_server_response(
        json.dumps(INPUT), prop="spatial/key1,spatial/key2,subject")
    assert resp.status == 200
    FETCHED = json.loads(content)
    assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
예제 #60
0
def scdl_geocode_regions(body,
                         ctype,
                         action="scdl_geocode_regions",
                         prop="sourceResource/spatial"):
    """
    Service that accepts a JSON document and forcibly sets the coordinates for South Carolina regions.

    For use with the scdl profiles
    """

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if exists(data, prop):
        value = getprop(data, prop)
        for v in iterify(value):
            if (is_region(v)):
                geocode_region(v)

    return json.dumps(data)