Beispiel #1
0
def nypl_identify_object(body, ctype, list_sets=None):

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    H = httplib2.Http('/tmp/.cache')
    H.force_exception_as_status_code = True
    resp, content = H.request(list_sets)
    if not resp[u'status'].startswith('2'):
        logger.error('  HTTP error (' + resp[u'status'] + ') resolving URL: ' + list_sets)
        return body
    content_dict = xmltodict.parse(content, xml_attribs=True, attr_prefix='', force_cdata=False, ignore_whitespace_cdata=True)
    sets = content_dict["nyplAPI"]["response"]

    for r in sets:
        if "collection" == r:
            for coll_dict in sets[r]:
                if "uuid" in coll_dict and "title" in coll_dict and (coll_dict["uuid"] == data["title"] or coll_dict["uuid"] in data["@id"]):
                    data["title"] = coll_dict["title"]

    return json.dumps(data)
Beispiel #2
0
def mix_freemix(body, ctype):
    #See: http://foundry.zepheira.com/issues/137#note-10
    '''
    {
      "datasets": {
        "dataset1": "http://recollection.zepheira.com/data/guide/data-profile-arthur-y-ford-photograph-albums-reprise-for-demo/data.json",
        "dataset2": "http://recollection.zepheira.com/data/guide/jean-thomas-collection/data.json"
      },
      "alignProperties": {
        "label": "Name",
        "dataset1": "Surname",
        "dataset2": "Name_of_Candidate"
      }
    }
    '''
    USER, PASSWD = "loc", "recollection"
    cache_dir = make_named_cache('mix.freemix.json')
    H = httplib2.Http(cache_dir)
    if USER:
        H.add_credentials(USER, PASSWD)
    request = json.loads(body)
    datasets = request['datasets']
    alignments = request.get('alignProperties')

    if len(datasets) != 2:
        raise ValueError('You must provide Mixer exactly 2 data sets')

    if alignments:
        mixed = []
        for dataset in datasets:
            #Replace the data set URL with the content
            logger.debug("Processing dataset: %s" % (datasets[dataset]))
            resp, content = H.request(datasets[dataset])
            items = json.loads(content)[u'items']
            prop = alignments[dataset]
            newprop_label = alignments['label']
            #Potluck (the usual mixer client) seems to generate property names such as
            #"Activity / Activity" which Exhibit cannot handle. Work around that.
            #See: http://foundry.zepheira.com/issues/334
            newprop = UNSUPPORTED_IN_EXHIBITKEY.sub('_', newprop_label)
            logger.debug("Mapping: %s -> %s" % (prop, newprop))
            for item in items:
                if prop in item:
                    item[newprop] = item[prop]
                mixed.append(item)
    else:
        mixed = []
        for dataset in datasets:
            #Replace the data set URL with the content
            logger.debug("Processing dataset: %s" % (datasets[dataset]))
            resp, content = H.request(datasets[dataset])
            items = json.loads(content)[u'items']
            mixed += items

    for (counter, item) in enumerate(mixed):
        item[u'id'] = u'_%i' % counter

    result = json.dumps({'items': mixed}, indent=4)
    return result
Beispiel #3
0
 def __init__(self, root, logger=logging, cachedir='/tmp/.cache'):
     '''
     root - root of the OAI service endpoint, e.g. http://dspace.mit.edu/oai/request
     '''
     self.root = root
     self.logger = logger
     self.h = httplib2.Http(cachedir)
     return
Beispiel #4
0
def listrecords(limit=100):
    import httplib
    h = httplib2.Http()
    h.force_exception_as_status_code = True
    url = join(COUCH_DATABASE, '_design', VIEW_APP, '_view', VIEW_NAME)
    url += '?limit=' + str(limit)
    logger.debug(url)
    resp, content = h.request(url, "GET", headers=COUCH_AUTH_HEADER)
    logger.debug("Content: " + content)
    if str(resp.status).startswith('2'):
        return content
    else:
        logger.error("Couldn't get documents via: " + repr(resp))
Beispiel #5
0
def oaisetname(body, ctype, sets_service=None):
    '''   
    Service that accepts a JSON document and sets the "name" property based on looking up
    the set in the HTTP_CONTEXT using the service passed in the 'sets_service' parameter.
    Assumes that the set_service returns a JSON array of two-element arrays, where the first
    element is the id and the second element the complete name.
    '''

    if not sets_service:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "No set service has been selected"

    try:
        data = json.loads(body)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse body as JSON"

    if not is_absolute(sets_service):
        prefix = request.environ['wsgi.url_scheme'] + '://'
        prefix += request.environ['HTTP_HOST'] if request.environ.get(
            'HTTP_HOST') else request.environ['SERVER_NAME']
        sets_service = prefix + sets_service

    H = httplib2.Http('/tmp/.cache')
    H.force_exception_as_status_code = True
    resp, content = H.request(sets_service)
    if not resp[u'status'].startswith('2'):
        print >> sys.stderr, '  HTTP error (' + resp[
            u'status'] + ') resolving URL: ' + sets_service

    try:
        sets = json.loads(content)
    except:
        response.code = 500
        response.add_header('content-type', 'text/plain')
        return "Unable to parse sets service result as JSON: " + repr(content)

    setpos = data['_id'].find('--')
    match = data['_id'][setpos + 2:] if setpos > -1 else data['_id']

    for s in sets:
        if match == s['setSpec']:
            data[u'title'] = s['setName']
            if s['setDescription']:
                data[u'description'] = s['setDescription'].strip()
            break

    return json.dumps(data)
Beispiel #6
0
def update_document(body, ctype):
    logger.debug(body)
    from StringIO import StringIO
    io = StringIO(body) 
    parsed_doc = json.load(io) 
    document_id = parsed_doc[u"id"]
    document  = body

    logger.debug("Storing the document: " + document_id)
    import httplib
    h = httplib2.Http()
    h.force_exception_as_status_code = True
    url = join(COUCH_DATABASE, document_id)
    resp, content = h.request(url, 'PUT', body=document, headers=COUCH_AUTH_HEADER)
    if str(resp.status).startswith('2'):
        return content
    else:
        logger.error("Couldn't store the document %s with the id: %s. " % (document, document_id, ) )
def nypl_identify_object(body, ctype, list_sets=None):

    try:
        assert ctype.lower() == HTTP_TYPE_JSON, "%s is not %s" % (
            HTTP_HEADER_TYPE, HTTP_TYPE_JSON)
        data = json.loads(body)
    except Exception as e:
        error_text = "Bad JSON: %s: %s" % (e.__class__.__name__, str(e))
        logger.exception(error_text)
        response.code = HTTP_INTERNAL_SERVER_ERROR
        response.add_header(HTTP_HEADER_TYPE, HTTP_TYPE_TEXT)
        return error_text

    H = httplib2.Http('/tmp/.cache')
    H.force_exception_as_status_code = True
    resp, content = H.request(list_sets)
    if not resp[u'status'].startswith('2'):
        logger.error('  HTTP error (' + resp[u'status'] + ') resolving URL: ' +
                     list_sets)
        return body
    content_dict = xmltodict.parse(content,
                                   xml_attribs=True,
                                   attr_prefix='',
                                   force_cdata=False,
                                   ignore_whitespace_cdata=True)
    sets = content_dict["nyplAPI"]["response"]

    for r in sets:
        if "collection" == r:
            for coll_dict in sets[r]:
                if "uuid" in coll_dict and "title" in coll_dict and (
                        coll_dict["uuid"] == data["title"]
                        or coll_dict["uuid"] in data["@id"]):
                    data["title"] = coll_dict["title"]

    return json.dumps(data)
import sys
from server_support import server, print_error_log
from amara.thirdparty import httplib2
from amara.thirdparty import json

CT_JSON = {"Content-Type": "application/json"}

H = httplib2.Http()

def _get_server_response(body, prop=None):
    url = server() + "decode_html?prop=%s" % prop
    return H.request(url, "POST", body=body, headers=CT_JSON)

def test_decode_html():
    """Should decode ", &, <, and >"""
    INPUT = {
        "subject": ['a', 'b', '&quot;&amp;', 'c; &lt;', 'd', '&gt;;e']
    }
    EXPECTED = {
        "subject": ['a', 'b', '\"&', 'c; <', 'd', '>;e']
    }

    resp, content = _get_server_response(json.dumps(INPUT), prop="subject")
    assert resp.status == 200
    assert json.loads(content) == EXPECTED

if __name__ == "__main__":
    raise SystemExit("Use nosetest")
Beispiel #9
0
def post(body, sink):
    headers = {'Content-type': 'application/json'}
    h = httplib2.Http()
    resp, content = h.request(sink, "POST", body=body, headers=headers)
    return resp, content
 def request(self, url, method, body, headers=HEADERS):
     from amara.thirdparty import httplib2
     h = httplib2.Http()
     return h.request(url, method, body, headers)