def expand_videos(msid, video): gc_data = metadata(msid) # cached on first hit gc_id_str = ", ".join(gc_data.keys()) v_id = video['id'] ensure(v_id in gc_data, "glencoe doesn't know %r, only %r" % (v_id, gc_id_str)) video_data = gc_data[v_id] video_data = utils.subdict(video_data, ['jpg_href', 'width', 'height']) video_data = utils.renkeys(video_data, [('jpg_href', 'image')]) func = lambda mtype: { 'mediaType': SOURCES[mtype], 'uri': gc_data[v_id][mtype + "_href"] } video_data['sources'] = lmap(func, SOURCES) video.update(video_data) del video['uri'] # returned by elife-tools, not part of spec # Add placeholder, the video thumbnail image video["placeholder"] = {} video["placeholder"]["uri"] = video["image"].split('/')[-1] video["placeholder"]["alt"] = "" return video
def expand_videos(msid, video): gc_data = metadata(msid) # cached on first hit gc_id_str = ", ".join(gc_data.keys()) v_id = video['id'] ensure(gc_data, "glencoe doesn't know %r, it doesn't have any media") ensure(v_id in gc_data, "glencoe doesn't know %r, only %r" % (v_id, gc_id_str)) video_data = gc_data[v_id] video_data = utils.subdict(video_data, ['jpg_href', 'width', 'height']) video_data = utils.renkeys(video_data, [('jpg_href', 'image')]) func = lambda mtype: OrderedDict([ ('mediaType', SOURCES[mtype]), ('uri', gc_data[v_id][mtype + "_href"]) ]) video_data['sources'] = lmap(func, SOURCES) video.update(video_data) del video['uri'] # returned by elife-tools, not part of spec # Add placeholder, the video thumbnail image video["placeholder"] = OrderedDict() video["placeholder"]["uri"] = video["image"].split('/')[-1] video["placeholder"]["alt"] = "" return video
def mkresponse(status, message, request={}, **kwargs): packet = { "status": status, "message": message, "id": None, "token": None, "datetime": datetime.now(), } request = subdict(request, ['id', 'token', 'action']) packet.update(request) # merge in any explicit overrides packet.update(kwargs) # more response wrangling packet = renkeys(packet, [("action", "requested-action"), ("dry-run", "validate-only")]) # remove any keys not supported in the schema supported_keys = conf.RESPONSE_SCHEMA['properties'].keys() packet = subdict(packet, supported_keys) # wrangle log context context = renkeys(packet, [("message", "status-message")]) levels = { INVALID: logging.ERROR, ERROR: logging.ERROR, VALIDATED: logging.INFO, INGESTED: logging.DEBUG, PUBLISHED: logging.DEBUG } LOG.log(levels[packet["status"]], "%s response", packet['status'], extra=context) # success messages are None if not packet['message']: del packet['message'] # double-publications are successful if kwargs.get('code') == 'already-published': packet['status'] = PUBLISHED return packet
def mkresponse(status, message, request={}, **kwargs): packet = { "status": status, "message": message, "id": None, "token": None, "datetime": datetime.now(), } request = subdict(request, ['id', 'token', 'action']) packet.update(request) # merge in any explicit overrides packet.update(kwargs) # more response wrangling packet = renkeys(packet, [ ("action", "requested-action"), ("dry-run", "validate-only") ]) # remove any keys not supported in the schema supported_keys = conf.RESPONSE_SCHEMA['properties'].keys() packet = subdict(packet, supported_keys) # wrangle log context context = renkeys(packet, [("message", "status-message")]) levels = { INVALID: logging.ERROR, ERROR: logging.ERROR, VALIDATED: logging.INFO, INGESTED: logging.DEBUG, PUBLISHED: logging.DEBUG } LOG.log(levels[packet["status"]], "%s response", packet['status'], extra=context) # success messages are None if not packet['message']: del packet['message'] # double-publications are successful if kwargs.get('code') == 'already-published': packet['status'] = PUBLISHED return packet
def post_xml(): "upload jats xml, generate xml, validate, send to lax as a dry run" http_ensure('xml' in request.files, "xml file required", 400) try: override = scraper.deserialize_overrides( request.form.getlist('override')) except ValueError: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_OVERRIDES, 'message': 'an error occurred attempting to parse your given overrides.', 'trace': sio.getvalue() }, 400 # upload try: xml = request.files['xml'] filename = os.path.basename(xml.filename) http_ensure( os.path.splitext(filename)[1] == '.xml', "file doesn't look like xml") path = join(upload_folder(), filename) xml.save(path) except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_UPLOAD, 'message': 'an error occured uploading the article xml to be processed', 'trace': sio.getvalue(), }, 400 # shouldn't this be a 500? everything is always the client's fault. # generate try: article_json = scraper.main(path, { 'override': override, 'fill-missing-image-dimensions': True }) json_filename = filename + '.json' json_path = join(upload_folder(), json_filename) open(json_path, 'w').write(article_json) except Exception as err: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_SCRAPE, 'message': str(err), 'trace': sio.getvalue() }, 400 # validate try: conf.API_PRE_VALIDATE and ajson_validate.main(open(json_path, 'r')) except jsonschema.ValidationError as err: return { 'status': conf.INVALID, 'code': conf.ERROR_INVALID, 'message': 'the generated article-json failed validation, see trace for details.', 'trace': str(err), # todo: any good? }, 400 except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_VALIDATING, 'message': 'an error occurred attempting to validate the generated article-json', 'trace': sio.getvalue() }, 400 # TODO: shouldn't this be a 500? # send to lax try: #msid, version = utils.version_from_path(filename) msid = request.args['id'] version = request.args['version'] token = str(uuid.uuid4()) args = { # the *most* important parameter. don't modify lax. 'dry_run': True, # a forced ingest by default 'action': conf.INGEST, 'force': True, # article details 'msid': msid, 'version': int(version), 'article_json': article_json, 'token': token, } lax_resp = adaptor.call_lax(**args) context = utils.renkeys(lax_resp, [("message", "lax-message")]) LOG.info("lax response", extra=context) api_resp = utils.subdict(lax_resp, ['status', 'code', 'message', 'trace']) if api_resp['status'] in [conf.INVALID, conf.ERROR]: # failure return api_resp, 400 # success # 'code', 'message' and 'trace' are not returned by lax on success, just 'status' api_resp['ajson'] = json.loads(article_json)['article'] api_resp['override'] = override return api_resp, 200 except Exception: # lax returned something indecipherable sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_COMMUNICATING, 'message': "lax responded with something that couldn't be decoded", 'trace': sio.getvalue(), }, 400 # TODO: shouldn't this be a 500?
def handler(json_request, outgoing): response = partial(send_response, outgoing) try: request = utils.validate(json_request, conf.REQUEST_SCHEMA) except ValueError as err: # bad data. who knows what it was. die return response( mkresponse(ERROR, "request could not be parsed: %s" % json_request)) except ValidationError as err: # data is readable, but it's in an unknown/invalid format. die return response( mkresponse(ERROR, "request was incorrectly formed: %s" % str(err))) except Exception as err: # die msg = "unhandled error attempting to handle request: %s" % str(err) return response(mkresponse(ERROR, msg)) # we have a valid request :) LOG.info("valid request") params = subdict( request, ['action', 'id', 'token', 'version', 'force', 'validate-only']) params = renkeys(params, [('validate-only', 'dry_run')]) # if we're to ingest/publish, then we expect a location to download article data if params['action'] in [INGEST, INGEST_PUBLISH]: try: article_xml = download(request['location']) if not article_xml: raise ValueError("no article content available") except AssertionError as err: msg = "refusing to download article xml: %s" % str(err) return response(mkresponse(ERROR, msg, request)) except Exception as err: msg = "failed to download article xml from %r: %s" % ( request['location'], str(err)) return response(mkresponse(ERROR, msg, request)) LOG.info("got xml") try: article_data = scraper.render_single(article_xml, version=params['version'], location=request['location']) LOG.info("rendered article data ") except Exception as err: error = str(err) if hasattr(err, 'message') else err msg = "failed to render article-json from article-xml: %s" % error LOG.exception(msg, extra=params) return response(mkresponse(ERROR, msg, request)) LOG.info("successful scrape") try: article_json = utils.json_dumps(article_data) except ValueError as err: msg = "failed to serialize article data to article-json: %s" % str( err) return response(mkresponse(ERROR, msg, request)) LOG.info("successfully serialized article-data to article-json") # phew! gauntlet ran, we're now confident of passing this article-json to lax # lax may still reject the data as invalid, but we'll proxy that back if necessary params['article_json'] = article_json try: LOG.info("calling lax") # with params: %r" % params) lax_response = call_lax(**params) LOG.info("lax response: %r", lax_response) return response(mkresponse(**lax_response)) except Exception as err: # lax didn't understand us or broke msg = "lax failed attempting to handle our request: %s" % str(err) response(mkresponse(ERROR, msg, request)) # when lax fails, we fail raise
def post_xml(): "upload jats xml, generate xml, validate, send to lax as a dry run" http_ensure('xml' in request.files, "xml file required", 400) try: override = scraper.deserialize_overrides(request.form.getlist('override')) except ValueError: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_OVERRIDES, 'message': 'an error occurred attempting to parse your given overrides.', 'trace': sio.getvalue() } # shouldn't this be a 400? # upload try: xml = request.files['xml'] filename = os.path.basename(xml.filename) http_ensure(os.path.splitext(filename)[1] == '.xml', "file doesn't look like xml") path = join(upload_folder(), filename) xml.save(path) except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_UPLOAD, 'message': 'an error occured uploading the article xml to be processed', 'trace': sio.getvalue(), }, 400 # shouldn't this be a 500? # generate try: article_json = scraper.main(path, { 'override': override, 'fill-missing-image-dimensions': True }) json_filename = filename + '.json' json_path = join(upload_folder(), json_filename) open(json_path, 'w').write(article_json) except Exception as err: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_SCRAPE, 'message': str(err), 'trace': sio.getvalue() }, 400 # validate try: conf.API_PRE_VALIDATE and ajson_validate.main(open(json_path, 'r')) except jsonschema.ValidationError as err: return { 'status': conf.INVALID, 'code': conf.ERROR_INVALID, 'message': 'the generated article-json failed validation, see trace for details.', 'trace': str(err), # todo: any good? }, 400 except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_VALIDATING, 'message': 'an error occurred attempting to validate the generated article-json', 'trace': sio.getvalue() }, 400 # TODO: shouldn't this be a 500? # send to lax try: #msid, version = utils.version_from_path(filename) msid = request.args['id'] version = request.args['version'] token = str(uuid.uuid4()) args = { # the *most* important parameter. don't modify lax. 'dry_run': True, # a forced ingest by default 'action': conf.INGEST, 'force': True, # article details 'msid': msid, 'version': int(version), 'article_json': article_json, 'token': token, } lax_resp = adaptor.call_lax(**args) context = utils.renkeys(lax_resp, [("message", "lax-message")]) LOG.info("lax response", extra=context) api_resp = utils.subdict(lax_resp, ['status', 'code', 'message', 'trace']) if api_resp['status'] in [conf.INVALID, conf.ERROR]: # failure return api_resp, 400 # success # 'code', 'message' and 'trace' are not returned by lax on success, just 'status' api_resp['ajson'] = json.loads(article_json)['article'] api_resp['override'] = override return api_resp, 200 except Exception: # lax returned something indecipherable sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_COMMUNICATING, 'message': "lax responded with something that couldn't be decoded", 'trace': sio.getvalue(), }, 400 # TODO: shouldn't this be a 500?
def handler(json_request, outgoing): response = partial(send_response, outgoing) try: request = utils.validate(json_request, conf.REQUEST_SCHEMA) except ValueError: # bad data. who knows what it was. die return response(mkresponse(ERROR, "request could not be parsed: %s" % json_request)) except ValidationError as err: # data is readable, but it's in an unknown/invalid format. die return response(mkresponse(ERROR, "request was incorrectly formed: %s" % str(err))) except Exception as err: # die msg = "unhandled error attempting to handle request: %s" % str(err) return response(mkresponse(ERROR, msg)) # we have a valid request :) LOG.info("valid request") params = subdict(request, ['action', 'id', 'token', 'version', 'force', 'validate-only']) params = renkeys(params, [('validate-only', 'dry_run'), ('id', 'msid')]) # if we're to ingest/publish, then we expect a location to download article data if params['action'] in [INGEST, INGEST_PUBLISH]: try: article_xml = download(request['location']) if not article_xml: raise ValueError("no article content available") except AssertionError as err: msg = "refusing to download article xml: %s" % str(err) return response(mkresponse(ERROR, msg, request)) except Exception as err: msg = "failed to download article xml from %r: %s" % (request['location'], str(err)) return response(mkresponse(ERROR, msg, request)) LOG.info("got xml") try: article_data = scraper.render_single(article_xml, version=params['version'], location=request['location']) LOG.info("rendered article data ") except Exception as err: error = str(err) if hasattr(err, 'message') else err msg = "failed to render article-json from article-xml: %s" % error LOG.exception(msg, extra=params) return response(mkresponse(ERROR, msg, request)) LOG.info("successful scrape") try: article_json = utils.json_dumps(article_data) except ValueError as err: msg = "failed to serialize article data to article-json: %s" % str(err) return response(mkresponse(ERROR, msg, request)) LOG.info("successfully serialized article-data to article-json") # phew! gauntlet ran, we're now confident of passing this article-json to lax # lax may still reject the data as invalid, but we'll proxy that back if necessary params['article_json'] = article_json try: LOG.info("calling lax") lax_response = call_lax(**params) LOG.info("lax response: %r", lax_response) return response(mkresponse(**lax_response)) except Exception as err: # lax didn't understand us or broke msg = "lax failed attempting to handle our request: %s" % str(err) response(mkresponse(ERROR, msg, request)) # when lax fails, we fail raise