Beispiel #1
0
def main(doc, args=None):
    args = args or {}
    msid, version = utils.version_from_path(getattr(doc, 'name', doc))
    ctx = {
        'version': version,
        'override': {},
        'fill-missing-image-dimensions': False
    }
    ctx.update(args)
    try:
        article_json = render_single(doc, **ctx)
        return json.dumps(article_json, indent=4)

    except AssertionError:
        # business error
        log_ctx = {
            'doc': str(doc),  # context needs to be json serializable
            'msid': msid,
            'version': version,
            'override': ctx['override'],
        }
        LOG.error("failed to scrape article", extra=log_ctx)
        raise

    except Exception:
        # unhandled exception
        log_ctx = {
            'doc': str(doc),  # context needs to be json serializable
            'msid': msid,
            'version': version,
            'render-ctx': ctx,
            #'override': ctx['override'],
        }
        LOG.exception("failed to scrape article", extra=log_ctx)
        raise
Beispiel #2
0
def main(doc, args=None):
    args = args or {}
    msid, version = utils.version_from_path(getattr(doc, 'name', doc))
    ctx = {
        'version': version,
        'override': {},
        'fill-missing-image-dimensions': False
    }
    ctx.update(args)
    try:
        article_json = render_single(doc, **ctx)
        return json.dumps(article_json, indent=4)

    except AssertionError:
        # business error
        log_ctx = {
            'doc': str(doc), # context needs to be json serializable
            'msid': msid,
            'version': version,
            'override': ctx['override'],
        }
        LOG.error("failed to scrape article", extra=log_ctx)
        raise

    except Exception:
        # unhandled exception
        log_ctx = {
            'doc': str(doc), # context needs to be json serializable
            'msid': msid,
            'version': version,
            'render-ctx': ctx,
            # 'override': ctx['override'],
        }
        LOG.exception("failed to scrape article", extra=log_ctx)
        raise
Beispiel #3
0
def main(doc, quiet=False):
    contents = json.load(doc)
    schema = conf.POA_SCHEMA if is_poa(contents) else conf.VOR_SCHEMA
    filename = os.path.basename(doc.name)
    msid, ver = utils.version_from_path(filename)
    log_context = {'json-filename': filename, 'msid': msid, 'version': ver}

    try:
        jsonschema.validate(contents["article"], schema)
        LOG.info("validated %s", msid, extra=log_context)
        return True, contents

    except jsonschema.ValidationError as err:
        LOG.error("failed to validate %s: %s", msid, err, extra=log_context)
        if quiet:
            return False, contents
        raise
def main(doc, quiet=False):
    contents = json.load(doc)
    schema = conf.POA_SCHEMA if is_poa(contents) else conf.VOR_SCHEMA
    filename = os.path.basename(doc.name)
    msid, ver = utils.version_from_path(filename)
    log_context = {
        'json-filename': filename,
        'msid': msid,
        'version': ver
    }

    try:
        jsonschema.validate(contents["article"], schema)
        LOG.info("validated %s", msid, extra=log_context)
        return True, contents

    except jsonschema.ValidationError as err:
        LOG.error("failed to validate %s: %s", msid, err, extra=log_context)
        if quiet:
            return False, contents
        raise
def mkreq(path, **overrides):
    ensure(not path.startswith('http://'), "no insecure requests, please")

    # TODO: consider removing these two lines
    if path.lstrip('/').startswith('article-xml/articles/'):
        path = os.path.abspath(path)

    path = 'file://' + path if not path.startswith('https://') else path
    msid, ver = utils.version_from_path(path)
    request = {
        'action': conf.INGEST,
        'location': path,
        'id': msid,
        'version': ver,
        'force': True, # TODO: shouldn't this be False?
        'validate-only': False,
        'token': 'pants-party'
    }
    request.update(overrides)
    # don't ever generate an invalid request
    utils.validate(request, conf.REQUEST_SCHEMA)
    return request
Beispiel #6
0
def post_xml():
    "upload jats xml, generate xml, validate, send to lax as a dry run"
    http_ensure('xml' in request.files, "xml file required", 404)

    try:
        override = scraper.deserialize_overrides(request.form.getlist('override'))
    except ValueError as err:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_OVERRIDES,
            'message': 'an error occurred attempting to parse your given overrides.',
            'trace': sio.getvalue()
        }

    # upload
    try:
        xml = request.files['xml']
        filename = xml.filename # todo: sanitize this. assumes a name like 'elife-00000-v1.xml'
        http_ensure(os.path.splitext(filename)[1] == '.xml', "file doesn't look like xml")
        path = join(upload_folder(), filename)
        xml.save(path)
    except Exception as err:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_UPLOAD,
            'message': 'an error occured uploading the article xml to be processed',
            'trace': sio.getvalue(),
        }, 400 # everything is always the client's fault.

    # generate
    try:
        article_json = scraper.main(path, {
            'override': override,
            'fill-missing-image-dimensions': True
        })
        json_filename = filename + '.json'
        json_path = join(upload_folder(), json_filename)
        open(json_path, 'w').write(article_json)
    except Exception as err:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_SCRAPE,
            'message': str(err),
            'trace': sio.getvalue()
        }, 400

    # validate
    try:
        ajson_validate.main(open(json_path, 'r'))

    except jsonschema.ValidationError as err:
        return {
            'status': conf.INVALID,
            'code': conf.ERROR_INVALID,
            'message': 'the generated article-json failed validation, see trace for details.',
            'trace': str(err), # todo: any good?
        }, 400
    except Exception as err:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.ERROR_VALIDATING,
            'message': 'an error occurred attempting to validate the generated article-json',
            'trace': sio.getvalue()
        }, 400

    # send to lax
    try:
        msid, version = utils.version_from_path(filename)
        token = str(uuid.uuid4())
        args = {
            # the *most* important parameter. don't modify lax.
            'dry_run': True,

            # a forced ingest by default
            'action': conf.INGEST,
            'force': True,

            # article details
            'id': msid,
            'version': int(version),
            'article_json': article_json,

            'token': token,
        }
        lax_resp = adaptor.call_lax(**args)

        context = utils.renkeys(lax_resp, [("message", "lax-message")])
        LOG.info("lax response", extra=context)

        api_resp = utils.subdict(lax_resp, ['status', 'code', 'message', 'trace'])

        if api_resp['status'] in [conf.INVALID, conf.ERROR]:
            # failure
            return api_resp, 400

        # success
        # 'code', 'message' and 'trace' are not returned by lax on success, just 'status'
        api_resp['ajson'] = json.loads(article_json)['article']
        api_resp['override'] = override
        return api_resp, 200

    except Exception as err:
        # lax returned something indecipherable
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.ERROR_COMMUNICATING,
            'message': "lax responded with something that couldn't be decoded",
            'trace': sio.getvalue(),
        }, 400