def request_args(request, **overrides): opts = {} opts.update(settings.API_OPTS) opts.update(overrides) def ispositiveint(v): ensure(isint(v) and int(v) > 0, "expecting positive integer, got: %s" % v) return int(v) def inrange(minpp, maxpp): def fn(v): ensure(v >= minpp and v <= maxpp, "value must be between %s and %s" % (minpp, maxpp)) return v return fn def asc_or_desc(val): v = val.strip().upper() ensure(v in ['ASC', 'DESC'], "expecting either 'asc' or 'desc' for 'order' parameter, got: %s" % val) return v desc = { 'page': [p('page', opts['page_num']), ispositiveint], 'per_page': [p('per-page', opts['per_page']), ispositiveint, inrange(opts['min_per_page'], opts['max_per_page'])], 'order': [p('order', opts['order_direction']), str, asc_or_desc] } return render_item(desc, request.GET)
def ajson_publish_events(av, force=False): ae_structs = [ render.render_item(desc, { 'article': av.article, 'forced?': force }) for desc in PUBLISH_EVENTS ] return add_many(av.article, ae_structs, force)
def _ingest_objects(data, create, update, force, log_context): "ingest helper. returns the journal, article, an article version and a list of article events" # WARN: log_context is a mutable dict data = copy.deepcopy(data) # this *could* be scraped from the provided data, but we have no time to # normalize journal names so we sometimes get duplicate journals in the db. # safer to disable until needed. journal = logic.journal() try: article_struct = render.render_item(ARTICLE, data['article']) article, created, updated = \ create_or_update(models.Article, article_struct, ['manuscript_id', 'journal'], create, update, journal=journal) log_context['article'] = article previous_article_versions = [] if updated: previous_article_versions = list( article.articleversion_set.all().order_by( 'version')) # earliest -> latest av_struct = render.render_item(ARTICLE_VERSION, data['article']) # this is an INGEST event and *not* a PUBLISH event. we don't touch the date published. del av_struct['datetime_published'] av, created, updated = \ create_or_update(models.ArticleVersion, av_struct, ['article', 'version'], create, update, commit=False, article=article) log_context['article-version'] = av events.ajson_ingest_events(article, data['article'], force) return av, created, updated, previous_article_versions except KeyError as err: raise StateError( codes.PARSE_ERROR, "failed to scrape article data, key not present: %s" % err)
def request_args(request, **overrides): opts = {} opts.update(settings.API_OPTS) opts.update(overrides) def ispositiveint(param): def wrap(v): ensure( isint(v) and int(v) > 0, "expecting positive integer for %r parameter" % param) return int(v) return wrap def inrange(minpp, maxpp): def fn(v): ensure( v >= minpp and v <= maxpp, "value must be between %s and %s for 'per-page' parameter" % (minpp, maxpp)) return v return fn def asc_or_desc(val): v = val.strip().upper()[:4] ensure(v in ['ASC', 'DESC'], "expecting either 'asc' or 'desc' for 'order' parameter") return v desc = { 'page': [p('page', opts['page_num']), ispositiveint('page')], 'per_page': [ p('per-page', opts['per_page']), ispositiveint('per-page'), inrange(opts['min_per_page'], opts['max_per_page']) ], 'order': [p('order', opts['order_direction']), str, asc_or_desc] } return render_item(desc, request.GET)
def request_args(request, **overrides): opts = {} opts.update(settings.API_OPTS) opts.update(overrides) def ispositiveint(v): ensure( isint(v) and int(v) > 0, "expecting positive integer, got: %s" % v) return int(v) def inrange(minpp, maxpp): def fn(v): ensure(v >= minpp and v <= maxpp, "value must be between %s and %s" % (minpp, maxpp)) return v return fn def isin(lst): def fn(val): ensure(val in lst, "value is not in %r" % (lst, )) return val return fn desc = { 'page': [p('page', opts['page_num']), ispositiveint], 'per_page': [ p('per-page', opts['per_page']), ispositiveint, inrange(opts['min_per_page'], opts['max_per_page']) ], 'order': [ p('order', opts['order_direction']), uppercase, isin(['ASC', 'DESC']) ], 'period': [p('by', 'day'), lowercase, isin(['day', 'month'])], } return render_item(desc, request.GET)
'vor': join(SCHEMA_PATH, 'model/article-vor.v1.json'), 'history': join(SCHEMA_PATH, 'model/article-history.v1.json'), 'list': join(SCHEMA_PATH, 'model/article-list.v1.json') } API_PATH = join(SCHEMA_PATH, 'api.raml') def _load_api_raml(path): # load the api.raml file, ignoring any "!include" commands yaml.add_multi_constructor('', lambda *args: '[disabled]') return yaml.load(open(path, 'r'))['traits']['paged']['queryParameters'] API_OPTS = render_item({ 'per_page': [p('per-page.default'), int], 'min_per_page': [p('per-page.minimum'), int], 'max_per_page': [p('per-page.maximum'), int], 'page_num': [p('page.default'), int], 'order_direction': [p('order.default')], }, _load_api_raml(API_PATH)) # KONG gateway options KONG_AUTH_HEADER = 'KONG-Authenticated' INTERNAL_NETWORK = '10.0.2.0/24' # # notification events # EVENT_BUS = {
def ejp_ingest_events(article, data, force=False): "scrapes and inserts events from ejp data" data['forced?'] = force ae_structs = [render.render_item(desc, data) for desc in EJP_EVENTS] return add_many(article, ae_structs, force, skip_missing_datestamped=True)
def _ingest(data, force=False): """ingests article-json. returns a triple of (journal obj, article obj, article version obj) unpublished article-version data can be ingested multiple times UNLESS that article version has been published. published article-version data can be ingested only if force=True""" data = copy.deepcopy(data) # we don't want to modify the given data create = update = True log_context = {} try: # this *could* be scraped from the provided data, but we have no time to # normalize journal names so we sometimes get duplicate journals in the db. # safer to disable until needed. journal = logic.journal() try: article_struct = render.render_item(ARTICLE, data['article']) article, created, updated = \ create_or_update(models.Article, article_struct, ['manuscript_id', 'journal'], create, update, journal=journal) assert isinstance(article, models.Article) log_context['article'] = article previous_article_versions = None if updated: previous_article_versions = list(article.articleversion_set.all().order_by('version')) # earliest -> latest av_struct = render.render_item(ARTICLE_VERSION, data['article']) # this is an INGEST event and *not* a PUBLISH event. we don't touch the date published. del av_struct['datetime_published'] av, created, updated = \ create_or_update(models.ArticleVersion, av_struct, ['article', 'version'], create, update, commit=False, article=article) except KeyError as err: raise ValueError("failed to scrape article data, couldn't find key %s" % err) assert isinstance(av, models.ArticleVersion) log_context['article-version'] = av # only update the fragment if this article version has *not* been published *or* if force=True update_fragment = not av.published() or force merge_result = fragments.add(av, XML2JSON, data['article'], pos=0, update=update_fragment) fragments.merge_if_valid(av) invalid_ajson = not merge_result if invalid_ajson: LOG.warn("this article failed to merge it's fragments into a valid result and cannot be PUBLISHed in it's current state.", extra=log_context) # enforce business rules if created: if previous_article_versions: last_version = previous_article_versions[-1] log_context['previous-version'] = last_version if not last_version.published(): # uhoh. we're attempting to create an article version before previous version of that article has been published. msg = "refusing to ingest new article version when previous article version is still unpublished." LOG.error(msg, extra=log_context) raise StateError(msg) if not last_version.version + 1 == av.version: # uhoh. we're attempting to create an article version out of sequence msg = "refusing to ingest new article version out of sequence." log_context.update({ 'given-version': av.version, 'expected-version': last_version.version + 1}) LOG.error(msg, extra=log_context) raise StateError(msg) # no other versions of article exist else: if not av.version == 1: # uhoh. we're attempting to create our first article version and it isn't a version 1 msg = "refusing to ingest new article version out of sequence. no other article versions exist so I expect a v1" log_context.update({ 'given-version': av.version, 'expected-version': 1}) LOG.error(msg, extra=log_context) raise StateError(msg) elif updated: # this version of the article already exists # this is only a problem if the article version has already been published if av.published(): # uhoh. we've received an INGEST event for a previously published article version if not force: # unless our arm is being twisted, die. msg = "refusing to ingest new article data on an already published article version." LOG.error(msg, extra=log_context) raise StateError(msg) # passed all checks, save av.save() # notify event bus that article change has occurred transaction.on_commit(partial(events.notify, article)) return journal, article, av except KeyError as err: # *probably* an error while scraping ... raise StateError("failed to scrape given article data: %s" % err) except StateError: raise except Exception: LOG.exception("unhandled exception attempting to ingest article-json", extra=log_context) raise
} API_PATH = join(SCHEMA_PATH, 'api.raml') def _load_api_raml(path): # load the api.raml file, ignoring any "!include" commands yaml.add_multi_constructor('', lambda *args: '[disabled]') return yaml.load( open(path, 'r'), Loader=yaml.FullLoader)['traits']['paged']['queryParameters'] API_OPTS = render_item( { 'per_page': [p('per-page.default'), int], 'min_per_page': [p('per-page.minimum'), int], 'max_per_page': [p('per-page.maximum'), int], 'page_num': [p('page.default'), int], 'order_direction': [p('order.default')], }, _load_api_raml(API_PATH)) LOG_FILE = join(PROJECT_DIR, 'elife-metrics.log') if ENV != DEV: LOG_FILE = join('/var/log/', 'elife-metrics.log') DEBUG_LOG_FILE = join(PROJECT_DIR, 'debugme.log') # whereever our log files are, ensure they are writable before we do anything else. def writable(path): os.system('touch ' + path) # https://docs.python.org/2/library/os.html