def relate_using_msid(av, msid, quiet=False): if not settings.ENABLE_RELATIONS: return try: art = models.Article.objects.get(manuscript_id=msid) except models.Article.DoesNotExist: # we're trying to relate this ArticleVersion to an Article that doesnt exist. # create a stub article and relate it to that stub = { 'manuscript_id': msid, 'journal': av.article. journal, # saves us having to refer to logic.py and circular dependencies 'doi': msid2doi(msid) } art, _, _ = create_or_update(models.Article, stub, create=settings.RELATED_ARTICLE_STUBS, update=False) if art: return relate(av, art) else: msg = "article with msid %r not found (and not created) attempting to relate %r => %s" % ( msid, av, msid) if not quiet: raise StateError(codes.NO_RECORD, msg) LOG.error(msg)
def relate(av, a): "creates a relationship between an ArticleVersion and an Article" data = {'articleversion': av, 'related_to': a} return first( create_or_update(models.ArticleVersionRelation, data, create=True, update=False))
def _ingest_objects(data, create, update, force, log_context): "ingest helper. returns the journal, article, an article version and a list of article events" # WARN: log_context is a mutable dict data = copy.deepcopy(data) # this *could* be scraped from the provided data, but we have no time to # normalize journal names so we sometimes get duplicate journals in the db. # safer to disable until needed. journal = logic.journal() try: article_struct = render.render_item(ARTICLE, data['article']) article, created, updated = \ create_or_update(models.Article, article_struct, ['manuscript_id', 'journal'], create, update, journal=journal) log_context['article'] = article previous_article_versions = [] if updated: previous_article_versions = list( article.articleversion_set.all().order_by( 'version')) # earliest -> latest av_struct = render.render_item(ARTICLE_VERSION, data['article']) # this is an INGEST event and *not* a PUBLISH event. we don't touch the date published. del av_struct['datetime_published'] av, created, updated = \ create_or_update(models.ArticleVersion, av_struct, ['article', 'version'], create, update, commit=False, article=article) log_context['article-version'] = av events.ajson_ingest_events(article, data['article'], force) return av, created, updated, previous_article_versions except KeyError as err: raise StateError( codes.PARSE_ERROR, "failed to scrape article data, key not present: %s" % err)
def add(art, event, value=None, datetime_event=None): utils.ensure(art, "need art") datetime_event = datetime_event or utils.utcnow() struct = { 'event': event, 'value': str(value), 'datetime_event': datetime_event } create = update = True ae, created, updated = \ create_or_update(models.ArticleEvent, struct, ['article', 'event', 'datetime_event'], create, update, article=art) return ae
def associate(av, citation): ensure( isinstance(citation, dict) and 'uri' in citation, "expecting a valid external-link type citation, got: %r" % citation) data = { 'articleversion': av, 'uri': citation['uri'], 'citation': citation, } key = ['articleversion', 'uri'] avr, _, _ = create_or_update(models.ArticleVersionExtRelation, data, key, create=True, update=True) return avr
def _ingest(data, force=False): """ingests article-json. returns a triple of (journal obj, article obj, article version obj) unpublished article-version data can be ingested multiple times UNLESS that article version has been published. published article-version data can be ingested only if force=True""" data = copy.deepcopy(data) # we don't want to modify the given data create = update = True log_context = {} try: # this *could* be scraped from the provided data, but we have no time to # normalize journal names so we sometimes get duplicate journals in the db. # safer to disable until needed. journal = logic.journal() try: article_struct = render.render_item(ARTICLE, data['article']) article, created, updated = \ create_or_update(models.Article, article_struct, ['manuscript_id', 'journal'], create, update, journal=journal) assert isinstance(article, models.Article) log_context['article'] = article previous_article_versions = None if updated: previous_article_versions = list(article.articleversion_set.all().order_by('version')) # earliest -> latest av_struct = render.render_item(ARTICLE_VERSION, data['article']) # this is an INGEST event and *not* a PUBLISH event. we don't touch the date published. del av_struct['datetime_published'] av, created, updated = \ create_or_update(models.ArticleVersion, av_struct, ['article', 'version'], create, update, commit=False, article=article) except KeyError as err: raise ValueError("failed to scrape article data, couldn't find key %s" % err) assert isinstance(av, models.ArticleVersion) log_context['article-version'] = av # only update the fragment if this article version has *not* been published *or* if force=True update_fragment = not av.published() or force merge_result = fragments.add(av, XML2JSON, data['article'], pos=0, update=update_fragment) fragments.merge_if_valid(av) invalid_ajson = not merge_result if invalid_ajson: LOG.warn("this article failed to merge it's fragments into a valid result and cannot be PUBLISHed in it's current state.", extra=log_context) # enforce business rules if created: if previous_article_versions: last_version = previous_article_versions[-1] log_context['previous-version'] = last_version if not last_version.published(): # uhoh. we're attempting to create an article version before previous version of that article has been published. msg = "refusing to ingest new article version when previous article version is still unpublished." LOG.error(msg, extra=log_context) raise StateError(msg) if not last_version.version + 1 == av.version: # uhoh. we're attempting to create an article version out of sequence msg = "refusing to ingest new article version out of sequence." log_context.update({ 'given-version': av.version, 'expected-version': last_version.version + 1}) LOG.error(msg, extra=log_context) raise StateError(msg) # no other versions of article exist else: if not av.version == 1: # uhoh. we're attempting to create our first article version and it isn't a version 1 msg = "refusing to ingest new article version out of sequence. no other article versions exist so I expect a v1" log_context.update({ 'given-version': av.version, 'expected-version': 1}) LOG.error(msg, extra=log_context) raise StateError(msg) elif updated: # this version of the article already exists # this is only a problem if the article version has already been published if av.published(): # uhoh. we've received an INGEST event for a previously published article version if not force: # unless our arm is being twisted, die. msg = "refusing to ingest new article data on an already published article version." LOG.error(msg, extra=log_context) raise StateError(msg) # passed all checks, save av.save() # notify event bus that article change has occurred transaction.on_commit(partial(events.notify, article)) return journal, article, av except KeyError as err: # *probably* an error while scraping ... raise StateError("failed to scrape given article data: %s" % err) except StateError: raise except Exception: LOG.exception("unhandled exception attempting to ingest article-json", extra=log_context) raise