def test_article_import_update_of_many_versions(self): "three versions of the same article can be ingested with expected results" path = join(self.fixture_dir, "ppp-09066") v1 = join(path, "elife-09066-v1.json") v2 = join(path, "elife-09066-v2.json") v3 = join(path, "elife-09066-v3.json") ingestor.import_article_from_json_path(self.journal, v1) ingestor.import_article_from_json_path(self.journal, v2) ingestor.import_article_from_json_path(self.journal, v3) self.assertEqual(models.Article.objects.count(), 1) self.assertEqual(models.ArticleVersion.objects.count(), 3) v1obj = models.ArticleVersion.objects.get(version=1) # POA v2obj = models.ArticleVersion.objects.get(version=2) # POA v3obj = models.ArticleVersion.objects.get(version=3) # VOR self.assertEqual(v1obj.datetime_published, utils.todt("2015-12-19T00:00:00Z")) self.assertEqual(v2obj.datetime_published, utils.todt("2015-12-23T00:00:00Z")) self.assertEqual(v3obj.datetime_published, utils.todt("2016-02-04T00:00:00Z")) # all three objects should share the same article and the article's date_published should be the # date of the earliest Article Version self.assertEqual(v1obj.datetime_published, v1obj.article.datetime_published) self.assertEqual(v1obj.datetime_published, v2obj.article.datetime_published) self.assertEqual(v1obj.datetime_published, v3obj.article.datetime_published)
def add_or_update_article(self, **adata): "creates article+article-version stubs for testing" replacements = [ ('pub-date', 'published'), ('update', 'versionDate'), ] renkeys(adata, replacements) struct = { 'id': utils.doi2msid(adata['doi']) if 'doi' in adata else adata['manuscript_id'], 'volume': 1, 'type': 'research-article', 'title': '[default]', 'version': 1, 'status': models.VOR, 'published': '2012-01-01T00:00:00Z' } struct.update(adata) delall(struct, ['journal']) # can't be serialized, not utilised anyway with self.settings(VALIDATE_FAILS_FORCE=False): # bad ajson won't fail ingest av = ajson_ingestor.ingest_publish({'article': struct}, force=True) av.datetime_published = utils.todt(struct['published']) av.save() return av
def test_article_publish_v1(self): "an unpublished v1 article can be successfully published" av = ajson_ingestor.ingest(self.ajson) self.assertEqual(models.Journal.objects.count(), 1) self.assertEqual(models.Article.objects.count(), 1) self.assertEqual(models.ArticleVersion.objects.count(), 1) self.assertFalse(av.published()) # publish av = ajson_ingestor.publish(self.msid, self.version) # aaand just make sure we still have the expected number of objects self.assertEqual(models.Journal.objects.count(), 1) self.assertEqual(models.Article.objects.count(), 1) self.assertEqual(models.ArticleVersion.objects.count(), 1) self.assertTrue(av.published()) self.assertTrue(isinstance(av.datetime_published, datetime)) # the pubdate of an unpublished v1 article is the same as that found in the # given json. av = self.freshen(av) expected_pubdate = utils.ymd( utils.todt(self.ajson['article']['published'])) self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published))
def test_article_publish_succeeds_for_published_article_if_forced(self): "publication of an already published article can occur only if forced" _, _, av = ajson_ingestor.ingest(self.ajson) av = ajson_ingestor.publish(self.msid, self.version) av = self.freshen(av) expected_pubdate = utils.ymd(utils.todt(self.ajson['article']['published'])) self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published)) # publish again, no changes to pubdate expected av = ajson_ingestor.publish(self.msid, self.version, force=True) av = self.freshen(av) self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published)) # ingest new pubdate, force publication new_pubdate = utils.todt('2016-01-01') self.ajson['article']['published'] = new_pubdate ajson_ingestor.ingest_publish(self.ajson, force=True) av = self.freshen(av) self.assertEqual(utils.ymd(new_pubdate), utils.ymd(av.datetime_published))
def test_article_version_data(self): art, ver = ingestor.import_article_from_json_path(self.journal, self.json_fixture) expected_data = { 'article': art, 'datetime_published': utils.todt('2012-12-10'), 'status': 'poa', 'version': 1, } avobj = models.ArticleVersion.objects.get(article=art, version=1) for attr, expected in expected_data.items(): self.assertEqual(getattr(avobj, attr), expected)
def test_article_updated(self): "an article is successfully updated when update=True" self.assertEqual(0, models.Article.objects.count()) art, ver = ingestor.import_article_from_json_path(self.journal, self.json_fixture) for attr, expected in [['title', "A meh life"], ['status', "poa"], ['version', 1], ["datetime_published", utils.todt("2012-12-10")]]: self.assertEqual(getattr(ver, attr), expected) self.assertEqual(1, models.Article.objects.count()) # attempt the update art, ver = ingestor.import_article_from_json_path(self.journal, self.update_fixture, update=True) for attr, expected in [['title', "A good life"], ['status', "vor"], ["datetime_published", utils.todt("2012-12-13")]]: self.assertEqual(getattr(ver, attr), expected) self.assertEqual(1, models.Article.objects.count())
def test_todt(self): cases = [ # naive dtstr becomes utc ("2001-01-01", \ datetime(year=2001, month=1, day=1, tzinfo=pytz.utc)), # aware but non-utc become utc ("2001-01-01T23:30:30+09:30", \ datetime(year=2001, month=1, day=1, hour=14, minute=0, second=30, tzinfo=pytz.utc)), ] for string, expected in cases: self.assertEqual(utils.todt(string), expected)
def test_article_publish_succeeds_for_published_article_if_forced(self): "publication of an already published article can occur only if forced" av = ajson_ingestor.ingest(self.ajson) av = ajson_ingestor.publish(self.msid, self.version) av = self.freshen(av) expected_pubdate = utils.ymd( utils.todt(self.ajson['article']['published'])) self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published)) # publish again, no changes to pubdate expected av = ajson_ingestor.publish(self.msid, self.version, force=True) av = self.freshen(av) self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published)) # ingest new pubdate, force publication new_pubdate = utils.todt('2016-01-01') self.ajson['article']['published'] = new_pubdate ajson_ingestor.ingest_publish(self.ajson, force=True) av = self.freshen(av) self.assertEqual(utils.ymd(new_pubdate), utils.ymd(av.datetime_published))
def test_ingest_from_cli(self): "ingest script requires the --ingest flag and a source of data" args = [self.nom, '--ingest', '--id', self.msid, '--version', self.version, self.ajson_fixture1] errcode, stdout = self.call_command(*args) self.assertEqual(errcode, 0) # article has been ingested self.assertEqual(models.ArticleVersion.objects.count(), 1) # message returned is json encoded with all the right keys and values result = json.loads(stdout.getvalue()) self.assertTrue(utils.has_all_keys(result, ['status', 'id', 'datetime'])) self.assertEqual(result['status'], 'ingested') # the date and time is roughly the same as right now, ignoring microseconds expected_datetime = utils.utcnow() actual_datetime = utils.todt(result['datetime']) delta = expected_datetime - actual_datetime threshold = 2 # seconds self.assertTrue(delta.seconds <= threshold)
def setUp(self): self.journal = publogic.journal() import_all = [ '00353.1', # discussion, VOR '00385.1', # commentary, VOR '01328.1', # correction, VOR '02619.1', # editorial, VOR '03401.1', # research, POA '03401.2', # POA '03401.3', # VOR '03665.1', # research, VOR '06250.1', # research, POA '06250.2', # POA '06250.3', # VOR '07301.1', # research, VOR '08025.1', # research, POA '08025.2', # VOR '09571.1', # research, POA ] for subdir in import_all: fname = subdir.replace('.', '-v') fname = "elife-%s.xml.json" % fname path = join(self.fixture_dir, 'ppp2', fname) ajson_ingestor.ingest_publish( self.load_ajson(path)) # strip relations # we need to coerce the data of the non-v1 articles a little # as we removed the eif ingestor that bypassed business logic cases = [ # vor (3401, 3, "2014-08-01"), (8025, 2, "2015-06-16"), ] for msid, ver, dtstr in cases: av = models.ArticleVersion.objects.get(article__manuscript_id=msid, version=ver) av.datetime_published = utils.todt(dtstr) av.save() self.vor_version_count = 9 self.poa_version_count = 6 self.poa_art_count = 1 self.vor_art_count = 9
def test_ingest_from_cli(self): "ingest script requires the --ingest flag and a source of data" args = [ self.nom, '--ingest', '--id', self.msid, '--version', self.version, self.ajson_fixture1 ] errcode, stdout = self.call_command(*args) self.assertEqual(errcode, 0) # article has been ingested self.assertEqual(models.ArticleVersion.objects.count(), 1) # message returned is json encoded with all the right keys and values result = json.loads(stdout) self.assertTrue( utils.has_all_keys(result, ['status', 'id', 'datetime'])) self.assertEqual(result['status'], 'ingested') # the date and time is roughly the same as right now, ignoring microseconds expected_datetime = utils.utcnow() actual_datetime = utils.todt(result['datetime']) delta = expected_datetime - actual_datetime threshold = 2 # seconds self.assertTrue(delta.seconds <= threshold)
def test_article_publish_v1(self): "an unpublished v1 article can be successfully published" _, _, av = ajson_ingestor.ingest(self.ajson) self.assertEqual(models.Journal.objects.count(), 1) self.assertEqual(models.Article.objects.count(), 1) self.assertEqual(models.ArticleVersion.objects.count(), 1) self.assertFalse(av.published()) # publish av = ajson_ingestor.publish(self.msid, self.version) # aaand just make sure we still have the expected number of objects self.assertEqual(models.Journal.objects.count(), 1) self.assertEqual(models.Article.objects.count(), 1) self.assertEqual(models.ArticleVersion.objects.count(), 1) self.assertTrue(av.published()) self.assertTrue(isinstance(av.datetime_published, datetime)) # the pubdate of an unpublished v1 article is the same as that found in the # given json. av = self.freshen(av) expected_pubdate = utils.ymd(utils.todt(self.ajson['article']['published'])) self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published))
def _publish(msid, version, force=False): """attach a `datetime_published` value to an article version. if none provided, use RIGHT NOW. you cannot publish an already published article version unless force==True""" try: av = models.ArticleVersion.objects.get(article__manuscript_id=msid, version=version) if av.published(): if not force: raise StateError("refusing to publish an already published article version") # NOTE: we don't use any other article fragments for determining the publication date # except the xml->json fragment. raw_data = fragments.get(av, XML2JSON) # the json *will always* have a published date if v1 ... if version == 1: # pull that published date from the stored (but unpublished) article-json # and set the pub-date on the ArticleVersion object datetime_published = utils.todt(raw_data.get('published')) if not datetime_published: raise StateError("found 'published' value in article-json, but it's either null or unparsable as a datetime") else: # but *not* if it's > v1. in this case, we generate one. if av.published() and force: # this article version is already published and a force publish request has been sent if False and 'versionDate' in raw_data: # fail this case for now. # FUTURE CASE: when a 'versionDate' value is present in the article-json, use that. # as of 2016-10-21 version history IS NOT captured in the xml, # it won't be parsed by the bot-lax-adaptor and it # won't find it's way here. this is a future-case only. datetime_published = utils.todt(raw_data['versionDate']) if not datetime_published: raise StateError("found 'versionDate' value in article-json, but it's either null or unparseable as a datetime") else: # CURRENT CASE # preserve the existing pubdate set by lax. ignore anything given in the ajson. # if the pubdate for an article is to change, it must come from the xml (see above case) datetime_published = av.datetime_published else: # CURRENT CASE # this article version hasn't been published yet. use a value of RIGHT NOW as the published date. datetime_published = utils.utcnow() av.datetime_published = datetime_published av.save() # merge the fragments we have available and make them available for serving # allow errors when the publish operation is being forced fragments.merge_if_valid(av, quiet=force) # notify event bus that article change has occurred transaction.on_commit(partial(events.notify, av.article)) return av except ValidationError: raise StateError("refusing to publish an article '%sv%s' with invalid article-json" % (msid, version)) except models.ArticleFragment.DoesNotExist: raise StateError("no 'xml->json' fragment found. being strict and failing this publish. please INGEST!") except models.ArticleVersion.DoesNotExist: # attempted to publish an article that doesn't exist ... raise StateError("refusing to publish an article '%sv%s' that doesn't exist" % (msid, version))
def _publish(msid, version, force=False) -> models.ArticleVersion: """attach a `datetime_published` value to an article version. if none provided, use RIGHT NOW. you cannot publish an already published article version unless force==True""" try: av = models.ArticleVersion.objects.get(article__manuscript_id=msid, version=version) if av.published(): if not force: raise StateError( codes.ALREADY_PUBLISHED, "refusing to publish an already published article version") # NOTE: we don't use any other article fragments for determining the publication date # except the xml->json fragment. raw_data = fragments.get(av, XML2JSON).fragment # the json *will always* have a published date if v1 ... if version == 1: # pull that published date from the stored (but unpublished) article-json # and set the pub-date on the ArticleVersion object datetime_published = utils.todt(raw_data.get('published')) if not datetime_published: raise StateError( codes.PARSE_ERROR, "found 'published' value in article-json, but it's either null or unparsable as a date+time" ) else: # but *not* if it's > v1. in this case, we generate one. if av.published() and force: # this article version is already published and a force publish request has been sent if False and 'versionDate' in raw_data: # fail this case for now. # FUTURE CASE: when a 'versionDate' value is present in the article-json, use that. # as of 2016-10-21 version history IS NOT captured in the xml, # it won't be parsed by the bot-lax-adaptor and it # won't find it's way here. this is a future-case only. datetime_published = utils.todt(raw_data['versionDate']) if not datetime_published: raise StateError( codes.PARSE_ERROR, "found 'versionDate' value in article-json, but it's either null or unparseable as a datetime" ) else: # CURRENT CASE # preserve the existing pubdate set by lax. ignore anything given in the ajson. # if the pubdate for an article is to change, it must come from the xml (see above case) datetime_published = av.datetime_published else: # CURRENT CASE # this article version hasn't been published yet. use a value of RIGHT NOW as the published date. datetime_published = utils.utcnow() av.datetime_published = datetime_published av.save() events.ajson_publish_events(av, force) # merge the fragments we have available and make them available for serving. # allow errors when the publish operation is being forced. fragments.set_article_json( av, quiet=False if settings.VALIDATE_FAILS_FORCE else force) # notify event bus that article change has occurred transaction.on_commit(partial(aws_events.notify_all, av)) return av except ValidationError as err: # the problem isn't that the ajson is invalid, it's that we've allowed invalid ajson into the system raise StateError( codes.INVALID, "refusing to publish an article '%sv%s' with invalid article-json: %s" % (msid, version, err), err) except models.ArticleFragment.DoesNotExist: raise StateError( codes.NO_RECORD, "no 'xml->json' fragment found. being strict and failing this publish. please INGEST!" ) except models.ArticleVersion.DoesNotExist: # attempted to publish an article that doesn't exist ... raise StateError( codes.NO_RECORD, "refusing to publish an article '%sv%s' that doesn't exist" % (msid, version))