def handle(self, *args, **options): path = options['path'] create_articles = options['no_create'] update_articles = options['no_update'] import_type = options['import_type'] atomic = options['no_atomic'] path_list = utils.resolve_path(path) if not options['just_do_it']: try: pprint.pprint(path_list) print print import_type.upper(), 'import of', len(path_list), 'files' print 'create?', create_articles print 'update?', update_articles print raw_input('continue? (ctrl-c to exit)') except KeyboardInterrupt: print exit(0) choices = { EIF: eif_ingestor.import_article_from_json_path, EJP: ejp_ingestor.import_article_list_from_json_path, PATCH: eif_ingestor.patch_handler, AJSON: None } fn = partial(ingest, choices[import_type], logic.journal(), create_articles, update_articles, path_list) if atomic: with transaction.atomic(): fn() else: fn() exit(0)
def test_article_ingest_data(self): ajson_ingestor.ingest(self.ajson) article_cases = [ ('journal', logic.journal()), ('manuscript_id', 20105), ('volume', 5), ('doi', '10.7554/eLife.20105'), ('date_received', date(year=2016, month=7, day=27)), ('date_accepted', date(year=2016, month=10, day=3)), ] art = models.Article.objects.get(manuscript_id=20105) for attr, expected in article_cases: actual = getattr(art, attr) self.assertEqual( actual, expected, "expecting %r for %r got %r" % (expected, attr, actual)) article_version_cases = [ ('article', art), ('title', 'An electrostatic selection mechanism controls sequential kinase signaling downstream of the T cell receptor' ), ('version', 1), ('status', 'poa'), ('datetime_published', None) ] av = art.articleversion_set.all()[0] for attr, expected in article_version_cases: actual = getattr(av, attr) self.assertEqual( actual, expected, "expecting %r for %r got %r" % (expected, attr, actual))
def setUp(self): self.c = Client() self.journal = logic.journal() an_hour_ago = utils.utcnow() - timedelta(hours=1) many_hours_ago = an_hour_ago - timedelta(hours=999) fmt = utils.ymdhms self.article_data_list = [ {'title': 'foo', 'status': 'vor', 'version': 1, 'doi': "10.7554/eLife.00001", 'journal': self.journal, 'pub-date': fmt(an_hour_ago), }, {'title': 'bar', 'status': 'vor', 'version': 1, 'doi': "10.7554/eLife.00002", 'journal': self.journal, 'pub-date': fmt(many_hours_ago), }, {'title': 'baz', 'version': 1, 'status': 'poa', # ** 'doi': "10.7554/eLife.00003", 'journal': self.journal, 'pub-date': fmt(an_hour_ago), } ] [logic.add_or_update_article(**article_data) for article_data in self.article_data_list]
def setUp(self): self.fixture_list = [] self.journal = logic.journal() for dirpath, _, files in os.walk(join(self.fixture_dir, 'ppp')): if not files: continue self.fixture_list.extend(map(lambda f: os.path.join(dirpath, f), files))
def handle(self, *args, **options): path = options['path'] create_articles = options['no_create'] update_articles = options['no_update'] import_type = options['import_type'] atomic = options['no_atomic'] path_list = utils.resolve_path(path) if not options['just_do_it']: try: pprint.pprint(path_list) print(import_type.upper(), 'import of', len(path_list), 'files') print('create?', create_articles) print('update?', update_articles) input('continue? (ctrl-c to exit)') except KeyboardInterrupt: exit(0) choices = { EJP: ejp_ingestor.import_article_list_from_json_path, } fn = partial(ingest, choices[import_type], logic.journal(), create_articles, update_articles, path_list) if atomic: with transaction.atomic(): fn() else: fn() exit(0)
def setUp(self): self.journal = logic.journal() import_all = [ '00353.1', # discussion, VOR '00385.1', # commentary, VOR '01328.1', # correction, VOR '02619.1', # editorial, VOR '03401.1', # research, POA '03401.2', # POA '03401.3', # VOR '03665.1', # research, VOR '06250.1', # research, POA '06250.2', # POA '06250.3', # VOR '07301.1', # research, VOR '08025.1', # research, POA '08025.2', # VOR '09571.1', # research, POA ] for subdir in import_all: fname = subdir.replace('.', '-v') fname = "elife-%s.xml.json" % fname path = join(self.fixture_dir, 'ppp2', fname) ajson_ingestor.ingest_publish( self.load_ajson(path)) # strip relations self.vor_version_count = 9 self.poa_version_count = 6 self.total_version_count = self.vor_version_count + self.poa_version_count self.poa_art_count = 1 self.vor_art_count = 9 self.total_art_count = self.poa_art_count + self.vor_art_count
def setUp(self): self.journal = logic.journal() self.article_data = { 'title': "Molecular architecture of human polycomb repressive complex 2", 'version': 1, 'doi': "10.7554/eLife.00005", 'journal': self.journal, }
def setUp(self): self.c = Client() self.journal = logic.journal() self.article_data = { 'title': "Molecular architecture of human polycomb repressive complex 2", 'version': 1, 'status': 'poa', 'doi': "10.7554/eLife.00005", 'pub-date': '2000-01-01', 'journal': self.journal, }
def test_unpublished_article_versions_list(self): "valid json content is returned" # we need some data that can only come from ejp for this ejp_data = join(self.fixture_dir, 'dummy-ejp-for-v2-api-fixtures.json') ejp_ingestor.import_article_list_from_json_path(logic.journal(), ejp_data, create=False, update=True) resp = self.ac.get(reverse('v2:article-version-list', kwargs={'id': self.msid2})) self.assertEqual(resp.status_code, 200) self.assertEqual(resp.content_type, 'application/vnd.elife.article-history+json;version=1') data = json.loads(resp.content) # valid data utils.validate(data, SCHEMA_IDX['history']) # correct data self.assertEqual(len(data['versions']), 3) # this article has two *published*, one *unpublished*
def setUp(self): self.journal = publogic.journal() import_all = [ '00353.1', # discussion, VOR '00385.1', # commentary, VOR '01328.1', # correction, VOR '02619.1', # editorial, VOR '03401.1', # research, POA '03401.2', # POA '03401.3', # VOR '03665.1', # research, VOR '06250.1', # research, POA '06250.2', # POA '06250.3', # VOR '07301.1', # research, VOR '08025.1', # research, POA '08025.2', # VOR '09571.1', # research, POA ] for subdir in import_all: fname = subdir.replace('.', '-v') fname = "elife-%s.xml.json" % fname path = join(self.fixture_dir, 'ppp2', fname) ajson_ingestor.ingest_publish( self.load_ajson(path)) # strip relations # we need to coerce the data of the non-v1 articles a little # as we removed the eif ingestor that bypassed business logic cases = [ # vor (3401, 3, "2014-08-01"), (8025, 2, "2015-06-16"), ] for msid, ver, dtstr in cases: av = models.ArticleVersion.objects.get(article__manuscript_id=msid, version=ver) av.datetime_published = utils.todt(dtstr) av.save() self.vor_version_count = 9 self.poa_version_count = 6 self.poa_art_count = 1 self.vor_art_count = 9
def setUp(self): self.journal = logic.journal() import_all = [ '00353.1', # discussion, VOR '00385.1', # commentary, VOR '01328.1', # correction, VOR '02619.1', # editorial, VOR '03401.1', # research, POA '03401.2', # POA '03401.3', # VOR '03665.1', # research, VOR '06250.1', # research, POA '06250.2', # POA '06250.3', # VOR '07301.1', # research, VOR '08025.1', # research, POA '08025.2', # VOR '09571.1', # research, POA ] for subdir in import_all: fname = subdir.replace('.', '-v') fname = "elife-%s.json" % fname path = join(self.fixture_dir, 'ppp', subdir, fname) eif_ingestor.import_article_from_json_path(self.journal, path) self.vor_version_count = 9 self.poa_version_count = 6 self.total_version_count = self.vor_version_count + self.poa_version_count self.poa_art_count = 1 self.vor_art_count = 9 self.total_art_count = self.poa_art_count + self.vor_art_count self.research_art_count = 6
def _ingest_objects(data, create, update, force, log_context): "ingest helper. returns the journal, article, an article version and a list of article events" # WARN: log_context is a mutable dict data = copy.deepcopy(data) # this *could* be scraped from the provided data, but we have no time to # normalize journal names so we sometimes get duplicate journals in the db. # safer to disable until needed. journal = logic.journal() try: article_struct = render.render_item(ARTICLE, data['article']) article, created, updated = \ create_or_update(models.Article, article_struct, ['manuscript_id', 'journal'], create, update, journal=journal) log_context['article'] = article previous_article_versions = [] if updated: previous_article_versions = list( article.articleversion_set.all().order_by( 'version')) # earliest -> latest av_struct = render.render_item(ARTICLE_VERSION, data['article']) # this is an INGEST event and *not* a PUBLISH event. we don't touch the date published. del av_struct['datetime_published'] av, created, updated = \ create_or_update(models.ArticleVersion, av_struct, ['article', 'version'], create, update, commit=False, article=article) log_context['article-version'] = av events.ajson_ingest_events(article, data['article'], force) return av, created, updated, previous_article_versions except KeyError as err: raise StateError( codes.PARSE_ERROR, "failed to scrape article data, key not present: %s" % err)
def _ingest(data, force=False): """ingests article-json. returns a triple of (journal obj, article obj, article version obj) unpublished article-version data can be ingested multiple times UNLESS that article version has been published. published article-version data can be ingested only if force=True""" data = copy.deepcopy(data) # we don't want to modify the given data create = update = True log_context = {} try: # this *could* be scraped from the provided data, but we have no time to # normalize journal names so we sometimes get duplicate journals in the db. # safer to disable until needed. journal = logic.journal() try: article_struct = render.render_item(ARTICLE, data['article']) article, created, updated = \ create_or_update(models.Article, article_struct, ['manuscript_id', 'journal'], create, update, journal=journal) assert isinstance(article, models.Article) log_context['article'] = article previous_article_versions = None if updated: previous_article_versions = list(article.articleversion_set.all().order_by('version')) # earliest -> latest av_struct = render.render_item(ARTICLE_VERSION, data['article']) # this is an INGEST event and *not* a PUBLISH event. we don't touch the date published. del av_struct['datetime_published'] av, created, updated = \ create_or_update(models.ArticleVersion, av_struct, ['article', 'version'], create, update, commit=False, article=article) except KeyError as err: raise ValueError("failed to scrape article data, couldn't find key %s" % err) assert isinstance(av, models.ArticleVersion) log_context['article-version'] = av # only update the fragment if this article version has *not* been published *or* if force=True update_fragment = not av.published() or force merge_result = fragments.add(av, XML2JSON, data['article'], pos=0, update=update_fragment) fragments.merge_if_valid(av) invalid_ajson = not merge_result if invalid_ajson: LOG.warn("this article failed to merge it's fragments into a valid result and cannot be PUBLISHed in it's current state.", extra=log_context) # enforce business rules if created: if previous_article_versions: last_version = previous_article_versions[-1] log_context['previous-version'] = last_version if not last_version.published(): # uhoh. we're attempting to create an article version before previous version of that article has been published. msg = "refusing to ingest new article version when previous article version is still unpublished." LOG.error(msg, extra=log_context) raise StateError(msg) if not last_version.version + 1 == av.version: # uhoh. we're attempting to create an article version out of sequence msg = "refusing to ingest new article version out of sequence." log_context.update({ 'given-version': av.version, 'expected-version': last_version.version + 1}) LOG.error(msg, extra=log_context) raise StateError(msg) # no other versions of article exist else: if not av.version == 1: # uhoh. we're attempting to create our first article version and it isn't a version 1 msg = "refusing to ingest new article version out of sequence. no other article versions exist so I expect a v1" log_context.update({ 'given-version': av.version, 'expected-version': 1}) LOG.error(msg, extra=log_context) raise StateError(msg) elif updated: # this version of the article already exists # this is only a problem if the article version has already been published if av.published(): # uhoh. we've received an INGEST event for a previously published article version if not force: # unless our arm is being twisted, die. msg = "refusing to ingest new article data on an already published article version." LOG.error(msg, extra=log_context) raise StateError(msg) # passed all checks, save av.save() # notify event bus that article change has occurred transaction.on_commit(partial(events.notify, article)) return journal, article, av except KeyError as err: # *probably* an error while scraping ... raise StateError("failed to scrape given article data: %s" % err) except StateError: raise except Exception: LOG.exception("unhandled exception attempting to ingest article-json", extra=log_context) raise
def setUp(self): self.journal = logic.journal() doc = 'elife00353.xml.json' self.json_fixture = os.path.join(self.this_dir, 'fixtures', doc) self.update_fixture = join(self.fixture_dir, 'ppp', '00353.1', 'elife-00353-v1.json')
def test_fetch_journal(self): self.assertEqual(0, models.Journal.objects.count()) j = logic.journal() self.assertEqual(1, models.Journal.objects.count()) self.assertEqual(j.name, settings.PRIMARY_JOURNAL['name'])
def setUp(self): self.journal = logic.journal() self.partial_json_path = join(self.fixture_dir, 'partial-ejp-to-lax-report.json') self.tiny_json_path = join(self.fixture_dir, 'tiny-ejp-to-lax-report.json')
def test_todict(self): self.assertEqual(models.Journal.objects.count(), 0) jnl = logic.journal() jnl_data = utils.to_dict(jnl) self.assertTrue(isinstance(jnl_data, dict)) self.assertEqual(jnl_data['name'], settings.PRIMARY_JOURNAL['name'])
def setUp(self): self.journal = logic.journal() doc = 'elife00353.xml.json' self.json_fixture = os.path.join(self.this_dir, 'fixtures', doc)