Example #1
0
    def handle(self, *args, **options):
        path = options['path']
        create_articles = options['no_create']
        update_articles = options['no_update']
        import_type = options['import_type']
        atomic = options['no_atomic']

        path_list = utils.resolve_path(path)

        if not options['just_do_it']:
            try:
                pprint.pprint(path_list)
                print
                print import_type.upper(), 'import of', len(path_list), 'files'
                print 'create?', create_articles
                print 'update?', update_articles
                print
                raw_input('continue? (ctrl-c to exit)')
            except KeyboardInterrupt:
                print
                exit(0)

        choices = {
            EIF: eif_ingestor.import_article_from_json_path,
            EJP: ejp_ingestor.import_article_list_from_json_path,
            PATCH: eif_ingestor.patch_handler,
            AJSON: None
        }
        fn = partial(ingest, choices[import_type], logic.journal(), create_articles, update_articles, path_list)
        if atomic:
            with transaction.atomic():
                fn()
        else:
            fn()
        exit(0)
Example #2
0
    def test_article_ingest_data(self):
        ajson_ingestor.ingest(self.ajson)
        article_cases = [
            ('journal', logic.journal()),
            ('manuscript_id', 20105),
            ('volume', 5),
            ('doi', '10.7554/eLife.20105'),
            ('date_received', date(year=2016, month=7, day=27)),
            ('date_accepted', date(year=2016, month=10, day=3)),
        ]
        art = models.Article.objects.get(manuscript_id=20105)
        for attr, expected in article_cases:
            actual = getattr(art, attr)
            self.assertEqual(
                actual, expected,
                "expecting %r for %r got %r" % (expected, attr, actual))

        article_version_cases = [
            ('article', art),
            ('title',
             'An electrostatic selection mechanism controls sequential kinase signaling downstream of the T cell receptor'
             ), ('version', 1), ('status', 'poa'), ('datetime_published', None)
        ]
        av = art.articleversion_set.all()[0]
        for attr, expected in article_version_cases:
            actual = getattr(av, attr)
            self.assertEqual(
                actual, expected,
                "expecting %r for %r got %r" % (expected, attr, actual))
Example #3
0
    def setUp(self):
        self.c = Client()
        self.journal = logic.journal()
        an_hour_ago = utils.utcnow() - timedelta(hours=1)
        many_hours_ago = an_hour_ago - timedelta(hours=999)
        fmt = utils.ymdhms
        self.article_data_list = [
            {'title': 'foo',
             'status': 'vor',
             'version': 1,
             'doi': "10.7554/eLife.00001",
             'journal': self.journal,
             'pub-date': fmt(an_hour_ago),
             },

            {'title': 'bar',
             'status': 'vor',
             'version': 1,
             'doi': "10.7554/eLife.00002",
             'journal': self.journal,
             'pub-date': fmt(many_hours_ago),
             },

            {'title': 'baz',
             'version': 1,
             'status': 'poa', # **
             'doi': "10.7554/eLife.00003",
             'journal': self.journal,
             'pub-date': fmt(an_hour_ago),
             }
        ]
        [logic.add_or_update_article(**article_data) for article_data in self.article_data_list]
Example #4
0
 def setUp(self):
     self.fixture_list = []
     self.journal = logic.journal()
     for dirpath, _, files in os.walk(join(self.fixture_dir, 'ppp')):
         if not files:
             continue
         self.fixture_list.extend(map(lambda f: os.path.join(dirpath, f), files))
Example #5
0
    def handle(self, *args, **options):
        path = options['path']
        create_articles = options['no_create']
        update_articles = options['no_update']
        import_type = options['import_type']
        atomic = options['no_atomic']

        path_list = utils.resolve_path(path)

        if not options['just_do_it']:
            try:
                pprint.pprint(path_list)
                print(import_type.upper(), 'import of', len(path_list), 'files')
                print('create?', create_articles)
                print('update?', update_articles)
                input('continue? (ctrl-c to exit)')
            except KeyboardInterrupt:
                exit(0)

        choices = {
            EJP: ejp_ingestor.import_article_list_from_json_path,
        }
        fn = partial(ingest, choices[import_type], logic.journal(), create_articles, update_articles, path_list)
        if atomic:
            with transaction.atomic():
                fn()
        else:
            fn()
        exit(0)
Example #6
0
    def setUp(self):
        self.journal = logic.journal()
        import_all = [
            '00353.1',  # discussion, VOR
            '00385.1',  # commentary, VOR
            '01328.1',  # correction, VOR
            '02619.1',  # editorial, VOR
            '03401.1',  # research, POA
            '03401.2',  # POA
            '03401.3',  # VOR
            '03665.1',  # research, VOR
            '06250.1',  # research, POA
            '06250.2',  # POA
            '06250.3',  # VOR
            '07301.1',  # research, VOR
            '08025.1',  # research, POA
            '08025.2',  # VOR
            '09571.1',  # research, POA
        ]
        for subdir in import_all:
            fname = subdir.replace('.', '-v')
            fname = "elife-%s.xml.json" % fname
            path = join(self.fixture_dir, 'ppp2', fname)
            ajson_ingestor.ingest_publish(
                self.load_ajson(path))  # strip relations

        self.vor_version_count = 9
        self.poa_version_count = 6
        self.total_version_count = self.vor_version_count + self.poa_version_count

        self.poa_art_count = 1
        self.vor_art_count = 9
        self.total_art_count = self.poa_art_count + self.vor_art_count
Example #7
0
 def setUp(self):
     self.journal = logic.journal()
     self.article_data = {
         'title': "Molecular architecture of human polycomb repressive complex 2",
         'version': 1,
         'doi': "10.7554/eLife.00005",
         'journal': self.journal,
     }
Example #8
0
 def setUp(self):
     self.c = Client()
     self.journal = logic.journal()
     self.article_data = {
         'title': "Molecular architecture of human polycomb repressive complex 2",
         'version': 1,
         'status': 'poa',
         'doi': "10.7554/eLife.00005",
         'pub-date': '2000-01-01',
         'journal': self.journal,
     }
Example #9
0
    def test_unpublished_article_versions_list(self):
        "valid json content is returned"
        # we need some data that can only come from ejp for this
        ejp_data = join(self.fixture_dir, 'dummy-ejp-for-v2-api-fixtures.json')
        ejp_ingestor.import_article_list_from_json_path(logic.journal(), ejp_data, create=False, update=True)

        resp = self.ac.get(reverse('v2:article-version-list', kwargs={'id': self.msid2}))
        self.assertEqual(resp.status_code, 200)
        self.assertEqual(resp.content_type, 'application/vnd.elife.article-history+json;version=1')
        data = json.loads(resp.content)

        # valid data
        utils.validate(data, SCHEMA_IDX['history'])

        # correct data
        self.assertEqual(len(data['versions']), 3)  # this article has two *published*, one *unpublished*
Example #10
0
    def setUp(self):
        self.journal = publogic.journal()
        import_all = [
            '00353.1',  # discussion, VOR
            '00385.1',  # commentary, VOR
            '01328.1',  # correction, VOR
            '02619.1',  # editorial, VOR
            '03401.1',  # research, POA
            '03401.2',  # POA
            '03401.3',  # VOR
            '03665.1',  # research, VOR
            '06250.1',  # research, POA
            '06250.2',  # POA
            '06250.3',  # VOR
            '07301.1',  # research, VOR
            '08025.1',  # research, POA
            '08025.2',  # VOR
            '09571.1',  # research, POA
        ]
        for subdir in import_all:
            fname = subdir.replace('.', '-v')
            fname = "elife-%s.xml.json" % fname
            path = join(self.fixture_dir, 'ppp2', fname)
            ajson_ingestor.ingest_publish(
                self.load_ajson(path))  # strip relations

        # we need to coerce the data of the non-v1 articles a little
        # as we removed the eif ingestor that bypassed business logic
        cases = [
            # vor
            (3401, 3, "2014-08-01"),
            (8025, 2, "2015-06-16"),
        ]
        for msid, ver, dtstr in cases:
            av = models.ArticleVersion.objects.get(article__manuscript_id=msid,
                                                   version=ver)
            av.datetime_published = utils.todt(dtstr)
            av.save()

        self.vor_version_count = 9
        self.poa_version_count = 6

        self.poa_art_count = 1
        self.vor_art_count = 9
Example #11
0
    def setUp(self):
        self.journal = logic.journal()
        import_all = [
            '00353.1', # discussion, VOR

            '00385.1', # commentary, VOR

            '01328.1', # correction, VOR

            '02619.1', # editorial, VOR

            '03401.1', # research, POA
            '03401.2', # POA
            '03401.3', # VOR

            '03665.1', # research, VOR

            '06250.1', # research, POA
            '06250.2', # POA
            '06250.3', # VOR

            '07301.1', # research, VOR

            '08025.1', # research, POA
            '08025.2', # VOR

            '09571.1', # research, POA
        ]
        for subdir in import_all:
            fname = subdir.replace('.', '-v')
            fname = "elife-%s.json" % fname
            path = join(self.fixture_dir, 'ppp', subdir, fname)
            eif_ingestor.import_article_from_json_path(self.journal, path)

        self.vor_version_count = 9
        self.poa_version_count = 6
        self.total_version_count = self.vor_version_count + self.poa_version_count

        self.poa_art_count = 1
        self.vor_art_count = 9
        self.total_art_count = self.poa_art_count + self.vor_art_count

        self.research_art_count = 6
Example #12
0
def _ingest_objects(data, create, update, force, log_context):
    "ingest helper. returns the journal, article, an article version and a list of article events"

    # WARN: log_context is a mutable dict

    data = copy.deepcopy(data)

    # this *could* be scraped from the provided data, but we have no time to
    # normalize journal names so we sometimes get duplicate journals in the db.
    # safer to disable until needed.
    journal = logic.journal()

    try:
        article_struct = render.render_item(ARTICLE, data['article'])
        article, created, updated = \
            create_or_update(models.Article, article_struct, ['manuscript_id', 'journal'], create, update, journal=journal)

        log_context['article'] = article

        previous_article_versions = []
        if updated:
            previous_article_versions = list(
                article.articleversion_set.all().order_by(
                    'version'))  # earliest -> latest

        av_struct = render.render_item(ARTICLE_VERSION, data['article'])
        # this is an INGEST event and *not* a PUBLISH event. we don't touch the date published.
        del av_struct['datetime_published']

        av, created, updated = \
            create_or_update(models.ArticleVersion, av_struct, ['article', 'version'],
                             create, update, commit=False, article=article)

        log_context['article-version'] = av

        events.ajson_ingest_events(article, data['article'], force)

        return av, created, updated, previous_article_versions

    except KeyError as err:
        raise StateError(
            codes.PARSE_ERROR,
            "failed to scrape article data, key not present: %s" % err)
Example #13
0
def _ingest(data, force=False):
    """ingests article-json. returns a triple of (journal obj, article obj, article version obj)
    unpublished article-version data can be ingested multiple times UNLESS that article version has been published.
    published article-version data can be ingested only if force=True"""

    data = copy.deepcopy(data) # we don't want to modify the given data

    create = update = True
    log_context = {}

    try:
        # this *could* be scraped from the provided data, but we have no time to
        # normalize journal names so we sometimes get duplicate journals in the db.
        # safer to disable until needed.
        journal = logic.journal()

        try:
            article_struct = render.render_item(ARTICLE, data['article'])
            article, created, updated = \
                create_or_update(models.Article, article_struct, ['manuscript_id', 'journal'], create, update, journal=journal)

            assert isinstance(article, models.Article)
            log_context['article'] = article

            previous_article_versions = None
            if updated:
                previous_article_versions = list(article.articleversion_set.all().order_by('version')) # earliest -> latest

            av_struct = render.render_item(ARTICLE_VERSION, data['article'])
            # this is an INGEST event and *not* a PUBLISH event. we don't touch the date published.
            del av_struct['datetime_published']

            av, created, updated = \
                create_or_update(models.ArticleVersion, av_struct, ['article', 'version'],
                                 create, update, commit=False, article=article)
        except KeyError as err:
            raise ValueError("failed to scrape article data, couldn't find key %s" % err)

        assert isinstance(av, models.ArticleVersion)
        log_context['article-version'] = av

        # only update the fragment if this article version has *not* been published *or* if force=True
        update_fragment = not av.published() or force
        merge_result = fragments.add(av, XML2JSON, data['article'], pos=0, update=update_fragment)
        fragments.merge_if_valid(av)
        invalid_ajson = not merge_result
        if invalid_ajson:
            LOG.warn("this article failed to merge it's fragments into a valid result and cannot be PUBLISHed in it's current state.", extra=log_context)

        # enforce business rules

        if created:
            if previous_article_versions:
                last_version = previous_article_versions[-1]
                log_context['previous-version'] = last_version

                if not last_version.published():
                    # uhoh. we're attempting to create an article version before previous version of that article has been published.
                    msg = "refusing to ingest new article version when previous article version is still unpublished."
                    LOG.error(msg, extra=log_context)
                    raise StateError(msg)

                if not last_version.version + 1 == av.version:
                    # uhoh. we're attempting to create an article version out of sequence
                    msg = "refusing to ingest new article version out of sequence."
                    log_context.update({
                        'given-version': av.version,
                        'expected-version': last_version.version + 1})
                    LOG.error(msg, extra=log_context)
                    raise StateError(msg)

            # no other versions of article exist
            else:
                if not av.version == 1:
                    # uhoh. we're attempting to create our first article version and it isn't a version 1
                    msg = "refusing to ingest new article version out of sequence. no other article versions exist so I expect a v1"
                    log_context.update({
                        'given-version': av.version,
                        'expected-version': 1})
                    LOG.error(msg, extra=log_context)
                    raise StateError(msg)

        elif updated:
            # this version of the article already exists
            # this is only a problem if the article version has already been published
            if av.published():
                # uhoh. we've received an INGEST event for a previously published article version
                if not force:
                    # unless our arm is being twisted, die.
                    msg = "refusing to ingest new article data on an already published article version."
                    LOG.error(msg, extra=log_context)
                    raise StateError(msg)

        # passed all checks, save
        av.save()

        # notify event bus that article change has occurred
        transaction.on_commit(partial(events.notify, article))

        return journal, article, av

    except KeyError as err:
        # *probably* an error while scraping ...
        raise StateError("failed to scrape given article data: %s" % err)

    except StateError:
        raise

    except Exception:
        LOG.exception("unhandled exception attempting to ingest article-json", extra=log_context)
        raise
Example #14
0
 def setUp(self):
     self.journal = logic.journal()
     doc = 'elife00353.xml.json'
     self.json_fixture = os.path.join(self.this_dir, 'fixtures', doc)
     self.update_fixture = join(self.fixture_dir, 'ppp', '00353.1', 'elife-00353-v1.json')
Example #15
0
 def test_fetch_journal(self):
     self.assertEqual(0, models.Journal.objects.count())
     j = logic.journal()
     self.assertEqual(1, models.Journal.objects.count())
     self.assertEqual(j.name, settings.PRIMARY_JOURNAL['name'])
 def setUp(self):
     self.journal = logic.journal()
     self.partial_json_path = join(self.fixture_dir, 'partial-ejp-to-lax-report.json')
     self.tiny_json_path = join(self.fixture_dir, 'tiny-ejp-to-lax-report.json')
Example #17
0
 def test_todict(self):
     self.assertEqual(models.Journal.objects.count(), 0)
     jnl = logic.journal()
     jnl_data = utils.to_dict(jnl)
     self.assertTrue(isinstance(jnl_data, dict))
     self.assertEqual(jnl_data['name'], settings.PRIMARY_JOURNAL['name'])
 def setUp(self):
     self.journal = logic.journal()
     self.partial_json_path = join(self.fixture_dir,
                                   'partial-ejp-to-lax-report.json')
     self.tiny_json_path = join(self.fixture_dir,
                                'tiny-ejp-to-lax-report.json')
Example #19
0
 def setUp(self):
     self.journal = logic.journal()
     doc = 'elife00353.xml.json'
     self.json_fixture = os.path.join(self.this_dir, 'fixtures', doc)
Example #20
0
 def test_fetch_journal(self):
     self.assertEqual(0, models.Journal.objects.count())
     j = logic.journal()
     self.assertEqual(1, models.Journal.objects.count())
     self.assertEqual(j.name, settings.PRIMARY_JOURNAL['name'])