def test_send_robotupload_works_doesnt_fail_when_removing_references_and_no_references():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST', 'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz'
        )

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config), \
                patch('inspirehep.modules.workflows.tasks.submission.record2marcxml') as mock_record2marcxml:
            data = {
                '$schema': 'http://localhost:5000/schemas/records/hep.json',
            }
            extra_data = {}

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(
                mode='insert',
            )

            assert _send_robotupload(obj, eng) is None
            assert mock_record2marcxml.called_with(data)
Example #2
0
def test_send_robotupload_works_doesnt_fail_when_removing_references_and_no_references(
):
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST',
            'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz')

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config), \
                patch('inspirehep.modules.workflows.tasks.submission.record2marcxml') as mock_record2marcxml:
            data = {
                '$schema': 'http://localhost:5000/schemas/records/hep.json',
            }
            extra_data = {}

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(mode='insert', )

            assert _send_robotupload(obj, eng) is None
            assert mock_record2marcxml.called_with(data)
Example #3
0
def test_send_robotupload_does_nothing_when_not_in_production_mode():
    with requests_mock.Mocker():
        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': False,
        }

        with patch.dict(current_app.config, config):
            data = {
                '$schema':
                'http://localhost:5000/schemas/records/hep.json',
                'arxiv_eprints': [
                    {
                        'categories': [
                            'hep-th',
                        ],
                        'value': 'hep-th/9711200',
                    },
                ],
            }
            extra_data = {}
            assert validate(data['arxiv_eprints'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(mode='insert', )

            assert _send_robotupload(obj, eng) is None
Example #4
0
def test_send_robotupload_new_article_when_feature_flag_is_disabled():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST',
            'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz')

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
            'FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY': False
        }

        with patch.dict(current_app.config, config), \
                patch('inspirehep.modules.workflows.tasks.submission.record2marcxml'):
            data = {
                '$schema': 'http://localhost:5000/schemas/records/hep.json',
            }

            extra_data = {}

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(mode='insert', )

            assert _send_robotupload(obj, eng) is None

            expected = ('Robotupload sent!'
                        '[INFO] foo bar baz'
                        'end of upload')
            result = obj.log._info.getvalue()

            assert expected == result
def test_send_robotupload_does_nothing_when_not_in_production_mode():
    with requests_mock.Mocker():
        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': False,
        }

        with patch.dict(current_app.config, config):
            data = {
                '$schema': 'http://localhost:5000/schemas/records/hep.json',
                'arxiv_eprints': [
                    {
                        'categories': [
                            'hep-th',
                        ],
                        'value': 'hep-th/9711200',
                    },
                ],
            }
            extra_data = {}
            assert validate(data['arxiv_eprints'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(
                mode='insert',
            )

            assert _send_robotupload(obj, eng) is None
Example #6
0
class Author(object):
    """Author ingestion workflow for HEPNames/Authors collection."""
    name = "Author"
    data_type = "authors"

    workflow = [
        # Make sure schema is set for proper indexing in Holding Pen
        set_schema,
        # Emit record signals to receive metadata enrichment
        emit_record_signals,
        IF_ELSE(is_marked('is-update'), [
            send_robotupload(marcxml_processor=hepnames2marc,
                             mode="holdingpen"),
            create_ticket(
                template="authors/tickets/curator_update.html",
                queue="Authors_cor_user",
                context_factory=update_ticket_context,
            ),
        ], [
            create_ticket(template="authors/tickets/curator_new.html",
                          queue="Authors_add_user",
                          context_factory=new_ticket_context),
            reply_ticket(template="authors/tickets/user_new.html",
                         context_factory=reply_ticket_context,
                         keep_new=True),
            halt_record(action="author_approval",
                        message="Accept submission?"),
            IF_ELSE(is_record_accepted, [
                send_robotupload(marcxml_processor=hepnames2marc,
                                 mode="insert"),
                reply_ticket(template="authors/tickets/user_accepted.html",
                             context_factory=reply_ticket_context),
                close_ticket(ticket_id_key="ticket_id"),
                IF(curation_ticket_needed, [
                    create_ticket(
                        template="authors/tickets/curation_needed.html",
                        queue="AUTHORS_curation",
                        context_factory=curation_ticket_context,
                        ticket_id_key="curation_ticket_id"),
                ]),
            ], [
                close_ticket(ticket_id_key="ticket_id"),
            ]),
        ]),
    ]
def test_send_robotupload_logs_on_error_response():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST', 'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[ERROR] cannot use the service'
        )

        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config):
            data = {
                '$schema': 'http://localhost:5000/schemas/records/hep.json',
                'arxiv_eprints': [
                    {
                        'categories': [
                            'hep-th',
                        ],
                        'value': 'hep-th/9711200',
                    },
                ],
            }
            extra_data = {}
            assert validate(data['arxiv_eprints'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(
                mode='insert',
            )

            with pytest.raises(Exception) as excinfo:
                _send_robotupload(obj, eng)

            expected = (
                'Error while submitting robotupload: '
                '[ERROR] cannot use the service'
            )
            result = str(excinfo.value)

            assert expected == result

            expected = (
                'Your IP is not in app.config_BATCHUPLOADER_WEB_ROBOT_RIGHTS on host: '
                '[ERROR] cannot use the service'
            )
            result = obj.log._error.getvalue()

            assert expected == result
def test_send_robotupload_logs_on_error_response():
    httpretty.HTTPretty.allow_net_connect = False
    httpretty.register_uri(
        httpretty.POST,
        'http://inspirehep.net/batchuploader/robotupload/insert',
        body='[ERROR] cannot use the service')

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    config = {
        'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
        'PRODUCTION_MODE': True,
    }

    with patch.dict(current_app.config, config):
        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'hep-th',
                    ],
                    'value': 'hep-th/9711200',
                },
            ],
        }
        extra_data = {}
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data)
        eng = MockEng()

        _send_robotupload = send_robotupload(
            marcxml_processor=hep2marc,
            mode='insert',
        )

        with pytest.raises(Exception) as excinfo:
            _send_robotupload(obj, eng)

        expected = ('Error while submitting robotupload: '
                    '[ERROR] cannot use the service')
        result = str(excinfo.value)

        assert expected == result

        expected = (
            'Your IP is not in app.config_BATCHUPLOADER_WEB_ROBOT_RIGHTS on host: '
            '[ERROR] cannot use the service')
        result = obj.log._error.getvalue()

        assert expected == result

    httpretty.HTTPretty.allow_net_connect = True
def test_send_robotupload_works_with_mode_insert_on_hep():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST', 'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz'
        )

        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config):
            data = {
                '$schema': 'http://localhost:5000/schemas/records/hep.json',
                'arxiv_eprints': [
                    {
                        'categories': [
                            'hep-th',
                        ],
                        'value': 'hep-th/9711200',
                    },
                ],
            }
            extra_data = {}
            assert validate(data['arxiv_eprints'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(
                mode='insert',
            )

            assert _send_robotupload(obj, eng) is None

            expected = (
                'Robotupload sent!'
                '[INFO] foo bar baz'
                'end of upload'
            )
            result = obj.log._info.getvalue()

            assert expected == result

            expected = 'Waiting for robotupload: [INFO] foo bar baz'
            result = eng.msg

            assert expected == result
Example #10
0
def test_send_robotupload_logs_on_error_response():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST',
            'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[ERROR] cannot use the service')

        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config):
            data = {
                '$schema':
                'http://localhost:5000/schemas/records/hep.json',
                'arxiv_eprints': [
                    {
                        'categories': [
                            'hep-th',
                        ],
                        'value': 'hep-th/9711200',
                    },
                ],
            }
            extra_data = {}
            assert validate(data['arxiv_eprints'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(mode='insert', )

            with pytest.raises(Exception) as excinfo:
                _send_robotupload(obj, eng)

            expected = ('Error while submitting robotupload: '
                        '[ERROR] cannot use the service')
            result = str(excinfo.value)

            assert expected == result

            expected = (
                'Your IP is not in app.config_BATCHUPLOADER_WEB_ROBOT_RIGHTS on host: '
                '[ERROR] cannot use the service')
            result = obj.log._error.getvalue()

            assert expected == result
Example #11
0
def test_send_robotupload_works_with_mode_correct_and_extra_data_key():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST',
            'http://inspirehep.net/batchuploader/robotupload/correct',
            text='[INFO] foo bar baz')

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config):
            data = {}
            extra_data = {
                'update_payload': {
                    '$schema':
                    'http://localhost:5000/schemas/records/hep.json',
                    'arxiv_eprints': [
                        {
                            'categories': [
                                'hep-th',
                            ],
                            'value': 'hep-th/9711200',
                        },
                    ],
                },
            }

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(
                mode='correct',
                extra_data_key='update_payload',
            )

            assert _send_robotupload(obj, eng) is None

            expected = ('Robotupload sent!'
                        '[INFO] foo bar baz'
                        'end of upload')
            result = obj.log._info.getvalue()

            assert expected == result

            expected = 'Waiting for robotupload: [INFO] foo bar baz'
            result = eng.msg

            assert expected == result
def test_send_robotupload_works_with_hep2marc_and_mode_insert():
    httpretty.HTTPretty.allow_net_connect = False
    httpretty.register_uri(
        httpretty.POST,
        'http://inspirehep.net/batchuploader/robotupload/insert',
        body='[INFO] foo bar baz')

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    config = {
        'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
        'PRODUCTION_MODE': True,
    }

    with patch.dict(current_app.config, config):
        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'hep-th',
                    ],
                    'value': 'hep-th/9711200',
                },
            ],
        }
        extra_data = {}
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data)
        eng = MockEng()

        _send_robotupload = send_robotupload(
            marcxml_processor=hep2marc,
            mode='insert',
        )

        assert _send_robotupload(obj, eng) is None

        expected = ('Robotupload sent!' '[INFO] foo bar baz' 'end of upload')
        result = obj.log._info.getvalue()

        assert expected == result

        expected = 'Waiting for robotupload: [INFO] foo bar baz'
        result = eng.msg

        assert expected == result

    httpretty.HTTPretty.allow_net_connect = True
Example #13
0
class EditArticle(object):
    """Editing workflow for Literature collection."""

    name = 'edit_article'
    data_type = 'hep'

    workflow = ([
        change_status_to_waiting,
        validate_record('hep'),
        update_record,
        send_robotupload(
            mode='replace',
            priority_config_key='LEGACY_ROBOTUPLOAD_PRIORITY_EDIT_ARTICLE'),
        cleanup_pending_workflow,
    ])
Example #14
0
def test_send_robotupload_removes_references_if_feature_flag_disabled():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST',
            'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz')

        schema = load_schema('hep')
        subschema = schema['properties']['references']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config), \
                patch('inspirehep.modules.workflows.tasks.submission.record2marcxml') as mock_record2marcxml:
            data = {
                '$schema':
                'http://localhost:5000/schemas/records/hep.json',
                'references': [
                    {
                        'raw_refs': [
                            {
                                'schema':
                                'text',
                                'value':
                                '[1] J. Maldacena and A. Strominger, hep-th/9710014.',
                            },
                        ],
                    },
                ]
            }
            data_without_references = {
                '$schema': 'http://localhost:5000/schemas/records/hep.json',
            }
            extra_data = {}
            assert validate(data['references'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(mode='insert', )

            assert _send_robotupload(obj, eng) is None
            assert mock_record2marcxml.called_with(data_without_references)
Example #15
0
def test_send_robotupload_works_with_hepnames2marc_and_mode_insert():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST',
            'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz')

        schema = load_schema('authors')
        subschema = schema['properties']['arxiv_categories']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config):
            data = {
                'arxiv_categories': [
                    'hep-th',
                ],
            }
            extra_data = {}
            assert validate(data['arxiv_categories'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(
                marcxml_processor=hepnames2marc,
                mode='insert',
            )

            assert _send_robotupload(obj, eng) is None

            expected = ('Robotupload sent!'
                        '[INFO] foo bar baz'
                        'end of upload')
            result = obj.log._info.getvalue()

            assert expected == result

            expected = 'Waiting for robotupload: [INFO] foo bar baz'
            result = eng.msg

            assert expected == result
def test_send_robotupload_removes_references_if_feature_flag_disabled():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST', 'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz'
        )

        schema = load_schema('hep')
        subschema = schema['properties']['references']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config), \
                patch('inspirehep.modules.workflows.tasks.submission.record2marcxml') as mock_record2marcxml:
            data = {
                '$schema': 'http://localhost:5000/schemas/records/hep.json',
                'references': [
                    {
                        'raw_refs': [
                            {
                                'schema': 'text',
                                'value': '[1] J. Maldacena and A. Strominger, hep-th/9710014.',
                            },
                        ],
                    },
                ]
            }
            data_without_references = {
                '$schema': 'http://localhost:5000/schemas/records/hep.json',
            }
            extra_data = {}
            assert validate(data['references'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(
                mode='insert',
            )

            assert _send_robotupload(obj, eng) is None
            assert mock_record2marcxml.called_with(data_without_references)
def test_send_robotupload_update_authors_when_feature_flag_is_enabled():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST', 'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz'
        )

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
            'FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY': True
        }

        with patch.dict(current_app.config, config), \
                patch('inspirehep.modules.workflows.tasks.submission.record2marcxml'):
            data = {
                '$schema': 'http://localhost:5000/schemas/records/authors.json',
                'name': {
                    'preferred_name': 'Jessica Jones',
                    'value': 'Jones, Jessica'
                }
            }
            extra_data = {
                'is-update': True
            }

            obj = MockObj(data, extra_data)
            obj.workflow = MockWorkflow('author')
            eng = MockEng(data_type='authors')

            _send_robotupload = send_robotupload(
                mode='insert',
            )

            assert _send_robotupload(obj, eng) is None

            expected = (
                'Robotupload sent!'
                '[INFO] foo bar baz'
                'end of upload'
            )
            result = obj.log._info.getvalue()

            assert expected == result
def test_send_robotupload_works_with_mode_holdingpen_and_without_callback_url():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST', 'http://inspirehep.net/batchuploader/robotupload/holdingpen',
            text='[INFO] foo bar baz'
        )

        schema = load_schema('authors')
        subschema = schema['properties']['arxiv_categories']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config):
            data = {
                '$schema': 'http://localhost:5000/schemas/records/authors.json',
                'arxiv_categories': [
                    'hep-th',
                ],
            }
            extra_data = {}
            assert validate(data['arxiv_categories'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(
                mode='holdingpen',
                callback_url=None,
            )

            assert _send_robotupload(obj, eng) is None

            expected = (
                'Robotupload sent!'
                '[INFO] foo bar baz'
                'end of upload'
            )
            result = obj.log._info.getvalue()

            assert expected == result
Example #19
0
def test_send_robotupload_works_with_mode_holdingpen_and_without_callback_url(
):
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST',
            'http://inspirehep.net/batchuploader/robotupload/holdingpen',
            text='[INFO] foo bar baz')

        schema = load_schema('authors')
        subschema = schema['properties']['arxiv_categories']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config):
            data = {
                '$schema':
                'http://localhost:5000/schemas/records/authors.json',
                'arxiv_categories': [
                    'hep-th',
                ],
            }
            extra_data = {}
            assert validate(data['arxiv_categories'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(
                mode='holdingpen',
                callback_url=None,
            )

            assert _send_robotupload(obj, eng) is None

            expected = ('Robotupload sent!'
                        '[INFO] foo bar baz'
                        'end of upload')
            result = obj.log._info.getvalue()

            assert expected == result
def test_send_robotupload_works_with_mode_holdingpen_and_without_callback_url(
):
    httpretty.HTTPretty.allow_net_connect = False
    httpretty.register_uri(
        httpretty.POST,
        'http://inspirehep.net/batchuploader/robotupload/holdingpen',
        body='[INFO] foo bar baz')

    schema = load_schema('authors')
    subschema = schema['properties']['arxiv_categories']

    config = {
        'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
        'PRODUCTION_MODE': True,
    }

    with patch.dict(current_app.config, config):
        data = {
            'arxiv_categories': [
                'hep-th',
            ],
        }
        extra_data = {}
        assert validate(data['arxiv_categories'], subschema) is None

        obj = MockObj(data, extra_data)
        eng = MockEng()

        _send_robotupload = send_robotupload(
            marcxml_processor=hepnames2marc,
            mode='holdingpen',
            callback_url=None,
        )

        assert _send_robotupload(obj, eng) is None

        expected = ('Robotupload sent!' '[INFO] foo bar baz' 'end of upload')
        result = obj.log._info.getvalue()

        assert expected == result

    httpretty.HTTPretty.allow_net_connect = True
Example #21
0
def test_send_robotupload_update_authors_when_feature_flag_is_enabled():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST',
            'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz')

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
            'FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY': True
        }

        with patch.dict(current_app.config, config), \
                patch('inspirehep.modules.workflows.tasks.submission.record2marcxml'):
            data = {
                '$schema':
                'http://localhost:5000/schemas/records/authors.json',
                'name': {
                    'preferred_name': 'Jessica Jones',
                    'value': 'Jones, Jessica'
                }
            }
            extra_data = {'is-update': True}

            obj = MockObj(data, extra_data)
            obj.workflow = MockWorkflow('author')
            eng = MockEng(data_type='authors')

            _send_robotupload = send_robotupload(mode='insert', )

            assert _send_robotupload(obj, eng) is None

            expected = ('Robotupload sent!'
                        '[INFO] foo bar baz'
                        'end of upload')
            result = obj.log._info.getvalue()

            assert expected == result
def test_send_robotupload_new_article_when_feature_flag_is_disabled():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST', 'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz'
        )

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
            'FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY': False
        }

        with patch.dict(current_app.config, config), \
                patch('inspirehep.modules.workflows.tasks.submission.record2marcxml'):
            data = {
                '$schema': 'http://localhost:5000/schemas/records/hep.json',
            }

            extra_data = {}

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(
                mode='insert',
            )

            assert _send_robotupload(obj, eng) is None

            expected = (
                'Robotupload sent!'
                '[INFO] foo bar baz'
                'end of upload'
            )
            result = obj.log._info.getvalue()

            assert expected == result
def test_send_robotupload_update_article_when_feature_flag_is_disabled():
    config = {
        'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
        'PRODUCTION_MODE': True,
        'FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY': False
    }

    with patch.dict(current_app.config, config), \
            patch('inspirehep.modules.workflows.tasks.submission.record2marcxml'):
        data = {
            '$schema': 'http://localhost:5000/schemas/records/hep.json',
        }

        extra_data = {'is-update': True}

        obj = MockObj(data, extra_data)
        eng = MockEng()

        _send_robotupload = send_robotupload(mode='insert', )

        expected_log = 'skipping upload to legacy, feature flag ``FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY`` is disabled.'

        assert _send_robotupload(obj, eng) is None
        assert expected_log in obj.log._info.getvalue()
def test_send_robotupload_does_nothing_when_not_in_production_mode():
    httpretty.HTTPretty.allow_net_connect = False

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    config = {
        'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
        'PRODUCTION_MODE': False,
    }

    with patch.dict(current_app.config, config):
        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'hep-th',
                    ],
                    'value': 'hep-th/9711200',
                },
            ],
        }
        extra_data = {}
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data)
        eng = MockEng()

        _send_robotupload = send_robotupload(
            marcxml_processor=hep2marc,
            mode='insert',
        )

        assert _send_robotupload(obj, eng) is None

    httpretty.HTTPretty.allow_net_connect = True
Example #25
0
from inspirehep.modules.workflows.tasks.submission import (close_ticket,
                                                           create_ticket,
                                                           reply_ticket,
                                                           send_robotupload)
from inspirehep.modules.workflows.tasks.upload import store_record, set_schema

from inspirehep.modules.authors.tasks import (
    curation_ticket_context,
    curation_ticket_needed,
    new_ticket_context,
    reply_ticket_context,
    update_ticket_context,
)

SEND_TO_LEGACY = [
    send_robotupload(mode="insert"),
]

NOTIFY_ACCEPTED = [
    reply_ticket(template="authors/tickets/user_accepted.html",
                 context_factory=reply_ticket_context),
    close_ticket(ticket_id_key="ticket_id"),
]

CLOSE_TICKET_IF_NEEDED = [
    IF(curation_ticket_needed, [
        create_ticket(template="authors/tickets/curation_needed.html",
                      queue="AUTHORS_curation",
                      context_factory=curation_ticket_context,
                      ticket_id_key="curation_ticket_id"),
    ]),
Example #26
0
    remove_references,
    set_refereed_and_fix_document_type,
    fix_submission_number,
]


SEND_TO_LEGACY = [
    IF_ELSE(
        is_marked('is-update'),
        [
            # TODO: once we have the merger in place
            # send_robotupload(mode="replace")
            mark('skipped-robot-upload', True)
        ],
        [
            send_robotupload(mode="replace"),
        ]
    ),
]


WAIT_FOR_LEGACY_WEBCOLL = [
    IF_NOT(
        is_marked('is-update'),
        wait_webcoll,
    ),
]


STOP_IF_EXISTING_SUBMISSION = [
    IF(
Example #27
0
from inspirehep.modules.workflows.tasks.submission import (close_ticket,
                                                           create_ticket,
                                                           reply_ticket,
                                                           send_robotupload)
from inspirehep.modules.workflows.tasks.upload import store_record, set_schema

from inspirehep.modules.authors.tasks import (
    curation_ticket_context,
    curation_ticket_needed,
    new_ticket_context,
    reply_ticket_context,
    update_ticket_context,
)

SEND_TO_LEGACY = [
    send_robotupload(marcxml_processor=hepnames2marc, mode="insert"),
]

NOTIFY_ACCEPTED = [
    reply_ticket(template="authors/tickets/user_accepted.html",
                 context_factory=reply_ticket_context),
    close_ticket(ticket_id_key="ticket_id"),
]

CLOSE_TICKET_IF_NEEDED = [
    IF(curation_ticket_needed, [
        create_ticket(template="authors/tickets/curation_needed.html",
                      queue="AUTHORS_curation",
                      context_factory=curation_ticket_context,
                      ticket_id_key="curation_ticket_id"),
    ]),
Example #28
0
    send_robotupload
)
from inspirehep.modules.workflows.tasks.upload import store_record, set_schema

from inspirehep.modules.authors.tasks import (
    curation_ticket_context,
    curation_ticket_needed,
    new_ticket_context,
    reply_ticket_context,
    update_ticket_context,
)


SEND_TO_LEGACY = [
    send_robotupload(
        marcxml_processor=hepnames2marc,
        mode="insert"
    ),
]


NOTIFY_ACCEPTED = [
    reply_ticket(
        template="authors/tickets/user_accepted.html",
        context_factory=reply_ticket_context),
    close_ticket(ticket_id_key="ticket_id"),
]


CLOSE_TICKET_IF_NEEDED = [
    IF(curation_ticket_needed, [
        create_ticket(
Example #29
0
            ticket_id_key='curation_ticket_id',
        ),
    ),
]

POSTENHANCE_RECORD = [
    add_core,
    filter_keywords,
    prepare_keywords,
    remove_references,
]

SEND_TO_LEGACY = [
    IF_ELSE(is_marked('is-update'), [
        prepare_update_payload(extra_data_key="update_payload"),
        send_robotupload(mode="correct", extra_data_key="update_payload"),
    ], [
        send_robotupload(mode="insert"),
    ]),
]

WAIT_FOR_LEGACY_WEBCOLL = [
    IF_NOT(
        is_marked('is-update'),
        wait_webcoll,
    ),
]

STOP_IF_EXISTING_SUBMISSION = [
    IF(is_submission, IF(is_marked('is-update'), NOTIFY_ALREADY_EXISTING))
]
Example #30
0
    send_robotupload
)
from inspirehep.modules.workflows.tasks.upload import store_record, set_schema

from inspirehep.modules.workflows.tasks.author import (
    curation_ticket_context,
    curation_ticket_needed,
    new_ticket_context,
    reply_ticket_context,
    update_ticket_context,
)
from inspirehep.modules.workflows.utils import do_not_repeat


SEND_TO_LEGACY = [
    send_robotupload(mode="insert", priority_config_key='LEGACY_ROBOTUPLOAD_PRIORITY_AUTHOR'),
]


NOTIFY_ACCEPTED = [
    do_not_repeat('reply_ticket_author_submission_accepted')(
        reply_ticket(
            template="authors/tickets/user_accepted.html",
            context_factory=reply_ticket_context)
    ),
    do_not_repeat('close_ticket_author_submission_accepted')(
        close_ticket(ticket_id_key="ticket_id")
    ),
]

Example #31
0
                                                           create_ticket,
                                                           reply_ticket,
                                                           send_robotupload)
from inspirehep.modules.workflows.tasks.upload import store_record, set_schema

from inspirehep.modules.workflows.tasks.author import (
    curation_ticket_context,
    curation_ticket_needed,
    new_ticket_context,
    reply_ticket_context,
    update_ticket_context,
)
from inspirehep.modules.workflows.utils import do_not_repeat

SEND_TO_LEGACY = [
    send_robotupload(mode="insert",
                     priority_config_key='LEGACY_ROBOTUPLOAD_PRIORITY_AUTHOR'),
]

NOTIFY_ACCEPTED = [
    do_not_repeat('reply_ticket_author_submission_accepted')(reply_ticket(
        template="authors/tickets/user_accepted.html",
        context_factory=reply_ticket_context)),
    do_not_repeat('close_ticket_author_submission_accepted')(
        close_ticket(ticket_id_key="ticket_id")),
]

CLOSE_TICKET_IF_NEEDED = [
    IF(curation_ticket_needed, [
        do_not_repeat('create_ticket_author_submission_curation_needed')(
            create_ticket(template="authors/tickets/curation_needed.html",
                          queue="AUTHORS_curation",
Example #32
0
    filter_keywords,
    prepare_keywords,
    user_pdf_get,
    remove_references,
    prepare_files,
]


SEND_TO_LEGACY_AND_WAIT = [
    IF_ELSE(
        article_exists,
        [
            prepare_update_payload(extra_data_key="update_payload"),
            send_robotupload(
                marcxml_processor=hep2marc,
                mode="correct",
                extra_data_key="update_payload"
            ),
        ], [
            send_robotupload(
                marcxml_processor=hep2marc,
                mode="insert"
            ),
            wait_webcoll,
        ]
    ),
]

CHECK_IF_MERGE_AND_STOP_IF_SO = [
    IF(
        article_exists,
Example #33
0
from inspirehep.modules.workflows.tasks.submission import (close_ticket,
                                                           create_ticket,
                                                           reply_ticket,
                                                           send_robotupload)
from inspirehep.modules.workflows.tasks.upload import store_record, set_schema

from inspirehep.modules.authors.tasks import (
    curation_ticket_context,
    curation_ticket_needed,
    new_ticket_context,
    reply_ticket_context,
    update_ticket_context,
)

SEND_TO_LEGACY = [
    send_robotupload(marcxml_processor=hepnames2marc, mode="insert"),
]

NOTIFY_ACCEPTED = [
    reply_ticket(template="authors/tickets/user_accepted.html",
                 context_factory=reply_ticket_context),
    close_ticket(ticket_id_key="ticket_id"),
]

CLOSE_TICKET_IF_NEEDED = [
    IF(curation_ticket_needed, [
        create_ticket(template="authors/tickets/curation_needed.html",
                      queue="AUTHORS_curation",
                      context_factory=curation_ticket_context,
                      ticket_id_key="curation_ticket_id"),
    ]),
Example #34
0
]

POSTENHANCE_RECORD = [
    add_core,
    add_note_entry,
    filter_keywords,
    prepare_keywords,
    remove_references,
    prepare_files,
]

SEND_TO_LEGACY_AND_WAIT = [
    IF_ELSE(article_exists, [
        prepare_update_payload(extra_data_key="update_payload"),
        send_robotupload(marcxml_processor=hep2marc,
                         mode="correct",
                         extra_data_key="update_payload"),
    ], [
        send_robotupload(marcxml_processor=hep2marc, mode="insert"),
        wait_webcoll,
    ]),
]

CHECK_IF_MERGE_AND_STOP_IF_SO = [
    IF(
        article_exists,
        [
            IF_ELSE(
                is_submission,
                NOTIFY_ALREADY_EXISTING,
                [
Example #35
0
        ),
    ),
]


POSTENHANCE_RECORD = [
    add_core,
    filter_keywords,
    prepare_keywords,
    set_refereed_and_fix_document_type,
    fix_submission_number,
]


SEND_TO_LEGACY = [
    send_robotupload(mode='replace'),
]


WAIT_FOR_LEGACY_WEBCOLL = [
    IF_NOT(
        is_marked('is-update'),
        wait_webcoll,
    ),
]


STOP_IF_EXISTING_SUBMISSION = [
    IF(
        is_submission,
        IF(
Example #36
0
    send_robotupload
)
from inspirehep.modules.workflows.tasks.upload import store_record, set_schema

from inspirehep.modules.authors.tasks import (
    curation_ticket_context,
    curation_ticket_needed,
    new_ticket_context,
    reply_ticket_context,
    update_ticket_context,
)


SEND_TO_LEGACY = [
    send_robotupload(
        mode="insert"
    ),
]


NOTIFY_ACCEPTED = [
    reply_ticket(
        template="authors/tickets/user_accepted.html",
        context_factory=reply_ticket_context),
    close_ticket(ticket_id_key="ticket_id"),
]


CLOSE_TICKET_IF_NEEDED = [
    IF(curation_ticket_needed, [
        create_ticket(
Example #37
0
class Article(object):
    """Article ingestion workflow for Literature collection."""
    name = "HEP"
    data_type = "hep"

    workflow = [
        # Make sure schema is set for proper indexing in Holding Pen
        set_schema,
        # Emit record signals to receive metadata enrichment
        emit_record_signals,
        # Query locally or via legacy search API to see if article
        # is already ingested and this is an update
        IF(article_exists, [
            mark('match-found', True),
        ]),
        IF_ELSE(
            is_submission,
            [
                # Article matching for submissions
                # ================================
                IF(pending_in_holding_pen, [
                    mark('already-in-holding-pen', True),
                ]),
                # Special RT integration for submissions
                # ======================================
                create_ticket(
                    template="literaturesuggest/tickets/curator_submitted.html",
                    queue="HEP_add_user",
                    context_factory=new_ticket_context,
                    ticket_id_key="ticket_id"),
                reply_ticket(
                    template="literaturesuggest/tickets/user_submitted.html",
                    context_factory=reply_ticket_context,
                    keep_new=True),
            ],
            [
                # Article matching for non-submissions
                # ====================================
                # Query holding pen to see if we already have this article ingested
                #
                # NOTE on updates:
                #     If the same article has been harvested before and the
                #     ingestion has been completed, process is continued
                #     to allow for updates.
                IF(pending_in_holding_pen, [
                    mark('already-in-holding-pen', True),
                    mark('delete', True),
                ]),
                IF(
                    is_arxiv_paper,
                    [
                        # FIXME: This filtering step should be removed when this
                        #        workflow includes arXiv CORE harvesting
                        IF(already_harvested, [
                            mark('already-ingested', True),
                            mark('stop', True),
                        ]),
                        # FIXME: This filtering step should be removed when:
                        #        old previously rejected records are treated
                        #        differently e.g. good auto-reject heuristics or better
                        #        time based filtering (5 days is quite random now).
                        IF(previously_rejected(), [
                            mark('already-ingested', True),
                            mark('stop', True),
                        ]),
                    ]),
                IF(is_marked('delete'),
                   [update_old_object, delete_self_and_stop_processing]),
                IF(is_marked('stop'), [stop_processing]),
            ]),
        #
        # Article Processing
        # ==================
        IF(is_arxiv_paper, [
            arxiv_fulltext_download,
            arxiv_plot_extract,
            arxiv_refextract,
            arxiv_author_list("authorlist2marcxml.xsl"),
        ]),
        extract_journal_info,
        classify_paper(
            taxonomy="HEPont.rdf",
            only_core_tags=False,
            spires=True,
            with_author_keywords=True,
        ),
        filter_core_keywords,
        guess_categories,
        IF(is_experimental_paper, [
            guess_experiments,
        ]),
        guess_keywords,
        # Predict action for a generic HEP paper based only on title
        # and abstract.
        guess_coreness,  # ("arxiv_skip_astro_title_abstract.pickle)
        # Check if we shall halt or auto-reject
        # =====================================
        # NOTE: User submissions are always relevant
        IF_ELSE(is_record_relevant, [
            halt_record(action="hep_approval"),
        ], [reject_record("Article automatically rejected"), stop_processing]),
        IF_ELSE(is_record_accepted, [
            IF(article_exists, [
                IF_ELSE(is_submission, [
                    reject_record('Article was already found on INSPIRE'),
                    stop_processing,
                    reply_ticket(
                        template=
                        "literaturesuggest/tickets/user_rejected_exists.html",
                        context_factory=reply_ticket_context),
                    close_ticket(ticket_id_key="ticket_id"),
                ], [
                    halt_record(action="merge_approval"),
                ]),
            ]),
            add_core,
            add_note_entry,
            filter_keywords,
            user_pdf_get,
            IF_ELSE(shall_push_remotely, [
                IF_ELSE(article_exists, [
                    prepare_update_payload(extra_data_key="update_payload"),
                    send_robotupload(marcxml_processor=hep2marc,
                                     mode="correct",
                                     extra_data_key="update_payload"),
                ], [
                    send_robotupload(marcxml_processor=hep2marc,
                                     mode="insert"),
                ])
            ], [store_record]),
            IF(is_submission, [
                IF(curation_ticket_needed, [
                    create_ticket(
                        template="literaturesuggest/tickets/curation_core.html",
                        queue="HEP_curation",
                        context_factory=curation_ticket_context,
                        ticket_id_key="curation_ticket_id")
                ]),
                reply_ticket(
                    template="literaturesuggest/tickets/user_accepted.html",
                    context_factory=reply_ticket_context),
            ]),
        ], [
            IF(is_submission,
               [reply_ticket(context_factory=reply_ticket_context)])
        ]),
        close_ticket(ticket_id_key="ticket_id")
    ]
Example #38
0
                                                           create_ticket,
                                                           reply_ticket,
                                                           send_robotupload)
from inspirehep.modules.workflows.tasks.upload import store_record, set_schema

from inspirehep.modules.workflows.tasks.author import (
    curation_ticket_context,
    curation_ticket_needed,
    new_ticket_context,
    reply_ticket_context,
    update_ticket_context,
)
from inspirehep.modules.workflows.utils import do_not_repeat

SEND_TO_LEGACY = [
    send_robotupload(mode='replace',
                     priority_config_key='LEGACY_ROBOTUPLOAD_PRIORITY_AUTHOR'),
]

NOTIFY_ACCEPTED = [
    do_not_repeat('reply_ticket_author_submission_accepted')(reply_ticket(
        template="authors/tickets/user_accepted.html",
        context_factory=reply_ticket_context)),
    do_not_repeat('close_ticket_author_submission_accepted')(
        close_ticket(ticket_id_key="ticket_id")),
]

CLOSE_TICKET_IF_NEEDED = [
    IF(curation_ticket_needed, [
        do_not_repeat('create_ticket_author_submission_curation_needed')(
            create_ticket(template="authors/tickets/curation_needed.html",
                          queue="AUTHORS_curation",