def test_send_robotupload_works_doesnt_fail_when_removing_references_and_no_references(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[INFO] foo bar baz' ) config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config), \ patch('inspirehep.modules.workflows.tasks.submission.record2marcxml') as mock_record2marcxml: data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', } extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( mode='insert', ) assert _send_robotupload(obj, eng) is None assert mock_record2marcxml.called_with(data)
def test_send_robotupload_works_doesnt_fail_when_removing_references_and_no_references( ): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[INFO] foo bar baz') config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config), \ patch('inspirehep.modules.workflows.tasks.submission.record2marcxml') as mock_record2marcxml: data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', } extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload(mode='insert', ) assert _send_robotupload(obj, eng) is None assert mock_record2marcxml.called_with(data)
def test_send_robotupload_does_nothing_when_not_in_production_mode(): with requests_mock.Mocker(): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': False, } with patch.dict(current_app.config, config): data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'arxiv_eprints': [ { 'categories': [ 'hep-th', ], 'value': 'hep-th/9711200', }, ], } extra_data = {} assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload(mode='insert', ) assert _send_robotupload(obj, eng) is None
def test_send_robotupload_new_article_when_feature_flag_is_disabled(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[INFO] foo bar baz') config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, 'FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY': False } with patch.dict(current_app.config, config), \ patch('inspirehep.modules.workflows.tasks.submission.record2marcxml'): data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', } extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload(mode='insert', ) assert _send_robotupload(obj, eng) is None expected = ('Robotupload sent!' '[INFO] foo bar baz' 'end of upload') result = obj.log._info.getvalue() assert expected == result
def test_send_robotupload_does_nothing_when_not_in_production_mode(): with requests_mock.Mocker(): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': False, } with patch.dict(current_app.config, config): data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'arxiv_eprints': [ { 'categories': [ 'hep-th', ], 'value': 'hep-th/9711200', }, ], } extra_data = {} assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( mode='insert', ) assert _send_robotupload(obj, eng) is None
class Author(object): """Author ingestion workflow for HEPNames/Authors collection.""" name = "Author" data_type = "authors" workflow = [ # Make sure schema is set for proper indexing in Holding Pen set_schema, # Emit record signals to receive metadata enrichment emit_record_signals, IF_ELSE(is_marked('is-update'), [ send_robotupload(marcxml_processor=hepnames2marc, mode="holdingpen"), create_ticket( template="authors/tickets/curator_update.html", queue="Authors_cor_user", context_factory=update_ticket_context, ), ], [ create_ticket(template="authors/tickets/curator_new.html", queue="Authors_add_user", context_factory=new_ticket_context), reply_ticket(template="authors/tickets/user_new.html", context_factory=reply_ticket_context, keep_new=True), halt_record(action="author_approval", message="Accept submission?"), IF_ELSE(is_record_accepted, [ send_robotupload(marcxml_processor=hepnames2marc, mode="insert"), reply_ticket(template="authors/tickets/user_accepted.html", context_factory=reply_ticket_context), close_ticket(ticket_id_key="ticket_id"), IF(curation_ticket_needed, [ create_ticket( template="authors/tickets/curation_needed.html", queue="AUTHORS_curation", context_factory=curation_ticket_context, ticket_id_key="curation_ticket_id"), ]), ], [ close_ticket(ticket_id_key="ticket_id"), ]), ]), ]
def test_send_robotupload_logs_on_error_response(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[ERROR] cannot use the service' ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config): data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'arxiv_eprints': [ { 'categories': [ 'hep-th', ], 'value': 'hep-th/9711200', }, ], } extra_data = {} assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( mode='insert', ) with pytest.raises(Exception) as excinfo: _send_robotupload(obj, eng) expected = ( 'Error while submitting robotupload: ' '[ERROR] cannot use the service' ) result = str(excinfo.value) assert expected == result expected = ( 'Your IP is not in app.config_BATCHUPLOADER_WEB_ROBOT_RIGHTS on host: ' '[ERROR] cannot use the service' ) result = obj.log._error.getvalue() assert expected == result
def test_send_robotupload_logs_on_error_response(): httpretty.HTTPretty.allow_net_connect = False httpretty.register_uri( httpretty.POST, 'http://inspirehep.net/batchuploader/robotupload/insert', body='[ERROR] cannot use the service') schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config): data = { 'arxiv_eprints': [ { 'categories': [ 'hep-th', ], 'value': 'hep-th/9711200', }, ], } extra_data = {} assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( marcxml_processor=hep2marc, mode='insert', ) with pytest.raises(Exception) as excinfo: _send_robotupload(obj, eng) expected = ('Error while submitting robotupload: ' '[ERROR] cannot use the service') result = str(excinfo.value) assert expected == result expected = ( 'Your IP is not in app.config_BATCHUPLOADER_WEB_ROBOT_RIGHTS on host: ' '[ERROR] cannot use the service') result = obj.log._error.getvalue() assert expected == result httpretty.HTTPretty.allow_net_connect = True
def test_send_robotupload_works_with_mode_insert_on_hep(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[INFO] foo bar baz' ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config): data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'arxiv_eprints': [ { 'categories': [ 'hep-th', ], 'value': 'hep-th/9711200', }, ], } extra_data = {} assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( mode='insert', ) assert _send_robotupload(obj, eng) is None expected = ( 'Robotupload sent!' '[INFO] foo bar baz' 'end of upload' ) result = obj.log._info.getvalue() assert expected == result expected = 'Waiting for robotupload: [INFO] foo bar baz' result = eng.msg assert expected == result
def test_send_robotupload_logs_on_error_response(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[ERROR] cannot use the service') schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config): data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'arxiv_eprints': [ { 'categories': [ 'hep-th', ], 'value': 'hep-th/9711200', }, ], } extra_data = {} assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload(mode='insert', ) with pytest.raises(Exception) as excinfo: _send_robotupload(obj, eng) expected = ('Error while submitting robotupload: ' '[ERROR] cannot use the service') result = str(excinfo.value) assert expected == result expected = ( 'Your IP is not in app.config_BATCHUPLOADER_WEB_ROBOT_RIGHTS on host: ' '[ERROR] cannot use the service') result = obj.log._error.getvalue() assert expected == result
def test_send_robotupload_works_with_mode_correct_and_extra_data_key(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/correct', text='[INFO] foo bar baz') config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config): data = {} extra_data = { 'update_payload': { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'arxiv_eprints': [ { 'categories': [ 'hep-th', ], 'value': 'hep-th/9711200', }, ], }, } obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( mode='correct', extra_data_key='update_payload', ) assert _send_robotupload(obj, eng) is None expected = ('Robotupload sent!' '[INFO] foo bar baz' 'end of upload') result = obj.log._info.getvalue() assert expected == result expected = 'Waiting for robotupload: [INFO] foo bar baz' result = eng.msg assert expected == result
def test_send_robotupload_works_with_hep2marc_and_mode_insert(): httpretty.HTTPretty.allow_net_connect = False httpretty.register_uri( httpretty.POST, 'http://inspirehep.net/batchuploader/robotupload/insert', body='[INFO] foo bar baz') schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config): data = { 'arxiv_eprints': [ { 'categories': [ 'hep-th', ], 'value': 'hep-th/9711200', }, ], } extra_data = {} assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( marcxml_processor=hep2marc, mode='insert', ) assert _send_robotupload(obj, eng) is None expected = ('Robotupload sent!' '[INFO] foo bar baz' 'end of upload') result = obj.log._info.getvalue() assert expected == result expected = 'Waiting for robotupload: [INFO] foo bar baz' result = eng.msg assert expected == result httpretty.HTTPretty.allow_net_connect = True
class EditArticle(object): """Editing workflow for Literature collection.""" name = 'edit_article' data_type = 'hep' workflow = ([ change_status_to_waiting, validate_record('hep'), update_record, send_robotupload( mode='replace', priority_config_key='LEGACY_ROBOTUPLOAD_PRIORITY_EDIT_ARTICLE'), cleanup_pending_workflow, ])
def test_send_robotupload_removes_references_if_feature_flag_disabled(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[INFO] foo bar baz') schema = load_schema('hep') subschema = schema['properties']['references'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config), \ patch('inspirehep.modules.workflows.tasks.submission.record2marcxml') as mock_record2marcxml: data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'references': [ { 'raw_refs': [ { 'schema': 'text', 'value': '[1] J. Maldacena and A. Strominger, hep-th/9710014.', }, ], }, ] } data_without_references = { '$schema': 'http://localhost:5000/schemas/records/hep.json', } extra_data = {} assert validate(data['references'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload(mode='insert', ) assert _send_robotupload(obj, eng) is None assert mock_record2marcxml.called_with(data_without_references)
def test_send_robotupload_works_with_hepnames2marc_and_mode_insert(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[INFO] foo bar baz') schema = load_schema('authors') subschema = schema['properties']['arxiv_categories'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config): data = { 'arxiv_categories': [ 'hep-th', ], } extra_data = {} assert validate(data['arxiv_categories'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( marcxml_processor=hepnames2marc, mode='insert', ) assert _send_robotupload(obj, eng) is None expected = ('Robotupload sent!' '[INFO] foo bar baz' 'end of upload') result = obj.log._info.getvalue() assert expected == result expected = 'Waiting for robotupload: [INFO] foo bar baz' result = eng.msg assert expected == result
def test_send_robotupload_removes_references_if_feature_flag_disabled(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[INFO] foo bar baz' ) schema = load_schema('hep') subschema = schema['properties']['references'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config), \ patch('inspirehep.modules.workflows.tasks.submission.record2marcxml') as mock_record2marcxml: data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'references': [ { 'raw_refs': [ { 'schema': 'text', 'value': '[1] J. Maldacena and A. Strominger, hep-th/9710014.', }, ], }, ] } data_without_references = { '$schema': 'http://localhost:5000/schemas/records/hep.json', } extra_data = {} assert validate(data['references'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( mode='insert', ) assert _send_robotupload(obj, eng) is None assert mock_record2marcxml.called_with(data_without_references)
def test_send_robotupload_update_authors_when_feature_flag_is_enabled(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[INFO] foo bar baz' ) config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, 'FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY': True } with patch.dict(current_app.config, config), \ patch('inspirehep.modules.workflows.tasks.submission.record2marcxml'): data = { '$schema': 'http://localhost:5000/schemas/records/authors.json', 'name': { 'preferred_name': 'Jessica Jones', 'value': 'Jones, Jessica' } } extra_data = { 'is-update': True } obj = MockObj(data, extra_data) obj.workflow = MockWorkflow('author') eng = MockEng(data_type='authors') _send_robotupload = send_robotupload( mode='insert', ) assert _send_robotupload(obj, eng) is None expected = ( 'Robotupload sent!' '[INFO] foo bar baz' 'end of upload' ) result = obj.log._info.getvalue() assert expected == result
def test_send_robotupload_works_with_mode_holdingpen_and_without_callback_url(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/holdingpen', text='[INFO] foo bar baz' ) schema = load_schema('authors') subschema = schema['properties']['arxiv_categories'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config): data = { '$schema': 'http://localhost:5000/schemas/records/authors.json', 'arxiv_categories': [ 'hep-th', ], } extra_data = {} assert validate(data['arxiv_categories'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( mode='holdingpen', callback_url=None, ) assert _send_robotupload(obj, eng) is None expected = ( 'Robotupload sent!' '[INFO] foo bar baz' 'end of upload' ) result = obj.log._info.getvalue() assert expected == result
def test_send_robotupload_works_with_mode_holdingpen_and_without_callback_url( ): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/holdingpen', text='[INFO] foo bar baz') schema = load_schema('authors') subschema = schema['properties']['arxiv_categories'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config): data = { '$schema': 'http://localhost:5000/schemas/records/authors.json', 'arxiv_categories': [ 'hep-th', ], } extra_data = {} assert validate(data['arxiv_categories'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( mode='holdingpen', callback_url=None, ) assert _send_robotupload(obj, eng) is None expected = ('Robotupload sent!' '[INFO] foo bar baz' 'end of upload') result = obj.log._info.getvalue() assert expected == result
def test_send_robotupload_works_with_mode_holdingpen_and_without_callback_url( ): httpretty.HTTPretty.allow_net_connect = False httpretty.register_uri( httpretty.POST, 'http://inspirehep.net/batchuploader/robotupload/holdingpen', body='[INFO] foo bar baz') schema = load_schema('authors') subschema = schema['properties']['arxiv_categories'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, } with patch.dict(current_app.config, config): data = { 'arxiv_categories': [ 'hep-th', ], } extra_data = {} assert validate(data['arxiv_categories'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( marcxml_processor=hepnames2marc, mode='holdingpen', callback_url=None, ) assert _send_robotupload(obj, eng) is None expected = ('Robotupload sent!' '[INFO] foo bar baz' 'end of upload') result = obj.log._info.getvalue() assert expected == result httpretty.HTTPretty.allow_net_connect = True
def test_send_robotupload_update_authors_when_feature_flag_is_enabled(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[INFO] foo bar baz') config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, 'FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY': True } with patch.dict(current_app.config, config), \ patch('inspirehep.modules.workflows.tasks.submission.record2marcxml'): data = { '$schema': 'http://localhost:5000/schemas/records/authors.json', 'name': { 'preferred_name': 'Jessica Jones', 'value': 'Jones, Jessica' } } extra_data = {'is-update': True} obj = MockObj(data, extra_data) obj.workflow = MockWorkflow('author') eng = MockEng(data_type='authors') _send_robotupload = send_robotupload(mode='insert', ) assert _send_robotupload(obj, eng) is None expected = ('Robotupload sent!' '[INFO] foo bar baz' 'end of upload') result = obj.log._info.getvalue() assert expected == result
def test_send_robotupload_new_article_when_feature_flag_is_disabled(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'POST', 'http://inspirehep.net/batchuploader/robotupload/insert', text='[INFO] foo bar baz' ) config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, 'FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY': False } with patch.dict(current_app.config, config), \ patch('inspirehep.modules.workflows.tasks.submission.record2marcxml'): data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', } extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( mode='insert', ) assert _send_robotupload(obj, eng) is None expected = ( 'Robotupload sent!' '[INFO] foo bar baz' 'end of upload' ) result = obj.log._info.getvalue() assert expected == result
def test_send_robotupload_update_article_when_feature_flag_is_disabled(): config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': True, 'FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY': False } with patch.dict(current_app.config, config), \ patch('inspirehep.modules.workflows.tasks.submission.record2marcxml'): data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', } extra_data = {'is-update': True} obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload(mode='insert', ) expected_log = 'skipping upload to legacy, feature flag ``FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY`` is disabled.' assert _send_robotupload(obj, eng) is None assert expected_log in obj.log._info.getvalue()
def test_send_robotupload_does_nothing_when_not_in_production_mode(): httpretty.HTTPretty.allow_net_connect = False schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] config = { 'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net', 'PRODUCTION_MODE': False, } with patch.dict(current_app.config, config): data = { 'arxiv_eprints': [ { 'categories': [ 'hep-th', ], 'value': 'hep-th/9711200', }, ], } extra_data = {} assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() _send_robotupload = send_robotupload( marcxml_processor=hep2marc, mode='insert', ) assert _send_robotupload(obj, eng) is None httpretty.HTTPretty.allow_net_connect = True
from inspirehep.modules.workflows.tasks.submission import (close_ticket, create_ticket, reply_ticket, send_robotupload) from inspirehep.modules.workflows.tasks.upload import store_record, set_schema from inspirehep.modules.authors.tasks import ( curation_ticket_context, curation_ticket_needed, new_ticket_context, reply_ticket_context, update_ticket_context, ) SEND_TO_LEGACY = [ send_robotupload(mode="insert"), ] NOTIFY_ACCEPTED = [ reply_ticket(template="authors/tickets/user_accepted.html", context_factory=reply_ticket_context), close_ticket(ticket_id_key="ticket_id"), ] CLOSE_TICKET_IF_NEEDED = [ IF(curation_ticket_needed, [ create_ticket(template="authors/tickets/curation_needed.html", queue="AUTHORS_curation", context_factory=curation_ticket_context, ticket_id_key="curation_ticket_id"), ]),
remove_references, set_refereed_and_fix_document_type, fix_submission_number, ] SEND_TO_LEGACY = [ IF_ELSE( is_marked('is-update'), [ # TODO: once we have the merger in place # send_robotupload(mode="replace") mark('skipped-robot-upload', True) ], [ send_robotupload(mode="replace"), ] ), ] WAIT_FOR_LEGACY_WEBCOLL = [ IF_NOT( is_marked('is-update'), wait_webcoll, ), ] STOP_IF_EXISTING_SUBMISSION = [ IF(
from inspirehep.modules.workflows.tasks.submission import (close_ticket, create_ticket, reply_ticket, send_robotupload) from inspirehep.modules.workflows.tasks.upload import store_record, set_schema from inspirehep.modules.authors.tasks import ( curation_ticket_context, curation_ticket_needed, new_ticket_context, reply_ticket_context, update_ticket_context, ) SEND_TO_LEGACY = [ send_robotupload(marcxml_processor=hepnames2marc, mode="insert"), ] NOTIFY_ACCEPTED = [ reply_ticket(template="authors/tickets/user_accepted.html", context_factory=reply_ticket_context), close_ticket(ticket_id_key="ticket_id"), ] CLOSE_TICKET_IF_NEEDED = [ IF(curation_ticket_needed, [ create_ticket(template="authors/tickets/curation_needed.html", queue="AUTHORS_curation", context_factory=curation_ticket_context, ticket_id_key="curation_ticket_id"), ]),
send_robotupload ) from inspirehep.modules.workflows.tasks.upload import store_record, set_schema from inspirehep.modules.authors.tasks import ( curation_ticket_context, curation_ticket_needed, new_ticket_context, reply_ticket_context, update_ticket_context, ) SEND_TO_LEGACY = [ send_robotupload( marcxml_processor=hepnames2marc, mode="insert" ), ] NOTIFY_ACCEPTED = [ reply_ticket( template="authors/tickets/user_accepted.html", context_factory=reply_ticket_context), close_ticket(ticket_id_key="ticket_id"), ] CLOSE_TICKET_IF_NEEDED = [ IF(curation_ticket_needed, [ create_ticket(
ticket_id_key='curation_ticket_id', ), ), ] POSTENHANCE_RECORD = [ add_core, filter_keywords, prepare_keywords, remove_references, ] SEND_TO_LEGACY = [ IF_ELSE(is_marked('is-update'), [ prepare_update_payload(extra_data_key="update_payload"), send_robotupload(mode="correct", extra_data_key="update_payload"), ], [ send_robotupload(mode="insert"), ]), ] WAIT_FOR_LEGACY_WEBCOLL = [ IF_NOT( is_marked('is-update'), wait_webcoll, ), ] STOP_IF_EXISTING_SUBMISSION = [ IF(is_submission, IF(is_marked('is-update'), NOTIFY_ALREADY_EXISTING)) ]
send_robotupload ) from inspirehep.modules.workflows.tasks.upload import store_record, set_schema from inspirehep.modules.workflows.tasks.author import ( curation_ticket_context, curation_ticket_needed, new_ticket_context, reply_ticket_context, update_ticket_context, ) from inspirehep.modules.workflows.utils import do_not_repeat SEND_TO_LEGACY = [ send_robotupload(mode="insert", priority_config_key='LEGACY_ROBOTUPLOAD_PRIORITY_AUTHOR'), ] NOTIFY_ACCEPTED = [ do_not_repeat('reply_ticket_author_submission_accepted')( reply_ticket( template="authors/tickets/user_accepted.html", context_factory=reply_ticket_context) ), do_not_repeat('close_ticket_author_submission_accepted')( close_ticket(ticket_id_key="ticket_id") ), ]
create_ticket, reply_ticket, send_robotupload) from inspirehep.modules.workflows.tasks.upload import store_record, set_schema from inspirehep.modules.workflows.tasks.author import ( curation_ticket_context, curation_ticket_needed, new_ticket_context, reply_ticket_context, update_ticket_context, ) from inspirehep.modules.workflows.utils import do_not_repeat SEND_TO_LEGACY = [ send_robotupload(mode="insert", priority_config_key='LEGACY_ROBOTUPLOAD_PRIORITY_AUTHOR'), ] NOTIFY_ACCEPTED = [ do_not_repeat('reply_ticket_author_submission_accepted')(reply_ticket( template="authors/tickets/user_accepted.html", context_factory=reply_ticket_context)), do_not_repeat('close_ticket_author_submission_accepted')( close_ticket(ticket_id_key="ticket_id")), ] CLOSE_TICKET_IF_NEEDED = [ IF(curation_ticket_needed, [ do_not_repeat('create_ticket_author_submission_curation_needed')( create_ticket(template="authors/tickets/curation_needed.html", queue="AUTHORS_curation",
filter_keywords, prepare_keywords, user_pdf_get, remove_references, prepare_files, ] SEND_TO_LEGACY_AND_WAIT = [ IF_ELSE( article_exists, [ prepare_update_payload(extra_data_key="update_payload"), send_robotupload( marcxml_processor=hep2marc, mode="correct", extra_data_key="update_payload" ), ], [ send_robotupload( marcxml_processor=hep2marc, mode="insert" ), wait_webcoll, ] ), ] CHECK_IF_MERGE_AND_STOP_IF_SO = [ IF( article_exists,
] POSTENHANCE_RECORD = [ add_core, add_note_entry, filter_keywords, prepare_keywords, remove_references, prepare_files, ] SEND_TO_LEGACY_AND_WAIT = [ IF_ELSE(article_exists, [ prepare_update_payload(extra_data_key="update_payload"), send_robotupload(marcxml_processor=hep2marc, mode="correct", extra_data_key="update_payload"), ], [ send_robotupload(marcxml_processor=hep2marc, mode="insert"), wait_webcoll, ]), ] CHECK_IF_MERGE_AND_STOP_IF_SO = [ IF( article_exists, [ IF_ELSE( is_submission, NOTIFY_ALREADY_EXISTING, [
), ), ] POSTENHANCE_RECORD = [ add_core, filter_keywords, prepare_keywords, set_refereed_and_fix_document_type, fix_submission_number, ] SEND_TO_LEGACY = [ send_robotupload(mode='replace'), ] WAIT_FOR_LEGACY_WEBCOLL = [ IF_NOT( is_marked('is-update'), wait_webcoll, ), ] STOP_IF_EXISTING_SUBMISSION = [ IF( is_submission, IF(
send_robotupload ) from inspirehep.modules.workflows.tasks.upload import store_record, set_schema from inspirehep.modules.authors.tasks import ( curation_ticket_context, curation_ticket_needed, new_ticket_context, reply_ticket_context, update_ticket_context, ) SEND_TO_LEGACY = [ send_robotupload( mode="insert" ), ] NOTIFY_ACCEPTED = [ reply_ticket( template="authors/tickets/user_accepted.html", context_factory=reply_ticket_context), close_ticket(ticket_id_key="ticket_id"), ] CLOSE_TICKET_IF_NEEDED = [ IF(curation_ticket_needed, [ create_ticket(
class Article(object): """Article ingestion workflow for Literature collection.""" name = "HEP" data_type = "hep" workflow = [ # Make sure schema is set for proper indexing in Holding Pen set_schema, # Emit record signals to receive metadata enrichment emit_record_signals, # Query locally or via legacy search API to see if article # is already ingested and this is an update IF(article_exists, [ mark('match-found', True), ]), IF_ELSE( is_submission, [ # Article matching for submissions # ================================ IF(pending_in_holding_pen, [ mark('already-in-holding-pen', True), ]), # Special RT integration for submissions # ====================================== create_ticket( template="literaturesuggest/tickets/curator_submitted.html", queue="HEP_add_user", context_factory=new_ticket_context, ticket_id_key="ticket_id"), reply_ticket( template="literaturesuggest/tickets/user_submitted.html", context_factory=reply_ticket_context, keep_new=True), ], [ # Article matching for non-submissions # ==================================== # Query holding pen to see if we already have this article ingested # # NOTE on updates: # If the same article has been harvested before and the # ingestion has been completed, process is continued # to allow for updates. IF(pending_in_holding_pen, [ mark('already-in-holding-pen', True), mark('delete', True), ]), IF( is_arxiv_paper, [ # FIXME: This filtering step should be removed when this # workflow includes arXiv CORE harvesting IF(already_harvested, [ mark('already-ingested', True), mark('stop', True), ]), # FIXME: This filtering step should be removed when: # old previously rejected records are treated # differently e.g. good auto-reject heuristics or better # time based filtering (5 days is quite random now). IF(previously_rejected(), [ mark('already-ingested', True), mark('stop', True), ]), ]), IF(is_marked('delete'), [update_old_object, delete_self_and_stop_processing]), IF(is_marked('stop'), [stop_processing]), ]), # # Article Processing # ================== IF(is_arxiv_paper, [ arxiv_fulltext_download, arxiv_plot_extract, arxiv_refextract, arxiv_author_list("authorlist2marcxml.xsl"), ]), extract_journal_info, classify_paper( taxonomy="HEPont.rdf", only_core_tags=False, spires=True, with_author_keywords=True, ), filter_core_keywords, guess_categories, IF(is_experimental_paper, [ guess_experiments, ]), guess_keywords, # Predict action for a generic HEP paper based only on title # and abstract. guess_coreness, # ("arxiv_skip_astro_title_abstract.pickle) # Check if we shall halt or auto-reject # ===================================== # NOTE: User submissions are always relevant IF_ELSE(is_record_relevant, [ halt_record(action="hep_approval"), ], [reject_record("Article automatically rejected"), stop_processing]), IF_ELSE(is_record_accepted, [ IF(article_exists, [ IF_ELSE(is_submission, [ reject_record('Article was already found on INSPIRE'), stop_processing, reply_ticket( template= "literaturesuggest/tickets/user_rejected_exists.html", context_factory=reply_ticket_context), close_ticket(ticket_id_key="ticket_id"), ], [ halt_record(action="merge_approval"), ]), ]), add_core, add_note_entry, filter_keywords, user_pdf_get, IF_ELSE(shall_push_remotely, [ IF_ELSE(article_exists, [ prepare_update_payload(extra_data_key="update_payload"), send_robotupload(marcxml_processor=hep2marc, mode="correct", extra_data_key="update_payload"), ], [ send_robotupload(marcxml_processor=hep2marc, mode="insert"), ]) ], [store_record]), IF(is_submission, [ IF(curation_ticket_needed, [ create_ticket( template="literaturesuggest/tickets/curation_core.html", queue="HEP_curation", context_factory=curation_ticket_context, ticket_id_key="curation_ticket_id") ]), reply_ticket( template="literaturesuggest/tickets/user_accepted.html", context_factory=reply_ticket_context), ]), ], [ IF(is_submission, [reply_ticket(context_factory=reply_ticket_context)]) ]), close_ticket(ticket_id_key="ticket_id") ]
create_ticket, reply_ticket, send_robotupload) from inspirehep.modules.workflows.tasks.upload import store_record, set_schema from inspirehep.modules.workflows.tasks.author import ( curation_ticket_context, curation_ticket_needed, new_ticket_context, reply_ticket_context, update_ticket_context, ) from inspirehep.modules.workflows.utils import do_not_repeat SEND_TO_LEGACY = [ send_robotupload(mode='replace', priority_config_key='LEGACY_ROBOTUPLOAD_PRIORITY_AUTHOR'), ] NOTIFY_ACCEPTED = [ do_not_repeat('reply_ticket_author_submission_accepted')(reply_ticket( template="authors/tickets/user_accepted.html", context_factory=reply_ticket_context)), do_not_repeat('close_ticket_author_submission_accepted')( close_ticket(ticket_id_key="ticket_id")), ] CLOSE_TICKET_IF_NEEDED = [ IF(curation_ticket_needed, [ do_not_repeat('create_ticket_author_submission_curation_needed')( create_ticket(template="authors/tickets/curation_needed.html", queue="AUTHORS_curation",