def test_is_marked_returns_false_when_key_does_not_exist(): obj = MockObj({}, {}) eng = MockEng() is_foo_marked = is_marked('foo') assert not is_foo_marked(obj, eng)
def test_is_marked(): obj = MockObj({}, {'foo': 'bar'}) eng = MockEng() is_foo_marked = is_marked('foo') assert is_foo_marked(obj, eng)
def test_is_marked_returns_false_when_value_is_falsy(): obj = MockObj({}, {'foo': False}) eng = MockEng() is_foo_marked = is_marked('foo') assert not is_foo_marked(obj, eng)
def test_is_marked(): obj = StubObj({}, {'foo': 'bar'}) eng = DummyEng() is_foo_marked = is_marked('foo') assert is_foo_marked(obj, eng)
class Author(object): """Author ingestion workflow for HEPNames/Authors collection.""" name = "Author" data_type = "authors" workflow = [ # Make sure schema is set for proper indexing in Holding Pen set_schema, validate_record('authors'), IF_ELSE( is_marked('is-update'), SEND_UPDATE_NOTIFICATION, ASK_FOR_REVIEW + [ IF_ELSE( is_record_accepted, ( SEND_TO_LEGACY + NOTIFY_ACCEPTED + [ # TODO: once legacy is out, this should become # unconditional, and remove the SEND_TO_LEGACY # steps IF_NOT(in_production_mode, [store_record]), ] + CLOSE_TICKET_IF_NEEDED), NOTIFY_NOT_ACCEPTED), ], ), ]
class Author(object): """Author ingestion workflow for HEPNames/Authors collection.""" name = "Author" data_type = "authors" workflow = [ # Make sure schema is set for proper indexing in Holding Pen set_schema, # Emit record signals to receive metadata enrichment emit_record_signals, IF_ELSE(is_marked('is-update'), [ send_robotupload(marcxml_processor=hepnames2marc, mode="holdingpen"), create_ticket( template="authors/tickets/curator_update.html", queue="Authors_cor_user", context_factory=update_ticket_context, ), ], [ create_ticket(template="authors/tickets/curator_new.html", queue="Authors_add_user", context_factory=new_ticket_context), reply_ticket(template="authors/tickets/user_new.html", context_factory=reply_ticket_context, keep_new=True), halt_record(action="author_approval", message="Accept submission?"), IF_ELSE(is_record_accepted, [ send_robotupload(marcxml_processor=hepnames2marc, mode="insert"), reply_ticket(template="authors/tickets/user_accepted.html", context_factory=reply_ticket_context), close_ticket(ticket_id_key="ticket_id"), IF(curation_ticket_needed, [ create_ticket( template="authors/tickets/curation_needed.html", queue="AUTHORS_curation", context_factory=curation_ticket_context, ticket_id_key="curation_ticket_id"), ]), ], [ close_ticket(ticket_id_key="ticket_id"), ]), ]), ]
class Author(object): """Author ingestion workflow for HEPNames/Authors collection.""" name = "Author" data_type = "authors" workflow = [ load_from_source_data, # Make sure schema is set for proper indexing in Holding Pen set_schema, validate_record('authors'), IF_ELSE( is_marked('is-update'), [ SEND_TO_LEGACY, SEND_UPDATE_NOTIFICATION, ], [ ASK_FOR_REVIEW, IF_ELSE(is_record_accepted, ([store_record] + SEND_TO_LEGACY + NOTIFY_ACCEPTED + CLOSE_TICKET_IF_NEEDED), NOTIFY_NOT_ACCEPTED), ], ), ]
keep_new=True ), ] ADD_INGESTION_MARKS = [ # Article matching for non-submissions # ==================================== # Query holding pen to see if we already have this article ingested # # NOTE on updates: # If the same article has been harvested before and the # ingestion has been completed, process is continued # to allow for updates. IF( is_marked('already-in-holding-pen'), [mark('delete', True)] ), IF( is_arxiv_paper, [ # FIXME: This filtering step should be removed when this # workflow includes arXiv CORE harvesting IF( already_harvested, [ mark('already-ingested', True), mark('stop', True), ] ), # FIXME: This filtering step should be removed when:
# time based filtering (5 days is quite random now). IF( previously_rejected(), [ mark('already-ingested', True), mark('stop', True), ] ), ] ), ] DELETE_AND_STOP_IF_NEEDED = [ IF( is_marked('delete'), [ update_existing_workflow_object, # TODO: Wen we get to fix refextract, we can remove the # following step as the references will be good enough to # trust delete_self_and_stop_processing ] ), IF( is_marked('stop'), [stop_processing] ), ]
NOTIFY_ACCEPTED = [ IF( is_submission, reply_ticket( template='literaturesuggest/tickets/user_accepted.html', context_factory=reply_ticket_context, ), ), ] NOTIFY_CURATOR_IF_CORE = [ IF_NOT( is_marked('is-update'), IF( curation_ticket_needed, create_ticket( template='literaturesuggest/tickets/curation_core.html', queue='HEP_curation', context_factory=curation_ticket_context, ticket_id_key='curation_ticket_id', ), ), ), ] POSTENHANCE_RECORD = [ add_core,
NOTIFY_ACCEPTED = [ IF( is_submission, do_not_repeat('reply_ticket_user_submission_accepted')( reply_ticket( template='literaturesuggest/tickets/user_accepted.html', context_factory=reply_ticket_context, ), ), ), ] NOTIFY_CURATOR_IF_NEEDED = [ IF_NOT( is_marked('is-update'), [ IF_ELSE( jlab_ticket_needed, do_not_repeat('create_ticket_jlab_curation')( create_ticket( template='literaturesuggest/tickets/curation_jlab.html', queue='HEP_curation_jlab', context_factory=curation_ticket_context, ticket_id_key='curation_ticket_id', ), ), IF( curation_ticket_needed, do_not_repeat('create_ticket_curator_core_curation')( create_ticket(
mark('stop', True), ]), # FIXME: This filtering step should be removed when: # old previously rejected records are treated # differently e.g. good auto-reject heuristics or better # time based filtering (5 days is quite random now). IF(previously_rejected(), [ mark('already-ingested', True), mark('stop', True), ]), ]), ] DELETE_AND_STOP_IF_NEEDED = [ IF( is_marked('delete'), [ update_existing_workflow_object, # TODO: Wen we get to fix refextract, we can remove the # following step as the references will be good enough to # trust delete_self_and_stop_processing ]), IF(is_marked('stop'), [stop_processing]), ] ENHANCE_RECORD = [ # Article Processing # ================== IF(is_arxiv_paper, [ arxiv_fulltext_download,
stop_processing, ] NOTIFY_ACCEPTED = [ IF( is_submission, do_not_repeat('reply_ticket_user_submission_accepted')(reply_ticket( template='literaturesuggest/tickets/user_accepted.html', context_factory=reply_ticket_context, ), ), ), ] NOTIFY_CURATOR_IF_NEEDED = [ IF_NOT( is_marked('is-update'), [ IF_ELSE( jlab_ticket_needed, do_not_repeat('create_ticket_jlab_curation')(create_ticket( template='literaturesuggest/tickets/curation_jlab.html', queue='HEP_curation_jlab', context_factory=curation_ticket_context, ticket_id_key='curation_ticket_id', ), ), IF( curation_ticket_needed, # if core IF_ELSE( check_source_publishing, # if it is coming from publisher, create ticket in hep_publisher queue do_not_repeat('create_ticket_curator_core_publisher') (create_ticket(
class Article(object): """Article ingestion workflow for Literature collection.""" name = "HEP" data_type = "hep" workflow = [ # Make sure schema is set for proper indexing in Holding Pen set_schema, # Emit record signals to receive metadata enrichment emit_record_signals, # Query locally or via legacy search API to see if article # is already ingested and this is an update IF(article_exists, [ mark('match-found', True), ]), IF_ELSE( is_submission, [ # Article matching for submissions # ================================ IF(pending_in_holding_pen, [ mark('already-in-holding-pen', True), ]), # Special RT integration for submissions # ====================================== create_ticket( template="literaturesuggest/tickets/curator_submitted.html", queue="HEP_add_user", context_factory=new_ticket_context, ticket_id_key="ticket_id"), reply_ticket( template="literaturesuggest/tickets/user_submitted.html", context_factory=reply_ticket_context, keep_new=True), ], [ # Article matching for non-submissions # ==================================== # Query holding pen to see if we already have this article ingested # # NOTE on updates: # If the same article has been harvested before and the # ingestion has been completed, process is continued # to allow for updates. IF(pending_in_holding_pen, [ mark('already-in-holding-pen', True), mark('delete', True), ]), IF( is_arxiv_paper, [ # FIXME: This filtering step should be removed when this # workflow includes arXiv CORE harvesting IF(already_harvested, [ mark('already-ingested', True), mark('stop', True), ]), # FIXME: This filtering step should be removed when: # old previously rejected records are treated # differently e.g. good auto-reject heuristics or better # time based filtering (5 days is quite random now). IF(previously_rejected(), [ mark('already-ingested', True), mark('stop', True), ]), ]), IF(is_marked('delete'), [update_old_object, delete_self_and_stop_processing]), IF(is_marked('stop'), [stop_processing]), ]), # # Article Processing # ================== IF(is_arxiv_paper, [ arxiv_fulltext_download, arxiv_plot_extract, arxiv_refextract, arxiv_author_list("authorlist2marcxml.xsl"), ]), extract_journal_info, classify_paper( taxonomy="HEPont.rdf", only_core_tags=False, spires=True, with_author_keywords=True, ), filter_core_keywords, guess_categories, IF(is_experimental_paper, [ guess_experiments, ]), guess_keywords, # Predict action for a generic HEP paper based only on title # and abstract. guess_coreness, # ("arxiv_skip_astro_title_abstract.pickle) # Check if we shall halt or auto-reject # ===================================== # NOTE: User submissions are always relevant IF_ELSE(is_record_relevant, [ halt_record(action="hep_approval"), ], [reject_record("Article automatically rejected"), stop_processing]), IF_ELSE(is_record_accepted, [ IF(article_exists, [ IF_ELSE(is_submission, [ reject_record('Article was already found on INSPIRE'), stop_processing, reply_ticket( template= "literaturesuggest/tickets/user_rejected_exists.html", context_factory=reply_ticket_context), close_ticket(ticket_id_key="ticket_id"), ], [ halt_record(action="merge_approval"), ]), ]), add_core, add_note_entry, filter_keywords, user_pdf_get, IF_ELSE(shall_push_remotely, [ IF_ELSE(article_exists, [ prepare_update_payload(extra_data_key="update_payload"), send_robotupload(marcxml_processor=hep2marc, mode="correct", extra_data_key="update_payload"), ], [ send_robotupload(marcxml_processor=hep2marc, mode="insert"), ]) ], [store_record]), IF(is_submission, [ IF(curation_ticket_needed, [ create_ticket( template="literaturesuggest/tickets/curation_core.html", queue="HEP_curation", context_factory=curation_ticket_context, ticket_id_key="curation_ticket_id") ]), reply_ticket( template="literaturesuggest/tickets/user_accepted.html", context_factory=reply_ticket_context), ]), ], [ IF(is_submission, [reply_ticket(context_factory=reply_ticket_context)]) ]), close_ticket(ticket_id_key="ticket_id") ]