class Author(object): """Author ingestion workflow for HEPNames/Authors collection.""" name = "Author" data_type = "authors" workflow = [ # Make sure schema is set for proper indexing in Holding Pen set_schema, validate_record('authors'), IF_ELSE( is_marked('is-update'), SEND_UPDATE_NOTIFICATION, ASK_FOR_REVIEW + [ IF_ELSE( is_record_accepted, ( SEND_TO_LEGACY + NOTIFY_ACCEPTED + [ # TODO: once legacy is out, this should become # unconditional, and remove the SEND_TO_LEGACY # steps IF_NOT(in_production_mode, [store_record]), ] + CLOSE_TICKET_IF_NEEDED), NOTIFY_NOT_ACCEPTED), ], ), ]
class Article(object): """Article ingestion workflow for Literature collection.""" name = "HEP" data_type = "hep" workflow = ( PRE_PROCESSING + NOTIFY_IF_SUBMISSION + MARK_IF_MATCH_IN_HOLDINGPEN + CHECK_IS_UPDATE + STOP_IF_EXISTING_SUBMISSION + CHECK_AUTO_APPROVE + PROCESS_HOLDINGPEN_MATCHES + ENHANCE_RECORD + HALT_FOR_APPROVAL_IF_NEW_OR_STOP_IF_NOT_RELEVANT + [ IF_ELSE( is_record_accepted, ( POSTENHANCE_RECORD + STORE_RECORD + SEND_TO_LEGACY + WAIT_FOR_LEGACY_WEBCOLL + NOTIFY_ACCEPTED + NOTIFY_CURATOR_IF_CORE ), NOTIFY_NOT_ACCEPTED, ), IF( is_submission, close_ticket(ticket_id_key="ticket_id"), ) ] )
def test_current_taskname_resolution(self): workflow = [m('test')] self.wfe.callbacks.replace(workflow, self.key) self.wfe.process(self.tokens) assert self.wfe.current_taskname == 'string appender' workflow = [lambda obj, eng: 1] self.wfe.callbacks.replace(workflow, self.key) self.wfe.process(self.tokens) assert self.wfe.current_taskname == '<lambda>' workflow = [ IF_ELSE( lambda obj, eng: True, [lambda obj, eng: 1], [lambda obj, eng: 2], ) ] self.wfe.callbacks.replace(workflow, self.key) # This test will break if someone changes IF_ELSE. TODO: Mock # Note: Python3 has much stronger introspection, thus the `.*`. assert re.match( r'\[<function IF_ELSE.* at 0x[0-f]+>, ' r'\[<function .*<lambda> at 0x[0-f]+>\], ' r'<function BREAK.* at 0x[0-f]+>, ' r'\[<function .*<lambda> at 0x[0-f]+>\]\]', self.wfe.current_taskname)
class Article(object): """Article ingestion workflow for Literature collection.""" name = "HEP" data_type = "hep" workflow = ( [ # Make sure schema is set for proper indexing in Holding Pen set_schema, ] + ADD_MARKS + DELETE_AND_STOP_IF_NEEDED + ENHANCE_RECORD + # TODO: Once we have a way to resolve merges, we should # use that instead of stopping CHECK_IF_MERGE_AND_STOP_IF_SO + CHECK_IF_SUBMISSION_AND_ASK_FOR_APPROVAL + [ IF_ELSE( is_record_accepted, ( POSTENHANCE_RECORD + SEND_TO_LEGACY_AND_WAIT + NOTIFY_USER_OR_CURATOR + [ # TODO: once legacy is out, this should become # unconditional, and remove the SEND_TO_LEGACY_AND_WAIT # steps IF_NOT(in_production_mode, [store_record]), ]), NOTIFY_NOT_ACCEPTED, ), IF( is_submission, [close_ticket(ticket_id_key="ticket_id")], ) ])
class Author(object): """Author ingestion workflow for HEPNames/Authors collection.""" name = "Author" data_type = "authors" workflow = [ # Make sure schema is set for proper indexing in Holding Pen set_schema, # Emit record signals to receive metadata enrichment emit_record_signals, IF_ELSE(is_marked('is-update'), [ send_robotupload(marcxml_processor=hepnames2marc, mode="holdingpen"), create_ticket( template="authors/tickets/curator_update.html", queue="Authors_cor_user", context_factory=update_ticket_context, ), ], [ create_ticket(template="authors/tickets/curator_new.html", queue="Authors_add_user", context_factory=new_ticket_context), reply_ticket(template="authors/tickets/user_new.html", context_factory=reply_ticket_context, keep_new=True), halt_record(action="author_approval", message="Accept submission?"), IF_ELSE(is_record_accepted, [ send_robotupload(marcxml_processor=hepnames2marc, mode="insert"), reply_ticket(template="authors/tickets/user_accepted.html", context_factory=reply_ticket_context), close_ticket(ticket_id_key="ticket_id"), IF(curation_ticket_needed, [ create_ticket( template="authors/tickets/curation_needed.html", queue="AUTHORS_curation", context_factory=curation_ticket_context, ticket_id_key="curation_ticket_id"), ]), ], [ close_ticket(ticket_id_key="ticket_id"), ]), ]), ]
class Author(object): """Author ingestion workflow for HEPNames/Authors collection.""" name = "Author" data_type = "authors" workflow = [ load_from_source_data, # Make sure schema is set for proper indexing in Holding Pen set_schema, validate_record('authors'), IF_ELSE( is_marked('is-update'), [ SEND_TO_LEGACY, SEND_UPDATE_NOTIFICATION, ], [ ASK_FOR_REVIEW, IF_ELSE(is_record_accepted, ([store_record] + SEND_TO_LEGACY + NOTIFY_ACCEPTED + CLOSE_TICKET_IF_NEEDED), NOTIFY_NOT_ACCEPTED), ], ), ]
class Article(object): """Article ingestion workflow for Literature collection.""" name = "HEP" data_type = "hep" workflow = ( PRE_PROCESSING + STOP_IF_ALREADY_HARVESTED_ON_LEGACY_OR_TOO_OLD + NOTIFY_IF_SUBMISSION + MARK_IF_MATCH_IN_HOLDINGPEN + PROCESS_HOLDINGPEN_MATCHES + MARK_IF_UPDATE + ENHANCE_RECORD + STOP_IF_EXISTING_SUBMISSION + HALT_FOR_APPROVAL + [ IF_ELSE( is_record_accepted, (POSTENHANCE_RECORD + STORE_RECORD + SEND_TO_LEGACY_AND_WAIT + NOTIFY_ACCEPTED + NOTIFY_CURATOR_IF_CORE), NOTIFY_NOT_ACCEPTED, ), IF( is_submission, close_ticket(ticket_id_key="ticket_id"), ) ])
do_not_repeat('reply_ticket_user_new_submission')( reply_ticket( template="literaturesuggest/tickets/user_submitted.html", context_factory=reply_ticket_context, keep_new=True ), ) ] CHECK_AUTO_APPROVE = [ IF_ELSE( is_submission, mark('auto-approved', False), IF_ELSE( auto_approve, [ mark('auto-approved', True), set_core_in_extra_data, ], mark('auto-approved', False), ), ), ] ENHANCE_RECORD = [ IF( is_arxiv_paper, [ populate_arxiv_document, arxiv_package_download, arxiv_plot_extract, arxiv_derive_inspire_categories,
schema_data = requests_retry_session().get(obj.data['$schema']).content schema_data = json.loads(schema_data) try: validate(obj.data, schema_data) except ValidationError as err: __halt_and_notify('Invalid record: %s' % err, eng) except SchemaError as err: __halt_and_notify('SchemaError during record validation! %s' % err, eng) STORE_REC = [ IF_ELSE(is_record_in_db, [ update_record, ], [ store_record, ]), ] class ArticlesUpload(object): """Article ingestion workflow for Records collection.""" name = "HEP" data_type = "harvesting" workflow = [ delete_older_workflows, set_schema, add_arxiv_category, add_nations, remove_orcid_prefix,
class Article(object): """Article ingestion workflow for Literature collection.""" name = "HEP" data_type = "hep" workflow = [ # Make sure schema is set for proper indexing in Holding Pen set_schema, # Emit record signals to receive metadata enrichment emit_record_signals, # Query locally or via legacy search API to see if article # is already ingested and this is an update IF(article_exists, [ mark('match-found', True), ]), IF_ELSE( is_submission, [ # Article matching for submissions # ================================ IF(pending_in_holding_pen, [ mark('already-in-holding-pen', True), ]), # Special RT integration for submissions # ====================================== create_ticket( template="literaturesuggest/tickets/curator_submitted.html", queue="HEP_add_user", context_factory=new_ticket_context, ticket_id_key="ticket_id"), reply_ticket( template="literaturesuggest/tickets/user_submitted.html", context_factory=reply_ticket_context, keep_new=True), ], [ # Article matching for non-submissions # ==================================== # Query holding pen to see if we already have this article ingested # # NOTE on updates: # If the same article has been harvested before and the # ingestion has been completed, process is continued # to allow for updates. IF(pending_in_holding_pen, [ mark('already-in-holding-pen', True), mark('delete', True), ]), IF( is_arxiv_paper, [ # FIXME: This filtering step should be removed when this # workflow includes arXiv CORE harvesting IF(already_harvested, [ mark('already-ingested', True), mark('stop', True), ]), # FIXME: This filtering step should be removed when: # old previously rejected records are treated # differently e.g. good auto-reject heuristics or better # time based filtering (5 days is quite random now). IF(previously_rejected(), [ mark('already-ingested', True), mark('stop', True), ]), ]), IF(is_marked('delete'), [update_old_object, delete_self_and_stop_processing]), IF(is_marked('stop'), [stop_processing]), ]), # # Article Processing # ================== IF(is_arxiv_paper, [ arxiv_fulltext_download, arxiv_plot_extract, arxiv_refextract, arxiv_author_list("authorlist2marcxml.xsl"), ]), extract_journal_info, classify_paper( taxonomy="HEPont.rdf", only_core_tags=False, spires=True, with_author_keywords=True, ), filter_core_keywords, guess_categories, IF(is_experimental_paper, [ guess_experiments, ]), guess_keywords, # Predict action for a generic HEP paper based only on title # and abstract. guess_coreness, # ("arxiv_skip_astro_title_abstract.pickle) # Check if we shall halt or auto-reject # ===================================== # NOTE: User submissions are always relevant IF_ELSE(is_record_relevant, [ halt_record(action="hep_approval"), ], [reject_record("Article automatically rejected"), stop_processing]), IF_ELSE(is_record_accepted, [ IF(article_exists, [ IF_ELSE(is_submission, [ reject_record('Article was already found on INSPIRE'), stop_processing, reply_ticket( template= "literaturesuggest/tickets/user_rejected_exists.html", context_factory=reply_ticket_context), close_ticket(ticket_id_key="ticket_id"), ], [ halt_record(action="merge_approval"), ]), ]), add_core, add_note_entry, filter_keywords, user_pdf_get, IF_ELSE(shall_push_remotely, [ IF_ELSE(article_exists, [ prepare_update_payload(extra_data_key="update_payload"), send_robotupload(marcxml_processor=hep2marc, mode="correct", extra_data_key="update_payload"), ], [ send_robotupload(marcxml_processor=hep2marc, mode="insert"), ]) ], [store_record]), IF(is_submission, [ IF(curation_ticket_needed, [ create_ticket( template="literaturesuggest/tickets/curation_core.html", queue="HEP_curation", context_factory=curation_ticket_context, ticket_id_key="curation_ticket_id") ]), reply_ticket( template="literaturesuggest/tickets/user_accepted.html", context_factory=reply_ticket_context), ]), ], [ IF(is_submission, [reply_ticket(context_factory=reply_ticket_context)]) ]), close_ticket(ticket_id_key="ticket_id") ]
ticket_id_key="ticket_id" ), reply_ticket( template="literaturesuggest/tickets/user_submitted.html", context_factory=reply_ticket_context, keep_new=True ), ] CHECK_AUTO_APPROVE = [ IF_ELSE( is_submission, mark('auto-approved', False), IF_ELSE( auto_approve, [ mark('auto-approved', True), set_core_in_extra_data, ], mark('auto-approved', False), ), ), ] ENHANCE_RECORD = [ IF( is_arxiv_paper, [ populate_arxiv_document, arxiv_package_download, arxiv_plot_extract, arxiv_derive_inspire_categories,
), filter_core_keywords, guess_categories, IF(is_experimental_paper, [guess_experiments]), guess_keywords, # Predict action for a generic HEP paper based only on title # and abstract. guess_coreness, # ("arxiv_skip_astro_title_abstract.pickle) # Check if we shall halt or auto-reject # ===================================== ] CHECK_IF_SUBMISSION_AND_ASK_FOR_APPROVAL = [ IF_ELSE(is_record_relevant, [ halt_record( action="hep_approval", message="Submission halted for curator approval.", ) ], [reject_record("Article automatically rejected"), stop_processing]), ] NOTIFY_NOT_ACCEPTED = [ IF(is_submission, [reply_ticket(context_factory=reply_ticket_context)]) ] NOTIFY_ALREADY_EXISTING = [ reject_record('Article was already found on INSPIRE'), stop_processing, reply_ticket(template=("literaturesuggest/tickets/" "user_rejected_exists.html"), context_factory=reply_ticket_context), close_ticket(ticket_id_key="ticket_id"),
class AuthorNew(WorkflowBase): """Workflow for new author information.""" object_type = "Author New" workflow = [ convert_data_to_model(), create_marcxml_record(), create_curator_ticket_new(template="authors/tickets/curator_new.html", queue="Authors_add_user"), reply_ticket(template="authors/tickets/user_new.html", keep_new=True), halt_record_with_action(action="author_approval", message="Accept submission?"), IF_ELSE(shall_upload_record, [ IF(recreate_data, [convert_data_to_model(), create_marcxml_record()]), send_robotupload(mode="insert"), reply_ticket(template="authors/tickets/user_accepted.html"), close_ticket(ticket_id_key="ticket_id"), IF(curation_ticket_needed, [ create_curation_ticket( template="authors/tickets/curation_needed.html", queue="AUTHORS_curation", ticket_id_key="curation_ticket_id"), ]), ], [ close_ticket(ticket_id_key="ticket_id"), ]) ] @staticmethod def get_title(bwo): """Return title of object.""" id_user = bwo.id_user try: user_email = User.query.get(id_user).email except AttributeError: user_email = '' return u"New Author by: {0}".format(user_email) @staticmethod def get_description(bwo): """Return description of object.""" return bwo.data.get("name", {}).get("preferred_name", "No name found") @staticmethod def formatter(bwo, **kwargs): """Return formatted data of object.""" of = kwargs.get("of", "hp") xml = bwo.extra_data.get("marcxml") id_user = bwo.id_user try: user_email = User.query.get(id_user).email except AttributeError: user_email = '' ticket_id = bwo.extra_data.get("ticket_id") ticket_url = "https://rt.inspirehep.net/Ticket/Display.html?id={}".format( ticket_id) if of == "xm": return xml else: # FIXME add a template for the author display in the HP return render_template("authors/workflows/authorupdate.html", record=bwo.data, user_email=user_email, ticket_url=ticket_url, comments=bwo.extra_data.get("comments"))
from workflow.patterns.controlflow import (FOR, IF_ELSE, CMP) redisUrl = "redis://" + os.getenv('REDIS_HOST') + ":6379/0" app = Celery('core', broker=redisUrl) flow = [ library.utils.initializeStore, library.agents.getAgent, library.clock.getCurrentTime, library.logger.logResult ] #workflowDefinition1 = '(lp0\nclibrary.agents\ngetAgent\np1\naclibrary.clock\ngetCurrentTime\np2\naclibrary.logger\nlogResult\np3\na.' #flow = pickle.loads(flow) flow = [ library.utils.initializeStore, library.agents.getAgent, IF_ELSE(CMP(lambda o, e: o['data']['agent']['sex'], 'Male', '=='), [library.logger.logResult], [library.clock.getCurrentTime]), library.logger.logResult ] @app.task def runWorkflow(data): from workflow.engine import GenericWorkflowEngine wfe = GenericWorkflowEngine() wfe.setWorkflow(flow) wfe.process(data) if __name__ == "__main__": runWorkflow.delay([{"start": True}])
with_author_keywords=True, ), filter_core_keywords, guess_categories, IF( is_experimental_paper, guess_experiments, ), guess_keywords, guess_coreness, IF_ELSE( is_submission, mark('auto-approved', False), IF_ELSE( belongs_to_relevant_category, [ mark('auto-approved', True), set_coreness_in_extra_data, ], mark('auto-approved', False), ), ), ] NOTIFY_NOT_ACCEPTED = [ IF( is_submission, reply_ticket(context_factory=reply_ticket_context), ) ] NOTIFY_ALREADY_EXISTING = [