class test_workflow_workflows_errors(object):
    """Test workflow for unit-tests."""

    workflow = [
        simple_for(0, 5, 1, "X"),
        [
            start_async_workflow("test_workflow_error"),
        ], end_for,
        simple_for(0, 5, 1), [
            wait_for_a_workflow_to_complete(0.1),
        ], end_for,
        workflows_reviews(False, False)
    ]
Esempio n. 2
0
class test_workflow_workflows_errors_C(object):
    """Test workflow for unit-tests."""

    workflow = [
        simple_for(0, 5, 1, "X"),
        [
            start_async_workflow("test_workflow_workflows_errors_B"),
        ], end_for, wait_for_workflows_to_complete,
        workflows_reviews(True, False)
    ]
class oaiharvest_harvest_repositories(RecordWorkflow):
    """A workflow for use with OAI harvesting in BibSched."""

    object_type = "workflow"
    record_workflow = "oaiharvest_record_post_process"

    workflow = [
        init_harvesting,
        foreach(get_repositories_list(), "repository"),
        [
            write_something_generic("Harvesting",
                                    [task_update_progress, write_message]),
            harvest_records,
            foreach(get_obj_extra_data_key("harvested_files_list")),
            [
                write_something_generic("Starting sub-workflows for file",
                                        [task_update_progress, write_message]),
                foreach(get_records_from_file()),
                [
                    workflow_if(filtering_oai_pmh_identifier),
                    [
                        workflow_if(num_workflow_running_greater(10),
                                    neg=True),
                        [
                            start_async_workflow(
                                preserve_data=True,
                                preserve_extra_data_keys=[
                                    "repository", "oai_identifier"
                                ],
                                get_workflow_from=
                                get_workflow_from_engine_definition,
                            ),
                        ],
                        workflow_else,
                        [
                            write_something_generic(
                                ["Waiting for workflows to finish"],
                                [task_update_progress, write_message]),
                            wait_for_a_workflow_to_complete(10.0),
                            start_async_workflow(
                                preserve_data=True,
                                preserve_extra_data_keys=[
                                    "repository", "oai_identifier"
                                ],
                                get_workflow_from=
                                get_workflow_from_engine_definition,
                            ),
                        ],
                    ],
                ], end_for
            ], end_for
        ], end_for,
        write_something_generic(
            ["Processing: ", get_nb_workflow_created, " records"],
            [task_update_progress, write_message]),
        simple_for(0, get_nb_workflow_created, 1),
        [
            wait_for_a_workflow_to_complete(1.0),
            write_something_generic([get_workflows_progress, "%% complete"],
                                    [task_update_progress, write_message]),
        ], end_for,
        workflows_reviews(stop_if_error=True),
        update_last_update(get_repositories_list())
    ]

    @staticmethod
    def get_description(bwo):
        """Return description of object."""
        from flask import render_template

        identifiers = None

        extra_data = bwo.get_extra_data()
        if 'options' in extra_data and 'identifiers' in extra_data["options"]:
            identifiers = extra_data["options"]["identifiers"]

        results = bwo.get_tasks_results()

        if 'review_workflow' in results:
            result_progress = results['review_workflow'][0]['result']
        else:
            result_progress = {}

        current_task = extra_data['_last_task_name']

        return render_template("workflows/styles/harvesting_description.html",
                               identifiers=identifiers,
                               result_progress=result_progress,
                               current_task=current_task)

    @staticmethod
    def get_title(bwo):
        """Return title of object."""
        return "Summary of OAI harvesting from: {0}".format(
            bwo.get_extra_data()["repository"]["name"])

    @staticmethod
    def formatter(bwo):
        """Return description of object."""
        from flask import render_template
        from invenio.modules.workflows.models import BibWorkflowObject
        from invenio.modules.workflows.registry import workflows

        identifiers = None

        extra_data = bwo.get_extra_data()
        if 'options' in extra_data and 'identifiers' in extra_data["options"]:
            identifiers = extra_data["options"]["identifiers"]

        results = bwo.get_tasks_results()

        if 'review_workflow' in results:
            result_progress = results['review_workflow'][0]['result']
        else:
            result_progress = {}

        current_task = extra_data['_last_task_name']

        related_objects = []
        for id_object in extra_data.get("objects_spawned", list()):
            spawned_object = BibWorkflowObject.query.get(id_object)
            if spawned_object:
                workflow = workflows.get(spawned_object.get_workflow_name())
                related_objects.append(
                    (spawned_object.id, workflow.get_title(spawned_object)
                     or "No title"))
            else:
                related_objects.append((id_object, None))

        return render_template("workflows/styles/harvesting_description.html",
                               identifiers=identifiers,
                               result_progress=result_progress,
                               current_task=current_task,
                               related_objects=related_objects)
class ingestion_arxiv_math(WorkflowBase):

    """Main workflow for harvesting arXiv via OAI-PMH (oaiharvester)."""

    object_type = "workflow"
    workflow = [
        write_something_generic("Initialization", [task_update_progress, write_message]),
        init_harvesting,
        write_something_generic("Starting", [task_update_progress, write_message]),
        foreach(get_repositories_list(), "repository"),
        [
            write_something_generic("Harvesting", [task_update_progress, write_message]),
            harvest_records,
            write_something_generic("Reading Files", [task_update_progress, write_message]),
            foreach(get_obj_extra_data_key("harvested_files_list")),
            [
                write_something_generic("Creating Workflows", [task_update_progress, write_message]),
                foreach(get_records_from_file()),
                [
                    workflow_if(filtering_oai_pmh_identifier),
                    [
                        workflow_if(num_workflow_running_greater(10), neg=True),
                        [
                            start_async_workflow("process_record_arxiv",
                                                 preserve_data=True,
                                                 preserve_extra_data_keys=["repository"]),

                            write_something_generic(
                                ["Workflow started: ",
                                 get_nb_workflow_created],
                                [task_update_progress,
                                 write_message]),
                        ],
                        workflow_else,
                        [
                            write_something_generic(
                                ["Max simultaneous workflows reached: ",
                                 "Waiting for one to finish"],
                                [task_update_progress,
                                 write_message]),
                            wait_for_a_workflow_to_complete(0.05),
                            start_async_workflow("process_record_arxiv",
                                                 preserve_data=True,
                                                 preserve_extra_data_keys=["repository"]),
                            write_something_generic(["Workflow started :",
                                                     get_nb_workflow_created,
                                                     " "],
                                                    [task_update_progress,
                                                     write_message]),
                        ],
                    ],
                ],
                end_for
            ],
            end_for
        ],
        end_for,
        write_something_generic(["Processing : ", get_nb_workflow_created, " records"],
                                [task_update_progress, write_message]),
        simple_for(0, get_nb_workflow_created, 1),
        [
            wait_for_a_workflow_to_complete(),
            write_something_generic([get_workflows_progress, " % Complete"],
                                    [task_update_progress, write_message]),
        ],
        end_for,
        write_something_generic("Finishing", [task_update_progress, write_message]),
        workflows_reviews(stop_if_error=True),
        update_last_update(get_repositories_list())
    ]

    @staticmethod
    def get_description(bwo):
        """Return description of object."""
        from flask import render_template

        identifiers = None

        extra_data = bwo.get_extra_data()
        if 'options' in extra_data and 'identifiers' in extra_data["options"]:
            identifiers = extra_data["options"]["identifiers"]

        results = bwo.get_tasks_results()

        if 'review_workflow' in results:
            result_progress = results['review_workflow'][0]['result']
        else:
            result_progress = {}

        current_task = extra_data['_last_task_name']

        return render_template("workflows/styles/harvesting_description.html",
                               identifiers=identifiers,
                               result_progress=result_progress,
                               current_task=current_task)

    @staticmethod
    def get_title(bwo):
        """Return title of object."""
        return "Supervising harvesting of {0}".format(
            bwo.get_extra_data()["repository"]["name"])

    @staticmethod
    def formatter(bwo, **kwargs):
        """Return formatted data of object."""
        return ingestion_arxiv_math.get_description(bwo)
class oaiharvest_harvest_repositories(RecordWorkflow):
    """A workflow for use with OAI harvesting in BibSched."""

    object_type = "workflow"
    record_workflow = "oaiharvest_record_post_process"

    workflow = [
        init_harvesting,
        foreach(get_repositories_list(), "repository"),
        [
            write_something_generic("Harvesting",
                                    [task_update_progress, write_message]),
            harvest_records,
            foreach(get_obj_extra_data_key("harvested_files_list")),
            [
                write_something_generic("Starting sub-workflows for file",
                                        [task_update_progress, write_message]),
                foreach(get_records_from_file()),
                [
                    workflow_if(filtering_oai_pmh_identifier),
                    [
                        workflow_if(num_workflow_running_greater(10),
                                    neg=True),
                        [
                            start_async_workflow(
                                preserve_data=True,
                                preserve_extra_data_keys=[
                                    "repository", "oai_identifier"
                                ],
                                get_workflow_from=
                                get_workflow_from_engine_definition,
                            ),
                        ],
                        workflow_else,
                        [
                            write_something_generic(
                                ["Waiting for workflows to finish"],
                                [task_update_progress, write_message]),
                            wait_for_a_workflow_to_complete(10.0),
                            start_async_workflow(
                                preserve_data=True,
                                preserve_extra_data_keys=[
                                    "repository", "oai_identifier"
                                ],
                                get_workflow_from=
                                get_workflow_from_engine_definition,
                            ),
                        ],
                    ],
                ], end_for
            ], end_for
        ], end_for,
        write_something_generic(
            ["Processing: ", get_nb_workflow_created, " records"],
            [task_update_progress, write_message]),
        simple_for(0, get_nb_workflow_created, 1),
        [
            wait_for_a_workflow_to_complete(1.0),
            write_something_generic([get_workflows_progress, "%% complete"],
                                    [task_update_progress, write_message]),
        ], end_for,
        workflows_reviews(stop_if_error=True),
        update_last_update(get_repositories_list())
    ]