Ejemplo n.º 1
0
    def test_delete_archived_tasks(self, swh_scheduler):
        self._create_task_types(swh_scheduler)
        _time = utcnow()
        recurring = tasks_from_template(TEMPLATES["git"], _time, 12)
        oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12)
        total_tasks = len(recurring) + len(oneshots)
        pending_tasks = swh_scheduler.create_tasks(recurring + oneshots)
        backend_tasks = [{
            "task": task["id"],
            "backend_id": str(uuid.uuid4()),
            "scheduled": utcnow(),
        } for task in pending_tasks]
        swh_scheduler.mass_schedule_task_runs(backend_tasks)

        _tasks = []
        percent = random.randint(0, 100)  # random election removal boundary
        for task in backend_tasks:
            t = swh_scheduler.end_task_run(task["backend_id"],
                                           status="eventful")
            c = random.randint(0, 100)
            if c <= percent:
                _tasks.append({"task_id": t["task"], "task_run_id": t["id"]})

        swh_scheduler.delete_archived_tasks(_tasks)

        all_tasks = [task["id"] for task in swh_scheduler.search_tasks()]
        tasks_count = len(all_tasks)
        tasks_run_count = len(swh_scheduler.get_task_runs(all_tasks))

        assert tasks_count == total_tasks - len(_tasks)
        assert tasks_run_count == total_tasks - len(_tasks)
Ejemplo n.º 2
0
    def test_origin_visit_stats_upsert_cardinality_failing(
            self, swh_scheduler) -> None:
        """Batch upsert does not support altering multiple times the same origin-visit-status

        """
        with pytest.raises(SchedulerException, match="CardinalityViolation"):
            swh_scheduler.origin_visit_stats_upsert([
                OriginVisitStats(
                    url="foo",
                    visit_type="git",
                    last_eventful=None,
                    last_uneventful=utcnow(),
                    last_notfound=None,
                    last_failed=None,
                    last_snapshot=None,
                ),
                OriginVisitStats(
                    url="foo",
                    visit_type="git",
                    last_eventful=None,
                    last_uneventful=utcnow(),
                    last_notfound=None,
                    last_failed=None,
                    last_snapshot=None,
                ),
            ])
def test_journal_client_origin_visit_status_from_journal_ignored_status(
        swh_scheduler):
    """Only final statuses (full, partial) are important, the rest remain ignored."""
    # Trace method calls on the swh_scheduler
    swh_scheduler = Mock(wraps=swh_scheduler)

    visit_statuses = [
        {
            "origin": "foo",
            "visit": 1,
            "status": "created",
            "date": utcnow(),
            "type": "git",
            "snapshot": None,
        },
        {
            "origin": "bar",
            "visit": 1,
            "status": "ongoing",
            "date": utcnow(),
            "type": "svn",
            "snapshot": None,
        },
    ]

    process_journal_objects({"origin_visit_status": visit_statuses},
                            scheduler=swh_scheduler)

    # All messages have been ignored: no stats have been upserted
    swh_scheduler.origin_visit_stats_upsert.assert_not_called()
Ejemplo n.º 4
0
    def test_origin_visit_stats_upsert_batch(self, swh_scheduler) -> None:
        """Batch upsert is ok"""
        visit_stats = [
            OriginVisitStats(
                url="foo",
                visit_type="git",
                last_eventful=utcnow(),
                last_uneventful=None,
                last_failed=None,
                last_notfound=None,
                last_snapshot=hash_to_bytes(
                    "d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
            ),
            OriginVisitStats(
                url="bar",
                visit_type="git",
                last_eventful=None,
                last_uneventful=utcnow(),
                last_notfound=None,
                last_failed=None,
                last_snapshot=hash_to_bytes(
                    "fffcc0710eb6cf9efd5b920a8453e1e07157bfff"),
            ),
        ]

        swh_scheduler.origin_visit_stats_upsert(visit_stats)

        for visit_stat in swh_scheduler.origin_visit_stats_get([
            (vs.url, vs.visit_type) for vs in visit_stats
        ]):
            assert visit_stat is not None
Ejemplo n.º 5
0
    def test_get_task_runs_with_executed(self, swh_scheduler):
        """Some tasks have been executed, get_task_runs() should
        not return an empty list. limit should behave as expected.

        """
        self._create_task_types(swh_scheduler)
        _time = utcnow()
        recurring = tasks_from_template(TEMPLATES["git"], _time, 12)
        oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12)
        pending_tasks = swh_scheduler.create_tasks(recurring + oneshots)
        backend_tasks = [{
            "task": task["id"],
            "backend_id": str(uuid.uuid4()),
            "scheduled": utcnow(),
        } for task in pending_tasks]
        swh_scheduler.mass_schedule_task_runs(backend_tasks)

        btask = backend_tasks[0]
        ts = utcnow()
        swh_scheduler.start_task_run(btask["backend_id"],
                                     metadata={"something": "stupid"},
                                     timestamp=ts)
        runs = swh_scheduler.get_task_runs(task_ids=[btask["task"]])
        assert len(runs) == 1
        assert subdict(runs[0], excl=("id")) == {
            "task": btask["task"],
            "backend_id": btask["backend_id"],
            "scheduled": btask["scheduled"],
            "started": ts,
            "ended": None,
            "metadata": {
                "something": "stupid"
            },
            "status": "started",
        }

        ts2 = utcnow()
        swh_scheduler.end_task_run(
            btask["backend_id"],
            metadata={"other": "stuff"},
            timestamp=ts2,
            status="eventful",
        )
        runs = swh_scheduler.get_task_runs(task_ids=[btask["task"]])
        assert len(runs) == 1
        assert subdict(runs[0], excl=("id")) == {
            "task": btask["task"],
            "backend_id": btask["backend_id"],
            "scheduled": btask["scheduled"],
            "started": ts,
            "ended": ts2,
            "metadata": {
                "something": "stupid",
                "other": "stuff"
            },
            "status": "eventful",
        }
Ejemplo n.º 6
0
    def test_create_tasks(self, swh_scheduler):
        self._create_task_types(swh_scheduler)
        num_git = 100
        tasks_1 = tasks_from_template(TEMPLATES["git"], utcnow(), num_git)
        tasks_2 = tasks_from_template(TEMPLATES["hg"],
                                      utcnow(),
                                      num_priorities=NUM_PRIORITY_TASKS)
        tasks = tasks_1 + tasks_2

        # tasks are returned only once with their ids
        ret1 = swh_scheduler.create_tasks(tasks + tasks)
        set_ret1 = set([t["id"] for t in ret1])

        # creating the same set result in the same ids
        ret = swh_scheduler.create_tasks(tasks)
        set_ret = set([t["id"] for t in ret])

        # Idempotence results
        assert set_ret == set_ret1
        assert len(ret) == len(ret1)

        ids = set()
        actual_priorities = defaultdict(int)

        for task, orig_task in zip(ret, tasks):
            task = copy.deepcopy(task)
            task_type = TASK_TYPES[orig_task["type"].split("-")[-1]]
            assert task["id"] not in ids
            assert task["status"] == "next_run_not_scheduled"
            assert task["current_interval"] == task_type["default_interval"]
            assert task["policy"] == orig_task.get("policy", "recurring")
            priority = task.get("priority")
            actual_priorities[priority] += 1

            assert task["retries_left"] == (task_type["num_retries"] or 0)
            ids.add(task["id"])
            del task["id"]
            del task["status"]
            del task["current_interval"]
            del task["retries_left"]
            if "policy" not in orig_task:
                del task["policy"]
            if "priority" not in orig_task:
                del task["priority"]
                assert task == orig_task

        expected_priorities = NUM_PRIORITY_TASKS.copy()
        expected_priorities[None] += num_git
        assert dict(actual_priorities) == expected_priorities
Ejemplo n.º 7
0
 def try_perform_actions(actions=actions):
     logger.debug("Try perform pending actions")
     if actions["queue"] and (
         len(actions["queue"]) > ACTION_QUEUE_MAX_LENGTH
         or utcnow() - actions["last_send"] > ACTION_SEND_DELAY
     ):
         perform_actions(actions)
Ejemplo n.º 8
0
    def test_peek_ready_tasks_returns_only_no_priority_tasks(
            self, swh_scheduler):
        """Peek ready tasks only return standard tasks (no priority)"""
        self._create_task_types(swh_scheduler)
        t = utcnow()
        task_type = TEMPLATES["git"]["type"]
        # Create tasks with and without priorities
        tasks = tasks_from_template(
            TEMPLATES["git"],
            t,
            num_priorities=NUM_PRIORITY_TASKS,
        )

        count_priority = 0
        for task in tasks:
            count_priority += 0 if task.get("priority") is None else 1

        assert count_priority > 0, "Some created tasks should have some priority"

        random.shuffle(tasks)
        swh_scheduler.create_tasks(tasks)

        # take all available no priority tasks
        ready_tasks = swh_scheduler.peek_ready_tasks(task_type)

        assert len(ready_tasks) == len(tasks) - count_priority

        # No read task should have any priority
        for task in ready_tasks:
            assert task.get("priority") is None
Ejemplo n.º 9
0
    def test_metrics_origins_never_visited(self, swh_scheduler,
                                           listed_origins):
        swh_scheduler.record_listed_origins(listed_origins)

        # Pretend that we've recorded a visit on one origin
        visited_origin = listed_origins[0]
        swh_scheduler.origin_visit_stats_upsert([
            OriginVisitStats(
                url=visited_origin.url,
                visit_type=visited_origin.visit_type,
                last_eventful=utcnow(),
                last_uneventful=None,
                last_failed=None,
                last_notfound=None,
                last_snapshot=hash_to_bytes(
                    "d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
            ),
        ])

        ret = swh_scheduler.update_metrics(lister_id=visited_origin.lister_id)
        for metric in ret:
            if metric.visit_type == visited_origin.visit_type:
                # We visited one of these origins
                assert metric.origins_known - metric.origins_never_visited == 1
            else:
                # But none of these have been visited
                assert metric.origins_known == metric.origins_never_visited
Ejemplo n.º 10
0
def respawn_tasks(ctx, task_ids: List[str], next_run: datetime.datetime):
    """Respawn tasks.

    Respawn tasks given by their ids (see the 'task list' command to
    find task ids) at the given date (immediately by default).

    Eg.

       swh-scheduler task respawn 1 3 12
    """
    from swh.scheduler.utils import utcnow

    scheduler = ctx.obj["scheduler"]
    if not scheduler:
        raise ValueError("Scheduler class (local/remote) must be instantiated")
    if next_run is None:
        next_run = utcnow()
    output = []

    task_ids_int = [int(id_) for id_ in task_ids]

    scheduler.set_status_tasks(task_ids_int,
                               status="next_run_not_scheduled",
                               next_run=next_run)
    output.append("Respawn tasks %s\n" % (task_ids_int, ))

    click.echo("\n".join(output))
Ejemplo n.º 11
0
    def perform_actions(actions, backend=backend):
        logger.info("Perform %s pending actions" % len(actions["queue"]))
        action_map = {
            "start_task_run": backend.start_task_run,
            "end_task_run": backend.end_task_run,
        }

        messages = []
        db = backend.get_db()
        try:
            cursor = db.cursor(None)
            for action in actions["queue"]:
                messages.append(action["message"])
                function = action_map[action["action"]]
                args = action.get("args", ())
                kwargs = action.get("kwargs", {})
                kwargs["cur"] = cursor
                function(*args, **kwargs)

        except Exception:
            db.conn.rollback()
        else:
            db.conn.commit()
        finally:
            backend.put_db(db)

        for message in messages:
            if not message.acknowledged:
                message.ack()
        actions["queue"] = []
        actions["last_send"] = utcnow()
Ejemplo n.º 12
0
    def schedule_task_run(self,
                          task_id,
                          backend_id,
                          metadata=None,
                          timestamp=None,
                          db=None,
                          cur=None):
        """Mark a given task as scheduled, adding a task_run entry in the database.

        Args:
            task_id (int): the identifier for the task being scheduled
            backend_id (str): the identifier of the job in the backend
            metadata (dict): metadata to add to the task_run entry
            timestamp (datetime.datetime): the instant the event occurred

        Returns:
            a fresh task_run entry

        """

        if metadata is None:
            metadata = {}

        if timestamp is None:
            timestamp = utcnow()

        cur.execute(
            "select * from swh_scheduler_schedule_task_run(%s, %s, %s, %s)",
            (task_id, backend_id, metadata, timestamp),
        )

        return cur.fetchone()
Ejemplo n.º 13
0
    def start_task_run(self,
                       backend_id,
                       metadata=None,
                       timestamp=None,
                       db=None,
                       cur=None):
        """Mark a given task as started, updating the corresponding task_run
           entry in the database.

        Args:
            backend_id (str): the identifier of the job in the backend
            metadata (dict): metadata to add to the task_run entry
            timestamp (datetime.datetime): the instant the event occurred

        Returns:
            the updated task_run entry

        """

        if metadata is None:
            metadata = {}

        if timestamp is None:
            timestamp = utcnow()

        cur.execute(
            "select * from swh_scheduler_start_task_run(%s, %s, %s)",
            (backend_id, metadata, timestamp),
        )

        return cur.fetchone()
def test_journal_client_origin_visit_status_from_journal_last_successful(
        swh_scheduler):
    visit_statuses = [
        {
            "origin": "bar",
            "visit": 1,
            "status": "partial",
            "date": utcnow(),
            "type": "git",
            "snapshot":
            hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
        },
        {
            "origin": "foo",
            "visit": 1,
            "status": "full",
            "date": DATE1,
            "type": "git",
            "snapshot":
            hash_to_bytes("eeecc0710eb6cf9efd5b920a8453e1e07157bfff"),
        },
        {
            "origin": "foo",
            "visit": 2,
            "status": "partial",
            "date": DATE2,
            "type": "git",
            "snapshot":
            hash_to_bytes("aaacc0710eb6cf9efd5b920a8453e1e07157baaa"),
        },
        {
            "origin": "foo",
            "visit": 3,
            "status": "full",
            "date": DATE3,
            "type": "git",
            "snapshot":
            hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"),
        },
    ]

    process_journal_objects({"origin_visit_status": visit_statuses},
                            scheduler=swh_scheduler)

    actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([("foo",
                                                                       "git")])
    assert_visit_stats_ok(
        actual_origin_visit_stats[0],
        OriginVisitStats(
            url="foo",
            visit_type="git",
            last_successful=DATE3,
            last_visit=DATE3,
            last_visit_status=LastVisitStatus.successful,
            last_snapshot=hash_to_bytes(
                "dddcc0710eb6cf9efd5b920a8453e1e07157bddd"),
            next_position_offset=0,
            successive_visits=3,
        ),
    )
Ejemplo n.º 15
0
    def test_get_task_runs_with_scheduled(self, swh_scheduler):
        """Some tasks have been scheduled but not executed yet, get_task_runs() should
        not return an empty list. limit should behave as expected.

        """
        self._create_task_types(swh_scheduler)
        _time = utcnow()
        recurring = tasks_from_template(TEMPLATES["git"], _time, 12)
        oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12)
        total_tasks = len(recurring) + len(oneshots)
        pending_tasks = swh_scheduler.create_tasks(recurring + oneshots)
        backend_tasks = [{
            "task": task["id"],
            "backend_id": str(uuid.uuid4()),
            "scheduled": utcnow(),
        } for task in pending_tasks]
        swh_scheduler.mass_schedule_task_runs(backend_tasks)

        assert not swh_scheduler.get_task_runs(task_ids=[total_tasks + 1])

        btask = backend_tasks[0]
        runs = swh_scheduler.get_task_runs(task_ids=[btask["task"]])
        assert len(runs) == 1
        run = runs[0]

        assert subdict(run, excl=("id", )) == {
            "task": btask["task"],
            "backend_id": btask["backend_id"],
            "scheduled": btask["scheduled"],
            "started": None,
            "ended": None,
            "metadata": None,
            "status": "scheduled",
        }

        runs = swh_scheduler.get_task_runs(
            task_ids=[bt["task"] for bt in backend_tasks], limit=2)
        assert len(runs) == 2

        runs = swh_scheduler.get_task_runs(
            task_ids=[bt["task"] for bt in backend_tasks])
        assert len(runs) == total_tasks

        keys = ("task", "backend_id", "scheduled")
        assert (sorted([subdict(x, keys) for x in runs],
                       key=lambda x: x["task"]) == backend_tasks)
Ejemplo n.º 16
0
    def _check_grab_next_visit(self, swh_scheduler, visit_type, policy,
                               expected):
        """Calls grab_next_visits with the passed policy, and check that all
        the origins returned are the expected ones (in the same order), and
        that no extra origins are returned. Also checks the origin visits have
        been marked as scheduled, and are only re-scheduled a week later"""
        assert len(expected) != 0

        before = utcnow()
        ret = swh_scheduler.grab_next_visits(
            visit_type=visit_type,
            # Request one more than expected to check that no extra origin is returned
            count=len(expected) + 1,
            policy=policy,
        )
        after = utcnow()

        assert ret == expected
        visit_stats_list = swh_scheduler.origin_visit_stats_get([
            (origin.url, origin.visit_type) for origin in expected
        ])
        assert len(visit_stats_list) == len(expected)
        for visit_stats in visit_stats_list:
            # Check that last_scheduled got updated
            assert before <= visit_stats.last_scheduled <= after

        # They should not be scheduled again
        ret = swh_scheduler.grab_next_visits(
            visit_type=visit_type,
            count=len(expected) + 1,
            policy=policy,
        )
        assert ret == [], "grab_next_visits returned already-scheduled origins"

        # But a week later, they should
        ret = swh_scheduler.grab_next_visits(
            visit_type=visit_type,
            count=len(expected) + 1,
            policy=policy,
            timestamp=after + datetime.timedelta(days=7),
        )
        # We need to sort them because their 'last_scheduled' field is updated to
        # exactly the same value, so the order is not deterministic
        assert sorted(ret) == sorted(
            expected), "grab_next_visits didn't reschedule visits after a week"
Ejemplo n.º 17
0
    def test_update_metrics_twice(self, swh_scheduler, listed_origins):
        swh_scheduler.record_listed_origins(listed_origins)

        ts = utcnow()
        ret = swh_scheduler.update_metrics(timestamp=ts)
        assert all(metric.last_update == ts for metric in ret)

        second_ts = ts + datetime.timedelta(seconds=1)
        ret = swh_scheduler.update_metrics(timestamp=second_ts)
        assert all(metric.last_update == second_ts for metric in ret)
Ejemplo n.º 18
0
    def test_search_tasks(self, swh_scheduler):
        def make_real_dicts(lst):
            """RealDictRow is not a real dict."""
            return [dict(d.items()) for d in lst]

        self._create_task_types(swh_scheduler)
        t = utcnow()
        tasks = tasks_from_template(TEMPLATES["git"], t, 100)
        tasks = swh_scheduler.create_tasks(tasks)
        assert make_real_dicts(
            swh_scheduler.search_tasks()) == make_real_dicts(tasks)
Ejemplo n.º 19
0
    def task_failed(event, message):
        logger.debug("task_failed: event: %s" % event)
        logger.debug("             message: %s" % message)

        queue_action(
            {
                "action": "end_task_run",
                "args": [event["uuid"]],
                "kwargs": {"timestamp": utcnow(), "status": "failed",},
                "message": message,
            }
        )
def test_journal_client_origin_visit_status_from_journal_last_failed(
        swh_scheduler):
    visit_statuses = [
        {
            "origin": "foo",
            "visit": 1,
            "status": "partial",
            "date": utcnow(),
            "type": "git",
            "snapshot": None,
        },
        {
            "origin": "bar",
            "visit": 1,
            "status": "full",
            "date": DATE1,
            "type": "git",
            "snapshot": None,
        },
        {
            "origin": "bar",
            "visit": 2,
            "status": "full",
            "date": DATE2,
            "type": "git",
            "snapshot": None,
        },
        {
            "origin": "bar",
            "visit": 3,
            "status": "full",
            "date": DATE3,
            "type": "git",
            "snapshot": None,
        },
    ]

    process_journal_objects({"origin_visit_status": visit_statuses},
                            scheduler=swh_scheduler)

    actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([("bar",
                                                                       "git")])
    assert_visit_stats_ok(
        actual_origin_visit_stats[0],
        OriginVisitStats(
            url="bar",
            visit_type="git",
            last_visit=DATE3,
            last_visit_status=LastVisitStatus.failed,
            next_position_offset=6,
            successive_visits=3,
        ),
    )
def test_journal_client_origin_visit_status_after_grab_next_visits(
        swh_scheduler, stored_lister):
    """Ensure OriginVisitStat entries created in the db as a result of calling
    grab_next_visits() do not mess the OriginVisitStats upsert mechanism.

    """

    listed_origins = [
        ListedOrigin(lister_id=stored_lister.id,
                     url=url,
                     visit_type=visit_type) for (url, visit_type) in set(
                         (v["origin"], v["type"]) for v in VISIT_STATUSES_2)
    ]
    swh_scheduler.record_listed_origins(listed_origins)
    before = utcnow()
    swh_scheduler.grab_next_visits(visit_type="git",
                                   count=10,
                                   policy="oldest_scheduled_first")
    after = utcnow()

    assert swh_scheduler.origin_visit_stats_get([("cavabarder", "hg")]) == []
    assert swh_scheduler.origin_visit_stats_get([("cavabarder", "git")
                                                 ])[0] is not None

    process_journal_objects({"origin_visit_status": VISIT_STATUSES_2},
                            scheduler=swh_scheduler)

    for url in ("cavabarder", "iciaussi"):
        ovs = swh_scheduler.origin_visit_stats_get([(url, "git")])[0]
        assert before <= ovs.last_scheduled <= after

        ovs = swh_scheduler.origin_visit_stats_get([(url, "hg")])[0]
        assert ovs.last_scheduled is None

    ovs = swh_scheduler.origin_visit_stats_get([("cavabarder", "git")])[0]
    assert ovs.last_successful == DATE1 + 5 * ONE_DAY
    assert ovs.last_visit == DATE1 + 5 * ONE_DAY
    assert ovs.last_visit_status == LastVisitStatus.successful
    assert ovs.last_snapshot == hash_to_bytes(
        "5555555555555555555555555555555555555555")
Ejemplo n.º 22
0
    def task_started(event, message):
        logger.debug("task_started: %s %s", event["type"], event.get("name", "N/A"))

        queue_action(
            {
                "action": "start_task_run",
                "args": [event["uuid"]],
                "kwargs": {
                    "timestamp": utcnow(),
                    "metadata": {"worker": event["hostname"],},
                },
                "message": message,
            }
        )
Ejemplo n.º 23
0
    def test_get_task_runs_no_task_executed(self, swh_scheduler):
        """No task has been executed yet, get_task_runs() should always return an empty
        list.

        """
        self._create_task_types(swh_scheduler)
        _time = utcnow()
        recurring = tasks_from_template(TEMPLATES["git"], _time, 12)
        oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12)
        swh_scheduler.create_tasks(recurring + oneshots)

        assert not swh_scheduler.get_task_runs(task_ids=())
        assert not swh_scheduler.get_task_runs(task_ids=(1, 2, 3))
        assert not swh_scheduler.get_task_runs(task_ids=(1, 2, 3), limit=10)
Ejemplo n.º 24
0
    def test_get_tasks(self, swh_scheduler):
        self._create_task_types(swh_scheduler)
        t = utcnow()
        tasks = tasks_from_template(TEMPLATES["git"], t, 100)
        tasks = swh_scheduler.create_tasks(tasks)
        random.shuffle(tasks)
        while len(tasks) > 1:
            length = random.randrange(1, len(tasks))
            cur_tasks = sorted(tasks[:length], key=lambda x: x["id"])
            tasks[:length] = []

            ret = swh_scheduler.get_tasks(task["id"] for task in cur_tasks)
            # result is not guaranteed to be sorted
            ret.sort(key=lambda x: x["id"])
            assert ret == cur_tasks
Ejemplo n.º 25
0
def process_event(event, scheduler_backend):
    uuid = event.get("uuid")
    if not uuid:
        return

    event_type = event["type"]
    statsd.increment("swh_scheduler_listener_handled_event_total",
                     tags={"event_type": event_type})

    if event_type == "task-started":
        scheduler_backend.start_task_run(
            uuid,
            timestamp=utcnow(),
            metadata={"worker": event.get("hostname")},
        )
    elif event_type == "task-result":
        result = event["result"]

        status = None

        if isinstance(result, dict) and "status" in result:
            status = result["status"]
            if status == "success":
                status = "eventful" if result.get("eventful") else "uneventful"

        if status is None:
            status = "eventful" if result else "uneventful"

        scheduler_backend.end_task_run(uuid,
                                       timestamp=utcnow(),
                                       status=status,
                                       result=result)
    elif event_type == "task-failed":
        scheduler_backend.end_task_run(uuid,
                                       timestamp=utcnow(),
                                       status="failed")
Ejemplo n.º 26
0
def test_schedule_tasks(swh_scheduler):
    csv_data = (b'swh-test-ping;[["arg1", "arg2"]];{"key": "value"};' +
                utcnow().isoformat().encode() + b"\n" +
                b'swh-test-ping;[["arg3", "arg4"]];{"key": "value"};' +
                utcnow().isoformat().encode() + b"\n")
    with tempfile.NamedTemporaryFile(suffix=".csv") as csv_fd:
        csv_fd.write(csv_data)
        csv_fd.seek(0)
        result = invoke(swh_scheduler, False,
                        ["task", "schedule", "-d", ";", csv_fd.name])
    expected = r"""
Created 2 tasks

Task 1
  Next run: today \(.*\)
  Interval: 1 day, 0:00:00
  Type: swh-test-ping
  Policy: recurring
  Args:
    \['arg1', 'arg2'\]
  Keyword args:
    key: 'value'

Task 2
  Next run: today \(.*\)
  Interval: 1 day, 0:00:00
  Type: swh-test-ping
  Policy: recurring
  Args:
    \['arg3', 'arg4'\]
  Keyword args:
    key: 'value'

""".lstrip()
    assert result.exit_code == 0, result.output
    assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
Ejemplo n.º 27
0
def schedule_task(ctx, type, options, policy, priority, next_run):
    """Schedule one task from arguments.

    The first argument is the name of the task type, further ones are
    positional and keyword argument(s) of the task, in YAML format.
    Keyword args are of the form key=value.

    Usage sample:

    swh-scheduler --database 'service=swh-scheduler' \
        task add list-pypi

    swh-scheduler --database 'service=swh-scheduler' \
        task add list-debian-distribution --policy=oneshot distribution=stretch

    Note: if the priority is not given, the task won't have the priority set,
    which is considered as the lowest priority level.
    """
    from swh.scheduler.utils import utcnow

    from .utils import parse_options

    scheduler = ctx.obj["scheduler"]
    if not scheduler:
        raise ValueError("Scheduler class (local/remote) must be instantiated")

    now = utcnow()

    (args, kw) = parse_options(options)
    task = {
        "type": type,
        "policy": policy,
        "priority": priority,
        "arguments": {
            "args": args,
            "kwargs": kw,
        },
        "next_run": next_run or now,
    }
    created = scheduler.create_tasks([task])

    output = [
        "Created %d tasks\n" % len(created),
    ]
    for task in created:
        output.append(pretty_print_task(task))

    click.echo("\n".join(output))
Ejemplo n.º 28
0
 def grab_ready_priority_tasks(
     self,
     task_type: str,
     timestamp: Optional[datetime.datetime] = None,
     num_tasks: Optional[int] = None,
     db=None,
     cur=None,
 ) -> List[Dict]:
     if timestamp is None:
         timestamp = utcnow()
     cur.execute(
         """select * from swh_scheduler_grab_any_ready_priority_tasks(
              %s, %s, %s :: bigint)""",
         (task_type, timestamp, num_tasks),
     )
     logger.debug("GRAB %s => %s", task_type, cur.rowcount)
     return cur.fetchall()
Ejemplo n.º 29
0
    def test_grab_ready_priority_tasks(self, swh_scheduler):
        """check the grab and peek priority tasks endpoint behave as expected"""
        self._create_task_types(swh_scheduler)
        t = utcnow()
        task_type = TEMPLATES["git"]["type"]
        num_tasks = 100
        # Create tasks with and without priorities
        tasks0 = tasks_with_priority_from_template(
            TEMPLATES["git"],
            t,
            num_tasks,
            "high",
        )
        tasks1 = tasks_with_priority_from_template(
            TEMPLATES["hg"],
            t,
            num_tasks,
            "low",
        )
        tasks2 = tasks_with_priority_from_template(
            TEMPLATES["hg"],
            t,
            num_tasks,
            "normal",
        )
        tasks = tasks0 + tasks1 + tasks2

        random.shuffle(tasks)
        swh_scheduler.create_tasks(tasks)

        ready_tasks = swh_scheduler.peek_ready_priority_tasks(task_type,
                                                              num_tasks=50)
        grabbed_tasks = swh_scheduler.grab_ready_priority_tasks(task_type,
                                                                num_tasks=50)

        for peeked, grabbed in zip(ready_tasks, grabbed_tasks):
            assert peeked["status"] == "next_run_not_scheduled"
            del peeked["status"]
            assert grabbed["status"] == "next_run_scheduled"
            del grabbed["status"]
            assert peeked == grabbed
            assert peeked["priority"] == grabbed["priority"]
            assert peeked["priority"] is not None
Ejemplo n.º 30
0
    def test_origin_visit_stats_upsert_with_snapshot(self,
                                                     swh_scheduler) -> None:
        eventful_date = utcnow()
        url = "https://github.com/666/test"

        visit_stats = OriginVisitStats(
            url=url,
            visit_type="git",
            last_eventful=eventful_date,
            last_uneventful=None,
            last_failed=None,
            last_notfound=None,
            last_snapshot=hash_to_bytes(
                "d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
        )
        swh_scheduler.origin_visit_stats_upsert([visit_stats])

        assert swh_scheduler.origin_visit_stats_get([(url, "git")
                                                     ]) == [visit_stats]
        assert swh_scheduler.origin_visit_stats_get([(url, "svn")]) == []