Ejemplo n.º 1
0
 def get_worker_jobs(self, queue, worker_type, worker):
     # TODO: need to get worker-group...
     return utils.get_jsonc(
         "https://firefox-ci-tc.services.mozilla.com/api/queue/v1/provisioners/%s/worker-types/%s/workers/%s/%s"
         # "https://queue.taskcluster.net/v1/provisioners/%s/worker-types/%s/workers/%s/%s"
         % (self.provisioner, queue, worker_type, worker),
         self.verbosity,
     )
Ejemplo n.º 2
0
 def get_worker_types(self, provisioner):
     # https://queue.taskcluster.net/v1/provisioners/proj-autophone/worker-types?limit=100
     return utils.get_jsonc(
         "https://firefox-ci-tc.services.mozilla.com/api/queue/v1/provisioners/%s/worker-types?limit=100"
         # "https://queue.taskcluster.net/v1/provisioners/%s/worker-types?limit=100"
         % provisioner,
         self.verbosity,
     )
Ejemplo n.º 3
0
 def set_current_worker_types(self):
     # get the queues with data
     # https://queue.taskcluster.net/v1/provisioners/proj-autophone/worker-types?limit=100
     url = (
         "https://firefox-ci-tc.services.mozilla.com/api/queue/v1/provisioners/%s/worker-types/?limit=%s"
         # "https://queue.taskcluster.net/v1/provisioners/proj-autophone/worker-types?limit=%s"
         % ("proj-autophone", MAX_WORKER_TYPES)
     )
     json_1 = utils.get_jsonc(url, self.verbosity)
     for item in json_1["workerTypes"]:
         self.tc_current_worker_types.append(item["workerType"])
Ejemplo n.º 4
0
 def set_queue_counts(self):
     for queue in self.devicepool_queues_and_workers:
         an_url = (
             "https://firefox-ci-tc.services.mozilla.com/api/queue/v1/pending/%s/%s"
             # "https://queue.taskcluster.net/v1/pending/proj-autophone/%s"
             % ("proj-autophone", queue)
         )
         json_result = utils.get_jsonc(an_url, self.verbosity)
         if "pendingTasks" in json_result:
             self.tc_queue_counts[queue] = json_result["pendingTasks"]
         else:
             logger.warning("failed to get counts for queue '%s'", queue)
Ejemplo n.º 5
0
    def simple_worker_report(self,
                             worker_type,
                             worker_prefix="packet-",
                             worker_count=60):
        url = (
            "https://firefox-ci-tc.services.mozilla.com/api/queue/v1/provisioners/%s/worker-types/%s/workers?limit=100"
            % (self.provisioner, worker_type))
        # print(url)
        try:
            workers_result = utils.get_jsonc(url, self.verbosity)
        except Exception as e:
            workers_result = []
            print(e)
        # print(workers_result)

        expected_workers = []
        for i in range(0, worker_count):
            expected_workers.append("%s%s" % (worker_prefix, i))

        seen_workers = []
        if "workers" in workers_result:
            for item in workers_result["workers"]:
                seen_workers.append(item["workerId"])
        # pprint.pprint(workers_result)

        # for item in natsorted(seen_workers):
        #     print(item)

        # should show 46
        e_w = set(expected_workers)
        s_w = set(seen_workers)
        # missing = natsorted(s_w.symmetric_difference(e_w))
        missing = e_w - s_w
        m_count = len(missing)
        print("missing workers (%s): %s" % (m_count, sorted(missing)))
        print("%s workers total" % worker_count)
Ejemplo n.º 6
0
    def set_current_workers(self):
        # get the workers and count of workers
        # https://queue.taskcluster.net/v1/provisioners/proj-autophone/worker-types/gecko-t-ap-unit-p2/workers?limit=15
        for item in self.tc_current_worker_types:
            url = (
                "https://firefox-ci-tc.services.mozilla.com/api/queue/v1/provisioners/%s/worker-types/%s/workers?limit=%s"
                # "https://queue.taskcluster.net/v1/provisioners/proj-autophone/worker-types/%s/workers?limit=%s"
                % ("proj-autophone", item, MAX_WORKER_COUNT)
            )
            json_result = utils.get_jsonc(url, self.verbosity)
            if self.verbosity > 2:
                print("")
                print("%s (%s)" % (item, url))
                self.pp.pprint(json_result)

            retries_left = 2
            # tc can sometimes return empty results for this query, retry a few times
            while json_result["workers"] == []:
                json_result = utils.get_jsonc(url, self.verbosity)
                retries_left = retries_left - 1
                if retries_left == 0:
                    break

            # if json_result["workers"] == []:
            #     logger.warning(
            #         "no workers in %s... strange. let aerickson know if it continues"
            #         % item
            #     )
            #     logger.warning(url)

            self.tc_workers[item] = []
            for worker in json_result["workers"]:
                self.tc_workers[item].append(worker["workerId"])
                # TODO: quarantine data
                if "quarantineUntil" in worker:
                    self.quarantined_workers.append(worker["workerId"])
                if "latestTask" not in worker:
                    # worker has no lastesttask... brand new or tc restart?
                    # TODO: eventually alert if this persists
                    # print("worker %s has no latestTask" % worker["workerId"])
                    continue
                an_url = (
                    "https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/%s/status"
                    # "https://queue.taskcluster.net/v1/task/%s/status"
                    % worker["latestTask"]["taskId"]
                )
                json_result2 = utils.get_jsonc(an_url, self.verbosity)
                if self.verbosity > 2:
                    print("%s result2: " % worker["workerId"])
                    self.pp.pprint(json_result2)

                # if a quarantined host's last job is old it will
                # expire and we can't look at it
                if "code" in json_result2:
                    if json_result2["code"] == "ResourceNotFound":
                        continue

                # look at the last record for the task, could be rescheduled
                strange_result = True
                try:
                    if "status" in json_result2:
                        if "runs" in json_result2["status"]:
                            # test pool workers, new workers
                            # - workers that just started won't have a 'started'
                            strange_result = False
                            # normal workers
                            # - set started_time if data
                            if "started" in json_result2["status"]["runs"][-1]:
                                started_time = json_result2["status"]["runs"][-1][
                                    "started"
                                ]
                                if (
                                    worker["workerId"]
                                    in self.tc_current_worker_last_started
                                ):
                                    if (
                                        self.tc_current_worker_last_started[
                                            worker["workerId"]
                                        ]
                                        < started_time
                                    ):
                                        self.tc_current_worker_last_started[
                                            worker["workerId"]
                                        ] = started_time
                                else:
                                    self.tc_current_worker_last_started[
                                        worker["workerId"]
                                    ] = started_time
                except KeyError:
                    # pass, because we mention the strange result below
                    pass

                if strange_result:
                    logger.warning(
                        "strange json_result2 for worker %s: %s"
                        % (worker["workerId"], json_result2)
                    )
Ejemplo n.º 7
0
    def main(self, provisioner, worker_type, worker_id):
        # TODO: show when worker last started a task (taskStarted in TC)
        # - aws metal nodes has quarantined nodes that have been deleted that never drop off from worker-data

        start = timer()
        worker_count = 0
        working_count = 0
        # TODO: for this calculation, should we use a count of hosts that are reporting (vs all)?
        sr_total = 0
        ## host mode
        if worker_type and worker_id:
            worker_count = 1
            self.get_pending_tasks_multi([worker_type])
            url = (
                "https://firefox-ci-tc.services.mozilla.com/api/queue/v1/provisioners/%s/worker-types/%s/workers?limit=5"
                # "https://queue.taskcluster.net/v1/provisioners/%s/worker-types/%s/workers?limit=5"
                % (self.provisioner, worker_type))
            # print(url)
            worker_group_result = utils.get_jsonc(url, self.verbosity)
            # worker_group = worker_group_result['workerTypes'][0][]
            # import pprint
            # pprint.pprint(worker_group_result)
            # sys.exit()
            if len(worker_group_result["workers"]) == 0:
                print("%s.%s: %s" % (worker_type, worker_id, "no data"))
                return
            worker_group = worker_group_result["workers"][0]["workerGroup"]
            _worker, res_obj, _e = self.device_fitness_report(
                worker_type, worker_group, worker_id)
            res_obj["worker_id"] = worker_id
            sr_total += res_obj["sr"]
            print("%s.%s" %
                  (worker_type,
                   self.format_workertype_fitness_report_result(res_obj)))
        else:
            ### queue mode
            if worker_type:
                worker_types = [worker_type]
            ### provisioner mode
            else:
                worker_types_result = self.get_worker_types(provisioner)
                worker_types = []
                if "workerTypes" in worker_types_result:
                    for provisioner in worker_types_result["workerTypes"]:
                        worker_type = provisioner["workerType"]
                        worker_types.append(worker_type)
                    # print(worker_types)
                else:
                    logger.warning(
                        "error fetching workerTypes, results are incomplete!")
            self.get_pending_tasks_multi(worker_types)

            # TODO: process and then display? padding of worker_id is not consistent for whole provisioner report
            # - because we haven't scanned the potentially longest worker_ids when we display the first worker_group's data
            for a_worker_type in worker_types:
                wt, res_obj, _e = self.workertype_fitness_report(a_worker_type)
                for item in res_obj:
                    worker_count += 1
                    sr_total += item["sr"]
                    if item.get("state") and "working" in item.get("state"):
                        working_count += 1
                    if self.args.only_show_alerting:
                        if "alerts" in item:
                            print("%s.%s" % (
                                wt,
                                self.format_workertype_fitness_report_result(
                                    item),
                            ))
                    else:
                        print(
                            "%s.%s" %
                            (wt,
                             self.format_workertype_fitness_report_result(item)
                             ))
        # if to protect from divide by 0 (happens on request failures)
        if worker_count:
            # TODO: show alerting count
            print(
                "%s workers queried in %s seconds (%s working), average SR %s%%"
                % (
                    worker_count,
                    round((timer() - start), 2),
                    working_count,
                    round((sr_total / worker_count * 100), 2),
                ))