コード例 #1
0
ファイル: software.py プロジェクト: xlhtc007/pyfarm-agent
        def mark_software_available(software, version):
            url = "{master_api}/agents/{agent}/software/".format(
                master_api=config.get("master_api"),
                agent=config.get("agent_id"))
            while True:
                try:
                    response = yield post_direct(url, data={
                            "software": software,
                            "version": version})
                except Exception as error:
                    delay = http_retry_delay()
                    logger.error(
                        "Failed to post availability of software %s, "
                        "version %s to master: %r. Will retry in %s "
                        "seconds.",
                        software, version, error, delay)
                    deferred = Deferred()
                    reactor.callLater(delay, deferred.callback, None)
                    yield deferred

                else:
                    data = yield treq.content(response)

                    if response.code == OK:
                        logger.info("Posted availability of software %s, "
                                    "version %s to master.",
                                    software, version)
                        break

                    elif response.code >= INTERNAL_SERVER_ERROR:
                        delay = http_retry_delay()
                        logger.warning(
                            "Could not post availability of software %s, "
                            "version %s. The master responded with "
                            "INTERNAL_SERVER_ERROR.  Retrying in %s "
                            "seconds.", software, version, delay)

                        deferred = Deferred()
                        reactor.callLater(delay, deferred.callback, None)
                        yield deferred

                    else:
                        logger.error(
                            "Failed to post availability of software %s, "
                            "version %s: "
                            "Unexpected status from server %s. Data: %s",
                            software, version, response.code, data)
                        break

            if self.testing:
                self.operation_deferred.callback(None)
コード例 #2
0
ファイル: software.py プロジェクト: xlhtc007/pyfarm-agent
        def mark_software_not_available(software, version):
            url = ("{master_api}/agents/{agent}/software/{software}/"
                    "versions/{version}").format(
                        master_api=config.get("master_api"),
                        agent=config.get("agent_id"),
                        software=software,
                        version=version)
            while True:
                try:
                    response = yield delete_direct(url)
                except Exception as error:
                    delay = http_retry_delay()
                    logger.error(
                        "Failed to remove software %s, version %s from this "
                        "agent on master: %r. Will retry in %s seconds.",
                        software, version, error, delay)
                    deferred = Deferred()
                    reactor.callLater(delay, deferred.callback, None)
                    yield deferred

                else:
                    data = yield treq.content(response)

                    if response.code in [OK, ACCEPTED, NO_CONTENT]:
                        logger.info("Removed software %s, version %s from this "
                                    "agent on master.", software, version)
                        break

                    elif response.code >= INTERNAL_SERVER_ERROR:
                        delay = http_retry_delay()
                        logger.warning(
                            "Could not remove software %s, version %s from "
                            "this agent. The master responded with "
                            "INTERNAL_SERVER_ERROR.  Retrying in %s "
                            "seconds.", software, version, delay)

                        deferred = Deferred()
                        reactor.callLater(delay, deferred.callback, None)
                        yield deferred

                    else:
                        logger.error(
                            "Failed to remove software %s, version %s from "
                            "this agent: "
                            "Unexpected status from server %s. Data: %s",
                            software, version, response.code, data)
                        break

            if self.testing:
                self.operation_deferred.callback(None)
コード例 #3
0
ファイル: internals.py プロジェクト: xlhtc007/pyfarm-agent
    def _get_uid_gid_value(self, value, value_name, func_name, module, module_name):
        """
        Internal function which handles both user name and group conversion.
        """
        # This platform does not implement the module
        if module is NotImplemented:
            logger.warning("This platform does not implement the %r module, skipping " "%s()", module_name, func_name)

        # Convert a user/group string to an integer
        elif isinstance(value, STRING_TYPES):
            try:
                if module_name == "pwd":
                    return pwd.getpwnam(value).pw_uid
                elif module_name == "grp":
                    return grp.getgrnam(value).gr_gid
                else:
                    raise ValueError(
                        "Internal error, failed to get module to use for " "conversion.  Was given %r" % module
                    )
            except KeyError:
                logger.error("Failed to convert %s to a %s", value, func_name.split("_")[1])

                if not config.get("jobtype_ignore_id_mapping_errors"):
                    raise

        # Verify that the provided user/group string is real
        elif isinstance(value, INTEGER_TYPES):
            try:
                if module_name == "pwd":
                    pwd.getpwuid(value)
                elif module_name == "grp":
                    grp.getgrgid(value)
                else:
                    raise ValueError(
                        "Internal error, failed to get module to use for " "conversion.  Was given %r" % module
                    )

                # Seems to check out, return the original value
                return value
            except KeyError:
                logger.error("%s %s does not seem to exist", value_name, value)

                if not config.get("jobtype_ignore_id_mapping_errors"):
                    raise
        else:
            raise TypeError("Expected an integer or string for `%s`" % value_name)
コード例 #4
0
    def test_get_result(self):
        process = psutil.Process()
        direct_child_processes = len(process.children(recursive=False))
        all_child_processes = len(process.children(recursive=True))
        grandchild_processes = all_child_processes - direct_child_processes

        # Determine the last time we talked to the master (if ever)
        contacted = config.master_contacted(update=False)
        if isinstance(contacted, datetime):
            contacted = datetime.utcnow() - contacted

        # Determine the last time we announced ourselves to the
        # master (if ever)
        last_announce = config.get("last_announce", None)
        if isinstance(last_announce, datetime):
            last_announce = datetime.utcnow() - last_announce

        future_time = config["start"] + 30
        process_memory = memory.process_memory()
        total_consumption = memory.total_consumption()
        expected_data = {
            "state": config["state"],
            "agent_hostname": config["agent_hostname"],
            "agent_process_ram": process_memory,
            "consumed_ram": total_consumption,
            "child_processes": direct_child_processes,
            "grandchild_processes": grandchild_processes,
            "pids": config["pids"],
            "agent_id": str(config["agent_id"]),
            "last_master_contact": contacted,
            "last_announce": last_announce,
            "agent_lock_file": config["agent_lock_file"],
            "free_ram": 4242,
            "uptime": total_seconds(
                timedelta(seconds=future_time - config["start"])),
            "jobs": list(config["jobtypes"].keys())}

        request = self.get()
        status = Status()

        with nested(
            mock.patch.object(memory, "free_ram", return_value=4242),
            mock.patch.object(time, "time", return_value=future_time),
            mock.patch.object(
                memory, "process_memory", return_value=process_memory),
            mock.patch.object(
                memory, "total_consumption", return_value=total_consumption)
        ):
            response = status.render(request)

        self.assertEqual(response, NOT_DONE_YET)
        self.assertTrue(request.finished)
        self.assertEqual(request.responseCode, OK)
        self.assertEqual(len(request.written), 1)
        self.assertEqual(loads(request.written[0]), expected_data)
コード例 #5
0
    def setUp(self):
        super(TestCheckSoftware, self).setUp()
        self.resource = Resource()
        sw_api_root = Resource()
        self.resource.putChild("software", sw_api_root)

        fake_software_api = Resource()
        sw_api_root.putChild("example_sw", fake_software_api)

        fake_version_index_api = Resource()
        fake_software_api.putChild("versions", fake_version_index_api)

        self.fake_version_api = FakeSoftwareVersionAPI()
        fake_version_index_api.putChild("1.0", self.fake_version_api)

        self.fake_discovery_code_api = FakeSoftwareVersionCodeAPI()
        self.fake_version_api.putChild(
            "discovery_code", self.fake_discovery_code_api)

        agent_api_root = AgentAPIRoot()
        self.resource.putChild("agents", agent_api_root)

        fake_agent_api = FakeAgentAPI()
        agent_api_root.putChild(str(config.get("agent_id")), fake_agent_api)

        self.fake_agent_software_api = FakeAgentSoftwareAPI()
        fake_agent_api.putChild("software", self.fake_agent_software_api)

        agent_example_sw_api = Resource()
        self.fake_agent_software_api.putChild("example_sw",
                                              agent_example_sw_api)

        agent_example_sw_version_index_api = Resource()
        agent_example_sw_api.putChild("versions",
                                      agent_example_sw_version_index_api)

        self.agent_example_sw_version_1_0_api = FakeAgentSoftwareVersionAPI()
        agent_example_sw_version_index_api.putChild(
            "1.0",
            self.agent_example_sw_version_1_0_api)

        self.site = Site(self.resource)
        self.server = reactor.listenTCP(random_port(), self.site)
        config["master_api"] = "http://127.0.0.1:%s" % self.server.port
コード例 #6
0
ファイル: state.py プロジェクト: xlhtc007/pyfarm-agent
    def get(self, **_):
        # Get counts for child processes and grandchild processes
        process = psutil.Process()
        direct_child_processes = len(process.children(recursive=False))
        all_child_processes = len(process.children(recursive=True))
        grandchild_processes = all_child_processes - direct_child_processes

        # Determine the last time we talked to the master (if ever)
        contacted = config.master_contacted(update=False)
        if isinstance(contacted, datetime):  # pragma: no cover
            contacted = datetime.utcnow() - contacted

        # Determine the last time we announced ourselves to the
        # master (if ever)
        last_announce = config.get("last_announce", None)
        if isinstance(last_announce, datetime):  # pragma: no cover
            last_announce = datetime.utcnow() - last_announce

        data = {"state": config["state"],
                "agent_hostname": config["agent_hostname"],
                "free_ram": memory.free_ram(),
                "agent_process_ram": memory.process_memory(),
                "consumed_ram": memory.total_consumption(),
                "child_processes": direct_child_processes,
                "grandchild_processes": grandchild_processes,
                "pids": config["pids"],
                "agent_id": config["agent_id"],
                "last_master_contact": contacted,
                "last_announce": last_announce,
                "agent_lock_file": config["agent_lock_file"],
                "uptime": total_seconds(
                    timedelta(seconds=time.time() - config["start"])),
                "jobs": list(config["jobtypes"].keys())}

        if config["farm_name"]:
            data["farm_name"] = config["farm_name"]

        return dumps(data)
コード例 #7
0
ファイル: service.py プロジェクト: xlhtc007/pyfarm-agent
    def system_data(self, requery_timeoffset=False):
        """
        Returns a dictionary of data containing information about the
        agent.  This is the information that is also passed along to
        the master.
        """
        # query the time offset and then cache it since
        # this is typically a blocking operation
        if config["agent_time_offset"] == "auto":
            config["agent_time_offset"] = None

        if requery_timeoffset or config["agent_time_offset"] is None:
            ntplog.info(
                "Querying ntp server %r for current time",
                config["agent_ntp_server"])

            ntp_client = NTPClient()
            try:
                pool_time = ntp_client.request(
                    config["agent_ntp_server"],
                    version=config["agent_ntp_server_version"])

            except Exception as e:
                ntplog.warning("Failed to determine network time: %s", e)

            else:
                config["agent_time_offset"] = \
                    int(pool_time.tx_time - time.time())

                # format the offset for logging purposes
                utcoffset = datetime.utcfromtimestamp(pool_time.tx_time)
                iso_timestamp = utcoffset.isoformat()
                ntplog.debug(
                    "network time: %s (local offset: %r)",
                    iso_timestamp, config["agent_time_offset"])

                if config["agent_time_offset"] != 0:
                    ntplog.warning(
                        "Agent is %r second(s) off from ntp server at %r",
                        config["agent_time_offset"],
                        config["agent_ntp_server"])

        data = {
            "id": config["agent_id"],
            "hostname": config["agent_hostname"],
            "version": config.version,
            "os_class": system.operating_system(),
            "os_fullname": platform(),
            "ram": int(config["agent_ram"]),
            "cpus": config["agent_cpus"],
            "cpu_name": cpu.cpu_name(),
            "port": config["agent_api_port"],
            "free_ram": memory.free_ram(),
            "time_offset": config["agent_time_offset"] or 0,
            "state": config["state"],
            "mac_addresses": list(network.mac_addresses()),
            "current_assignments": config.get(
                "current_assignments", {}), # may not be set yet
            "disks": disks.disks(as_dict=True)
        }

        try:
            gpu_names = graphics.graphics_cards()
            data["gpus"] = gpu_names
        except graphics.GPULookupError:
            pass

        if "remote_ip" in config:
            data.update(remote_ip=config["remote_ip"])

        if config["farm_name"]:
            data["farm_name"] = config["farm_name"]

        return data
コード例 #8
0
ファイル: service.py プロジェクト: xlhtc007/pyfarm-agent
    def reannounce(self, force=False):
        """
        Method which is used to periodically contact the master.  This
        method is generally called as part of a scheduled task.
        """
        # Attempt to acquire the reannounce lock but fail after 70%
        # of the total time between reannouncements elapses.  This should
        # help prevent an accumulation of requests in the event the master
        # is having issues.
        try:
            yield self.reannounce_lock.acquire(
                config["agent_master_reannounce"] * .70
            )
        except utility.LockTimeoutError:
            svclog.debug("Timed out while waiting to acquire reannounce_lock")
            returnValue(None)

        if not self.should_reannounce() and not force:
            yield self.reannounce_lock.release()
            returnValue(None)

        svclog.debug("Announcing %s to master", config["agent_hostname"])
        data = None
        num_retry_errors = 0
        while True:  # for retries
            try:
                response = yield post_direct(
                    self.agent_api(),
                    data={
                        "state": config["state"],
                        "current_assignments": config.get(
                            "current_assignments", {} # may not be set yet
                        ),
                        "free_ram": memory.free_ram(),
                        "disks": disks.disks(as_dict=True)
                    }
                )

            except (ResponseNeverReceived, RequestTransmissionFailed) as error:
                num_retry_errors += 1
                if num_retry_errors > config["broken_connection_max_retry"]:
                    svclog.error(
                        "Failed to announce self to the master, "
                        "caught try-again type errors %s times in a row.",
                        num_retry_errors)
                    break
                else:
                    svclog.debug("While announcing self to master, caught "
                                 "%s. Retrying immediately.",
                                 error.__class__.__name__)
            except Exception as error:
                if force:
                    delay = http_retry_delay()
                    svclog.error(
                        "Failed to announce self to the master: %s.  Will "
                        "retry in %s seconds.", error, delay)
                    deferred = Deferred()
                    reactor.callLater(delay, deferred.callback, None)
                    yield deferred
                else:
                    # Don't retry because reannounce is called periodically
                    svclog.error(
                        "Failed to announce self to the master: %s.  This "
                        "request will not be retried.", error)
                    break

            else:
                data = yield treq.json_content(response)
                if response.code == OK:
                    config.master_contacted(announcement=True)
                    svclog.info("Announced self to the master server.")
                    break

                elif response.code >= INTERNAL_SERVER_ERROR:
                    if not self.shutting_down:
                        delay = http_retry_delay()
                        svclog.warning(
                            "Could not announce self to the master server, "
                            "internal server error: %s.  Retrying in %s "
                            "seconds.", data, delay)

                        deferred = Deferred()
                        reactor.callLater(delay, deferred.callback, None)
                        yield deferred
                    else:
                        svclog.warning(
                            "Could not announce to master. Not retrying "
                            "because of pending shutdown.")
                        break

                elif response.code == NOT_FOUND:
                    svclog.warning("The master says it does not know about our "
                                   "agent id. Posting as a new agent.")
                    yield self.post_agent_to_master()
                    break

                # If this is a client problem retrying the request
                # is unlikely to fix the issue so we stop here
                elif response.code >= BAD_REQUEST:
                    svclog.error(
                        "Failed to announce self to the master, bad "
                        "request: %s.  This request will not be retried.",
                        data)
                    break

                else:
                    svclog.error(
                        "Unhandled error when posting self to the "
                        "master: %s (code: %s).  This request will not be "
                        "retried.", data, response.code)
                    break

        yield self.reannounce_lock.release()
        returnValue(data)
コード例 #9
0
ファイル: service.py プロジェクト: xlhtc007/pyfarm-agent
 def shutting_down(self):
     return config.get("shutting_down", False)
コード例 #10
0
ファイル: software.py プロジェクト: xlhtc007/pyfarm-agent
def get_software_version_data(software, version):
    """
    Asynchronously fetches the known data about the given software version from
    the master.

    :param str software:
        The name of the software to get data for

    :param str version:
        The name of the version to get data for

    :return:
        Returns information about the given software version from
        the master
    """
    url = "{master_api}/software/{software}/versions/{version}".\
        format(master_api=config.get("master_api"),
               software=software, version=version)

    while True:
        try:
            response = yield get_direct(url)

        except Exception as error:
            delay = http_retry_delay()
            logger.error(
                "Failed to get data about software %s, version %s: %r.  Will "
                "retry in %s seconds.", software, version, error, delay)
            deferred = Deferred()
            reactor.callLater(delay, deferred.callback, None)
            yield deferred

        else:
            if response.code == OK:
                data = yield treq.json_content(response)
                returnValue(data)

            elif response.code >= INTERNAL_SERVER_ERROR:
                delay = http_retry_delay()
                logger.warning(
                    "Could not get data for software %s, version %s, server "
                    "responded with INTERNAL_SERVER_ERROR.  Retrying in %s "
                    "seconds.", software, version, delay)

                deferred = Deferred()
                reactor.callLater(delay, deferred.callback, None)
                yield deferred

            elif response.code == NOT_FOUND:
                logger.error("Got 404 NOT FOUND from server on getting data "
                             "for software %s, version %s", software, version)
                raise VersionNotFound("This software version was not found or "
                                      "has no discovery code.")

            else:
                logger.error(
                    "Failed to get data for software %s, version %s: "
                    "Unexpected status from server %s", software, version,
                    response.code)
                raise Exception("Unknown return code from master: %s" %
                                response.code)