Ejemplo n.º 1
0
    def start_battleserver(self):
        repo = config.BUILD_PATH
        build_info = get_manifest(self.ref)

        def enqueue_output(out, queue):
            for line in iter(out.readline, b''):
                queue.put(line)
            queue.put("ProcessExit")
            out.close()

        #! get command line from config
        command_line = config_file["command-line"]
        build_path = build_info["build"]
        executable_path = build_info["executable_path"]
        command, battleserver_resource = get_battleserver_command(
            build_path, executable_path, command_line, self.tenant)

        logger.debug("Spawning process with command: %s", command)

        try:
            p = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1)
        except Exception as e:
            logger.exception("Spawning failed.")
            battleserver_resource.set_status("popen failed", {"error": str(e)})
            raise

        pid = p.pid

        status = "starting"

        battleserver_resource.put({
            "repository":
            repo,
            "ref":
            self.ref,
            "build":
            build_path,
            "build_number":
            build_info["build_number"],
            "target_platform":
            build_info["target_platform"],
            "build_info":
            build_info,
            "status":
            status,
            "pid":
            pid,
            "details": {
                "ref": self.ref,
                "repository": repo,
                "build_path": build_path
            }
        })
        logger.info("Spawned process with pid %s" % pid)
        q = Queue()
        t = Thread(target=enqueue_output, args=(p.stdout, q))
        t.daemon = True  # thread dies with the program
        t.start()
        return pid, q, battleserver_resource
Ejemplo n.º 2
0
def download_latest_builds(force=False):
    ts = get_ts()
    product_name = config.product_name
    group_name = config.group_name

    # get the S3 location where the builds for this product are located
    rows = ts.get_table('ue4-build-artifacts').find({'product_name': product_name})
    if not rows:
        logger.error("No UE4 build artifacts configured for product '%s'" % product_name)
        sys.exit(1)
    bucket_name = rows[0]['bucket_name']
    path = rows[0]['path']
    s3_region = rows[0]['s3_region']

    refs = get_local_refs()
    if refs:
        logger.info('Syncing builds for the following refs: %s' % repr(refs))

    for ref, tenant in refs:
        build_info = get_manifest(ref)
        if build_info is None:
            logger.info("Build %s not found. Ignoring ref.", ref)
            continue
        build_name = build_info["build"]
        print "Checking out build '%s'" % build_name
        if not force and is_build_installed(build_name, build_info["executable_path"]):
            logger.info("Build '%s' already installed" % build_name)
            continue
        log_details = {"archive": build_info["archive"]}
        log_event("download_build", "Downloading build for ref '%s'" % ref, details=log_details, tenant_name=tenant)

        local_filename = download_build(build_info["archive"], ignore_if_exists=(not force))
        log_details["local_filename"] = local_filename
        log_event("download_build_complete", "Finished downloading build for ref '%s'" % ref, details=log_details, tenant_name=tenant)
        logger.info("Done downloading '%s' to %s" % (build_info["archive"], local_filename))

        install_build(local_filename)


        log_event("install_build_complete", "Finished installing build for ref '%s'" % ref, details=log_details, tenant_name=tenant)
Ejemplo n.º 3
0
def kill_processes_by_ref(ref, tenant):
    """"
    Find all running processes of any version of 'ref' and terminate
    """
    logger.info("kill_processes_by_ref '%s', '%s'", ref, tenant)
    repo = config.BUILD_PATH
    build_info = get_manifest(ref)
    executable_path = build_info["executable_path"].lower()
    partial_build = build_info["build"].replace(
        str(build_info["build_number"]), "").lower()
    logger.info("  Finding partial path '%s'..." % partial_build)
    killed_processes = []
    #! TODO: tenant is not included so this kills all tasks in this ref for all tenants. Fix me!
    for p in psutil.process_iter():
        try:
            exe = p.exe().replace("\\", "/").lower()
            cmd = p.cmdline()
        except psutil.AccessDenied:
            logger.debug("  Got AccessDenied for '%s'" % p.name())
            continue

        if partial_build in exe and ("-tenant=%s" % tenant) in cmd:
            killed_processes.append({'pid': p.pid, 'exe': exe, 'cmd': cmd})
            logger.info("  Killing pid %s: '%s'", p.pid, p.exe())
            p.terminate()
            p.wait(timeout=10)

    if len(killed_processes):
        log_event('processes_killed',
                  'Killed %s processes' % len(killed_processes),
                  details={'processes': killed_processes},
                  severity='WARNING',
                  ref=ref,
                  tenant_name=tenant)

    logger.info(
        "Done killing processes for ref='%s', tenant='%s'. Killed %s processes",
        ref, tenant, len(killed_processes))
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    #parser.add_argument("cmd", choices=["run", "deploy", "killall"])
    parser.add_argument("-v",
                        "--verbose",
                        help="increase output verbosity",
                        action="store_true")
    subparsers = parser.add_subparsers(help='sub-command help', dest="cmd")

    parser_run = subparsers.add_parser('run', help='Run a battleserver')
    parser_run.add_argument("-r", "--ref", help='The server ref to run')
    #parser_run.add_argument("-n", "--num-processes", help='Number of UE4 processes to run simultaneously. Overrides num-processes from config')
    parser_run.add_argument(
        "-t",
        "--tenant",
        help='Backend tenant to connect to. Overrides tenant from config')

    subparsers.add_parser('clean', help='Delete old builds from the machine')
    subparsers.add_parser('cleanall',
                          help='Delete all builds from the machine')
    subparsers.add_parser('cleanlogs', help='Clean logs and move to S3')
    subparsers.add_parser('cleans3', help='Delete old builds from S3')
    subparsers.add_parser(
        'heartbeat', help='Heartbeat this machine on all registered tenants')
    subparsers.add_parser('updateruntasks',
                          help='Set up run tasks for all registered refs')

    parser_deploy = subparsers.add_parser(
        'syncbuilds', help='Fetch and install the latest builds from S3')
    parser_deploy.add_argument(
        "-f",
        "--force",
        action="store_true",
        help='Always download file (even if it already exists)')

    args = parser.parse_args()
    logname = args.cmd
    tenant_name = getattr(args, "tenant", None)
    ref = getattr(args, "ref", None)

    logsetup.args_ref = ref
    logsetup.args_tenant_name = tenant_name
    logsetup.args_cmd = logname

    #! we have multiple ongoing run commands at once and we can't use the same logfile
    if args.cmd in ("run"):
        logname = "%s_%s" % (args.cmd, os.getpid())
    setup_logging(logname)
    if args.verbose:
        logger.setLevel(logging.DEBUG)

    # start by syncing down the index file
    logger.info(
        "serverdaemon starting. cmd = '%s'. Product = '%s', Tenant = '%s', Repository = '%s'"
        % (args.cmd, config.product_name, tenant_name, config.BUILD_PATH))

    try:
        sync_index()
    except Exception as e:
        logger.error("Error! %s" % e)
        logger.exception("Error! %s" % e)

    if args.cmd == "run":
        index_file = get_index()
        build_info = get_manifest(args.ref)
        if not build_info:
            msg = "Build not found for ref '%s'" % args.ref
            logger.error(msg)
            sys.exit(1)
        logger.info("RUNNING build %s", build_info)
        build_path = build_info["build"]
        d = daemon.Daemon(args.ref, tenant_name)
        d.run()
        logger.info("Exiting")

    elif args.cmd == "syncbuilds":
        download_latest_builds(args.force)
    elif args.cmd == "clean":
        delete_old_builds()
    elif args.cmd == "cleanall":
        delete_all_builds()
    elif args.cmd == "cleanlogs":
        upload_logs()
    elif args.cmd == "cleans3":
        cleanup_s3()
    elif args.cmd == "heartbeat":
        heartbeat_all_tenants()
    elif args.cmd == "updateruntasks":
        update_tasks()
Ejemplo n.º 5
0
    def run(self):

        try:
            build_info = get_manifest(self.ref)
            build_path = build_info["build"]

            index_file = get_index()

            command_line = config_file["command-line"]
            status = "starting"

            build_path = build_info["build"]
            executable = os.path.join(config.BSD_BATTLESERVER_FOLDER,
                                      build_info["build"],
                                      build_info["executable_path"])
            if not os.path.exists(executable):
                log_event(
                    "build_not_installed",
                    "Build '%s' not installed. Cannot start daemon." %
                    build_info["build"])
                return

            start_time = time.time()
            loop_cnt = 0
            # read line without blocking
            while 1:
                loop_cnt += 1
                diff = (time.time() - start_time)
                p = None
                config_num_processes = get_num_processes(self.ref, self.tenant)
                if config_num_processes != self.num_processes:
                    txt = "Number of processes in config for ref '%s' has changed from %s to %s" % (
                        self.ref, self.num_processes, config_num_processes)
                    logger.warning(txt)
                    log_event("num_processes_changed", txt)
                    # if we should run more processes: no problem, we'll add them in automatically
                    # but if we should run fewer processes we need to kill some
                    self.num_processes = config_num_processes

                    if len(self.battleserver_instances) > self.num_processes:
                        servers_killed = []
                        while len(self.battleserver_instances
                                  ) > self.num_processes:
                            logger.info(
                                "I am running %s battleservers but should be running %s. Killing servers..."
                                % (len(self.battleserver_instances),
                                   self.num_processes))
                            # try to find a server that is not 'running'. If no such servers are found then kill a running one
                            for pid, (q, battleserver_resource, status
                                      ) in self.battleserver_instances.items():
                                resource_status = battleserver_resource.get_status(
                                )
                                if resource_status != "running":
                                    logger.info(
                                        "Found battleserver in state '%s' to kill: %s"
                                        % (resource_status,
                                           battleserver_resource))
                                    pid_to_kill = pid
                                    break
                            else:
                                logger.warning(
                                    "Found no battleserver to kill that was not 'running'. I will kill a running one"
                                )
                                pid_to_kill = self.battleserver_instances.keys(
                                )[0]

                            try:
                                p = psutil.Process(pid_to_kill)
                                q, battleserver_resource, status = self.battleserver_instances[
                                    pid_to_kill]
                                logger.info("Killing server with pid %s" %
                                            pid_to_kill)
                                p.terminate()
                                servers_killed.append(str(pid_to_kill))
                                battleserver_resource.set_status(
                                    "killed",
                                    {"status-reason": "Scaling down"})
                            except psutil.NoSuchProcess:
                                logger.info(
                                    "Cannot kill %s because it's already dead")

                            del self.battleserver_instances[pid_to_kill]
                            time.sleep(5.0)
                        txt = "Done killing servers for ref '%s'. Killed servers %s and am now running %s servers" % (
                            self.ref, ", ".join(servers_killed),
                            len(self.battleserver_instances))
                        log_event("servers_killed", txt)

                if self.num_processes == 0:
                    logger.info("Running zero processes")
                    time.sleep(10)
                    continue

                if len(self.battleserver_instances) < self.num_processes:
                    num_added = 0
                    while len(
                            self.battleserver_instances) < self.num_processes:
                        logger.info(
                            "I am running %s battleservers but should be running %s. Adding servers..."
                            % (len(self.battleserver_instances),
                               self.num_processes))
                        pid, q, battleserver_resource = self.start_battleserver(
                        )
                        self.battleserver_instances[pid] = (
                            q, battleserver_resource, "starting")
                        num_added += 1
                        time.sleep(5.0)
                    logger.info(
                        "Done adding servers. Running instances: %s" %
                        ",".join([
                            str(p) for p in self.battleserver_instances.keys()
                        ]))

                    txt = "Done adding servers for ref '%s'. Added %s servers and am now running %s servers" % (
                        self.ref, num_added, len(self.battleserver_instances))
                    log_event("servers_added", txt)

                for pid, (q, battleserver_resource,
                          status) in self.battleserver_instances.iteritems():
                    try:
                        p = psutil.Process(pid)
                    except psutil.NoSuchProcess:
                        logger.info("Process %s running server '%s' has died",
                                    pid, battleserver_resource)
                        resource_status = battleserver_resource.get_status()
                        if resource_status == "starting":
                            battleserver_resource.set_status(
                                "abnormalexit",
                                {"status-reason": "Failed to start"})
                        if resource_status == "running":
                            battleserver_resource.set_status(
                                "abnormalexit",
                                {"status-reason": "Died prematurely"})
                        # else the instance has updated the status
                        time.sleep(5.0)
                        logger.info("Restarting UE4 Server (1)...")
                        del self.battleserver_instances[pid]
                        break

                new_index_file = get_index()
                old_manifest = find_build_manifest(index_file, self.ref)
                new_manifest = find_build_manifest(new_index_file, self.ref)

                if old_manifest != new_manifest:
                    build_info = get_manifest(self.ref)
                    build_path = build_info["build"]

                    logger.info("Index file has changed. Reloading")
                    self.shutdown_servers_and_exit("New build is available")
                while 1:
                    if not self.battleserver_instances:
                        break
                    empty = True
                    for pid, (
                            q, battleserver_resource,
                            status) in self.battleserver_instances.iteritems():
                        try:
                            line = q.get(timeout=.1)
                        except Empty:
                            #sys.stdout.write(".")
                            print "%s..." % pid
                            time.sleep(1.0)
                        else:  # got line
                            empty = False
                            logger.debug("stdout: %s", line)
                            if "Game Engine Initialized." in line:
                                logger.info("Game server has started up!")
                                status = "started"
                                self.battleserver_instances[pid] = (
                                    q, battleserver_resource, status)
                            if line == "ProcessExit":
                                logger.info("UE4 Process has exited")
                                resource_status = battleserver_resource.get_status(
                                )
                                if resource_status == "starting":
                                    battleserver_resource.set_status(
                                        "abnormalexit",
                                        {"status-reason": "Failed to start"})
                                # else the instance has updated the status
                                time.sleep(5.0)
                                logger.info("Restarting UE4 Server (2)...")
                                try:
                                    p = psutil.Process(pid)
                                    if p: p.terminate()
                                except:
                                    pass
                                del self.battleserver_instances[pid]
                                empty = True
                                break
                    if empty:
                        time.sleep(1.0)
                        break
                for pid, (q, battleserver_resource,
                          status) in self.battleserver_instances.items():
                    if status == "starting" and diff > 60.0:
                        logger.error(
                            "Server still hasn't started after %.0f seconds!" %
                            diff)
                        sys.exit(-1)
                    elif status == "started" and loop_cnt % 10 == 0:
                        resp = battleserver_resource.get().json()
                        if len(resp["pending_commands"]) > 0:
                            for cmd in resp["pending_commands"]:
                                logger.warning(
                                    "I should execute the following command: '%s'",
                                    cmd["command"])
                                command_resource = copy.copy(
                                    battleserver_resource)
                                command_resource.location = cmd["url"]
                                command_resource.patch(
                                    data={"status": "running"})

                                if cmd["command"] == "kill":
                                    logger.error(
                                        "External command to kill servers!")
                                    self.shutdown_servers_and_exit(
                                        "Received command to kill all")

                        resource_status = resp["status"]
                        if diff > 60.0 and resource_status == "starting":
                            logger.error(
                                "Server is still in status '%s' after %.0f seconds!"
                                % (resource_status, diff))
                            battleserver_resource.set_status(
                                "killed", {
                                    "status-reason":
                                    "Failed to reach 'started' status"
                                })
                            time.sleep(5.0)
                            logger.info("Restarting UE4 Server (4)...")
                            try:
                                p = psutil.Process(pid)
                                if p: p.terminate()
                            except:
                                pass
                            del self.battleserver_instances[pid]
                        else:
                            heartbeat_date = dateutil.parser.parse(
                                resp["heartbeat_date"]).replace(tzinfo=None)
                            heartbeat_diff = (datetime.datetime.utcnow() -
                                              heartbeat_date).total_seconds()
                            if heartbeat_diff > 60:
                                logger.error(
                                    "Server heartbeat is %s seconds old. The process must be frozen",
                                    heartbeat_diff)
                                battleserver_resource.set_status(
                                    "killed",
                                    {"status-reason": "Heartbeat timeout"})
                                time.sleep(5.0)
                                logger.info("Restarting UE4 Server (5)...")
                                try:
                                    p = psutil.Process(pid)
                                    if p: p.terminate()
                                except:
                                    pass
                                del self.battleserver_instances[pid]

        except KeyboardInterrupt:
            logger.info("User exiting...")
            self.shutdown_servers_and_exit("User exit")
        except Exception as e:
            # unhandled exception
            logger.exception(
                "Fatal error occurred in run_battleserver_loop. Exiting")
            self.shutdown_servers_and_exit(
                "Fatal error, '%s' occurred in run_battleserver_loop" % e)