def get_index(): folder = "config/{repo}/".format(repo=config.BUILD_PATH) local_filename = os.path.join(folder, "index.json") logger.debug("Loading index from '%s'", local_filename) if not os.path.exists(local_filename): raise RuntimeError("Repository has not been synced") return json.load(open(local_filename))
def start_battleserver(self): repo = config.BUILD_PATH build_info = get_manifest(self.ref) def enqueue_output(out, queue): for line in iter(out.readline, b''): queue.put(line) queue.put("ProcessExit") out.close() #! get command line from config command_line = config_file["command-line"] build_path = build_info["build"] executable_path = build_info["executable_path"] command, battleserver_resource = get_battleserver_command( build_path, executable_path, command_line, self.tenant) logger.debug("Spawning process with command: %s", command) try: p = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1) except Exception as e: logger.exception("Spawning failed.") battleserver_resource.set_status("popen failed", {"error": str(e)}) raise pid = p.pid status = "starting" battleserver_resource.put({ "repository": repo, "ref": self.ref, "build": build_path, "build_number": build_info["build_number"], "target_platform": build_info["target_platform"], "build_info": build_info, "status": status, "pid": pid, "details": { "ref": self.ref, "repository": repo, "build_path": build_path } }) logger.info("Spawned process with pid %s" % pid) q = Queue() t = Thread(target=enqueue_output, args=(p.stdout, q)) t.daemon = True # thread dies with the program t.start() return pid, q, battleserver_resource
def install_build(zipfile_name, ignore_if_exists=False): """ Install server build on local drive. 'zipfile_name' is the name of the zip file in 'BSD_TEMP_FOLDER'. The function returns the folder name of the battleserver build image. If 'ignore_if_exists' is True, then the function returns immediately if the build is already installed on local drive. Details: The build is installed into a subfolder in BSD_BATTLESERVER_FOLDER using the same name as the zip file (sans the .zip ending). The contents of the zip file is first extracted to a temporary folder, then that folder is renamed to the final name. This is to ensure an atomic publishing of the build. If the target folder already exists, it will be removed first. """ head, tail = os.path.split(zipfile_name) image_name, ext = os.path.splitext(tail) # The final destination of the build dest_folder = os.path.join(config.BSD_BATTLESERVER_FOLDER, image_name) dest_folder = os.path.abspath(dest_folder) if ignore_if_exists and os.path.exists(dest_folder): return image_name zipfile_path = os.path.join(config.BSD_TEMP_FOLDER, zipfile_name) zipfile_path = os.path.abspath(zipfile_path) if not os.path.exists(zipfile_path): raise RuntimeError("Zipfile '{}' not found!".format(zipfile_path)) with ZipFile(zipfile_path) as zipfile: update_state( state='PROGRESS', meta={'file': tail, 'step': 'unzipping'}, ) # Extract to a staging folder staging_folder = dest_folder + ".temp" try: logger.info("Unzipping %s to %s", zipfile_path, staging_folder) zipfile.extractall(staging_folder) # Publish the build update_state( state='PROGRESS', meta={'file': tail, 'step': 'publishing'}, ) if os.path.exists(dest_folder): logger.info("Removing previous install at %s", dest_folder) shutil.rmtree(dest_folder, ignore_errors=False) logger.info("Publishing %s to %s", staging_folder, dest_folder) os.rename(staging_folder, dest_folder) finally: # Remove staging folder, if needed. if os.path.exists(staging_folder): logger.debug("Removing staging folder %s", staging_folder) shutil.rmtree(staging_folder) return image_name
def is_build_installed(build_name, executable_path): build_path = os.path.join(config.BSD_BATTLESERVER_FOLDER, build_name) executable_path = os.path.join(build_path, executable_path) if os.path.exists(executable_path): logger.debug("Build '%s' is installed", build_name) return True else: logger.info("Build '%s' is not installed", build_name) if os.path.exists("build_path"): logger.warning("Folder '%s exists but no .exe found!" % build_path) return False
def kill_processes_by_ref(ref, tenant): """" Find all running processes of any version of 'ref' and terminate """ logger.info("kill_processes_by_ref '%s', '%s'", ref, tenant) repo = config.BUILD_PATH build_info = get_manifest(ref) executable_path = build_info["executable_path"].lower() partial_build = build_info["build"].replace( str(build_info["build_number"]), "").lower() logger.info(" Finding partial path '%s'..." % partial_build) killed_processes = [] #! TODO: tenant is not included so this kills all tasks in this ref for all tenants. Fix me! for p in psutil.process_iter(): try: exe = p.exe().replace("\\", "/").lower() cmd = p.cmdline() except psutil.AccessDenied: logger.debug(" Got AccessDenied for '%s'" % p.name()) continue if partial_build in exe and ("-tenant=%s" % tenant) in cmd: killed_processes.append({'pid': p.pid, 'exe': exe, 'cmd': cmd}) logger.info(" Killing pid %s: '%s'", p.pid, p.exe()) p.terminate() p.wait(timeout=10) if len(killed_processes): log_event('processes_killed', 'Killed %s processes' % len(killed_processes), details={'processes': killed_processes}, severity='WARNING', ref=ref, tenant_name=tenant) logger.info( "Done killing processes for ref='%s', tenant='%s'. Killed %s processes", ref, tenant, len(killed_processes))
def collect_processes(): ret = [] for p in psutil.process_iter(): with p.oneshot(): try: name = p.name() exe = p.exe().replace("\\", "/").lower() cmd = p.cmdline() except psutil.AccessDenied: logger.debug(" Got AccessDenied for '%s'" % p.name()) continue if not exe.startswith(config.BSD_BATTLESERVER_FOLDER.lower() ): # name != "python.exe" and logger.debug("Not collecting process '%s'" % exe) continue proc_info = { 'create_time': datetime.datetime.fromtimestamp( p.create_time()).strftime("%Y-%m-%d %H:%M:%S"), 'cpu_percent': p.cpu_percent(), 'memory_mb': p.memory_info().vms / 1024 / 1024, 'username': p.username(), 'name': name, 'pid': p.pid, 'cmd': cmd, 'exe': exe } ret.append(proc_info) return ret
def get_battleserver_command(image_name, executable_path, command_line, tenant, **kw): command_line = command_line or [] tenant = tenant or "default" executable = os.path.join(config.BSD_BATTLESERVER_FOLDER, image_name, executable_path) if not os.path.exists(executable): raise RuntimeError( "Executable '%s' not found. Build might not be installed" % executable) logger.info("Absolute path of executable: %s" % executable) # Make an explicit parameter type check so we can fail with a sensible error if not isinstance(command_line, list): raise RuntimeError("Argument 'command_line' must be a list.") # Register machine and server info sess = get_battle_api(tenant) battle_api_host = get_root_endpoint(tenant) machine_resource = get_machine_resource(sess, battle_api_host, tenant) logger.info("Machine resource: %s", machine_resource) public_ip = machine_resource.data.get("public_ip") # Construct a command line command = [executable] + command_line if public_ip: command += ["-publicIP=%s" % public_ip] jti_token = get_auth_token(tenant, "battleserver")["jti"] # See UE4 command line arguments at http://tinyurl.com/oygdwy3 port = _get_available_port(MIN_PORT, MAX_PORT) #Battle_Lava+End+Lobby+Login+Main -server -log -Messaging -nomcp -pak -CrashForUAT -SessionId=B0166D674598A24A73B8D29174F9826E -SessionOwner="matth" -SessionName="deditcatedad server" battleserver_info = { "status": "pending", "image_name": image_name, "command_line": " ".join(command), "command_line_custom": " ".join(command_line), "machine_id": machine_resource.data["machine_id"], #"celery_task_id": self.request.id, } if public_ip: battleserver_info["public_ip"] = public_ip battleserver_info["port"] = port #battleserver_resource = RESTResource(sess, battle_api_host"/servers", battleserver_info) battleserver_resource = ServerResource(sess, tenant, battleserver_info) logger.debug("Battleserver resource: %s", battleserver_resource) server_id = battleserver_resource.data["server_id"] token = battleserver_resource.data["token"] command += [ "-drift_url={}".format(battle_api_host), "-server_url={}".format(battleserver_resource.location), "-token={}".format(token), ] command += [ "-server", "-port={}".format(port), #"-logfolder={}".format(_get_logfolder(image_name)), # Vk Specific "-FORCELOGFLUSH", # Force a log flush after each line. "-unattended", # Disable anything requiring feedback from user. "-tenant={}".format(tenant), # Select this tenant "-jti={}".format(jti_token), # Access token for REST API calls. #"-log", #"-Messaging" "-abslog={}/{}/server_{}.log".format(_get_logfolder(), tenant, server_id), "-CrashForUAT", ] battleserver_resource.put({ "status": "pending", "command_line": " ".join(command) }) return command, battleserver_resource
def delete_old_builds(): """ deletes all 'user' builds that do not match the current build_number Currently leaves other refs alone """ def extract_build_from_filename(filename): lst = filename.split(".") try: return int(lst[-2]) except ValueError: return int(lst[-1]) repo = config.BUILD_PATH logger.info("Deleting old user builds for repo '%s'...", repo) index_file = get_index() build_folders = os.listdir(config.BSD_BATTLESERVER_FOLDER) zip_files = os.listdir(config.BSD_TEMP_FOLDER) build_number_by_ref = {} num_deleted_folders = 0 num_deleted_files = 0 for ref in index_file["refs"]: ref_name = ref["ref"] target_platform = ref["target_platform"] if ref_name.startswith( "users/") and target_platform == "WindowsServer": build_number = extract_build_from_filename(ref["build_manifest"]) if ref_name in build_number_by_ref: build_number_by_ref[ref_name] = min( build_number_by_ref[ref_name], build_number) else: build_number_by_ref[ref_name] = build_number for ref_name, latest_build_number in build_number_by_ref.iteritems(): logger.debug("Latest build for ref '%s' is %s", ref_name, latest_build_number) ref_filename = ref_name.replace("/", ".") for folder in build_folders: if ref_filename in folder: this_build_number = extract_build_from_filename(folder) if this_build_number < latest_build_number: folder = os.path.join(config.BSD_BATTLESERVER_FOLDER, folder) logger.info( "Deleting folder '%s' for ref '%s' because %s < %s", folder, ref_name, this_build_number, latest_build_number) shutil.rmtree(folder) num_deleted_folders += 1 for filename in zip_files: if ref_filename in filename: this_build_number = extract_build_from_filename(filename) if this_build_number < latest_build_number: filename = os.path.join(config.BSD_TEMP_FOLDER, filename) logger.info( "Deleting zip file '%s' for ref '%s' because %s < %s", filename, ref_name, this_build_number, latest_build_number) os.remove(filename) num_deleted_files += 1 if any((num_deleted_folders, num_deleted_files)): logger.info("Deleted %s build folders and %s zip files", num_deleted_folders, num_deleted_files) else: logger.info("No old builds to delete")
def run(self): try: build_info = get_manifest(self.ref) build_path = build_info["build"] index_file = get_index() command_line = config_file["command-line"] status = "starting" build_path = build_info["build"] executable = os.path.join(config.BSD_BATTLESERVER_FOLDER, build_info["build"], build_info["executable_path"]) if not os.path.exists(executable): log_event( "build_not_installed", "Build '%s' not installed. Cannot start daemon." % build_info["build"]) return start_time = time.time() loop_cnt = 0 # read line without blocking while 1: loop_cnt += 1 diff = (time.time() - start_time) p = None config_num_processes = get_num_processes(self.ref, self.tenant) if config_num_processes != self.num_processes: txt = "Number of processes in config for ref '%s' has changed from %s to %s" % ( self.ref, self.num_processes, config_num_processes) logger.warning(txt) log_event("num_processes_changed", txt) # if we should run more processes: no problem, we'll add them in automatically # but if we should run fewer processes we need to kill some self.num_processes = config_num_processes if len(self.battleserver_instances) > self.num_processes: servers_killed = [] while len(self.battleserver_instances ) > self.num_processes: logger.info( "I am running %s battleservers but should be running %s. Killing servers..." % (len(self.battleserver_instances), self.num_processes)) # try to find a server that is not 'running'. If no such servers are found then kill a running one for pid, (q, battleserver_resource, status ) in self.battleserver_instances.items(): resource_status = battleserver_resource.get_status( ) if resource_status != "running": logger.info( "Found battleserver in state '%s' to kill: %s" % (resource_status, battleserver_resource)) pid_to_kill = pid break else: logger.warning( "Found no battleserver to kill that was not 'running'. I will kill a running one" ) pid_to_kill = self.battleserver_instances.keys( )[0] try: p = psutil.Process(pid_to_kill) q, battleserver_resource, status = self.battleserver_instances[ pid_to_kill] logger.info("Killing server with pid %s" % pid_to_kill) p.terminate() servers_killed.append(str(pid_to_kill)) battleserver_resource.set_status( "killed", {"status-reason": "Scaling down"}) except psutil.NoSuchProcess: logger.info( "Cannot kill %s because it's already dead") del self.battleserver_instances[pid_to_kill] time.sleep(5.0) txt = "Done killing servers for ref '%s'. Killed servers %s and am now running %s servers" % ( self.ref, ", ".join(servers_killed), len(self.battleserver_instances)) log_event("servers_killed", txt) if self.num_processes == 0: logger.info("Running zero processes") time.sleep(10) continue if len(self.battleserver_instances) < self.num_processes: num_added = 0 while len( self.battleserver_instances) < self.num_processes: logger.info( "I am running %s battleservers but should be running %s. Adding servers..." % (len(self.battleserver_instances), self.num_processes)) pid, q, battleserver_resource = self.start_battleserver( ) self.battleserver_instances[pid] = ( q, battleserver_resource, "starting") num_added += 1 time.sleep(5.0) logger.info( "Done adding servers. Running instances: %s" % ",".join([ str(p) for p in self.battleserver_instances.keys() ])) txt = "Done adding servers for ref '%s'. Added %s servers and am now running %s servers" % ( self.ref, num_added, len(self.battleserver_instances)) log_event("servers_added", txt) for pid, (q, battleserver_resource, status) in self.battleserver_instances.iteritems(): try: p = psutil.Process(pid) except psutil.NoSuchProcess: logger.info("Process %s running server '%s' has died", pid, battleserver_resource) resource_status = battleserver_resource.get_status() if resource_status == "starting": battleserver_resource.set_status( "abnormalexit", {"status-reason": "Failed to start"}) if resource_status == "running": battleserver_resource.set_status( "abnormalexit", {"status-reason": "Died prematurely"}) # else the instance has updated the status time.sleep(5.0) logger.info("Restarting UE4 Server (1)...") del self.battleserver_instances[pid] break new_index_file = get_index() old_manifest = find_build_manifest(index_file, self.ref) new_manifest = find_build_manifest(new_index_file, self.ref) if old_manifest != new_manifest: build_info = get_manifest(self.ref) build_path = build_info["build"] logger.info("Index file has changed. Reloading") self.shutdown_servers_and_exit("New build is available") while 1: if not self.battleserver_instances: break empty = True for pid, ( q, battleserver_resource, status) in self.battleserver_instances.iteritems(): try: line = q.get(timeout=.1) except Empty: #sys.stdout.write(".") print "%s..." % pid time.sleep(1.0) else: # got line empty = False logger.debug("stdout: %s", line) if "Game Engine Initialized." in line: logger.info("Game server has started up!") status = "started" self.battleserver_instances[pid] = ( q, battleserver_resource, status) if line == "ProcessExit": logger.info("UE4 Process has exited") resource_status = battleserver_resource.get_status( ) if resource_status == "starting": battleserver_resource.set_status( "abnormalexit", {"status-reason": "Failed to start"}) # else the instance has updated the status time.sleep(5.0) logger.info("Restarting UE4 Server (2)...") try: p = psutil.Process(pid) if p: p.terminate() except: pass del self.battleserver_instances[pid] empty = True break if empty: time.sleep(1.0) break for pid, (q, battleserver_resource, status) in self.battleserver_instances.items(): if status == "starting" and diff > 60.0: logger.error( "Server still hasn't started after %.0f seconds!" % diff) sys.exit(-1) elif status == "started" and loop_cnt % 10 == 0: resp = battleserver_resource.get().json() if len(resp["pending_commands"]) > 0: for cmd in resp["pending_commands"]: logger.warning( "I should execute the following command: '%s'", cmd["command"]) command_resource = copy.copy( battleserver_resource) command_resource.location = cmd["url"] command_resource.patch( data={"status": "running"}) if cmd["command"] == "kill": logger.error( "External command to kill servers!") self.shutdown_servers_and_exit( "Received command to kill all") resource_status = resp["status"] if diff > 60.0 and resource_status == "starting": logger.error( "Server is still in status '%s' after %.0f seconds!" % (resource_status, diff)) battleserver_resource.set_status( "killed", { "status-reason": "Failed to reach 'started' status" }) time.sleep(5.0) logger.info("Restarting UE4 Server (4)...") try: p = psutil.Process(pid) if p: p.terminate() except: pass del self.battleserver_instances[pid] else: heartbeat_date = dateutil.parser.parse( resp["heartbeat_date"]).replace(tzinfo=None) heartbeat_diff = (datetime.datetime.utcnow() - heartbeat_date).total_seconds() if heartbeat_diff > 60: logger.error( "Server heartbeat is %s seconds old. The process must be frozen", heartbeat_diff) battleserver_resource.set_status( "killed", {"status-reason": "Heartbeat timeout"}) time.sleep(5.0) logger.info("Restarting UE4 Server (5)...") try: p = psutil.Process(pid) if p: p.terminate() except: pass del self.battleserver_instances[pid] except KeyboardInterrupt: logger.info("User exiting...") self.shutdown_servers_and_exit("User exit") except Exception as e: # unhandled exception logger.exception( "Fatal error occurred in run_battleserver_loop. Exiting") self.shutdown_servers_and_exit( "Fatal error, '%s' occurred in run_battleserver_loop" % e)
def cb(num_bytes, total): logger.debug("{:,} bytes of {:,} downloaded".format(num_bytes, total))