def _send_cpplint_result_to_comments(self, issue_id): # get trunk url of current dir cmd = ["svn", "info"] svn_info, returncode = common.run_shell(cmd) if returncode: error_exit("failed to run '%s'" % " ".join(cmd)) current_trunk_dir = svn_info.split('\n')[2].split()[1] comment = [] for msg in self.cpplint_err_msgs: err_info = msg.split(":") if len(err_info) < 3: continue temp_dict = {} temp_dict["file"] = os.path.join(current_trunk_dir, err_info[0]) temp_dict["line"] = err_info[1] temp_dict["type"] = "3" temp_dict["comment"] = "CPPLINT:" + err_info[2] comment.append(temp_dict) summary = ('cpplint check result: %d new errors found' % len(self.cpplint_err_msgs)) json_content = {"comments": comment, "summary": summary} (fd, comment_file) = tempfile.mkstemp(suffix=".json") with open(comment_file, 'w') as fd: json.dump(json_content, fd) # send comment cmd = ["%s/app/qzap/common/tool/tcr.py" % self.current_source_dir] cmd.extend(["-i", issue_id, "--comment", comment_file]) output, ret = common.run_shell(cmd) if ret: common.warning("Failed to send comments! ret=%d" % ret) os.remove(comment_file)
def update_slurm_conf(update_cmpnode_res=False, nodes_change=False): # backup last slurm configuration if os_path.exists(SLURM_CONF): backup(BACKUP_SLURM_CONF_CMD) # replace slurm.conf.template tmp_file = "{}/slurm.conf.tmp".format(APP_HOME) run_shell("cp -f {} {}".format(SLURM_CONF, tmp_file)) # update NodeName if update_cmpnode_res: update_cmpnodes_res_conf(tmp_file) # get current queues info queues_info = get_queues_details() # update queues_info according to nodes changing if nodes_change: update_queues_info_with_nodes_change(queues_info) # update queues_info to slurm conf Partition update_queues_conf_file(tmp_file, queues_info) run_shell("mv {} {}".format(tmp_file, SLURM_CONF)) pass
def generate_hosts(): backup(BACKUP_HOSTS_CMD) ori_hosts = [] with open(HOSTS, "r") as old: line = old.readline() while line: ori_hosts.append(line) if "metadata" in line: break line = old.readline() tmp_hosts = "{}/hosts".format(APP_HOME) with open(tmp_hosts, "w") as hosts: for line in ori_hosts: hosts.write(line) with open(HOSTS_INFO_FILE, "r") as ehpc_hosts: hosts_lines = ehpc_hosts.readlines() for i in range(len(hosts_lines)): # replace node1 to node001 if hosts_lines[i].find(COMPUTE_HOSTNAME_PREFIX) != -1: node = hosts_lines[i].split()[1] sid = int(node.split(COMPUTE_HOSTNAME_PREFIX)[1]) new_node = "%s%03d" % (COMPUTE_HOSTNAME_PREFIX, sid) hosts_lines[i] = hosts_lines[i].replace(node, new_node) hosts.writelines(hosts_lines) run_shell("mv {} {}".format(tmp_hosts, HOSTS))
def stop(): role = get_role() if role in ROLE_SERVICES: for service in ROLE_SERVICES[role]: run_shell("systemctl stop {}".format(service)) else: logger.error("Un-support role[%s].", role) return 1 return 0
def modify_user_password(username, old_password, password): salt = ''.join( map( lambda x: './0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'[ ord(x) % 64], os.urandom(16))) pwd = crypt.crypt(password, "$6$%s" % salt) pwd = pwd.replace("$", "\$") run_shell("usermod -p %s %s" % (pwd, username))
def restart(): role = get_role() if role in ROLE_SERVICES: for service in ROLE_SERVICES[role]: run_shell("systemctl restart {}".format(service)) else: logger.error("Un-support role[%s].", role) return 1 logger.info("%s re-started.", role) return 0
def generate_conf(): # get cluster_name cluster_info = get_cluster_info() cls_name = cluster_info["cluster_name"] ctl_resource = "" # the first line in resource.info cmp_resource = "" # the last line in resource.info with open(RESOURCE_INFO_FILE, "r") as info: lines = info.readlines() for line in lines: if line: if not ctl_resource: ctl_resource = line else: cmp_resource = line # generate default NodeName, eg: node[1-6,8,12-15] sids = [int(s) for s in cluster_info["sids"].split(",") if s] sids.sort() node_name = "{}[".format(COMPUTE_HOSTNAME_PREFIX) # eg: [1-6,8] start_sid = sids[0] # eg: 1 last_sid = start_sid for sid in sids: if sid - last_sid > 1: if last_sid == start_sid: node_name = "{}{},".format(node_name, start_sid) # eg: [1-6,8, else: node_name = "{}{}-{},".format(node_name, start_sid, last_sid) # eg: [1-6,8,12-15] start_sid = sid last_sid = sid if last_sid == start_sid: # the end of sids node_name = "{}{}]".format(node_name, start_sid) else: node_name = "{}{}-{}]".format(node_name, start_sid, last_sid) # backup last slurm configuration if os_path.exists(SLURM_CONF): backup(BACKUP_SLURM_CONF_CMD) # replace slurm.conf.template tmp_file = "{}/slurm.conf.tmp".format(APP_HOME) with open(tmp_file, "w") as conf: with open(SLURM_CONF_TMPL, "r") as tmpl: for line in tmpl.readlines(): if line: line = line.format(CLUSTER_NAME=cls_name, CONTROLLER_RESOURCE=ctl_resource, COMPUTE_RESOURCE=cmp_resource, DEFAULT_NODE_NAME=node_name) conf.write(line) run_shell("mv {} {}".format(tmp_file, SLURM_CONF))
def update_queues_conf(queues_info): # backup last slurm configuration if os_path.exists(SLURM_CONF): backup(BACKUP_SLURM_CONF_CMD) # replace slurm.conf.template tmp_file = "{}/slurm.conf.tmp".format(APP_HOME) run_shell("cp -f {} {}".format(SLURM_CONF, tmp_file)) update_queues_conf_file(tmp_file, queues_info) run_shell("mv {} {}".format(tmp_file, SLURM_CONF)) return 0
def metadata_reload(): logger.info("generate hosts for reloading..") generate_hosts() role = get_role() if role == ROLE_CONTROLLER: logger.info("update slurm conf for reloading metadata..") update_slurm_conf() # TODO: 多controller节点时,只在一个master节点执行此命令即可 logger.info("re-config slurm configuration for cluster..") run_shell("scontrol reconfigure") return 0
def gcloud_upload(app_platform, app_path, gcloud_storage_keyword, success_keyword): cmds = [ 'gcloud', 'beta', 'firebase', 'test', app_platform, 'run', '--type game-loop', '--app %s' % app_path, '--scenario-numbers 1', '--format="json"', ] res = common.run_shell(cmds) gcloud_storage_url = '' for line in res.stderr.readlines(): utf8 = line.decode('UTF-8').strip() if len(utf8) > 0: print('stderr:%s' % utf8) if gcloud_storage_keyword in utf8: url = re.findall(r'\[(.*?)\]', utf8) gcloud_storage_url = url[0][len(gcloud_storage_keyword):] total = 0 succeeded = 0 output = res.stdout.read().decode('UTF-8') print('upload package output:%s' % output) for result in json.loads(output): total += 1 if check_firebase_log(app_platform, gcloud_storage_url, result['axis_value'], success_keyword): succeeded += 1 return succeeded, total
def list_slice(table): "ret list: [Slice]" slice_and_state_count = [[], {}, {}] # [[slice_list],state->num,state->slice_list] global g_slice_states for state in g_slice_states: slice_and_state_count[1][state] = 0 slice_and_state_count[2][state] = [] try: shell_out = common.run_shell( 'echo "list slice on table %s;" | ./Cli -s' % table) output_lines = shell_out.split('\n') n = len(output_lines) if len(output_lines) > 7: status = output_lines[7].split('|')[3].strip() if status != 'ok': pass # TODO else: for line_idx in range(13, n, 2): stmp = output_lines[line_idx] if stmp != '': slice = BailingSlice.Slice(stmp) slice_and_state_count[1][slice.state] += 1 slice_and_state_count[2][slice.state].append(slice) slice_and_state_count[0].append(slice) return slice_and_state_count except: common.print_exc_plus()
def _uninstall(software, software_home, uninstaller=None): logger.info("Do uninstall software[%s]..", software) software_dir = "{}/{}".format(software_home, software) if uninstaller: uninstaller = "bash {}".format(uninstaller) else: f = "bash {}/uninstall.sh".format(software_dir) uninstaller = f if os.path.exists(f) else "rm -rf {}".format(software_dir) try: # uninstall run_shell("export SOFTWARE_HOME={} && {}".format(software_home, uninstaller)) except Exception as e: logger.error("Failed to run install cmd: %s", e.message) logger.error("Error: %s", traceback.format_exc()) return 1 return 0
def generate_hosts(): backup(BACKUP_HOSTS_CMD) ori_hosts = [] with open(HOSTS, "r") as old: line = old.readline() while line: ori_hosts.append(line) if "metadata" in line: break line = old.readline() tmp_hosts = "{}/hosts".format(APP_HOME) with open(tmp_hosts, "w") as hosts: for line in ori_hosts: hosts.write(line) with open(HOSTS_INFO_FILE, "r") as ehpc_hosts: hosts.writelines(ehpc_hosts.readlines()) run_shell("mv {} {}".format(tmp_hosts, HOSTS))
def check_dependency_between_issues(self): if not self.issue_dict.has_key(self.issue) or len(self.issue_dict) < 2: return build_path = set() current_dir = get_cwd() file_list = [] if isinstance(self.issue_dict[self.issue], set): file_list = self.issue_dict[self.issue] else: file_list = self.issue_dict[self.issue]["filelist"] for f in file_list: ext = os.path.splitext(f)[1] #BUILD has no .h dependency specification, so not check .h if ext in (".c", ".cpp", ".hpp", ".C", ".cxx", ".cc"): find_path = self._find_build_path_contain_filename_bottom_up(f) if find_path: build_path.add(os.path.relpath(find_path, current_dir)) if not build_path: # some issues have no associated BUILD file(eg. modify script files) return blade = find_file_bottom_up('blade') cmd = "%s query --deps %s" % (blade, " ".join(build_path)) (output, ret) = run_shell(cmd.split()) if ret: warning('failed to run %s' % cmd) return submit_issue_deps = set(output.splitlines()) issues_to_pop = [] for issue in self.issue_dict.keys(): if issue == self.issue: continue issue_info_url = "%s%s" % (DEFAULT_REVIEW_INFO_URL, issue) issue_info = json.load(urllib.urlopen(issue_info_url)) if not issue_info['successfully']: warning('failed to get issue_info for issue %s' % issue) continue issue_info_detail = issue_info["requestsWithPagerInfo"][ "requests"][0] issue_state = issue_info_detail['state'] if (issue_state == _ISSUE_STATE.CLOSED or issue_state == _ISSUE_STATE.SUBMITED): # issue already closed or submited issues_to_pop.append(issue) continue if self._has_dependency_relation(submit_issue_deps, issue): warning('the submit issue may depends on the issue %s with' ' title \"%s\"' % (issue, issue_info_detail['name'])) answer = raw_input('Continue?(y/N) ').strip() if answer != 'y': error_exit('Exit') if issues_to_pop: map(self.issue_dict.pop, issues_to_pop)
def install(software_list, ignore_exist=False): logger.info("install software[%s]..", software_list) software_home = SOFTWARE_HOME_FMT.format(get_admin_user()) if software_list: run_shell("mkdir -p {}".format(software_home)) exist_info = {} for s in software_list: # software exist if os.path.exists("{}/{}".format(software_home, s["name"])): exist_info[s["name"]] = True logger.error("The software[%s] already exist!", s["name"]) if not ignore_exist: return 55 for s in software_list: if not exist_info.get(s["name"], False): ret = _install(s["name"], s["source"], software_home, s.get("installer")) if ret is not 0: return ret return 0
def show_slice(table, slice_no): "ret_list: [Slice, unit_list, snapshot_list]" ret_list = [] try: shell_out = common.run_shell( 'echo "show slice %d on table %s;" | ./Cli -s' % (int(slice_no), table)) output_lines = shell_out.split('\n') n = len(output_lines) if len(output_lines) > 7: status = output_lines[7].split('|')[3].strip() if status != 'ok': pass # TODO else: sli = BailingSlice.Slice(output_lines[13]) ret_list.append(sli) # rep_base = snapshot_base = 0 for idx, line in enumerate(output_lines): line = line.strip() if line == 'Replication List:': rep_base = idx elif line == 'Snapshot List:' or line == 'Snapshot list:': snapshot_base = idx rep_list = [] for line_idx in range(rep_base + 4, snapshot_base, 2): stmp = output_lines[line_idx] if stmp != '': rep_list.append(stmp.split('|')[1].strip()) unit_list = [] for line_idx in range(19, rep_base, 2): stmp = output_lines[line_idx] if stmp != '': unit = BailingSlice.Unit(stmp) try: unit.rep_order = rep_list.index(unit.repl_addr) except: unit.rep_order = 999 unit_list.append(unit) ret_list.append(unit_list) snapshot_list = [] for line_idx in range(snapshot_base + 4, len(output_lines), 2): stmp = output_lines[line_idx] if stmp != '': snapshot_list.append(BailingSlice.Snapshot(stmp)) ret_list.append(snapshot_list) return ret_list except: common.print_exc_plus()
def generate_slurm_conf(): # get cluster_name cluster_info = get_cluster_info() cls_name = cluster_info["cluster_name"] ctl_resource = "" # the first line in resource.info cmp_resource = "" # the last line in resource.info with open(RESOURCE_INFO_FILE, "r") as info: lines = info.readlines() for line in lines: if line: if not ctl_resource: ctl_resource = line else: cmp_resource = line node_list = get_slurm_cmpnode_list(cluster_info) ctl_machine = get_ctl_machine() # backup last slurm configuration if os_path.exists(SLURM_CONF): backup(BACKUP_SLURM_CONF_CMD) # replace slurm.conf.template tmp_file = "{}/slurm.conf.tmp".format(APP_HOME) with open(tmp_file, "w") as conf: with open(SLURM_CONF_TMPL, "r") as tmpl: for line in tmpl.readlines(): if line: line = line.format(CLUSTER_NAME=cls_name, CONTROL_MACHINE=ctl_machine, CONTROLLER_RESOURCE=ctl_resource, COMPUTE_RESOURCE=cmp_resource, DEFAULT_NODE_NAME=node_list) conf.write(line) run_shell("mv {} {}".format(tmp_file, SLURM_CONF))
def list_server(): machine_list = [] try: shell_out = common.run_shell('echo "list server;" | ./Cli -s') output_lines = shell_out.split('\n') n = len(output_lines) if len(output_lines) > 7: status = output_lines[7].split('|')[3].strip() if status != 'ok': pass # TODO else: for line_idx in range(13, n, 2): if output_lines[line_idx] != '': machine_list.append( MachineManager.Machine(output_lines[line_idx])) return machine_list except: common.print_exc_plus()
def list_table(): "ret list: [Repository]" table_list = [] try: shell_out = common.run_shell('echo "list table;" | ./Cli -s') output_lines = shell_out.split('\n') n = len(output_lines) if len(output_lines) > 7: status = output_lines[7].split('|')[3].strip() if status != 'ok': pass # TODO else: for line_idx in range(13, n, 2): stmp = output_lines[line_idx] if stmp != '': table_list.append(BailingRepository.Repository(stmp)) return table_list except: common.print_exc_plus()
def show_snapshot(table, ss): ret_list = [] try: shell_out = common.run_shell( 'echo "show snapshot %s on table %s;" | ./Cli -s' % (ss, table)) output_lines = shell_out.split('\n') n = len(output_lines) if len(output_lines) > 7: status = output_lines[7].split('|')[3].strip() if status != 'ok': pass # TODO else: for line_idx in range(13, len(output_lines), 2): stmp = output_lines[line_idx] if stmp != '': ret_list.append(stmp.split('|')[1:-1]) return ret_list except: common.print_exc_plus()
def _install(software, source, software_home, installer=None): """ :param source: full url to download software, eg: root@xxx/aa/bb/cc.tar.gz :param software_home: the home that software would to be installed :param software: software name (install dir: software_home/software) :param installer: install script :return: """ logger.info("Do install software[%s] from source[%s]..", software, source) package = source.split("/")[-1] workdir = SOFTWARE_WORKDIR_FMT.format(get_admin_user(), software) package_path = "{}/{}".format(workdir, package) try: # download _download(source, workdir) # un-tar run_shell("tar -zxf {}".format(package_path), cwd=workdir, timeout=180) # install ret = os.listdir(workdir) un_tar_dir = "" for r in ret: if os.path.isdir("{}/{}".format(workdir, r)): un_tar_dir = "{}/{}".format(workdir, r) break if installer: installer = "bash {}/{}".format(un_tar_dir, installer) else: f = "bash {}/install.sh".format(un_tar_dir) installer = f if os.path.exists(f) else \ "mv {} {}/".format(un_tar_dir, software_home) run_shell("export SOFTWARE_HOME={} && {}".format( software_home, installer), timeout=120) # clean if os.path.exists(workdir): run_shell("rm -rf {}".format(workdir)) except Exception: logger.error("Failed to install software[%s]: \n%s", software, traceback.format_exc()) return 1 return 0
def add_user(username, password, uid, gid, nas_mount_path=None): run_shell("groupadd -g %s %s" % (gid, username)) salt = ''.join( map( lambda x: './0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'[ ord(x) % 64], os.urandom(16))) pwd = crypt.crypt(password, "$6$%s" % salt) pwd = pwd.replace("$", "\$") run_shell("useradd -u %s -g %s -p %s %s" % (uid, gid, pwd, username)) # run_shell("chmod 755 /etc/sudoers") # run_shell("cat >> /etc/sudoers <<EOF\n%s ALL=(ALL) NOPASSWD: ALL\nEOF" % # username) # run_shell("chmod 755 /etc/sudoers") run_shell("usermod -aG wheel %s" % username)
def generate_hpcmodulefiles(): # prepare path for ehpc cluster modulefiles nas_mount_point = get_nas_mount_point() run_shell("mkdir -p {}/opt/modulefiles/".format(nas_mount_point)) dft_modulefiles_path = "{}/opt/modulefiles/default".format(nas_mount_point) run_shell("cp {} {}".format(HPC_DEFAULT_MODULE_FILE, dft_modulefiles_path)) # replace slurm.conf.template tmp_file = "{}/hpcmodulefiles.tmp".format(APP_HOME) with open(tmp_file, "w") as conf: with open(HPC_MODULE_FILES_TMPL, "r") as tmpl: for line in tmpl.readlines(): if line and line.find("NAS_MOUNT_POINT") != -1: line = line.format(NAS_MOUNT_POINT=nas_mount_point) conf.write(line) run_shell("mv {} {}".format(tmp_file, HPC_MODULE_FILES))
def expand_depended_targets(self, build_targets): if not self.options.build_dependeds: return build_targets blade = find_file_bottom_up('blade') dot_file = 'depended_targets' cmd = "%s query --depended --output-to-dot %s %s" % (blade, dot_file, build_targets) (output, ret) = run_shell(cmd.split()) all_targets = set() if not ret: with open(dot_file) as f: next(f) # skip the starting line "digraph blade..." for line in f: if line.find('[label =') == -1: break # line: "path:target" [label = "path:target"] all_targets.add(line.split()[0][1:-1]) # skip double quote if all_targets: build_targets = '' for target in sorted(list(all_targets)): build_targets += " %s/%s" % (self.blade_root_dir, target) os.remove(dot_file) return build_targets
def check_service_status(service): retcode = run_shell("systemctl is-active {}".format(service), without_log=True) if retcode != 0: logger.error("the service[%s] is not health[code: %s].", service, retcode) return retcode
def _download(source, workdir): if source.startswith("http"): run_shell("wget -P {} -t 5 -w 60 -c {}".format(workdir, source), timeout=600) else: run_shell("rsync -q -aP {} {}/".format(source, workdir), timeout=600)
json_message['attachments'][0]['actions'].append(deployment_button) headers = {'Content-Type': 'application/json', 'Accept': 'text/plain'} res = requests.post(slack_webhook_url, data=json.dumps(json_message), headers=headers) return res.text == 'ok' if __name__ == '__main__': common.log('generate-slack-webhook-url') cmds = [ 'imp-ci', 'secrets', 'read', '--environment=production', '--buildkite-org=improbable', '--secret-type=slack-webhook', '--secret-name=unreal-gdk-slack-web-hook' ] res = common.run_shell(cmds) for line in res.stderr.readlines(): utf8 = line.decode('UTF-8').strip() if len(utf8) > 0: print('%s' % utf8) output = res.stdout.read().decode('UTF-8') slack_webhook_url = json.loads(output)['url'] common.log('slack-notify') slack_channel = common.get_environment_variable('SLACK_CHANNEL', '#unreal-gdk-builds') result = slack_notify(slack_channel, slack_webhook_url) exit_value = 0 if result == True else 1 exit(exit_value)
def start(): role = get_role() nas_mount_point = get_nas_mount_point() cluster_name = get_cluster_name() # mkdir /nas_mount_point/opt/slurm/state_save_loc for StateSaveLocation run_shell("mkdir -p {}/opt/slurm/state_save_loc/{}/".format(nas_mount_point, cluster_name)) run_shell("ln -sf {}/opt/slurm/ /opt/slurm".format(nas_mount_point)) # start before if role == ROLE_CONTROLLER: logger.info("Generating slurm configurations...") generate_slurm_conf() else: for f in clear_files[role]: if path.exists(f): run_shell("rm {}".format(f)) # start service if role in ROLE_SERVICES: for service in ROLE_SERVICES[role]: logger.info("Start service {}".format(service)) run_shell("systemctl start {}".format(service)) else: logger.error("Un-support role[%s].", role) return 1 # start post cluster_info = get_cluster_info() nas_mount_point = get_nas_mount_point() if role == ROLE_CONTROLLER and \ int(cluster_info["sid"]) == MASTER_CONTROLLER_SID: logger.info("create admin dirs..") run_shell("mkdir -p {}/opt".format(nas_mount_point)) run_shell("mkdir -p {}/home/".format(nas_mount_point)) run_shell("mkdir -p {}/data/".format(nas_mount_point)) # create admin user add_admin_user() # install software return init_software() logger.info("%s started.", role) return 0
def set_hostname(): hostname = get_hostname() run_shell("hostnamectl set-hostname {}".format(hostname))