def get_info_file_info(self): """ 返回info file信息 :return: """ info_data = {} if self.exist_info_file: info_file_full_path = path.join(self.package_path, "info.txt") with open(info_file_full_path, "rb+") as f: try: info_con = f.read() # 如果存在Bom 先处理一下 if info_con[:3] == codecs.BOM_UTF8: info_con = info_con[3:] # 重新写入文件 f.seek(0) f.truncate() f.write(info_con) if len(info_con.strip()) > 0: info_data = json.loads(info_con.decode()) else: pass cli.warning("\ninfo 文件是空的") except: cli.error("\n %s 包的 info文件信息解析出错了" % self.package_name) return info_data
def mv_unzip_analysis(file_path: str): """ 针对ftp上传的zip文件进行,移动、解压、包分析 """ if not path.isfile(file_path) or not file_path.endswith(".zip"): cli.error( "{} 不是文件 or 不是 zip 压缩文件,请传入需要处理的 zip 压缩包的路径".format(file_path)) exit() file_path = path.abspath(file_path) helper = FtpServerHelperStep1(file_path) # 开始任务 cli.warning("Task Start...") helper.ls_lah(helper.zipfile_path) # 0. 分析路径获取相关信息 helper.show_zipfile_info() # 1. 挪动文件到 Configs['ftp_data_tmp'] 下,为了避免文件名不规范导致数据丢失,给文件重新起名 helper.mv() helper.show_tree() # 2. 解压 helper.unzip() # 3. 进行包分析,修复,生成excel cli.info("正在针对解压后的目录进行数据解密,数据清洗工作") helper.package_analysis() # 完成 cli.warning("Task Finished!")
def __init__(self, zipfile_path): """ :param str zipfile_path: """ self.zipfile_path = zipfile_path self.zipfile_root_foldername = self.get_zipfile_rootpath(zipfile_path) # 解压后的根目录名 if not self.zipfile_root_foldername: cli.error("无法获取压缩包的根目录") exit() # ftp 信息 self.ftp_info = FtpInfo(path.dirname(self.zipfile_path)) self.zip_uploads_path = path.dirname(self.zipfile_path) # ftp 上传目录 self.zip_process_path = path.join(ftp_data_tmp_path(), self.ftp_info.project_code, self.ftp_info.company_code + "_fid_" + self.ftp_info.ftp_id ) # ftp 上传的数据临时处理目录 self.zipfile_newname = "{}_{}_{}.zip".format(self.ftp_info.project_code, self.ftp_info.company_code + "_fid" + self.ftp_info.ftp_id, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())) ) # 执行本地linux 命令实例 self.local_connection = Connection("localhost")
def init(self): path_list = self.target_path.strip("/").split("/") if len(path_list) != 5: cli.error("路径分析结果不匹配,请核对路径") raise AttributeError("路径拆分个数({})不等于5".format(len(path_list))) self.project_code = path_list[2] self.company_code = path_list[3] self.ftp_id = str(path_list[4]).split("_")[1]
def upp_version(upp_root): if os.path.isfile(f'{upp_root}/arch/version'): line = open(f'{upp_root}/arch/version', 'r').readline() match = re.search('UPPV(\d+\.\d+(\.\d+)?)', line)[1] elif os.path.isfile(f'{upp_root}/version'): line = open(f'{upp_root}/version', 'r').readline() match = re.search('(\d+\.\d+(\.\d+)?)', line)[1] else: cli.error('Could not find UPP version!') return parse(match)
def check_files(expected_files, fatal=False): result = True if type(expected_files) == str: expected_files = [expected_files] for file in expected_files: if not os.path.isfile(file): if fatal: cli.error(f'File {file} has not been generated!') result = False break return result
def wrf_version(wrf_root): if os.path.isfile(f'{wrf_root}/README'): line = open(f'{wrf_root}/README', 'r').readline() elif os.path.isfile(f'{wrf_root}/README.DA'): line = ' '.join(open(f'{wrf_root}/README.DA', 'r').readlines()) elif os.path.isfile(f'{wrf_root}/README.WRFPLUS'): line = ' '.join(open(f'{wrf_root}/README.WRFPLUS', 'r').readlines()) else: cli.error('Could not find WRF version!') match = re.search('Version (\d+\.\d+(\.\d+)?)', line)[1] return parse(match)
def get_value(x, keys, default=None): if has_key(x, keys): if len(keys) == 1: return x[keys[0]] if len(keys) == 2: return x[keys[0]][keys[1]] if len(keys) == 3: return x[keys[0]][keys[1]][keys[2]] if len(keys) == 4: return x[keys[0]][keys[1]][keys[2]][keys[3]] if len(keys) == 5: return x[keys[0]][keys[1]][keys[2]][keys[3]][keys[4]] elif default != None: return default else: cli.error(f'No {keys}!')
def src_exist_on_dst_packages(src, dst): """ 获取目标中已经存在的源包列表 \n """ pass src = src if path.isabs(src) else path.abspath(src) dst = dst if path.isabs(dst) else path.abspath(dst) if not path.exists(src) or not path.exists(dst): info = "源和目标目录必须得存在 %s %s %s" % (src, "__" * 5, dst) cli.error(info) return diff_packages = [sp for sp in os.listdir(src) if path.exists(path.join(dst, sp))] cli.info(str(diff_packages))
def result_echo_no_via_pager(self): """ 不使用click的echo_via_pager 输出运行结果 :return: """ all_packages = (self.normal_packages + self.abnormal_packages) print_normal_list = [] print_abnormal_list = [] for package in all_packages: if package.is_normal_package: print_normal_list.append(package.get_formatted_package_info()) elif not package.is_normal_package: print_abnormal_list.append(package.get_formatted_abnormal_reason()) cli.info("\n".join(print_normal_list)) cli.error("\n".join(print_abnormal_list))
def decrypt_endswith_enc_file(self): """ 删除后缀 skip、sk、pk等文件 :return: """ update_flag = False for file in self.unknown_files: file_full_path = path.join(self.package_path, file) file_suffix = file[file.rfind(".") + 1:] if file_suffix == "enc": try: decrypt_file(file_full_path) except: cli.error("\n 当解密(%s)文件的时候出错了" % file_full_path) update_flag = True if update_flag: self.update_info()
def get_package_metadata_info(packages_path, keys): """ 读取每个包中任意一个metadata 获取给定key列表,count(keys) >=1 """ if not path.isabs(packages_path): packages_path = (path.abspath(packages_path)).rstrip("//") if not len(keys) > 0: cli.error("Warning:keys 必须得有一个") return cli.warning("Task Starting...") for package in os.listdir(packages_path): package_path = path.join(packages_path, package) key_values = get_package_metadata_file_con_by_key(package_path, keys) cli.info("%s 包 %s" % (package, str(key_values))) cli.warning("Task Finish!")
def tar_upload(packages_folder_path: str): """ 针对待数据核对的包目录进行,压缩、上传到微软服务器 """ if not path.isdir(packages_folder_path): cli.error("{} 必须是一个目录".format(packages_folder_path)) exit() packages_folder_path = path.abspath(packages_folder_path) helper = FtpServerHelperStep2(packages_folder_path) # 开始任务 cli.warning("Task Start...") # 1.需要压缩tar.gz 放入 data_sync_backup目录下 helper.compress_folder() # 2.上传到微软服务器 helper.upload_2_weiruan() # 完成 cli.warning("Task Finished!")
def compare_file(file_a, file_b, compare_type): """ 比较两个文件[行比较] 更多信息用 --help 获取帮助 """ if not compare_type: cli.error("请用 -t 参数指定比较类型,更多信息用 --help 获取") a_lines = get_file_lines(file_a) # 去重 b_lines = get_file_lines(file_b) # 去重 lines = [] if compare_type == "a_and_b": lines.extend(a_lines) lines.extend(b_lines) elif compare_type == "not_a": for line in b_lines: if line not in a_lines: lines.append(line) elif compare_type == "not_b": for line in a_lines: if line not in b_lines: lines.append(line) print("\n".join(lines))
def edit_file(filepath, changes, return_on_first_match=False): try: matched = False with fileinput.FileInput(filepath, inplace=True) as file: line_number = 1 for line in file: changed_line = line if not return_on_first_match or not matched: for change in changes: if type(change[0]) == int: if line_number == change[0]: changed_line = change[1] elif re.search(change[0], changed_line, flags=re.IGNORECASE): changed_line = re.sub(change[0], change[1], changed_line, flags=re.IGNORECASE) matched = True print(changed_line, end='') line_number += 1 except Exception as e: cli.error(f'Failed to edit file {filepath}! {e}')
def check_array_size(config, keys, size): if not has_key(config, keys): return if len(keys) == 1: if len(config[keys[0]]) != size: cli.error(f'Parameter max_dom is {config["domains"]["max_dom"]}, but {keys} only has {len(config[keys[0]])} elements!') elif len(keys) == 2: if len(config[keys[0]][keys[1]]) != size: cli.error(f'Parameter max_dom is {config["domains"]["max_dom"]}, but {keys} only has {len(config[keys[0]][keys[1]])} elements!') elif len(keys) == 3: if len(config[keys[0]][keys[1]][keys[2]]) != size: cli.error(f'Parameter max_dom is {config["domains"]["max_dom"]}, but {keys} only has {len(config[keys[0]][keys[1]][keys[2]])} elements!')
import cli import os queue = None ntasks_per_node = None if not 'WRF_SCRIPTS_QUEUE' in os.environ: cli.warning('Environment WRF_SCRIPTS_QUEUE is not set. Will run executable in current node!') elif not 'WRF_SCRIPTS_NTASKS_PER_NODE' in os.environ: cli.error('Environment WRF_SCRIPTS_NTASKS_PER_NODE should be set by you!') else: queue = os.environ['WRF_SCRIPTS_QUEUE'] ntasks_per_node = os.environ['WRF_SCRIPTS_NTASKS_PER_NODE'] if queue != None: # Allow user to set multiple queues with descending priority. queue = queue.split(',') ntasks_per_node = [int(x) for x in ntasks_per_node.split(',')]
if __name__ == '__main__': # Values and helpers class Modes(object): __metaclass__ = ancillary.Enum __elements__ = cli.command_list def error(msg): print "Error: %s" % msg sys.exit(1) # Parse command line try: mode, _ = cli.parse(sys.argv[1:]) except cli.CommandParsingError, e: cli.error(e) # No command given if mode is None: cli.error("must specify command") # Trace contour of the old world if mode == Modes.trace: print "Getting existing world contour..." contour = get_contour(cli.contour_reset) print "Tracing world contour..." try: contour.trace_combine(cli.world_dir, cli.contour_combine, cli.merge_types, cli.contour_select, cli.contour_join) except (EnvironmentError, ContourLoadError), e: error('could not read world contour: %s' % e)
def check_mandatory_params(config, keys): for key in keys: if not key in config: cli.error(f'Parameter {key} is not in configuration file!')
def parse_config(config_json): config = {} if os.path.isfile(config_json): config = json.loads(open(config_json, 'r').read()) else: try: config = json.loads(config_json) except: cli.error(f'{cli.red(config_json)} is not a JSON file or text!') # Set defaults. if not has_key(config, 'share'): config['share'] = {} if not has_key(config, 'domains'): config['domains'] = {} check_mandatory_params(config['domains'], ('max_dom', 'dx', 'dy', 'e_we', 'e_sn')) # - parent_grid_ratio, parent_time_step_ratio, i_parent_start, j_parent_start if config['domains']['max_dom'] == 1: config['domains']['parent_grid_ratio'] = [1] config['domains']['parent_time_step_ratio'] = [1] config['domains']['i_parent_start'] = [1] config['domains']['j_parent_start'] = [1] else: if has_key(config, ('custom', 'start_time')): check_mandatory_params(config['domains'], ['parent_time_step_ratio']) check_mandatory_params(config['domains'], ('i_parent_start', 'j_parent_start')) # Change to array. change_to_array(config, ('domains', 'dx')) change_to_array(config, ('domains', 'dy')) change_to_array(config, ('domains', 'e_we')) change_to_array(config, ('domains', 'e_sn')) change_to_array(config, ('domains', 'e_vert')) change_to_array(config, ('domains', 'parent_time_step_ratio')) change_to_array(config, ('domains', 'i_parent_start')) change_to_array(config, ('domains', 'j_parent_start')) # Check dimension. check_array_size(config, ('domains', 'dx'), config['domains']['max_dom']) check_array_size(config, ('domains', 'dy'), config['domains']['max_dom']) check_array_size(config, ('domains', 'e_we'), config['domains']['max_dom']) check_array_size(config, ('domains', 'e_sn'), config['domains']['max_dom']) check_array_size(config, ('domains', 'e_vert'), config['domains']['max_dom']) check_array_size(config, ('domains', 'parent_time_step_ratio'), config['domains']['max_dom']) check_array_size(config, ('domains', 'i_parent_start'), config['domains']['max_dom']) check_array_size(config, ('domains', 'j_parent_start'), config['domains']['max_dom']) if not has_key(config, ('domains', 'parent_grid_ratio')): config['domains']['parent_grid_ratio'] = [1] for i in range(1, len(config['domains']['dx'])): config['domains']['parent_grid_ratio'].append( int(config['domains']['dx'][i - 1] / config['domains']['dx'][i])) config['domains']['grid_id'] = [ i + 1 for i in range(config['domains']['max_dom']) ] if not has_key(config, ('domains', 'parent_id')): config['domains']['parent_id'] = [ i for i in range(config['domains']['max_dom']) ] # Transform parameters. if has_key(config, ('custom', 'start_time')): config['custom']['start_time'] = pendulum.from_format( config['custom']['start_time'], 'YYYYMMDDHH') if not has_key(config, ('custom', 'end_time')): config['custom']['end_time'] = config['custom']['start_time'].add( hours=config['custom']['forecast_hours']) else: config['custom']['end_time'] = pendulum.from_format( config['custom']['end_time'], 'YYYYMMDDHH') if not type(config['domains']['dx']) == list: config['domains']['dx'] = [config['domains']['dx']] if not type(config['domains']['dy']) == list: config['domains']['dy'] = [config['domains']['dy']] for key in ('e_we', 'e_sn', 'parent_id', 'parent_grid_ratio', 'i_parent_start', 'j_parent_start'): config['geogrid'][key] = config['domains'][key] config['geogrid']['dx'] = config['domains']['dx'][0] config['geogrid']['dy'] = config['domains']['dy'][0] # Set time step if not set yet. if not has_key(config, ('domains', 'time_step')): if config['domains']['dx'][0] >= 30000: config['domains']['time_step'] = 120 elif 25000 <= config['domains']['dx'][0] < 30000: config['domains']['time_step'] = 120 elif 20000 <= config['domains']['dx'][0] < 25000: config['domains']['time_step'] = 120 elif 15000 <= config['domains']['dx'][0] < 20000: config['domains']['time_step'] = 90 elif 10000 <= config['domains']['dx'][0] < 15000: config['domains']['time_step'] = 60 elif 5000 <= config['domains']['dx'][0] < 10000: config['domains']['time_step'] = 30 elif 2500 <= config['domains']['dx'][0] >= 5000: config['domains']['time_step'] = 10 else: config['domains']['time_step'] = 5 if not has_key(config, 'time_control'): config['time_control'] = {} # wrfvar* sections for tag in range(1, 23): section = f'wrfvar{tag}' if not section in config: config[section] = {} if not has_key(config, ('wrfvar3', 'ob_format')): config['wrfvar3']['ob_format'] = 2 if config['wrfvar3']['ob_format'] == 1 and not has_key( config, ('custom', 'wrfda', 'prepbufr_source')): config['custom']['wrfda']['prepbufr_source'] = 'gdas' return config
def submit_job(cmd, ntasks, config, args, logfile='rsl.out.0000', wait=False, queue_idx=0): if logfile: run(f'rm -f {logfile}') ntasks_per_node = None if args.ntasks_per_node != None: ntasks_per_node = args.ntasks_per_node elif mach.ntasks_per_node: ntasks_per_node = mach.ntasks_per_node[queue_idx] if ntasks_per_node != None and ntasks < ntasks_per_node: cli.warning(f'Change ntasks_per_node from {ntasks_per_node} to {ntasks}.') ntasks_per_node = ntasks if args.slurm: f = open('submit.sh', 'w') f.write(f'''\ #!/bin/bash #SBATCH --job-name {config["tag"]} #SBATCH --comment WRF #SBATCH --partition {mach.queue[queue_idx]} #SBATCH --time 24:00:00 #SBATCH --ntasks {ntasks} #SBATCH --ntasks-per-node {ntasks_per_node} #SBATCH --nodes {int(ntasks / ntasks_per_node)} mpiexec -np {ntasks} {cmd} ''') f.close() stdout = run('sbatch < submit.sh', stdout=True) match = re.search('Submitted batch job (\w+)', stdout) if not match: if queue_idx < len(mach.queue) - 1: cli.warning(f'Failed to submit to queue {mach.queue[queue_idx]}, try queue {mach.queue[queue_idx+1]}.') submit_job(cmd, ntasks, config, args, logfile, wait, queue_idx+1) return else: cli.error(f'Failed to submit job!') job_id = match[1] cli.notice(f'Job {job_id} submitted running {ntasks} tasks.') if wait: cli.notice(f'Wait for job {job_id}.') try: last_line = None while job_running(args, job_id): sleep(10) if not os.path.isfile(logfile): if job_pending(args, job_id): cli.notice(f'Job {job_id} is still pending.') continue line = subprocess.run(['tail', '-n', '1', logfile], stdout=subprocess.PIPE).stdout.decode('utf-8').strip() if last_line != line and line != '': last_line = line print(f'{cli.cyan("==>")} {last_line if len(last_line) <= 80 else last_line[:80]}') except KeyboardInterrupt: kill_job(args, job_id) exit(1) return job_id elif args.pbs: f = open('submit.sh', 'w') f.write(f'''\ #!/bin/bash #PBS -N {config["tag"]} #PBS -q {mach.queue} #PBS -l nodes={int(ntasks / ntasks_per_node)}:ppn={ntasks_per_node} cd $PBS_O_WORKDIR mpiexec -np {ntasks} -machinefile $PBS_NODEFILE {cmd} ''') f.close() stdout = run('qsub < submit.sh', stdout=True) match = re.search('(\w+)', stdout) if not match: cli.error(f'Failed to parse job id from {stdout}') job_id = match[1] cli.notice(f'Job {job_id} submitted running {ntasks} tasks.') if wait: cli.notice(f'Wait for job {job_id}.') try: last_line = None while job_running(args, job_id): sleep(10) if not os.path.isfile(logfile): if job_pending(args, job_id): cli.notice(f'Job {job_id} is still pending.') continue line = subprocess.run(['tail', '-n', '1', logfile], stdout=subprocess.PIPE).stdout.decode('utf-8').strip() if last_line != line and line != '': last_line = line print(f'{cli.cyan("==>")} {last_line if len(last_line) <= 80 else last_line[:80]}') except KeyboardInterrupt: kill_job(args, job_id) exit(1) return job_id else: proc = run(f'mpiexec -np {ntasks} {cmd}', bg=True) try: while proc.poll() == None: sleep(10) if not os.path.isfile(logfile): continue res = subprocess.run(['tail', '-n', '1', logfile], stdout=subprocess.PIPE) last_line = res.stdout.decode("utf-8").strip() print(f'{cli.cyan("==>")} {last_line if len(last_line) <= 80 else last_line[:80]}') except KeyboardInterrupt: cli.warning('Ended by user!') proc.kill() exit(1)