def prepare_sal_release(): try: os.mkdir(sal_rel_dir) print('SAL release directory {} created.'.format(sal_rel_dir)) except FileExistsError: print('SAL Release directory {} already exists, recreated.'.format(sal_rel_dir)) delete_files(sal_rel_dir) os.mkdir(sal_rel_dir) shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/include/',sal_rel_dir+'/include') shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/build',sal_rel_dir+'/build') shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/lib',sal_rel_dir+'/lib') shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/scripts',sal_rel_dir+'/scripts') shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/config',sal_rel_dir+'/config') shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/proto',sal_rel_dir+'/proto') os.mkdir(sal_rel_dir+'/test') shutil.copyfile(get_env_var(constants.sal_home_env_var_name)+'/test/sal_service_test_direct.py',sal_rel_dir+'/test/sal_service_test_direct.py') shutil.copyfile(get_env_var(constants.sal_home_env_var_name)+'/sal_services_direct_pb2.py',sal_rel_dir+'/sal_services_direct_pb2.py') shutil.copyfile(get_env_var(constants.sal_home_env_var_name)+'/sal_services_direct_pb2_grpc.py',sal_rel_dir+'/sal_services_direct_pb2_grpc.py') shutil.copyfile(get_env_var( constants.sal_home_env_var_name) + '/sal_services_pb2.py', sal_rel_dir + '/sal_services_pb2.py') shutil.copyfile(get_env_var( constants.sal_home_env_var_name) + '/sal_services_pb2_grpc.py', sal_rel_dir + '/sal_services_pb2_grpc.py') print('SAL release is available at {}'.format(sal_rel_dir))
def prepare_bsp_pkg(): bsp_dev_abs = get_bsp_dev_abs_path() earliest_commit_hash = execute_cmd_n_get_output_2( 'git --git-dir {0}/.git rev-list --max-parents=0 HEAD'.format( bsp_dev_abs)) latest_commit_hash = execute_cmd_n_get_output_2( 'git --git-dir {0}/.git rev-parse HEAD'.format(bsp_dev_abs)) os.chdir(bsp_dev_abs) execute_cmd_n_get_output_2( 'git --git-dir {0}/.git diff {1} {2} -- \':!./platforms/apsn/\' \':!.idea/\' \':!.gitignore\' > {3}' .format(bsp_dev_abs, earliest_commit_hash, latest_commit_hash, bsp_dev_abs + '/' + diff_file)) latest_commit_hash_short = execute_cmd_n_get_output_2( 'git --git-dir {0}/.git rev-parse --short HEAD'.format(bsp_dev_abs)) bsp_name = '/' + os.path.basename(bsp_dev_abs) \ + '_' + latest_commit_hash_short bsp_rel_dir = release_dir + bsp_name try: os.mkdir(bsp_rel_dir) print('SAL release directory {} created.'.format(bsp_rel_dir)) except FileExistsError: print('BSP Release directory {} already exists, recreated.'.format( bsp_rel_dir)) delete_files(bsp_rel_dir) os.mkdir(bsp_rel_dir) shutil.move(bsp_dev_abs + '/' + diff_file, bsp_rel_dir + '/' + diff_file) shutil.copytree(bsp_dev_abs + '/platforms/apsn/', bsp_rel_dir + '/apsn') shutil.make_archive(bsp_rel_dir, 'zip', bsp_rel_dir)
def clean_sal(): print('Cleaning SAL...') to_delete = [get_env_var(constants.sal_home_env_var_name)+f for f in ['/bin', '/build', '/logs/', '/CMakeCache.txt', '/Makefile', '/CMakeFiles', '/cmake-build-debug']] os.system( 'make -C {} clean'.format(get_env_var(constants.sal_home_env_var_name))) for file in to_delete: print('Deteling {}'.format(file)) delete_files(file)
def clean_bsp(): print('Cleaning BSP...') to_delete = [ get_aps_bsp_pkg_abs_path() + f for f in ['/CMakeCache.txt', '/Makefile', '/CMakeFiles', '/cmake-build-debug'] ] execute_cmd('make -C {} clean'.format(get_aps_bsp_pkg_abs_path())) for file in to_delete: print('Deteling {}'.format(file)) delete_files(file) return True
def test_delete_files(): setup_test_files() # Set the readonly.txt file to read/write so it can be deleted readonly_file_name = f'{test_folder_path}/readonly.txt' if is_file(readonly_file_name): os.chmod(readonly_file_name, S_IWUSR | S_IREAD) # delete both the readonly.txt and the readwrite.txt files delete_files(f'{test_folder_path}/*.txt') # assert both files have been deleted assert is_file(f'{test_folder_path}/readwrite.txt') is False assert is_file(f'{test_folder_path}/readonly.txt') is False teardown_test_files()
def main(): duration_list = ['hour', 'day'] # parse args task_name_list = config.task_list.keys() start_time, stop_time, task_name, duration = common.parse_args( task_name_list, duration_list) task = config.task_list[task_name] # init log common.init_log(task['log']['filename'], task['log']['debug']) logging.info('======\n%s\n' % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z')) logging.info(" TASK [%s] START ..." % task_name) # init t_timedelta t_timedelta = 'hour' summary_suffix_format = '_%Y%m%d' if duration == 'hour': t_timedelta = datetime.timedelta(hours=1) summary_suffix_format = '_hourly_%Y%m%d' elif duration == 'day': t_timedelta = datetime.timedelta(days=1) summary_suffix_format = '_daily_%Y%m' else: logging.info('main():bad param `duration`:%s' % duration) return # make tmp sql file _prefix = task['tmp_summary_sql_file_name'] _suffix = task['tmp_summary_sql_file_name_suffix'] global_tmp_sql_file = common.make_file_name(_prefix, _suffix) logging.info('Global Tmp Sql File:%s' % global_tmp_sql_file) # delete old same name file common.delete_files(global_tmp_sql_file) summary_dimension_1_name_list = task['summary_dimension_1_name_list'] summary_dimension_2_name_list = task['summary_dimension_2_name_list'] summary_metric_name_list = task['summary_metric_name_list'] # current module ref this_module = sys.modules[__name__] # create dimension table create_table.create_table_dimension_enum(global_tmp_sql_file, task['dimension_table_name']) # summary function create_summary_table_function = getattr( create_table, task['create_summary_table_function']) compute_summary_function = getattr(this_module, task['compute_summary_function']) # main loop # save summary_table_name dump_table_name_list = [task['dimension_table_name']] # foreach time range p = start_time sql_list = [] while p < stop_time: # prepare origin_table_name _prefix = task['raw_data_table_name'] _suffix = task['raw_data_table_name_suffix'] _format_suffix = p.strftime(_suffix) origin_table_name = '%s%s' % (_prefix, _format_suffix) print 'origin_table_name', origin_table_name # prepare summary_table_name _prefix = task['summary_data_table_name'] _format_suffix = p.strftime(summary_suffix_format) summary_table_name = '%s%s' % (_prefix, _format_suffix) # save summary_table_name dump_table_name_list.append(summary_table_name) # create summary table tmp_sql = create_summary_table_function(None, summary_table_name) sql_list.append(tmp_sql) # summary compute sql tmp_sql_list = compute_summary_function(p, duration, origin_table_name, summary_table_name, summary_dimension_1_name_list, summary_dimension_2_name_list, summary_metric_name_list) sql_list += tmp_sql_list # compute ctr tmp_sql = update_summary_ctr(summary_table_name) sql_list.append(tmp_sql) # extract dimension_enum category = task['category'] dimension_name_list = list( set(summary_dimension_1_name_list + summary_dimension_2_name_list)) tmp_sql_list = compute_dimension_enum(origin_table_name, task['dimension_table_name'], dimension_name_list, category) sql_list += tmp_sql_list # next p += t_timedelta # End While # filter duplication sql or None good_sql_list = [] for sql in sql_list: if not sql or not isinstance(sql, (unicode, str)) \ or sql in good_sql_list: continue good_sql_list.append(sql) # write sql to file with open(global_tmp_sql_file, 'a') as f: big_sql = '\n'.join(good_sql_list) f.write(big_sql) f.write('\n') # 执行 SQL local_mysql_auth = config.local_mysql_auth common.execute_mysql_sql(local_mysql_auth['host'], local_mysql_auth['port'], local_mysql_auth['user'], local_mysql_auth['passwd'], local_mysql_auth['dbname'], global_tmp_sql_file) # clean some files if os.path.exists(global_tmp_sql_file): if task['keep_summary_sql_file']: logging.info("keep the tmp sql file @ %s" % global_tmp_sql_file) else: try: os.remove(global_tmp_sql_file) except Exception as e: logging.error("main():Delete the tmp sql file: %s:[%s]" % (global_tmp_sql_file, e)) else: logging.warning("There is No tmp sql file:%s" % global_tmp_sql_file) # dump to summary data to remote server _prefix = task['mysql_dump_file_name'] _suffix_fmt = task['mysql_dump_file_name_suffix'] dump_file_name = common.make_file_name(_prefix, _suffix_fmt) common.delete_files(dump_file_name) local_mysql_auth = config.local_mysql_auth dump_table_name_list = list(set(dump_table_name_list)) # dump tables into dump file for dump_table_name in dump_table_name_list: common.execute_mysql_dump(local_mysql_auth['host'], local_mysql_auth['port'], local_mysql_auth['user'], local_mysql_auth['passwd'], local_mysql_auth['dbname'], dump_table_name, dump_file_name) # 执行 Dump SQL remote_mysql_auth = config.remote_mysql_auth common.execute_mysql_sql(remote_mysql_auth['host'], remote_mysql_auth['port'], remote_mysql_auth['user'], remote_mysql_auth['passwd'], remote_mysql_auth['dbname'], dump_file_name) if task['keep_mysql_dump_file']: logging.info("keep the Dump sql file @ %s" % global_tmp_sql_file) else: common.delete_files(dump_file_name) # End return
def process_table(self, db, db_engine, schema_name, table_name, table_object, table_history, current_timestamp): """Process a specific table.""" # skip default table and ignored tables if table_name == 'default': return elif table_object.ignore_table: logger.info(f'Skipping table: {table_name} (ignore_table=1)') return elif table_object.drop_table: logger.info(f'Skipping table: {table_name} (drop_table=1)') return # initialize table history's last time stamp to first timestamp if not set yet if not table_history.last_timestamp: # default first timestamp to 1900-01-01 if project has no first timestamp if not table_object.first_timestamp: table_object.first_timestamp = '1900-01-01' table_history.last_timestamp = iso_to_datetime( table_object.first_timestamp) # skip table if last timestamp > current timestamp, eg. tables pre-configured for the future if table_history.last_timestamp > current_timestamp: explanation = f'first/last timestamp {table_history.last_timestamp} > current timestamp {current_timestamp}' logger.info(f'Skipping table: {table_name} ({explanation})') return # if we're here then we have a legit last timestamp value to use for CDC last_timestamp = table_history.last_timestamp self.stats.start(table_name, 'table') # logger.info(f'Processing {table_name} ...') # create a fresh cursor for each table cursor = db.conn.cursor() # save table object for stage output_stream = open(f'{self.work_folder_name}/{table_name}.table', 'wb') pickle.dump(table_object, output_stream) output_stream.close() # discover table schema table_schema = db_engine.select_table_schema(schema_name, table_name) # remove ignored columns from table schema if table_object.ignore_columns: # find columns to ignore (remove) based on ignore column names/glob-style patterns ignore_columns = [] for column_name in table_schema.columns: for pattern in split(table_object.ignore_columns): # use fnmatch() to provide glob style matching if fnmatch.fnmatch(column_name.lower(), pattern.lower()): ignore_columns.append(column_name) # delete ignored columns from our table schema for column_name in ignore_columns: logger.info(f'Ignore_column: {table_name}.{column_name}') table_schema.columns.pop(column_name) # save table schema for stage to use output_stream = open(f'{self.work_folder_name}/{table_name}.schema', 'wb') pickle.dump(table_schema, output_stream) output_stream.close() # save table pk for stage to use pk_columns = db_engine.select_table_pk(schema_name, table_name) if not pk_columns and table_object.primary_key: pk_columns = table_object.primary_key output_stream = open(f'{self.work_folder_name}/{table_name}.pk', 'w') output_stream.write(pk_columns) output_stream.close() # clear cdc if it doesn't match timestamp/rowversion table_object.cdc = table_object.cdc.lower() if not table_object.cdc or table_object.cdc not in ('timestamp', 'rowversion'): table_object.cdc = '' # if no pk_columns, then clear table cdc if not pk_columns: if table_object.cdc and table_object.cdc != 'none': logger.info( f'Warning: {table_name} cdc={table_object.cdc} but table has no pk column(s)' ) table_object.cdc = 'none' # we still keep timestamp because its required for filtering first_timestamp - current_timestamp # if table_object.timestamp: # logger.info(f'Warning: {table_name} timestamp={table_object.timestamp} but table has no pk column(s)') # table_object.timestamp = '' # update table object properties for cdc select build column_names = list(table_schema.columns.keys()) table_object.schema_name = schema_name table_object.table_name = table_name table_object.column_names = column_names select_cdc = cdc_select.SelectCDC(table_object) sql = select_cdc.select(self.job_id, current_timestamp, last_timestamp) # logger.info(f'Capture SQL:\n{sql}\n') # run sql here vs via db_engine.capture_select # cursor = db_engine.capture_select(schema_name, table_name, column_names, last_timestamp, current_timestamp) cursor.execute(sql) # capture rows in fixed size batches to support unlimited size record counts # Note: Batching on capture side allows stage to insert multiple batches in parallel. if self.project.batch_size: batch_size = int(self.project.batch_size) # logger.info(f'Using project specific batch size: {self.project.batch_size}') else: batch_size = 1_000_000 batch_number = 0 row_count = 0 file_size = 0 while True: batch_number += 1 rows = cursor.fetchmany(batch_size) if not rows: break logger.info( f'Table({table_name}): batch={batch_number} using batch size {batch_size:,}' ) # flatten rows to list of column values json_rows = [list(row) for row in rows] output_file = f'{self.work_folder_name}/{table_name}#{batch_number:04}.json' with open(output_file, 'w') as output_stream: # indent=2 for debugging json.dump(json_rows, output_stream, indent=2, default=json_serializer) # track stats row_count += len(json_rows) file_size += pathlib.Path(output_file).stat().st_size # if no cdc, but order set, do a file hash see if output the same time as last file hash if (not table_object.cdc or table_object.cdc == 'none') and table_object.order: print( f'Checking {table_name} file hash based on cdc={table_object.cdc} and order={table_object.order}' ) table_data_files = f'{self.work_folder_name}/{table_name}#*.json' current_filehash = hash_files(table_data_files) if table_history.last_filehash == current_filehash: # suppress this update print( f'Table({table_name}): identical file hash, update suppressed' ) logger.info( f'Table({table_name}): identical file hash, update suppressed' ) row_count = 0 file_size = 0 # delete exported json files delete_files(table_data_files) else: print( f'Table({table_name}): {table_history.last_filehash} != {current_filehash}' ) table_history.last_filehash = current_filehash # update table history with new last timestamp value table_history.last_timestamp = current_timestamp # track total row count and file size across all of a table's batched json files self.stats.stop(table_name, row_count, file_size) # save interim state of stats for diagnostics self.stats.save() self.job_row_count += row_count self.job_file_size += file_size # explicitly close cursor when finished # cursor.close() return
def main(): global global_fail_count global global_success_count global global_file_list # parse args task_name_list = config.task_list.keys() start_time, stop_time, task_name, _ = common.parse_args(task_name_list) task = config.task_list[task_name] # init task app local data task['app'] = {} # init log common.init_log(task['log']['filename'], task['log']['debug']) logging.info('======\n%s\n' % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z')) logging.info(" TASK [%s] START ..." % task_name) # init parser json function task['app']['parser_json'] = getattr(parser, task['parser_function_name']) # make tmp sql file _prefix = task['tmp_sql_file_name'] _suffix = task['tmp_sql_file_name_suffix'] global_tmp_sql_file = common.make_file_name(_prefix, _suffix) logging.info('Global Tmp Sql File:%s' % global_tmp_sql_file) # save global_tmp_sql_file to task . task['app']['global_tmp_sql_file'] = global_tmp_sql_file # delete old same name file common.delete_files(global_tmp_sql_file) # init s3 helper, download s3 file to local amazon_s3_auth = task['amazon_s3_auth'] s3 = S3Helper(**amazon_s3_auth) aws_appid_list = task['aws_appid_list'] tmp_local_file_dir = task['tmp_local_file_dir'] keep_s3_file = task['keep_s3_file'] # main loop # foreach time range p = start_time while p < stop_time: # prepare raw data table name _prefix = task['raw_data_table_name'] _suffix = task['raw_data_table_name_suffix'] _format_suffix = p.strftime(_suffix) table_name = '%s%s' % (_prefix, _format_suffix) # save table_name to task . task['app']['table_name'] = table_name # create table if not exits create_raw_data_table_function = task['create_raw_data_table_function'] create_table_function = getattr(create_table, create_raw_data_table_function) create_table_function(global_tmp_sql_file, table_name) # foreach time range for aws_appid in aws_appid_list: key_path = 'awsma/events/%s/' % aws_appid local_s3_file_list = common.download_from_s3( s3, key_path, p, tmp_local_file_dir) # main_loop(task, local_s3_file_list) # delete local s3 files if not keep_s3_file: common.delete_files(local_s3_file_list) p += datetime.timedelta(hours=1) # exit() # 执行 SQL local_mysql_auth = config.local_mysql_auth common.execute_mysql_sql(local_mysql_auth['host'], local_mysql_auth['port'], local_mysql_auth['user'], local_mysql_auth['passwd'], local_mysql_auth['dbname'], global_tmp_sql_file) # END if os.path.exists(global_tmp_sql_file): if task['keep_sql_file']: logging.info("keep the tmp sql file @ %s" % global_tmp_sql_file) else: try: os.remove(global_tmp_sql_file) except Exception as e: logging.error("main():Delete the tmp sql file: %s:[%s]" % (global_tmp_sql_file, e)) else: logging.warning("There is No tmp sql file:%s" % global_tmp_sql_file) return
def prepare_sal_release(): try: os.mkdir(sal_rel_dir) print('SAL release directory {} created.'.format(sal_rel_dir)) except FileExistsError: print('SAL Release directory {} already exists, recreated.'.format( sal_rel_dir)) delete_files(sal_rel_dir) os.mkdir(sal_rel_dir) shutil.copytree( get_env_var(constants.sal_home_env_var_name) + '/include/', sal_rel_dir + '/include') shutil.copytree( get_env_var(constants.sal_home_env_var_name) + '/src/include/', sal_rel_dir + '/src/include') shutil.copytree( get_env_var(constants.sal_home_env_var_name) + '/build', sal_rel_dir + '/build') shutil.copytree( get_env_var(constants.sal_home_env_var_name) + '/lib', sal_rel_dir + '/lib') shutil.copytree( get_env_var(constants.sal_home_env_var_name) + '/scripts', sal_rel_dir + '/scripts') shutil.copytree( get_env_var(constants.sal_home_env_var_name) + '/config', sal_rel_dir + '/config') shutil.copytree( get_env_var(constants.sal_home_env_var_name) + '/proto', sal_rel_dir + '/proto') if get_from_advance_setting_dict(constants.sal_sw_attr_node, constants.build_third_party_node): shutil.copytree( get_env_var(constants.tp_install_env_var_name) + '/lib', sal_rel_dir + '/install/lib') shutil.copytree( get_env_var(constants.tp_install_env_var_name) + '/include', sal_rel_dir + '/install/include') shutil.copytree( get_env_var(constants.tp_install_env_var_name) + '/bin', sal_rel_dir + '/install/bin') shutil.copytree( get_env_var(constants.tp_install_env_var_name) + '/share', sal_rel_dir + '/install/share') os.mkdir(sal_rel_dir + '/test') shutil.copyfile( get_env_var(constants.sal_home_env_var_name) + '/README.md', sal_rel_dir + '/README.md') shutil.copyfile( get_env_var(constants.sal_home_env_var_name) + '/test/sal_service_test_BF6064.py', sal_rel_dir + '/test/sal_service_test_BF6064.py') shutil.copyfile( get_env_var(constants.sal_home_env_var_name) + '/test/sal_service_test_BF2556.py', sal_rel_dir + '/test/sal_service_test_BF2556.py') shutil.copyfile( get_env_var(constants.sal_home_env_var_name) + '/test/TestUtil.py', sal_rel_dir + '/test/TestUtil.py') shutil.copyfile( get_env_var(constants.sal_home_env_var_name) + '/sal_services_pb2.py', sal_rel_dir + '/sal_services_pb2.py') shutil.copyfile( get_env_var(constants.sal_home_env_var_name) + '/sal_services_pb2_grpc.py', sal_rel_dir + '/sal_services_pb2_grpc.py') shutil.copyfile( get_env_var(constants.sal_home_env_var_name) + '/sal_services_pb2.py', sal_rel_dir + '/sal_services.grpc.pb.cc') shutil.copyfile( get_env_var(constants.sal_home_env_var_name) + '/sal_services_pb2_grpc.py', sal_rel_dir + '/sal_services.grpc.pb.h') shutil.copyfile( get_env_var(constants.sal_home_env_var_name) + '/sal_services_pb2_grpc.py', sal_rel_dir + '/sal_services.pb.cc') shutil.copyfile( get_env_var(constants.sal_home_env_var_name) + '/sal_services_pb2_grpc.py', sal_rel_dir + '/sal_services.pb.h') prepare_sal_pkg() return True