def process_prediction(base_config_dir, project_name, sub_project_name, root_config_path): common_config = load_yaml( os.path.join(base_config_dir, project_name, sub_project_name, 'common.yml')) process_config = load_yaml( os.path.join(base_config_dir, project_name, sub_project_name, 'model_prediction.yml')) process_config.update(common_config) logger = initlog(project_name + '_' + sub_project_name + '_' + 'process_model_prediction') result = process_model_prediction(process_config, root_config_path=root_config_path, logger=logger) logger.info(result) process_config = load_yaml( os.path.join(base_config_dir, project_name, sub_project_name, 'result_saving.yml')) process_config.update(common_config) logger = initlog(project_name + '_' + sub_project_name + '_' + 'process_result_saving') result = process_result_saving(process_config, root_config_path=root_config_path, logger=logger) logger.info(result)
def process_label_extraction_only(base_config_dir, project_name, sub_project_name, root_config_path): common_config = load_yaml( os.path.join(base_config_dir, project_name, sub_project_name, 'common.yml')) process_config = load_yaml( os.path.join(base_config_dir, project_name, sub_project_name, 'label_extraction.yml')) process_config.update(common_config) logger = initlog(project_name + '_' + sub_project_name + '_' + 'process_label_extraction') result = process_label_extraction(process_config, root_config_path=root_config_path, logger=logger) logger.info(result)
def process_entity_set_preview_only(base_config_dir, project_name, sub_project_name, root_config_path): common_config = load_yaml( os.path.join(base_config_dir, project_name, sub_project_name, 'common.yml')) process_config = load_yaml( os.path.join(base_config_dir, project_name, sub_project_name, 'entity_set_desc.yml')) process_config.update(common_config) logger = initlog(project_name + '_' + sub_project_name + '_' + 'entity_set_desc') result = process_entity_set_preview(process_config, root_config_path=root_config_path, logger=logger) logger.info(result)
if 'a_run' not in os.getcwd().split('/')[-1]: os.chdir('./a_run') sys.path.append(os.path.join(os.getcwd(), '../')) from utils.utils_common import load_yaml from utils.utils_tools_init import initlog from comment_analysis.comment_analysis import comment_anlysis if __name__ == '__main__': # 参数读取 ap = argparse.ArgumentParser() ap.add_argument('-begin_date', '--begin_date', default='0000-00-00', help='补跑开始日期') ap.add_argument('-end_date', '--end_date', default='0000-00-00', help='补跑结束日期') args = (ap.parse_args()) begin_date = args.begin_date end_date = args.end_date # 配置解析 root_config = load_yaml('../config/comment_analysis/root_config.yml') #logger logger = initlog('comment_analysis') comment_anlysis(logger, root_config, begin_date, end_date)
def _resolve_root_config(self): root_config = load_yaml(self.root_config_path) self.root_config = root_config # root self.product_label_column_name_sep = root_config[ 'product_label_column_name_sep'] self.standard_result_colname_dic = root_config[ 'standard_result_colname_dic'] self.problem_type_vs_label_type = root_config[ 'problem_type_vs_label_type'] self.ana_type_vs_data_type = root_config['ana_type_vs_data_type'] self.default_time_col_name = root_config.get('default_time_col_name', 'time') # dir定义 self.base_project_dir = root_config['dir']['base_project_dir'] self.sub_project_models_dir = root_config['dir'][ 'sub_project_models_dir'] self.sub_project_data_dir = root_config['dir']['sub_project_data_dir'] self.sub_project_srcdata_dir = root_config['dir'][ 'sub_project_srcdata_dir'] self.sub_project_models_prediction_data_dir = root_config['dir'][ 'sub_project_models_prediction_data_dir'] self.base_project_dir_deploying = root_config['dir'][ 'base_project_dir_deploying'] # file_map self.label_extraction_model_file = root_config['file_map'].get( 'label_extraction_model', 'label_extraction_model.pkl') self.label_result_df_file = root_config['file_map'].get( 'label_result_df', 'label_result_df.csv') self.label_statistics_df_file = root_config['file_map'].get( 'label_statistics_df', 'label_statistics_df.csv') self.feature_result_df_file = root_config['file_map'].get( 'feature_result_df', 'feature_result_df.faturedf') self.feature_statistics_df_file = root_config['file_map'].get( 'feature_statistics_df', 'feature_statistics_df.csv') self.feature_extraction_para_model_file = root_config['file_map'].get( 'feature_extraction_para_model', 'feature_extraction_para_model.pkl') self.model_training_model_file = root_config['file_map'].get( 'model_training_model', 'model_training_model.pkl') self.prediction_result_df_file = root_config['file_map'].get( 'prediction_result_df', 'prediction_result_df.csv') self.entity_set_data_dfs_dic_file = root_config['file_map'].get( 'entity_set_data_dfs_dic', 'entity_set_data_dfs_dic.pkl') # model_training self.model_eval_key_value = root_config['model_training'].get( 'model_eval_key_value', {}) self.model_split_key_value = root_config['model_training'].get( 'model_split_key_value', {}) # run_para self.is_dask_mode = root_config['run_para']['is_dask_mode'] self.test_mode = root_config['run_para'].get('test_mode', None) self.test_nrows = root_config['run_para'].get('test_nrows', None) # dask self.partitioned_chunksize = root_config['dask'][ 'partitioned_chunksize'] self.num_workers = root_config['dask'].get('num_workers', None) if self.test_mode: self.nrows = self.test_nrows else: self.nrows = None