Ejemplo n.º 1
0
def process_prediction(base_config_dir, project_name, sub_project_name,
                       root_config_path):
    common_config = load_yaml(
        os.path.join(base_config_dir, project_name, sub_project_name,
                     'common.yml'))

    process_config = load_yaml(
        os.path.join(base_config_dir, project_name, sub_project_name,
                     'model_prediction.yml'))
    process_config.update(common_config)
    logger = initlog(project_name + '_' + sub_project_name + '_' +
                     'process_model_prediction')
    result = process_model_prediction(process_config,
                                      root_config_path=root_config_path,
                                      logger=logger)
    logger.info(result)

    process_config = load_yaml(
        os.path.join(base_config_dir, project_name, sub_project_name,
                     'result_saving.yml'))
    process_config.update(common_config)
    logger = initlog(project_name + '_' + sub_project_name + '_' +
                     'process_result_saving')
    result = process_result_saving(process_config,
                                   root_config_path=root_config_path,
                                   logger=logger)
    logger.info(result)
Ejemplo n.º 2
0
def process_label_extraction_only(base_config_dir, project_name,
                                  sub_project_name, root_config_path):
    common_config = load_yaml(
        os.path.join(base_config_dir, project_name, sub_project_name,
                     'common.yml'))

    process_config = load_yaml(
        os.path.join(base_config_dir, project_name, sub_project_name,
                     'label_extraction.yml'))
    process_config.update(common_config)
    logger = initlog(project_name + '_' + sub_project_name + '_' +
                     'process_label_extraction')
    result = process_label_extraction(process_config,
                                      root_config_path=root_config_path,
                                      logger=logger)
    logger.info(result)
Ejemplo n.º 3
0
def process_entity_set_preview_only(base_config_dir, project_name,
                                    sub_project_name, root_config_path):
    common_config = load_yaml(
        os.path.join(base_config_dir, project_name, sub_project_name,
                     'common.yml'))

    process_config = load_yaml(
        os.path.join(base_config_dir, project_name, sub_project_name,
                     'entity_set_desc.yml'))
    process_config.update(common_config)
    logger = initlog(project_name + '_' + sub_project_name + '_' +
                     'entity_set_desc')
    result = process_entity_set_preview(process_config,
                                        root_config_path=root_config_path,
                                        logger=logger)
    logger.info(result)
Ejemplo n.º 4
0
if 'a_run' not in os.getcwd().split('/')[-1]:
    os.chdir('./a_run')

sys.path.append(os.path.join(os.getcwd(), '../'))

from utils.utils_common import load_yaml
from utils.utils_tools_init import initlog
from comment_analysis.comment_analysis import comment_anlysis

if __name__ == '__main__':
    # 参数读取
    ap = argparse.ArgumentParser()
    ap.add_argument('-begin_date',
                    '--begin_date',
                    default='0000-00-00',
                    help='补跑开始日期')
    ap.add_argument('-end_date',
                    '--end_date',
                    default='0000-00-00',
                    help='补跑结束日期')
    args = (ap.parse_args())
    begin_date = args.begin_date
    end_date = args.end_date

    # 配置解析
    root_config = load_yaml('../config/comment_analysis/root_config.yml')

    #logger
    logger = initlog('comment_analysis')

    comment_anlysis(logger, root_config, begin_date, end_date)
Ejemplo n.º 5
0
    def _resolve_root_config(self):
        root_config = load_yaml(self.root_config_path)
        self.root_config = root_config
        # root
        self.product_label_column_name_sep = root_config[
            'product_label_column_name_sep']
        self.standard_result_colname_dic = root_config[
            'standard_result_colname_dic']
        self.problem_type_vs_label_type = root_config[
            'problem_type_vs_label_type']
        self.ana_type_vs_data_type = root_config['ana_type_vs_data_type']
        self.default_time_col_name = root_config.get('default_time_col_name',
                                                     'time')

        # dir定义
        self.base_project_dir = root_config['dir']['base_project_dir']
        self.sub_project_models_dir = root_config['dir'][
            'sub_project_models_dir']
        self.sub_project_data_dir = root_config['dir']['sub_project_data_dir']
        self.sub_project_srcdata_dir = root_config['dir'][
            'sub_project_srcdata_dir']
        self.sub_project_models_prediction_data_dir = root_config['dir'][
            'sub_project_models_prediction_data_dir']
        self.base_project_dir_deploying = root_config['dir'][
            'base_project_dir_deploying']

        # file_map
        self.label_extraction_model_file = root_config['file_map'].get(
            'label_extraction_model', 'label_extraction_model.pkl')
        self.label_result_df_file = root_config['file_map'].get(
            'label_result_df', 'label_result_df.csv')
        self.label_statistics_df_file = root_config['file_map'].get(
            'label_statistics_df', 'label_statistics_df.csv')
        self.feature_result_df_file = root_config['file_map'].get(
            'feature_result_df', 'feature_result_df.faturedf')
        self.feature_statistics_df_file = root_config['file_map'].get(
            'feature_statistics_df', 'feature_statistics_df.csv')
        self.feature_extraction_para_model_file = root_config['file_map'].get(
            'feature_extraction_para_model',
            'feature_extraction_para_model.pkl')
        self.model_training_model_file = root_config['file_map'].get(
            'model_training_model', 'model_training_model.pkl')
        self.prediction_result_df_file = root_config['file_map'].get(
            'prediction_result_df', 'prediction_result_df.csv')
        self.entity_set_data_dfs_dic_file = root_config['file_map'].get(
            'entity_set_data_dfs_dic', 'entity_set_data_dfs_dic.pkl')

        # model_training
        self.model_eval_key_value = root_config['model_training'].get(
            'model_eval_key_value', {})
        self.model_split_key_value = root_config['model_training'].get(
            'model_split_key_value', {})

        # run_para
        self.is_dask_mode = root_config['run_para']['is_dask_mode']
        self.test_mode = root_config['run_para'].get('test_mode', None)
        self.test_nrows = root_config['run_para'].get('test_nrows', None)

        # dask
        self.partitioned_chunksize = root_config['dask'][
            'partitioned_chunksize']
        self.num_workers = root_config['dask'].get('num_workers', None)

        if self.test_mode:
            self.nrows = self.test_nrows
        else:
            self.nrows = None