Python ConfigManager.save_config 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: config_manager

클래스/타입: ConfigManager

메소드/함수: save_config

hotexamples.com에서의 예제들: 2

Python ConfigManager.save_config - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 config_manager.ConfigManager.save_config에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

ConfigManager(30)

db_path(30)

get(20)

get_instance(19)

logger_path(19)

get_config(11)

from_file(9)

get_value(7)

set_value(4)

instance(4)

load_config(3)

get_item(2)

print_config(2)

save_config(2)

set(2)

get_group_mac_address(2)

get_group_degrees(2)

get_model(2)

getFtpHostname(1)

as_list(1)

load(1)

decode_configuration(1)

load_model(1)

create_remove_dirs(1)

merge(1)

create_config(1)

read_serial_config(1)

reload_yaml(1)

remove_from_blacklist(1)

remove_from_whitelist(1)

save_columns(1)

save_configuration(1)

init(1)

save_tables(1)

append_whitelist_files(1)

set_blacklist_files(1)

set_config(1)

set_config_settings_value(1)

set_config_value(1)

set_settings(1)

append_blacklist_files(1)

set_whitelist_files(1)

show_filter_status(1)

switch_blacklist_enabled(1)

switch_whitelist_enabled(1)

decode_connection_info(1)

has_valid_config(1)

getFtpPassword(1)

find_table(1)

getFtpUsername(1)

예제 #1

파일 보기

    def save_transformers(self,
                          dst_dir='./.models',
                          child_dir=None,
                          transformers_name='transformers.pkl.cmp'):
        """前処理・特徴量エンジニアリング用モデル等を保存．
        これらは一括して，dictオブジェクトにまとめられ，joblib.dumpされた
        単一ファイルを想定．

        Parameters
        ----------
        dst_dir : str, optional
            保存先の親ディレクトリ, by default './.models'
        child_dir : str, optional
            保存先の子ディレクトリ．実験タスク等の中間名, by default None
        transformers_name : str, optional
            前処理・特徴量エンジニアリング用モデルの保存名
            , by default 'transformers.pkl.cmp'

        TODO
        ----
        model.pyのsave_modelメソッドと被る部分が多いのでまとめるか検討
        """
        if not self.transformers:
            print('モデルが学習またはロードされていないので保存しない')
            return

        dst_dir = Path(dst_dir).resolve()
        if child_dir is None:
            # '{acitve branchのHEAD commit ID}.pkl.cmp'のように表示
            repo_abspath = Path(__file__).resolve().parents[6]
            repo = Repo(repo_abspath)
            child_dir = repo.active_branch.commit.hexsha
        dst_path = dst_dir.joinpath(child_dir, transformers_name)

        os.makedirs(dst_path.parent, exist_ok=True)

        joblib.dump(self.transformers, dst_path, compress=True)
        print(dst_path, 'に前処理・特徴量エンジニアリング用モデル等を保存')

        # 子ディレクトリ以下のパスを記録（推論時に使用）
        self.config['transformers_path'] = \
            Path(child_dir).joinpath(transformers_name)
        cm = ConfigManager()
        cm.save_config(self.config, self.config_path)
        print(f'モデル保存先を設定ファイル{self.config_path}に上書き')

예제 #2

파일 보기

파일: model.py 프로젝트: ababa831/sagemaker_mlops_demo

class Model(object):
    def __init__(self, config_path, mode):
        self.config_path = config_path
        self.clf = None
        self.cm = ConfigManager()

        if mode not in ['train', 'pred']:
            raise ValueError('modeに"train", "pred"を指定してない．')
        self.mode = mode
        expected_keys = []
        if mode == 'pred':
            expected_keys = ['model_path', 'hyper_params']
        self.config = \
            self.cm.load_config(config_path, expected_keys)

    def _validate_dataset(self, dataset):
        if not isinstance(dataset, dict):
            raise TypeError('入力データセットがdictでない．')
        if 'X' not in dataset:
            raise KeyError('データセットに key: "X" が含まれていない')
        if self.mode == 'train' and 'y' not in dataset:
            raise KeyError('データセットに key: "y" が含まれていない')
        if not isinstance(dataset['X'], np.ndarray):
            raise TypeError('Xのvalueがarrayでない')
        if self.mode == 'train' and not isinstance(dataset['y'], np.ndarray):
            raise TypeError('yのvalueがarrayでない')

    def init_model(self, hyper_parameters=None):
        # ハイパーパラメータが引数に渡されなかった場合は，configから読み込む
        if hyper_parameters is None:
            hyper_parameters = self.config['hyper_params']

        # ハイパーパラメータ辞書の検証
        try:
            if not isinstance(hyper_parameters, dict):
                raise TypeError(f'{hyper_parameters}がdictでない．')
            expected_keys = [
                'random_state', 'solver', 'class_weight', 'n_jobs'
            ]
            Utils.validate_dict(hyper_parameters, expected_keys)
            isinstance(hyper_parameters['random_state'], int)
            isinstance(hyper_parameters['solver'], str)
            isinstance(hyper_parameters['class_weight'], str)
            isinstance(hyper_parameters['n_jobs'], int)
        except (TypeError, KeyError):
            """
            configに'hyper_params'キーとそのvaluesにexpected_keys
            が存在しない場合
            """
            traceback.print_exc()
            hyper_parameters = {
                'random_state': 0,
                'solver': 'lbfgs',
                'class_weight': 'balanced',
                'n_jobs': -1
            }

        # モデルの初期化
        self.clf = LogisticRegression(
            random_state=hyper_parameters['random_state'],
            solver=hyper_parameters['solver'],
            class_weight=hyper_parameters['class_weight'],
            n_jobs=hyper_parameters['n_jobs'])

    def train_with_cv(self, dataset, cv=4, return_train_score=True):
        # 使用オブジェクトの検証
        self._validate_dataset(dataset)
        if self.clf is None:
            raise TypeError('モデルが初期化またはロードされていない．')
        """
        configに特定のcv, return_train_score
        が指定されていたらオプション値を更新
        """
        if 'cv' in self.config['hyper_params']:
            cv = self.config['hyper_params']['cv']
        if 'return_train_score' in self.config['hyper_params']:
            return_train_score = \
                self.config['hyper_params']['return_train_score']

        # 学習（公差検証）
        self.scores = cross_validate(self.clf,
                                     dataset['X'],
                                     dataset['y'],
                                     cv=cv,
                                     return_train_score=return_train_score,
                                     return_estimator=True)
        """
        今回は，簡単のためCV中最も良いvalidationスコアが出たものを採用する．
        このあたりはタスクによって手法を適宜変えれば良い (e.g. 平均をとる)
        """
        best_idx = self.scores['test_score'].argmax()
        self.clf = self.scores['estimator'][best_idx]

    def save_model(self,
                   dst_dir='./.models',
                   child_dir=None,
                   model_name='logistic_regression.pkl.cmp'):
        self.dst_dir = dst_dir
        if not self.clf:
            print('モデルが学習またはロードされていないので保存しない')
            return

        dst_dir = Path(dst_dir).resolve()
        if child_dir is None:
            # '{acitve branchのHEAD commit ID}.pkl.cmp'のように表示
            repo_abspath = Path(__file__).resolve().parents[6]
            repo = Repo(repo_abspath)
            child_dir = repo.active_branch.commit.hexsha
        dst_path = dst_dir.joinpath(child_dir, model_name)

        if not dst_path.parent.exists():
            os.makedirs(dst_path.parent)

        joblib.dump(self.clf, dst_path, compress=True)
        print(dst_path, 'にモデルを保存')

        # 子ディレクトリ以下のパスを記録（推論時に使用）
        self.config['model_path'] = \
            Path(child_dir).joinpath(model_name)
        self.cm.save_config(self.config, self.config_path)
        print(f'モデル保存先を設定ファイル{self.config_path}を更新')

    def predict(self, dataset):
        """入力データセット内'X'に対する推論結果yをデータセットに付与して返す
        
        Parameters
        ----------
        dataset : dict
            前処理・特徴量エンジニアリング済みデータセット
            {'X': shape(サンプル数, 変数の数), 'y': shape(サンプル数, )}
        
        Returns
        -------
        dict
            推論結果'y'が更新されたデータセット
        """
        prefix = '/opt/ml/model'
        filename = Path(self.config['model_path']).name
        model_path_for_pred = Path(prefix).joinpath(filename)

        self._validate_dataset(dataset)

        self.clf = joblib.load(model_path_for_pred)
        dataset['y'] = self.clf.predict(dataset['X'])

        return dataset