Exemple #1
0
    def __init__(self, config_path, mode, label='Survived'):
        self.label = label
        self.config_path = config_path  # 学習時の設定ファイル
        self.dataset = {'X': None, 'y': None}

        if mode not in ['train', 'pred']:
            raise ValueError('modeに"train", "pred"を指定してない.')
        self.mode = mode
        cm = ConfigManager()
        if mode == 'pred':
            expected_keys = ['transformers_path']
            self.config = cm.load_config(config_path, expected_keys)
            self.transformers = self._load_transformers(self.config)
        else:
            expected_keys = []
            self.config = cm.load_config(config_path, expected_keys)
            self.transformers = {
                'fillna_vals': {},
                'onehot_encoders': {},
                'count_corresp_tables': {},
                'minmax_scaler': None
            }
Exemple #2
0
class Father:
    def __init__(self):
        self.queue_to_cli = Queue()
        self.queue_to_bot = Queue()
        self.queue_to_father = Queue()
        self.config = ConfigManager()
        self.cli = Client(self.config, self.queue_to_cli, self.queue_to_bot)
        self.bot = Bot(self.config, self.queue_to_cli, self.queue_to_bot,
                       self.queue_to_father)
        self.bot_token = None
        self.api_id = None
        self.api_hash = None
        self.phone = None

    def run(self):
        def load_conf():
            print("Cargando conf father")
            conf_dict = self.config.conf_dict
            self.bot_token = conf_dict['bot_token']
            self.api_id = conf_dict['api_id']
            self.api_hash = conf_dict['api_hash']
            self.phone = conf_dict['phone']
            return True

        def queue_check():
            try:
                req = self.queue_to_father.get(True,
                                               cons.FATHER_QUEUE_POLL_TIMEOUT)
                print("Father receive: ", req)

                if req.request_code == cons.CLIENT_START:
                    self.cli = Client(self.config, self.queue_to_cli,
                                      self.queue_to_bot)
                    self.cli.start()
                elif req.request_code == cons.RELOAD_CONF:
                    load_conf()
                elif req.request_code == cons.CLIENT_STATUS:
                    req.reply_code = self.cli.is_alive()
                    self.queue_to_bot.put(req)
                else:
                    print("WARNING Father receives unkown code: ", req)
            except Empty:
                pass

        def threads_status():
            # is_cli_alive = self.cli.is_alive()
            is_bot_alive = self.bot.is_alive()
            # print("Cli: ", is_cli_alive, " Bot: ", is_bot_alive)

            # if is_cli_alive is False:
            # self.cli = Client(self.config, self.queue_to_cli, self.queue_to_bot)
            # self.cli.start()

            if is_bot_alive is False:
                self.bot = Bot(self.config, self.queue_to_cli,
                               self.queue_to_bot, self.queue_to_father)
                self.bot.start()

        def client_launch():
            if self.api_id is None or self.api_hash is None or self.phone is None:
                error = "Hay parametros del cliente no configurados, revisalos:\nAPI_ID: "+str(self.api_id)\
                        +"\nAPI_HASH: "+str(self.api_hash)+"\nPhone: "+str(self.phone)\
                  +"\nUsa /set_api_id, /set_api_hash o /set_phone y luego /client_launch"
                print(error)
                packet = Packet(cons.SEND_MSG, error)
                self.queue_to_bot.put(packet)
            elif self.cli.is_alive() is True:
                packet = Packet(cons.SEND_MSG,
                                "El cliente YA estaba ejecutandose")
                self.queue_to_bot.put(packet)
            else:
                self.cli.start()

        #####################################################
        if __name__ == "__main__":
            print("Ejecutando Padre")
            self.config.load_config()
            load_conf()

            # Iniciar Bot & Client
            self.bot.start()
            client_launch()

            # Comunicacion & estados de hilos
            while True:
                queue_check()
                threads_status()
class Model(object):
    def __init__(self, config_path, mode):
        self.config_path = config_path
        self.clf = None
        self.cm = ConfigManager()

        if mode not in ['train', 'pred']:
            raise ValueError('modeに"train", "pred"を指定してない.')
        self.mode = mode
        expected_keys = []
        if mode == 'pred':
            expected_keys = ['model_path', 'hyper_params']
        self.config = \
            self.cm.load_config(config_path, expected_keys)

    def _validate_dataset(self, dataset):
        if not isinstance(dataset, dict):
            raise TypeError('入力データセットがdictでない.')
        if 'X' not in dataset:
            raise KeyError('データセットに key: "X" が含まれていない')
        if self.mode == 'train' and 'y' not in dataset:
            raise KeyError('データセットに key: "y" が含まれていない')
        if not isinstance(dataset['X'], np.ndarray):
            raise TypeError('Xのvalueがarrayでない')
        if self.mode == 'train' and not isinstance(dataset['y'], np.ndarray):
            raise TypeError('yのvalueがarrayでない')

    def init_model(self, hyper_parameters=None):
        # ハイパーパラメータが引数に渡されなかった場合は,configから読み込む
        if hyper_parameters is None:
            hyper_parameters = self.config['hyper_params']

        # ハイパーパラメータ辞書の検証
        try:
            if not isinstance(hyper_parameters, dict):
                raise TypeError(f'{hyper_parameters}がdictでない.')
            expected_keys = [
                'random_state', 'solver', 'class_weight', 'n_jobs'
            ]
            Utils.validate_dict(hyper_parameters, expected_keys)
            isinstance(hyper_parameters['random_state'], int)
            isinstance(hyper_parameters['solver'], str)
            isinstance(hyper_parameters['class_weight'], str)
            isinstance(hyper_parameters['n_jobs'], int)
        except (TypeError, KeyError):
            """
            configに'hyper_params'キーとそのvaluesにexpected_keys
            が存在しない場合
            """
            traceback.print_exc()
            hyper_parameters = {
                'random_state': 0,
                'solver': 'lbfgs',
                'class_weight': 'balanced',
                'n_jobs': -1
            }

        # モデルの初期化
        self.clf = LogisticRegression(
            random_state=hyper_parameters['random_state'],
            solver=hyper_parameters['solver'],
            class_weight=hyper_parameters['class_weight'],
            n_jobs=hyper_parameters['n_jobs'])

    def train_with_cv(self, dataset, cv=4, return_train_score=True):
        # 使用オブジェクトの検証
        self._validate_dataset(dataset)
        if self.clf is None:
            raise TypeError('モデルが初期化またはロードされていない.')
        """
        configに特定のcv, return_train_score
        が指定されていたらオプション値を更新
        """
        if 'cv' in self.config['hyper_params']:
            cv = self.config['hyper_params']['cv']
        if 'return_train_score' in self.config['hyper_params']:
            return_train_score = \
                self.config['hyper_params']['return_train_score']

        # 学習(公差検証)
        self.scores = cross_validate(self.clf,
                                     dataset['X'],
                                     dataset['y'],
                                     cv=cv,
                                     return_train_score=return_train_score,
                                     return_estimator=True)
        """
        今回は,簡単のためCV中最も良いvalidationスコアが出たものを採用する.
        このあたりはタスクによって手法を適宜変えれば良い (e.g. 平均をとる)
        """
        best_idx = self.scores['test_score'].argmax()
        self.clf = self.scores['estimator'][best_idx]

    def save_model(self,
                   dst_dir='./.models',
                   child_dir=None,
                   model_name='logistic_regression.pkl.cmp'):
        self.dst_dir = dst_dir
        if not self.clf:
            print('モデルが学習またはロードされていないので保存しない')
            return

        dst_dir = Path(dst_dir).resolve()
        if child_dir is None:
            # '{acitve branchのHEAD commit ID}.pkl.cmp'のように表示
            repo_abspath = Path(__file__).resolve().parents[6]
            repo = Repo(repo_abspath)
            child_dir = repo.active_branch.commit.hexsha
        dst_path = dst_dir.joinpath(child_dir, model_name)

        if not dst_path.parent.exists():
            os.makedirs(dst_path.parent)

        joblib.dump(self.clf, dst_path, compress=True)
        print(dst_path, 'にモデルを保存')

        # 子ディレクトリ以下のパスを記録(推論時に使用)
        self.config['model_path'] = \
            Path(child_dir).joinpath(model_name)
        self.cm.save_config(self.config, self.config_path)
        print(f'モデル保存先を設定ファイル{self.config_path}を更新')

    def predict(self, dataset):
        """入力データセット内'X'に対する推論結果yをデータセットに付与して返す
        
        Parameters
        ----------
        dataset : dict
            前処理・特徴量エンジニアリング済みデータセット
            {'X': shape(サンプル数, 変数の数), 'y': shape(サンプル数, )}
        
        Returns
        -------
        dict
            推論結果'y'が更新されたデータセット
        """
        prefix = '/opt/ml/model'
        filename = Path(self.config['model_path']).name
        model_path_for_pred = Path(prefix).joinpath(filename)

        self._validate_dataset(dataset)

        self.clf = joblib.load(model_path_for_pred)
        dataset['y'] = self.clf.predict(dataset['X'])

        return dataset