コード例 #1
0
def test_multi_target_sample_weight_partial_fit():
    # weighted regressor
    X = [[1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [2.718, 3.141]]
    w = [2., 1.]
    rgr_w = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5))
    rgr_w.partial_fit(X, y, w)

    # weighted with different weights
    w = [2., 2.]
    rgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5))
    rgr.partial_fit(X, y, w)

    assert_not_equal(rgr.predict(X)[0][0], rgr_w.predict(X)[0][0])
コード例 #2
0
def test_multi_target_sample_weight_partial_fit():
    # weighted regressor
    X = [[1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [2.718, 3.141]]
    w = [2., 1.]
    rgr_w = MultiOutputRegressor(SGDRegressor(random_state=0))
    rgr_w.partial_fit(X, y, w)

    # weighted with different weights
    w = [2., 2.]
    rgr = MultiOutputRegressor(SGDRegressor(random_state=0))
    rgr.partial_fit(X, y, w)

    assert_not_equal(rgr.predict(X)[0][0], rgr_w.predict(X)[0][0])
コード例 #3
0
ファイル: cartpole.py プロジェクト: andreigann/cartpole_try
class DQNSolver:

    def __init__(self, action_space, is_partial_fit: bool = False):
        self.exploration_rate = EXPLORATION_MAX

        self.action_space = action_space
        self.memory = deque(maxlen=MEMORY_SIZE)
        self._is_partial_fit = is_partial_fit

        self.predict_mode = False

        regressor = get_regressor()
        self.model = MultiOutputRegressor(regressor)

        self.isFit = False

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() < self.exploration_rate:
            # print("Random guess")
            # if self.predict_mode:
            #     self.predict_mode = False;
            #     print("Switched to random mode")
            return random.randrange(self.action_space)

        # print("Predict mode")
        # if not self.predict_mode:
        #     self.predict_mode = True
        #     print("Switched to predict mode")

        if self.isFit == True:
            q_values = self.model.predict(state)
        else:
            q_values = np.zeros(self.action_space).reshape(1, -1)
        return np.argmax(q_values[0])

    def experience_replay(self):
        if len(self.memory) < BATCH_SIZE:
            return
        if self._is_partial_fit:
            batch = list(self.memory)
            # if we use Incremental Model, we don't have to keep whole memory, as we use just last batch
            # and the rest of the history is stored within the model, indirectly through learning
            self.memory = deque(maxlen=BATCH_SIZE)
        else:
            batch = random.sample(self.memory, int(len(self.memory) / 1))
        X = []
        targets = []
        for state, action, reward, state_next, terminal in batch:
            q_update = reward
            if not terminal:
                if self.isFit:
                    q_update = (reward + GAMMA * np.amax(
                        self.model.predict(state_next)[0]))  # Return the maximum of an array or maximum along an axis
                else:
                    q_update = reward
            if self.isFit:
                q_values = self.model.predict(state)
            else:
                q_values = np.zeros(self.action_space).reshape(1, -1)
            q_values[0][action] = q_update

            X.append(list(state[0]))
            targets.append(q_values[0])

        if self._is_partial_fit:
            self.model.partial_fit(X, targets)
        else:
            self.model.fit(X, targets)

        self.isFit = True
        self.exploration_rate *= EXPLORATION_DECAY
        self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)
コード例 #4
0
ファイル: cartpole2.py プロジェクト: andreigann/cartpole_try
class DQNSolver:
    def __init__(self, action_space, is_partial_fit: bool = False):
        self.exploration_rate = EXPLORATION_MAX

        self.action_space = action_space
        self.memory = deque(maxlen=MEMORY_SIZE)
        self._is_partial_fit = is_partial_fit

        if is_partial_fit:
            # Here you can use only Incremental Models: https://scikit-learn.org/0.18/modules/scaling_strategies.html
            regressor = SGDRegressor()
            self.model = MultiOutputRegressor(regressor)
        else:
            # Here you can use whatever regression model you want, simple or Incremental
            # The sklearn regression models can be found by searching for "regress" at https://scikit-learn.org/stable/modules/classes.html

            # Ex:
            #regressor = RandomForestRegressor(max_depth=2, random_state=0, n_estimators=100)
            #regressor = LGBMRegressor(n_estimators=100, n_jobs=-1)

            regressor = AdaBoostRegressor(n_estimators=10)
            self.model = MultiOutputRegressor(regressor)

        self.isFit = False

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() < self.exploration_rate:
            return random.randrange(self.action_space)
        if self.isFit == True:
            q_values = self.model.predict(state)
        else:
            q_values = np.zeros(self.action_space).reshape(1, -1)
        return np.argmax(q_values[0])

    def experience_replay(self):
        if len(self.memory) < BATCH_SIZE:
            return
        if self._is_partial_fit:
            batch = list(self.memory)
            # if we use Incremental Model, we don't have to keep whole memory, as we use just last batch
            # and the rest of the history is stored within the model, indirectly through learning
            self.memory = deque(maxlen=BATCH_SIZE)
        else:
            batch = random.sample(self.memory, int(len(self.memory) / 1))
        X = []
        targets = []
        for state, action, reward, state_next, terminal in batch:
            q_update = reward
            if not terminal:
                if self.isFit:
                    q_update = (
                        reward +
                        GAMMA * np.amax(self.model.predict(state_next)[0])
                    )  #Return the maximum of an array or maximum along an axis
                else:
                    q_update = reward
            if self.isFit:
                q_values = self.model.predict(state)
            else:
                q_values = np.zeros(self.action_space).reshape(1, -1)
            q_values[0][action] = q_update

            X.append(list(state[0]))
            targets.append(q_values[0])

        if self._is_partial_fit:
            self.model.partial_fit(X, targets)
        else:
            self.model.fit(X, targets)

        self.isFit = True
        self.exploration_rate *= EXPLORATION_DECAY
        self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)
コード例 #5
0
class GBoostModel(BaseModel):
    def build_model(self,
                    model_type: str = "xgboost",
                    scale_data: bool = False):

        self.scale_data = scale_data
        if model_type == "xgboost":
            self.single_model = XGBRegressor(objective="reg:squarederror")
        elif model_type == "lightgbm":
            self.single_model = LGBMRegressor()
        else:
            raise NotImplementedError("Unknown model selected")

        self.model = MultiOutputRegressor(self.single_model)
        self.model_type = model_type

    def fit(self, X, y, fit_separate: bool = False):

        if self.scale_data:
            X, y = self.scalar(X, y)

        self.separate_models = fit_separate

        if self.separate_models:
            self.models = []
            for i in range(y.shape[1]):

                if self.model_type == "xgboost":
                    boost_model = XGBRegressor(objective="reg:squarederror")
                elif self.model_type == "lightgbm":
                    boost_model = LGBMRegressor()
                else:
                    raise ValueError("Unknown model type")

                logger.info(f"Fitting model {i+1} of {y.shape[1]}")
                self.models.append(boost_model.fit(X, y[:, i]))
        else:
            self.model.fit(X, y)

    def partial_fit(self, X, y):

        if not self.model:
            raise NotFittedError("No model found")
        else:
            self.model.partial_fit(X, y)

    def predict(self, X: np.ndarray):

        if len(X.shape) == 1:
            X = X.reshape(1, -1)

        if self.scale_data:
            X = self.xscalar.transform(X)

        if self.separate_models:
            pred = []
            for i in range(len(self.models)):
                logger.debug(f"Predicting model {i} of {len(self.models)}")
                pred.append(self.models[i].predict(X))

            preds = np.array(pred).transpose()
        else:
            preds = self.model.predict(X)
        if self.scale_data:
            preds = self.yscalar.inverse_transform(preds)

        # preds_df = pd.DataFrame(preds)
        # preds_df.columns = label_col_names

        return preds

    def save_model(self, filename):

        if self.scale_data:
            logger.info(f"Scale transformations used, saving to {filename}")
            if not self.separate_models:
                if not any([s in filename for s in [".pkl", ".pickle"]]):
                    filename += ".pkl"
                parent_dir = pathlib.Path(filename).parent
                if not parent_dir.exists():
                    parent_dir.mkdir(parents=True, exist_ok=True)
                path_name = str(parent_dir)
            else:
                path_name = filename
            pickle.dump(self.xscalar,
                        open(os.path.join(path_name, "xscalar.pkl"), "wb"))
            pickle.dump(self.yscalar,
                        open(os.path.join(path_name, "yscalar.pkl"), "wb"))

        if self.separate_models:
            if not pathlib.Path(filename).exists():
                pathlib.Path(filename).mkdir(parents=True, exist_ok=True)
            for i in range(len(self.models)):
                pickle.dump(
                    self.models[i],
                    open(os.path.join(filename, f"model{i}.pkl"), "wb"))
        else:
            parent_dir = pathlib.Path(filename).parent
            if not parent_dir.exists():
                parent_dir.mkdir(parents=True, exist_ok=True)
            pickle.dump(self.model, open(filename, "wb"))

    # def load_model(
    #     self, filename: str, scale_data: bool = False, separate_models: bool = False
    # ):

    #     self.scale_data = scale_data
    #     self.separate_models = separate_models
    #     if self.separate_models:
    #         all_models = os.listdir(filename)
    #         all_models = natsorted(all_models)
    #         if self.scale_data:
    #             all_models = all_models[:-2]
    #         num_models = len(all_models)
    #         models = []
    #         for i in range(num_models):
    #             models.append(
    #                 pickle.load(open(os.path.join(filename, all_models[i]), "rb"))
    #             )
    #         self.models = models
    #     else:
    #         if not any([s in filename for s in [".pkl", ".pickle"]]):
    #             filename += ".pkl"
    #         self.model = pickle.load(open(filename, "rb"))

    #     if scale_data:
    #         if not separate_models:
    #             path_name = str(pathlib.Path(filename).parent)
    #         else:
    #             path_name = filename
    #         self.xscalar = pickle.load(
    #             open(os.path.join(path_name, "xscalar.pkl"), "rb")
    #         )
    #         self.yscalar = pickle.load(
    #             open(os.path.join(path_name, "yscalar.pkl"), "rb")
    #         )

    def sweep(self, params: Dict, X, y):

        tune_search = TuneSearchCV(
            self.model,
            param_distributions=params,
            n_trials=3,
            # early_stopping=True,
            # use_gpu=True
        )

        tune_search.fit(X, y)

        return tune_search
コード例 #6
0
class GBoostModel(BaseModel):
    def build_model(
        self,
        model_type: str = "xgboost",
        scale_data: bool = False,
        halt_model: bool = False,
        objective: str = "reg:squarederror",
        fit_separate: bool = False,
        n_estimators: int = 100,
        learning_rate: float = 0.3,
        max_depth: int = 6,
    ):

        self.scale_data = scale_data
        if model_type == "xgboost":
            self.single_model = XGBRegressor(
                objective=objective,
                n_estimators=n_estimators,
                max_depth=max_depth,
                learning_rate=learning_rate,
            )
        elif model_type == "lightgbm":
            self.single_model = LGBMRegressor()
        else:
            raise NotImplementedError("Unknown model selected")

        if halt_model:
            logger.info(
                f"Halt model specified, using same model_type for halt classifier: {model_type}"
            )
            if model_type == "xgboost":
                self.halt_model = XGBClassifier()
            elif model_type == "lightgbm":
                self.halt_model = LGBMClassifier()

        self.model = MultiOutputRegressor(self.single_model)
        self.model_type = model_type
        self.separate_models = fit_separate

    def fit(self, X, y):

        if self.scale_data:
            X, y = self.scalar(X, y)

        if self.separate_models:
            logger.warn(f"Fitting {y.shape[1]} separate models for each output")
            self.models = []
            for i in range(y.shape[1]):

                boost_model = self.single_model
                # if self.model_type == "xgboost":
                #     boost_model = XGBRegressor()
                # elif self.model_type == "lightgbm":
                #     boost_model = LGBMRegressor()
                # else:
                #     raise ValueError("Unknown model type")

                logger.info(f"Fitting model {i+1} of {y.shape[1]}")
                self.models.append(boost_model.fit(X, y[:, i]))
        else:
            self.model.fit(X, y)

    def partial_fit(self, X, y):

        if not self.model:
            raise NotFittedError("No model found")
        else:
            self.model.partial_fit(X, y)

    def predict(self, X: np.ndarray):

        if len(X.shape) == 1:
            X = X.reshape(1, -1)

        if self.scale_data:
            X = self.xscalar.transform(X)

        if self.separate_models:
            pred = []
            for i in range(len(self.models)):
                logger.debug(f"Predicting model {i} of {len(self.models)}")
                pred.append(self.models[i].predict(X))

            preds = np.array(pred).transpose()
        else:
            preds = self.model.predict(X)
        if self.scale_data:
            preds = self.yscalar.inverse_transform(preds)

        # preds_df = pd.DataFrame(preds)
        # preds_df.columns = label_col_names

        return preds

    def save_model(self, filename):

        if not self.separate_models:
            if not any([s in filename for s in [".pkl", ".pickle"]]):
                filename += ".pkl"
            parent_dir = pathlib.Path(filename).parent
            if not parent_dir.exists():
                parent_dir.mkdir(parents=True, exist_ok=True)
            path_name = str(parent_dir)
        else:
            file_dir = pathlib.Path(filename)
            if not file_dir.exists():
                logger.info(f"Creating new directories at {file_dir}")
                file_dir.mkdir(parents=True, exist_ok=True)
            path_name = file_dir
        if self.scale_data:
            logger.info(f"Scale transformations used, saving to {filename}")
            pickle.dump(
                self.xscalar, open(os.path.join(path_name, "xscalar.pkl"), "wb")
            )
            pickle.dump(
                self.yscalar, open(os.path.join(path_name, "yscalar.pkl"), "wb")
            )

        if self.separate_models:
            if not pathlib.Path(filename).exists():
                pathlib.Path(filename).mkdir(parents=True, exist_ok=True)
            for i in range(len(self.models)):
                pickle.dump(
                    self.models[i], open(os.path.join(filename, f"model{i}.pkl"), "wb")
                )
        else:
            parent_dir = pathlib.Path(filename).parent
            if not parent_dir.exists():
                parent_dir.mkdir(parents=True, exist_ok=True)
            pickle.dump(self.model, open(filename, "wb"))
コード例 #7
0
class BaseValueFunction(ABC):
    """
    Abstract base class for value functions.

    MODELTYPES
    ----------
    0 : s     -->  V(s)
    1 : s, a  -->  Q(s, a)  for either discrete or continuous action spaces
    2 : s     -->  Q(s, .)  for discrete action spaces
    3 : s     -->  Q(s, .)  for continuous action spaces (not yet implemented)

    TODO
    ----
    add batch_eval_typeIII and model type 3

    """
    MODELTYPES = (0, 1, 2)

    def __init__(self,
                 env,
                 regressor,
                 transformer=None,
                 attempt_fit_transformer=False):
        self.env = env
        self.regressor = regressor
        self.transformer = transformer
        self.attempt_fit_transformer = attempt_fit_transformer
        self._init_model()

    @abstractmethod
    def __call__(self, *args):
        """
        Compute the value for a state observation or state-action pair
        (depending on model type).

        Parameters
        ----------
        args
            Either state or state-action pair, depending on model type.

        Returns
        -------
        v, q_I, q_II or q_III : float, float, array of floats, or func
            A sklearn-style design matrix of a single data point. For a state
            value function (type 0) as well as for a type I model this returns
            a single float. For a type II model this returns an array of
            Q-values. For a type III model, this returns a callable object
            (function) that maps :math:`a\\mapsto Q(s,a)`.

        """
        pass

    @abstractmethod
    def X(self, *args):
        """
        Create a feature vector from a state observation or state-action pair.
        This is the design matrix that is fed into the regressor, i.e. function
        approximator.

        Parameters
        ----------
        args
            Either state or state-action pair, depending on model type.

        Returns
        -------
        X : 2d-array, shape = [1, num_features]
            A sklearn-style design matrix of a single data point.

        """
        pass

    def update(self, X, Y):
        """
        Update the value function. This method will call :term:`partial_fit` on
        the underlying sklearn regressor.

        Parameters
        ----------
        X : 2d-array, shape = [batch_size, num_features]
            A sklearn-style design matrix of a single data point.

        Y : 1d- or 2d-array, depends on model type
            A sklearn-style label array. The shape depends on the model type.
            For a type-I model, the output shape is `[batch_size]` and for a
            type-II model the shape is `[batch_size, num_actions]`.

        """
        self.regressor.partial_fit(X, Y)

    def _init_model(self):
        # n is needed to create dummy output Y
        try:
            n = self.env.action_space.n
        except AttributeError:
            raise NotImplementedError(
                "can only do discrete action spaces for now")

        # create dummy input X
        s = self.env.observation_space.sample()
        if isinstance(s, np.ndarray):
            s = np.random.rand(*s.shape)  # otherwise we get overflow

        if self.MODELTYPE == 0:
            X = self.X(s)
            Y = np.zeros(1)
        elif self.MODELTYPE == 1:
            a = self.env.action_space.sample()
            X = self.X(s, a)
            Y = np.zeros(1)
        elif self.MODELTYPE == 2:
            X = self.X(s)
            Y = np.zeros((1, n))
        elif self.MODELTYPE == 3:
            raise NotImplementedError("MODELTYPE == 3")
        else:
            raise ValueError("bad MODELTYPE")

        try:
            self.regressor.partial_fit(X, Y)
        except ValueError as e:
            expected_failure = (
                e.args[0].startswith("bad input shape") and  # Y has bad shape
                self.MODELTYPE == 2 and  # type II model
                not isinstance(self.regressor, MultiOutputRegressor)
            )  # not yet wrapped
            if not expected_failure:
                raise
            self.regressor = MultiOutputRegressor(self.regressor)
            self.regressor.partial_fit(X, Y)

    def _transform(self, X):
        if self.transformer is not None:
            try:
                X = self.transformer.transform(X)
            except NotFittedError:
                if not self.attempt_fit_transformer:
                    raise NotFittedError(
                        "transformer needs to be fitted; setting "
                        "attempt_fit_transformer=True will fit the "
                        "transformer on one data point")
                print("attemting to fit transformer", file=sys.stderr)
                X = self.transformer.fit_transform(X)
        return X
コード例 #8
0
ファイル: cartpole.py プロジェクト: asuiu/cartpole
class DQNSolver:
    def __init__(self, action_space, is_partial_fit: bool = False):
        self.exploration_rate = EXPLORATION_MAX

        self.action_space = action_space
        self.memory = deque(maxlen=MEMORY_SIZE)
        self._is_partial_fit = is_partial_fit

        if is_partial_fit:
            # Here you can use only Incremental Models: https://scikit-learn.org/0.18/modules/scaling_strategies.html
            regressor = SGDRegressor()
            self.model = MultiOutputRegressor(regressor)
        else:
            # Here you can use whatever regression model you want, simple or Incremental
            # The sklearn regression models can be found by searching for "regress" at https://scikit-learn.org/stable/modules/classes.html

            # Ex:
            #regressor = RandomForestRegressor(max_depth=2, random_state=0, n_estimators=100)
            regressor = LGBMRegressor(n_estimators=100, n_jobs=-1)

            #regressor = AdaBoostRegressor(n_estimators=10)
            self.model = MultiOutputRegressor(regressor)

        self.isFit = False

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() < self.exploration_rate:
            return random.randrange(self.action_space)
        if self.isFit == True:
            q_values = self.model.predict(state)
        else:
            q_values = np.zeros(self.action_space).reshape(1, -1)
        return np.argmax(q_values[0])

    def experience_replay(self):
        if len(self.memory) < BATCH_SIZE:
            return
        X = []
        targets = []
        if self._is_partial_fit:
            batch = random.sample(self.memory, BATCH_SIZE)
        else:
            batch = random.sample(self.memory, int(len(self.memory) / 1))
        if len(self.memory) % 1000 == 0 and len(self.memory) < MEMORY_SIZE:
            print(f"Memory size: {len(self.memory)}")
        for state, action, reward, state_next, terminal in batch:
            q_update = reward
            if self.isFit:
                if not terminal:
                    q_update = (
                        reward +
                        GAMMA * np.amax(self.model.predict(state_next)[0]))
                q_values = self.model.predict(state)[0]
            else:
                q_values = np.zeros(self.action_space)
            q_values[action] = q_update

            if self._is_partial_fit:
                self.model.partial_fit([list(state[0])], [q_values])
            else:
                X.append(list(state[0]))
                targets.append(q_values)

        if not self._is_partial_fit:
            self.model.fit(X, targets)

        self.isFit = True
        self.exploration_rate *= EXPLORATION_DECAY
        self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)