def test_multi_target_sample_weight_partial_fit(): # weighted regressor X = [[1, 2, 3], [4, 5, 6]] y = [[3.141, 2.718], [2.718, 3.141]] w = [2., 1.] rgr_w = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5)) rgr_w.partial_fit(X, y, w) # weighted with different weights w = [2., 2.] rgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5)) rgr.partial_fit(X, y, w) assert_not_equal(rgr.predict(X)[0][0], rgr_w.predict(X)[0][0])
def test_multi_target_sample_weight_partial_fit(): # weighted regressor X = [[1, 2, 3], [4, 5, 6]] y = [[3.141, 2.718], [2.718, 3.141]] w = [2., 1.] rgr_w = MultiOutputRegressor(SGDRegressor(random_state=0)) rgr_w.partial_fit(X, y, w) # weighted with different weights w = [2., 2.] rgr = MultiOutputRegressor(SGDRegressor(random_state=0)) rgr.partial_fit(X, y, w) assert_not_equal(rgr.predict(X)[0][0], rgr_w.predict(X)[0][0])
class DQNSolver: def __init__(self, action_space, is_partial_fit: bool = False): self.exploration_rate = EXPLORATION_MAX self.action_space = action_space self.memory = deque(maxlen=MEMORY_SIZE) self._is_partial_fit = is_partial_fit self.predict_mode = False regressor = get_regressor() self.model = MultiOutputRegressor(regressor) self.isFit = False def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def act(self, state): if np.random.rand() < self.exploration_rate: # print("Random guess") # if self.predict_mode: # self.predict_mode = False; # print("Switched to random mode") return random.randrange(self.action_space) # print("Predict mode") # if not self.predict_mode: # self.predict_mode = True # print("Switched to predict mode") if self.isFit == True: q_values = self.model.predict(state) else: q_values = np.zeros(self.action_space).reshape(1, -1) return np.argmax(q_values[0]) def experience_replay(self): if len(self.memory) < BATCH_SIZE: return if self._is_partial_fit: batch = list(self.memory) # if we use Incremental Model, we don't have to keep whole memory, as we use just last batch # and the rest of the history is stored within the model, indirectly through learning self.memory = deque(maxlen=BATCH_SIZE) else: batch = random.sample(self.memory, int(len(self.memory) / 1)) X = [] targets = [] for state, action, reward, state_next, terminal in batch: q_update = reward if not terminal: if self.isFit: q_update = (reward + GAMMA * np.amax( self.model.predict(state_next)[0])) # Return the maximum of an array or maximum along an axis else: q_update = reward if self.isFit: q_values = self.model.predict(state) else: q_values = np.zeros(self.action_space).reshape(1, -1) q_values[0][action] = q_update X.append(list(state[0])) targets.append(q_values[0]) if self._is_partial_fit: self.model.partial_fit(X, targets) else: self.model.fit(X, targets) self.isFit = True self.exploration_rate *= EXPLORATION_DECAY self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)
class DQNSolver: def __init__(self, action_space, is_partial_fit: bool = False): self.exploration_rate = EXPLORATION_MAX self.action_space = action_space self.memory = deque(maxlen=MEMORY_SIZE) self._is_partial_fit = is_partial_fit if is_partial_fit: # Here you can use only Incremental Models: https://scikit-learn.org/0.18/modules/scaling_strategies.html regressor = SGDRegressor() self.model = MultiOutputRegressor(regressor) else: # Here you can use whatever regression model you want, simple or Incremental # The sklearn regression models can be found by searching for "regress" at https://scikit-learn.org/stable/modules/classes.html # Ex: #regressor = RandomForestRegressor(max_depth=2, random_state=0, n_estimators=100) #regressor = LGBMRegressor(n_estimators=100, n_jobs=-1) regressor = AdaBoostRegressor(n_estimators=10) self.model = MultiOutputRegressor(regressor) self.isFit = False def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def act(self, state): if np.random.rand() < self.exploration_rate: return random.randrange(self.action_space) if self.isFit == True: q_values = self.model.predict(state) else: q_values = np.zeros(self.action_space).reshape(1, -1) return np.argmax(q_values[0]) def experience_replay(self): if len(self.memory) < BATCH_SIZE: return if self._is_partial_fit: batch = list(self.memory) # if we use Incremental Model, we don't have to keep whole memory, as we use just last batch # and the rest of the history is stored within the model, indirectly through learning self.memory = deque(maxlen=BATCH_SIZE) else: batch = random.sample(self.memory, int(len(self.memory) / 1)) X = [] targets = [] for state, action, reward, state_next, terminal in batch: q_update = reward if not terminal: if self.isFit: q_update = ( reward + GAMMA * np.amax(self.model.predict(state_next)[0]) ) #Return the maximum of an array or maximum along an axis else: q_update = reward if self.isFit: q_values = self.model.predict(state) else: q_values = np.zeros(self.action_space).reshape(1, -1) q_values[0][action] = q_update X.append(list(state[0])) targets.append(q_values[0]) if self._is_partial_fit: self.model.partial_fit(X, targets) else: self.model.fit(X, targets) self.isFit = True self.exploration_rate *= EXPLORATION_DECAY self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)
class GBoostModel(BaseModel): def build_model(self, model_type: str = "xgboost", scale_data: bool = False): self.scale_data = scale_data if model_type == "xgboost": self.single_model = XGBRegressor(objective="reg:squarederror") elif model_type == "lightgbm": self.single_model = LGBMRegressor() else: raise NotImplementedError("Unknown model selected") self.model = MultiOutputRegressor(self.single_model) self.model_type = model_type def fit(self, X, y, fit_separate: bool = False): if self.scale_data: X, y = self.scalar(X, y) self.separate_models = fit_separate if self.separate_models: self.models = [] for i in range(y.shape[1]): if self.model_type == "xgboost": boost_model = XGBRegressor(objective="reg:squarederror") elif self.model_type == "lightgbm": boost_model = LGBMRegressor() else: raise ValueError("Unknown model type") logger.info(f"Fitting model {i+1} of {y.shape[1]}") self.models.append(boost_model.fit(X, y[:, i])) else: self.model.fit(X, y) def partial_fit(self, X, y): if not self.model: raise NotFittedError("No model found") else: self.model.partial_fit(X, y) def predict(self, X: np.ndarray): if len(X.shape) == 1: X = X.reshape(1, -1) if self.scale_data: X = self.xscalar.transform(X) if self.separate_models: pred = [] for i in range(len(self.models)): logger.debug(f"Predicting model {i} of {len(self.models)}") pred.append(self.models[i].predict(X)) preds = np.array(pred).transpose() else: preds = self.model.predict(X) if self.scale_data: preds = self.yscalar.inverse_transform(preds) # preds_df = pd.DataFrame(preds) # preds_df.columns = label_col_names return preds def save_model(self, filename): if self.scale_data: logger.info(f"Scale transformations used, saving to {filename}") if not self.separate_models: if not any([s in filename for s in [".pkl", ".pickle"]]): filename += ".pkl" parent_dir = pathlib.Path(filename).parent if not parent_dir.exists(): parent_dir.mkdir(parents=True, exist_ok=True) path_name = str(parent_dir) else: path_name = filename pickle.dump(self.xscalar, open(os.path.join(path_name, "xscalar.pkl"), "wb")) pickle.dump(self.yscalar, open(os.path.join(path_name, "yscalar.pkl"), "wb")) if self.separate_models: if not pathlib.Path(filename).exists(): pathlib.Path(filename).mkdir(parents=True, exist_ok=True) for i in range(len(self.models)): pickle.dump( self.models[i], open(os.path.join(filename, f"model{i}.pkl"), "wb")) else: parent_dir = pathlib.Path(filename).parent if not parent_dir.exists(): parent_dir.mkdir(parents=True, exist_ok=True) pickle.dump(self.model, open(filename, "wb")) # def load_model( # self, filename: str, scale_data: bool = False, separate_models: bool = False # ): # self.scale_data = scale_data # self.separate_models = separate_models # if self.separate_models: # all_models = os.listdir(filename) # all_models = natsorted(all_models) # if self.scale_data: # all_models = all_models[:-2] # num_models = len(all_models) # models = [] # for i in range(num_models): # models.append( # pickle.load(open(os.path.join(filename, all_models[i]), "rb")) # ) # self.models = models # else: # if not any([s in filename for s in [".pkl", ".pickle"]]): # filename += ".pkl" # self.model = pickle.load(open(filename, "rb")) # if scale_data: # if not separate_models: # path_name = str(pathlib.Path(filename).parent) # else: # path_name = filename # self.xscalar = pickle.load( # open(os.path.join(path_name, "xscalar.pkl"), "rb") # ) # self.yscalar = pickle.load( # open(os.path.join(path_name, "yscalar.pkl"), "rb") # ) def sweep(self, params: Dict, X, y): tune_search = TuneSearchCV( self.model, param_distributions=params, n_trials=3, # early_stopping=True, # use_gpu=True ) tune_search.fit(X, y) return tune_search
class GBoostModel(BaseModel): def build_model( self, model_type: str = "xgboost", scale_data: bool = False, halt_model: bool = False, objective: str = "reg:squarederror", fit_separate: bool = False, n_estimators: int = 100, learning_rate: float = 0.3, max_depth: int = 6, ): self.scale_data = scale_data if model_type == "xgboost": self.single_model = XGBRegressor( objective=objective, n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate, ) elif model_type == "lightgbm": self.single_model = LGBMRegressor() else: raise NotImplementedError("Unknown model selected") if halt_model: logger.info( f"Halt model specified, using same model_type for halt classifier: {model_type}" ) if model_type == "xgboost": self.halt_model = XGBClassifier() elif model_type == "lightgbm": self.halt_model = LGBMClassifier() self.model = MultiOutputRegressor(self.single_model) self.model_type = model_type self.separate_models = fit_separate def fit(self, X, y): if self.scale_data: X, y = self.scalar(X, y) if self.separate_models: logger.warn(f"Fitting {y.shape[1]} separate models for each output") self.models = [] for i in range(y.shape[1]): boost_model = self.single_model # if self.model_type == "xgboost": # boost_model = XGBRegressor() # elif self.model_type == "lightgbm": # boost_model = LGBMRegressor() # else: # raise ValueError("Unknown model type") logger.info(f"Fitting model {i+1} of {y.shape[1]}") self.models.append(boost_model.fit(X, y[:, i])) else: self.model.fit(X, y) def partial_fit(self, X, y): if not self.model: raise NotFittedError("No model found") else: self.model.partial_fit(X, y) def predict(self, X: np.ndarray): if len(X.shape) == 1: X = X.reshape(1, -1) if self.scale_data: X = self.xscalar.transform(X) if self.separate_models: pred = [] for i in range(len(self.models)): logger.debug(f"Predicting model {i} of {len(self.models)}") pred.append(self.models[i].predict(X)) preds = np.array(pred).transpose() else: preds = self.model.predict(X) if self.scale_data: preds = self.yscalar.inverse_transform(preds) # preds_df = pd.DataFrame(preds) # preds_df.columns = label_col_names return preds def save_model(self, filename): if not self.separate_models: if not any([s in filename for s in [".pkl", ".pickle"]]): filename += ".pkl" parent_dir = pathlib.Path(filename).parent if not parent_dir.exists(): parent_dir.mkdir(parents=True, exist_ok=True) path_name = str(parent_dir) else: file_dir = pathlib.Path(filename) if not file_dir.exists(): logger.info(f"Creating new directories at {file_dir}") file_dir.mkdir(parents=True, exist_ok=True) path_name = file_dir if self.scale_data: logger.info(f"Scale transformations used, saving to {filename}") pickle.dump( self.xscalar, open(os.path.join(path_name, "xscalar.pkl"), "wb") ) pickle.dump( self.yscalar, open(os.path.join(path_name, "yscalar.pkl"), "wb") ) if self.separate_models: if not pathlib.Path(filename).exists(): pathlib.Path(filename).mkdir(parents=True, exist_ok=True) for i in range(len(self.models)): pickle.dump( self.models[i], open(os.path.join(filename, f"model{i}.pkl"), "wb") ) else: parent_dir = pathlib.Path(filename).parent if not parent_dir.exists(): parent_dir.mkdir(parents=True, exist_ok=True) pickle.dump(self.model, open(filename, "wb"))
class BaseValueFunction(ABC): """ Abstract base class for value functions. MODELTYPES ---------- 0 : s --> V(s) 1 : s, a --> Q(s, a) for either discrete or continuous action spaces 2 : s --> Q(s, .) for discrete action spaces 3 : s --> Q(s, .) for continuous action spaces (not yet implemented) TODO ---- add batch_eval_typeIII and model type 3 """ MODELTYPES = (0, 1, 2) def __init__(self, env, regressor, transformer=None, attempt_fit_transformer=False): self.env = env self.regressor = regressor self.transformer = transformer self.attempt_fit_transformer = attempt_fit_transformer self._init_model() @abstractmethod def __call__(self, *args): """ Compute the value for a state observation or state-action pair (depending on model type). Parameters ---------- args Either state or state-action pair, depending on model type. Returns ------- v, q_I, q_II or q_III : float, float, array of floats, or func A sklearn-style design matrix of a single data point. For a state value function (type 0) as well as for a type I model this returns a single float. For a type II model this returns an array of Q-values. For a type III model, this returns a callable object (function) that maps :math:`a\\mapsto Q(s,a)`. """ pass @abstractmethod def X(self, *args): """ Create a feature vector from a state observation or state-action pair. This is the design matrix that is fed into the regressor, i.e. function approximator. Parameters ---------- args Either state or state-action pair, depending on model type. Returns ------- X : 2d-array, shape = [1, num_features] A sklearn-style design matrix of a single data point. """ pass def update(self, X, Y): """ Update the value function. This method will call :term:`partial_fit` on the underlying sklearn regressor. Parameters ---------- X : 2d-array, shape = [batch_size, num_features] A sklearn-style design matrix of a single data point. Y : 1d- or 2d-array, depends on model type A sklearn-style label array. The shape depends on the model type. For a type-I model, the output shape is `[batch_size]` and for a type-II model the shape is `[batch_size, num_actions]`. """ self.regressor.partial_fit(X, Y) def _init_model(self): # n is needed to create dummy output Y try: n = self.env.action_space.n except AttributeError: raise NotImplementedError( "can only do discrete action spaces for now") # create dummy input X s = self.env.observation_space.sample() if isinstance(s, np.ndarray): s = np.random.rand(*s.shape) # otherwise we get overflow if self.MODELTYPE == 0: X = self.X(s) Y = np.zeros(1) elif self.MODELTYPE == 1: a = self.env.action_space.sample() X = self.X(s, a) Y = np.zeros(1) elif self.MODELTYPE == 2: X = self.X(s) Y = np.zeros((1, n)) elif self.MODELTYPE == 3: raise NotImplementedError("MODELTYPE == 3") else: raise ValueError("bad MODELTYPE") try: self.regressor.partial_fit(X, Y) except ValueError as e: expected_failure = ( e.args[0].startswith("bad input shape") and # Y has bad shape self.MODELTYPE == 2 and # type II model not isinstance(self.regressor, MultiOutputRegressor) ) # not yet wrapped if not expected_failure: raise self.regressor = MultiOutputRegressor(self.regressor) self.regressor.partial_fit(X, Y) def _transform(self, X): if self.transformer is not None: try: X = self.transformer.transform(X) except NotFittedError: if not self.attempt_fit_transformer: raise NotFittedError( "transformer needs to be fitted; setting " "attempt_fit_transformer=True will fit the " "transformer on one data point") print("attemting to fit transformer", file=sys.stderr) X = self.transformer.fit_transform(X) return X
class DQNSolver: def __init__(self, action_space, is_partial_fit: bool = False): self.exploration_rate = EXPLORATION_MAX self.action_space = action_space self.memory = deque(maxlen=MEMORY_SIZE) self._is_partial_fit = is_partial_fit if is_partial_fit: # Here you can use only Incremental Models: https://scikit-learn.org/0.18/modules/scaling_strategies.html regressor = SGDRegressor() self.model = MultiOutputRegressor(regressor) else: # Here you can use whatever regression model you want, simple or Incremental # The sklearn regression models can be found by searching for "regress" at https://scikit-learn.org/stable/modules/classes.html # Ex: #regressor = RandomForestRegressor(max_depth=2, random_state=0, n_estimators=100) regressor = LGBMRegressor(n_estimators=100, n_jobs=-1) #regressor = AdaBoostRegressor(n_estimators=10) self.model = MultiOutputRegressor(regressor) self.isFit = False def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def act(self, state): if np.random.rand() < self.exploration_rate: return random.randrange(self.action_space) if self.isFit == True: q_values = self.model.predict(state) else: q_values = np.zeros(self.action_space).reshape(1, -1) return np.argmax(q_values[0]) def experience_replay(self): if len(self.memory) < BATCH_SIZE: return X = [] targets = [] if self._is_partial_fit: batch = random.sample(self.memory, BATCH_SIZE) else: batch = random.sample(self.memory, int(len(self.memory) / 1)) if len(self.memory) % 1000 == 0 and len(self.memory) < MEMORY_SIZE: print(f"Memory size: {len(self.memory)}") for state, action, reward, state_next, terminal in batch: q_update = reward if self.isFit: if not terminal: q_update = ( reward + GAMMA * np.amax(self.model.predict(state_next)[0])) q_values = self.model.predict(state)[0] else: q_values = np.zeros(self.action_space) q_values[action] = q_update if self._is_partial_fit: self.model.partial_fit([list(state[0])], [q_values]) else: X.append(list(state[0])) targets.append(q_values) if not self._is_partial_fit: self.model.fit(X, targets) self.isFit = True self.exploration_rate *= EXPLORATION_DECAY self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)