def optimal_machines_grid(self, X, y, line_points=200, info=False): """ Find the optimal epsilon and machine-combination for a single query point for the COBRA predictor. Parameteres ----------- X: array-like, [n_features] Vector for which we want optimal machines and epsilon values y: float Target value for query to compare. line_points: integer, optional Number of epsilon values to traverse the grid. info: bool, optional Returns MSE dictionary for each epsilon/machine value. Returns ------- MSE: dictionary mapping (machine combination, epsilon) with mean squared errors opt: optimal epislon/machine combination """ # code to find maximum and minimum distance between predictions to create grid a, size = sorted(self.aggregate.all_predictions_), len( self.aggregate.all_predictions_) res = [a[i + 1] - a[i] for i in range(size) if i + 1 < size] emin = min(res) emax = max(a) - min(a) erange = np.linspace(emin, emax, line_points) n_machines = np.arange(1, len(self.aggregate.estimators_) + 1) MSE = {} for epsilon in erange: for num in n_machines: machine_names = self.aggregate.estimators_.keys() use = list(itertools.combinations(machine_names, num)) for combination in use: machine = Cobra(random_state=self.random_state, epsilon=epsilon) machine.fit(self.aggregate.X_, self.aggregate.y_, default=False) machine.split_data() machine.load_default(machine_list=combination) machine.load_machine_predictions() result = machine.predict(X.reshape(1, -1)) MSE[(combination, epsilon)] = np.square(y - result) if info: return MSE opt = min(MSE, key=MSE.get) return opt, MSE[opt]
def optimal_machines(self, X, y, single=False, epsilon=None, info=False): """ Find the optimal combination of machines for testing data for the COBRA predictor. Parameteres ----------- X: array-like, [n_features] Vector for which we want optimal machine combinations. y: float Target value for query to compare. single: boolean, optional Option to calculate optimal machine combinations for a single query point instead. info: bool, optional Returns MSE dictionary for each machine combination value epsilon: float, optional fixed epsilon value to help determine optimal machines. Returns ------- MSE: dictionary mapping machines with mean squared errors opt: optimal machines combination """ if epsilon is None: epsilon = self.aggregate.epsilon n_machines = np.arange(1, len(self.aggregate.estimators_) + 1) MSE = {} for num in n_machines: machine_names = self.aggregate.estimators_.keys() use = list(itertools.combinations(machine_names, num)) for combination in use: machine = Cobra(random_state=self.random_state, epsilon=epsilon) machine.fit(self.aggregate.X_, self.aggregate.y_, default=False) machine.split_data() machine.load_default(machine_list=combination) machine.load_machine_predictions() if single: result = machine.predict(X.reshape(1, -1)) MSE[combination] = np.square(y - result) else: results = machine.predict(X) MSE[combination] = (mean_squared_error(y, results)) if info: return MSE opt = min(MSE, key=MSE.get) return opt, MSE[opt]
def optimal_split(self, X, y, split=None, epsilon=None, info=False, graph=False): """ Find the optimal combination split (D_k, D_l) for fixed epsilon value for the COBRA predictor. Parameteres ----------- X: array-like, [n_features] Vector for which we want for optimal split. y: float Target value for query to compare. epsilon: float, optional. fixed epsilon value to help determine optimal machines. split: list, optional. D_k, D_l break-up to calculate MSE info: bool, optional. Returns MSE dictionary for each split. graph: bool, optional. Plots graph of MSE vs split Returns ------- MSE: dictionary mapping split with mean squared errors opt: optimal epsilon value """ if epsilon is None: epsilon = self.aggregate.epsilon if split is None: split = [(0.20, 0.80), (0.40, 0.60), (0.50, 0.50), (0.60, 0.40), (0.80, 0.20)] MSE = {} for k, l in split: machine = Cobra(random_state=self.random_state, epsilon=epsilon) machine.fit(self.aggregate.X_, self.aggregate.y_, default=False) machine.split_data(int(k * len(self.aggregate.X_)), int((k + l) * len(self.aggregate.X_))) machine.load_default() machine.load_machine_predictions() results = machine.predict(X) MSE[(k, l)] = (mean_squared_error(y, results)) if graph: import matplotlib.pyplot as plt ratio, mse = [], [] for value in split: ratio.append(value[0]) mse.append(MSE[value]) plt.plot(ratio, mse) if info: return MSE opt = min(MSE, key=MSE.get) return opt, MSE[opt]
# is so we can walk you through what happens when COBRA is set-up, instead # of the deafult settings being used. # ###################################################################### # We're now going to split our dataset into two parts, and shuffle data # points. # cobra.split_data(D1, D1 + D2, shuffle_data=True) ###################################################################### # Let's load the default machines to COBRA. # cobra.load_default() ###################################################################### # We note here that further machines can be loaded using either the # ``loadMachine()`` and ``loadSKMachine()`` methods. The only prerequisite # is that the machine has a valid ``predict()`` method. # ###################################################################### # Using COBRA's machines # ---------------------- # # We've created our random dataset and now we're going to use the default # sci-kit machines to see what the results look like. #