Exemple #1
0
    def optimal_machines_grid(self, X, y, line_points=200, info=False):
        """
        Find the optimal epsilon and machine-combination for a single query point for the COBRA predictor.

        Parameteres
        -----------

        X: array-like, [n_features]
            Vector for which we want optimal machines and epsilon values

        y: float
            Target value for query to compare.

        line_points: integer, optional
            Number of epsilon values to traverse the grid.

        info: bool, optional
            Returns MSE dictionary for each epsilon/machine value.

        Returns
        -------

        MSE: dictionary mapping (machine combination, epsilon) with mean squared errors
        opt: optimal epislon/machine combination

        """

        # code to find maximum and minimum distance between predictions to create grid
        a, size = sorted(self.aggregate.all_predictions_), len(
            self.aggregate.all_predictions_)
        res = [a[i + 1] - a[i] for i in range(size) if i + 1 < size]
        emin = min(res)
        emax = max(a) - min(a)
        erange = np.linspace(emin, emax, line_points)
        n_machines = np.arange(1, len(self.aggregate.estimators_) + 1)
        MSE = {}

        for epsilon in erange:
            for num in n_machines:
                machine_names = self.aggregate.estimators_.keys()
                use = list(itertools.combinations(machine_names, num))
                for combination in use:
                    machine = Cobra(random_state=self.random_state,
                                    epsilon=epsilon)
                    machine.fit(self.aggregate.X_,
                                self.aggregate.y_,
                                default=False)
                    machine.split_data()
                    machine.load_default(machine_list=combination)
                    machine.load_machine_predictions()
                    result = machine.predict(X.reshape(1, -1))
                    MSE[(combination, epsilon)] = np.square(y - result)

        if info:
            return MSE
        opt = min(MSE, key=MSE.get)
        return opt, MSE[opt]
Exemple #2
0
    def optimal_machines(self, X, y, single=False, epsilon=None, info=False):
        """
        Find the optimal combination of machines for testing data for the COBRA predictor.

        Parameteres
        -----------

        X: array-like, [n_features]
            Vector for which we want optimal machine combinations.

        y: float
            Target value for query to compare.

        single: boolean, optional
            Option to calculate optimal machine combinations for a single query point instead.

        info: bool, optional
            Returns MSE dictionary for each machine combination value

        epsilon: float, optional
            fixed epsilon value to help determine optimal machines.

        Returns
        -------

        MSE: dictionary mapping machines with mean squared errors
        opt: optimal machines combination

        """
        if epsilon is None:
            epsilon = self.aggregate.epsilon

        n_machines = np.arange(1, len(self.aggregate.estimators_) + 1)
        MSE = {}
        for num in n_machines:
            machine_names = self.aggregate.estimators_.keys()
            use = list(itertools.combinations(machine_names, num))
            for combination in use:
                machine = Cobra(random_state=self.random_state,
                                epsilon=epsilon)
                machine.fit(self.aggregate.X_,
                            self.aggregate.y_,
                            default=False)
                machine.split_data()
                machine.load_default(machine_list=combination)
                machine.load_machine_predictions()
                if single:
                    result = machine.predict(X.reshape(1, -1))
                    MSE[combination] = np.square(y - result)
                else:
                    results = machine.predict(X)
                    MSE[combination] = (mean_squared_error(y, results))

        if info:
            return MSE
        opt = min(MSE, key=MSE.get)
        return opt, MSE[opt]
Exemple #3
0
    def optimal_split(self,
                      X,
                      y,
                      split=None,
                      epsilon=None,
                      info=False,
                      graph=False):
        """
        Find the optimal combination split (D_k, D_l) for fixed epsilon value for the COBRA predictor.

        Parameteres
        -----------

        X: array-like, [n_features]
            Vector for which we want for optimal split.

        y: float
            Target value for query to compare.

        epsilon: float, optional.
            fixed epsilon value to help determine optimal machines.

        split: list, optional.
            D_k, D_l break-up to calculate MSE

        info: bool, optional.
            Returns MSE dictionary for each split.

        graph: bool, optional.
            Plots graph of MSE vs split

        Returns
        -------

        MSE: dictionary mapping split with mean squared errors
        opt: optimal epsilon value

        """
        if epsilon is None:
            epsilon = self.aggregate.epsilon

        if split is None:
            split = [(0.20, 0.80), (0.40, 0.60), (0.50, 0.50), (0.60, 0.40),
                     (0.80, 0.20)]

        MSE = {}
        for k, l in split:
            machine = Cobra(random_state=self.random_state, epsilon=epsilon)
            machine.fit(self.aggregate.X_, self.aggregate.y_, default=False)
            machine.split_data(int(k * len(self.aggregate.X_)),
                               int((k + l) * len(self.aggregate.X_)))
            machine.load_default()
            machine.load_machine_predictions()
            results = machine.predict(X)
            MSE[(k, l)] = (mean_squared_error(y, results))

        if graph:
            import matplotlib.pyplot as plt
            ratio, mse = [], []
            for value in split:
                ratio.append(value[0])
                mse.append(MSE[value])
            plt.plot(ratio, mse)

        if info:
            return MSE
        opt = min(MSE, key=MSE.get)
        return opt, MSE[opt]
# is so we can walk you through what happens when COBRA is set-up, instead
# of the deafult settings being used.
#

######################################################################
# We're now going to split our dataset into two parts, and shuffle data
# points.
#

cobra.split_data(D1, D1 + D2, shuffle_data=True)

######################################################################
# Let's load the default machines to COBRA.
#

cobra.load_default()

######################################################################
# We note here that further machines can be loaded using either the
# ``loadMachine()`` and ``loadSKMachine()`` methods. The only prerequisite
# is that the machine has a valid ``predict()`` method.
#

######################################################################
# Using COBRA's machines
# ----------------------
#
# We've created our random dataset and now we're going to use the default
# sci-kit machines to see what the results look like.
#