Example #1
0
    def setUp(self):
        # setting up our random data-set
        rng = np.random.RandomState(42)

        # D1 = train machines; D2 = create COBRA; D3 = calibrate epsilon, alpha; D4 = testing
        n_features = 20
        D1, D2, D3, D4 = 200, 200, 200, 200
        D = D1 + D2 + D3 + D4
        X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features)
        Y = np.power(X[:, 1], 2) + np.power(X[:, 3], 3) + np.exp(X[:, 10])

        # training data-set
        X_train = X[:D1 + D2]
        X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4]
        # for testing
        Y_train = Y[:D1 + D2]
        Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4]

        cobra = Cobra(random_state=0, epsilon=0.5)
        cobra.fit(X_train, Y_train)

        ewa = Ewa(random_state=0)
        ewa.fit(X_train, Y_train)

        kernel = KernelCobra(random_state=0)
        kernel.fit(X_train, Y_train)

        self.test_data = X_test
        self.cobra = cobra
        self.ewa = ewa
        self.kernelcobra = kernel
Example #2
0
    def setUp(self):
        # setting up our random data-set
        rng = np.random.RandomState(42)

        # D1 = train machines; D2 = create COBRA; D3 = calibrate epsilon, alpha; D4 = testing
        n_features = 20
        D1, D2, D3, D4 = 200, 200, 200, 200
        D = D1 + D2 + D3 + D4
        X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features)
        Y = np.power(X[:,1], 2) + np.power(X[:,3], 3) + np.exp(X[:,10]) 

        # training data-set
        X_train = X[:D1 + D2]
        X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4]
        # for testing
        Y_train = Y[:D1 + D2]
        Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4]

        cobra = Cobra(random_state=0, epsilon=0.5)
        cobra.fit(X_train, Y_train)
        self.test_data = X_test
        self.test_response = Y_test
        self.cobra = cobra
        self.cobra_vis = Visualisation(self.cobra, self.test_data[0:4], self.test_response[0:4])
        self.indices, self.mse = self.cobra_vis.indice_info(self.test_data[0:4], self.test_response[0:4], epsilon=self.cobra.epsilon)

        ewa = Ewa(random_state=0)
        ewa.fit(X_train, Y_train)
        self.ewa = ewa
        self.ewa_vis = Visualisation(self.ewa, self.test_data[0:4], self.test_response[0:4])
Example #3
0
    def optimal_alpha_grid(self, X, y, line_points=200, info=False):
        """
        Find the optimal epsilon and alpha for a single query point for the COBRA predictor.

        Parameteres
        -----------

        X: array-like, [n_features]
            Vector for which we want optimal alpha and epsilon values

        y: float
            Target value for query to compare.

        line_points: integer, optional
            Number of epsilon values to traverse the grid.

        info: bool, optional
            Returns MSE dictionary for each epsilon/alpha value

        Returns
        -------

        MSE: dictionary mapping (alpha, epsilon) with mean squared errors
        opt: optimal epislon/alpha combination

        """

        # code to find maximum and minimum distance between predictions to create grid
        a, size = sorted(self.aggregate.all_predictions_), len(
            self.aggregate.all_predictions_)
        res = [a[i + 1] - a[i] for i in range(size) if i + 1 < size]
        emin = min(res)
        emax = max(a) - min(a)
        erange = np.linspace(emin, emax, line_points)
        n_machines = np.arange(1, len(self.aggregate.estimators_) + 1)
        MSE = {}

        # looping over epsilon and alpha values
        for epsilon in erange:
            for num in n_machines:
                machine = Cobra(random_state=self.random_state,
                                epsilon=epsilon)
                machine.fit(self.aggregate.X_, self.aggregate.y_)
                result = machine.predict(X.reshape(1, -1), alpha=num)
                MSE[(num, epsilon)] = np.square(y - result)

        if info:
            return MSE

        opt = min(MSE, key=MSE.get)
        return opt, MSE[opt]
Example #4
0
    def optimal_epsilon(self, X, y, line_points=200, info=False):
        """
        Find the optimal epsilon value for the COBRA predictor.

        Parameteres
        -----------

        X: array-like, [n_features]
            Vector for which we want for optimal epsilon.

        y: float
            Target value for query to compare.

        line_points: integer, optional
            Number of epsilon values to traverse the grid.

        info: bool, optional
            Returns MSE dictionary for each epsilon value.

        Returns
        -------

        MSE: dictionary mapping epsilon with mean squared errors
        opt: optimal epsilon value

        """

        a, size = sorted(self.aggregate.all_predictions_), len(
            self.aggregate.all_predictions_)
        res = [a[i + 1] - a[i] for i in range(size) if i + 1 < size]
        emin = min(res)
        emax = max(a) - min(a)
        erange = np.linspace(emin, emax, line_points)

        MSE = {}
        for epsilon in erange:
            machine = Cobra(random_state=self.random_state, epsilon=epsilon)
            machine.fit(self.aggregate.X_, self.aggregate.y_)
            results = machine.predict(X)
            MSE[epsilon] = (mean_squared_error(y, results))

        if info:
            return MSE
        opt = min(MSE, key=MSE.get)
        return opt, MSE[opt]
Example #5
0
def set_up():
    # setting up our random data-set
    rng = np.random.RandomState(42)

    # D1 = train machines; D2 = create COBRA; D3 = calibrate epsilon, alpha; D4 = testing
    n_features = 20
    D1, D2, D3, D4 = 200, 200, 200, 200
    D = D1 + D2 + D3 + D4
    X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features)
    Y = np.power(X[:, 1], 2) + np.power(X[:, 3], 3) + np.exp(X[:, 10])

    # training data-set
    X_train = X[:D1 + D2]
    X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4]
    # for testing
    Y_train = Y[:D1 + D2]
    Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4]

    cobra = Cobra(random_state=0, epsilon=0.5)
    cobra.fit(X_train, Y_train)
    test_data = X_test
    test_response = Y_test
    cobra_vis = Visualisation(cobra, test_data[0:4], test_response[0:4])
    return cobra_vis
Example #6
0
    def optimal_alpha(self, X, y, single=False, epsilon=None, info=False):
        """
        Find the optimal alpha for testing data for the COBRA predictor.

        Parameteres
        -----------

        X: array-like, [n_features]
            Vector for which we want optimal alpha values

        y: float
            Target value for query to compare.

        single: boolean, optional
            Option to calculate optimal alpha for a single query point instead.

        info: bool, optional
            Returns MSE dictionary for each alpha value

        epsilon: float, optional
            fixed epsilon value to help determine optimal alpha.

        Returns
        -------

        MSE: dictionary mapping alpha with mean squared errors
        opt: optimal alpha combination

        """
        if epsilon is None:
            epsilon = self.aggregate.epsilon

        MSE = {}
        for alpha in range(1, len(self.aggregate.estimators_) + 1):
            machine = Cobra(random_state=self.random_state, epsilon=epsilon)
            machine.fit(self.aggregate.X_, self.aggregate.y_)
            # for a single data point
            if single:
                result = machine.predict(X, alpha=alpha)
                MSE[alpha] = np.square(y - result)
            else:
                results = machine.predict(X, alpha=alpha)
                MSE[alpha] = (mean_squared_error(y, results))

        if info:
            return MSE
        opt = min(MSE, key=MSE.get)
        return opt, MSE[opt]
Example #7
0
def define_cobra_model(train_names,
                       training_noise_kind,
                       patch_size=1,
                       optimi=True,
                       verbose=False):
    """
    Train a cobra model for denoising task
    
    INPUT :
    train_names : list containing the name of images used to train the model
    patch_size : use patch of size (2*patch_size+1)*(2*patch_size+1) as features
    verbose : print or not information during the training
        
    OUTPUT :
    cobra : trained model
    """
    #initial cobra parameters
    Alpha = 4  #how many machines must agree
    Epsilon = 0.2  # confidence parameter

    print("Training cobra model...")
    Xtrain, Xtrain1, Xtrain2, Ytrain = load_training_data(
        train_names, training_noise_kind, patch_size)
    cobra = Cobra(epsilon=Epsilon, machines=Alpha)  # create a cobra machine
    #cobra.fit(Xtrain, Ytrain, default=False, X_k=Xtrain1, X_l=Xtrain2, y_k=Ytrain, y_l=Ytrain) # fit the cobra machine with our data
    cobra.fit(Xtrain, Ytrain)  # fit the cobra machine with our data

    print("Loading machines...")
    cobra.load_machine('bilateral', machine('bilateral', 0, patch_size))
    cobra.load_machine('nlmeans', machine('nlmeans', 1, patch_size))
    cobra.load_machine('gauss', machine('gauss', 2, patch_size))
    cobra.load_machine('median', machine('median', 3, patch_size))
    cobra.load_machine('TVchambolle', machine('TVchambolle', 4, patch_size))
    cobra.load_machine('richardson_lucy',
                       machine('richardson_lucy', 5, patch_size))
    cobra.load_machine('inpainting', machine('inpainting', 6, patch_size))
    cobra.load_machine('ksvd', machine('ksvd', 7, patch_size))
    cobra.load_machine('lee', machine('lee', 8, patch_size))
    #    cobra.load_machine('bm3d', machine('bm3d', 9, patch_size))

    print("Loading machine predictions...")
    cobra.load_machine_predictions()  #agregate
    if verbose:
        print(cobra.machine_predictions_)

    if optimi:
        print("Parameter optimisation")
        cobra_diagnostics = Diagnostics(cobra, Xtrain, Ytrain)
        Epsilon_opt, MSE = cobra_diagnostics.optimal_epsilon(Xtrain,
                                                             Ytrain,
                                                             line_points=100,
                                                             info=False)
        Alpha_opt, MSE = cobra_diagnostics.optimal_alpha(Xtrain,
                                                         Ytrain,
                                                         epsilon=Epsilon_opt,
                                                         info=False)
        if verbose:
            print("epsilon = ", Epsilon_opt)
            print("alpha = ", Alpha_opt)

        print("Training cobra model again...")
        cobra = Cobra(epsilon=Epsilon_opt, machines=Alpha_opt)
        cobra.fit(Xtrain,
                  Ytrain,
                  default=False,
                  X_k=Xtrain1,
                  X_l=Xtrain2,
                  y_k=Ytrain,
                  y_l=Ytrain)
        cobra.load_machine('bilateral', machine('bilateral', 0, patch_size))
        cobra.load_machine('nlmeans', machine('nlmeans', 1, patch_size))
        cobra.load_machine('gauss', machine('gauss', 2, patch_size))
        cobra.load_machine('median', machine('median', 3, patch_size))
        cobra.load_machine('TVchambolle', machine('TVchambolle', 4,
                                                  patch_size))
        cobra.load_machine('richardson_lucy',
                           machine('richardson_lucy', 5, patch_size))
        cobra.load_machine('inpainting', machine('inpainting', 6, patch_size))
        cobra.load_machine('ksvd', machine('ksvd', 7, patch_size))
        cobra.load_machine('lee', machine('lee', 8, patch_size))
        #       cobra.load_machine('bm3d', machine('bm3d', 9, patch_size))
        cobra.load_machine_predictions()
        if verbose:
            print("Loading machine predictions...")
            print(cobra.machine_predictions_)

    return (cobra, Alpha, Epsilon)
Example #8
0
    def boxplot(self, reps=100, info=False):
        """
        Plots boxplots of machines.

        Parameters
        ----------
        reps: int, optional
            Number of times to repeat experiments for boxplot.

        info: boolean, optional
            Returns data 

        """
        if type(self.aggregate) is Cobra:

            MSE = {k: [] for k, v in self.aggregate.machines_.items()}
            MSE["COBRA"] = []
            for i in range(0, reps):
                cobra = Cobra(random_state=self.random_state, epsilon=self.aggregate.epsilon)
                X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state)
                cobra.fit(X, y, default=False)
                cobra.split_data(shuffle_data=True)

                for machine in self.aggregate.machines_:
                    self.aggregate.machines_[machine].fit(cobra.X_k_, cobra.y_k_)
                    cobra.load_machine(machine, self.aggregate.machines_[machine])

                cobra.load_machine_predictions()
                X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state)

                for machine in cobra.machines_:
                    MSE[machine].append(mean_squared_error(y_test, cobra.machines_[machine].predict(X_test)))
                MSE["COBRA"].append(mean_squared_error(y_test, cobra.predict(X_test)))

            data, labels = [], []
            for machine in MSE:
                data.append(MSE[machine])
                labels.append(machine)

        if type(self.aggregate) is Ewa:

            MSE = {k: [] for k, v in self.aggregate.machines_.items()}
            MSE["EWA"] = []
            for i in range(0, reps):
                ewa = Ewa(random_state=self.random_state, beta=self.aggregate.beta)
                X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state)
                ewa.fit(X, y, default=False)
                ewa.split_data(shuffle_data=True)

                for machine in self.aggregate.machines_:
                    self.aggregate.machines_[machine].fit(ewa.X_k_, ewa.y_k_)
                    ewa.load_machine(machine, self.aggregate.machines_[machine])

                ewa.load_machine_weights(self.aggregate.beta)
                X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state)
                for machine in ewa.machines_:
                    MSE[machine].append(mean_squared_error(y_test, ewa.machines_[machine].predict(X_test)))
                MSE["EWA"].append(mean_squared_error(y_test, ewa.predict(X_test)))

            data, labels = [], []
            for machine in MSE:
                data.append(MSE[machine])
                labels.append(machine)

        if type(self.aggregate) is ClassifierCobra:

            errors = {k: [] for k, v in self.aggregate.machines_.items()}
            errors["ClassifierCobra"] = []
            for i in range(0, reps):
                cc = ClassifierCobra(random_state=self.random_state)
                X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state)
                cc.fit(X, y, default=False)
                cc.split_data(shuffle_data=True)

                for machine in self.aggregate.machines_:
                    self.aggregate.machines_[machine].fit(cc.X_k_, cc.y_k_)
                    cc.load_machine(machine, self.aggregate.machines_[machine])

                cc.load_machine_predictions()
                X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state)
                for machine in cc.machines_: 
                    errors[machine].append(1 - accuracy_score(y_test, cc.machines_[machine].predict(X_test)))
                errors["ClassifierCobra"].append(1 - accuracy_score(y_test, cc.predict(X_test)))

            data, labels = [], []
            for machine in errors:
                data.append(errors[machine])
                labels.append(machine)


        plt.figure(figsize=(self.plot_size, self.plot_size))
        plt.boxplot(data, labels=labels)
        plt.show()
        
        if info:
            return data        
Example #9
0
D = D1 + D2 + D3 + D4
X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features)
# Y = np.power(X[:,1], 2) + np.power(X[:,3], 3) + np.exp(X[:,10]) 
Y = np.power(X[:,0], 2) + np.power(X[:,1], 3)

# training data-set
X_train = X[:D1 + D2]
X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4]
X_eps = X[D1 + D2:D1 + D2 + D3]
# for testing
Y_train = Y[:D1 + D2]
Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4]
Y_eps = Y[D1 + D2:D1 + D2 + D3]

# set up our COBRA machine with the data
cobra = Cobra(epsilon=0.5)
cobra.fit(X_train, Y_train)


######################################################################
# Plotting COBRA
# ~~~~~~~~~~~~~~
# 
# We use the visualisation class to plot our results, and for various
# visualisations.
# 

cobra_vis = Visualisation(cobra, X_test, Y_test)

# to plot our machines, we need a linspace as input. This is the 'scale' to plot and should be the range of the results
# since our data ranges from -1 to 1 it is such - and we space it out to a hundred points
Example #10
0
X_eps = X[D1 + D2:D1 + D2 + D3]
# for testing
Y_train = Y[:D1 + D2]
Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4]
Y_eps = Y[D1 + D2:D1 + D2 + D3]

######################################################################
# Similar to other scikit-learn estimators, we set up our machine by
# creating an object and then fitting it. Since we are not passing an
# Epsilon value, we pass data to find an optimal epsilon value while
# instantiating our object. The optimal epsilon is found through the
# scikit-learn ``CVGridSearch``. The ``grid_points`` parameter decides how
# many possible epsilon values must be traversed.
#

cobra = Cobra()

cobra.set_epsilon(X_epsilon=X_eps, y_epsilon=Y_eps, grid_points=5)

cobra.epsilon

cobra.fit(X_train, Y_train)

######################################################################
# We now see if our object can fit into the scikit-learn pipeline and
# GridSearch - and it can!
#

from sklearn.utils.estimator_checks import check_estimator
# check_estimator(Cobra) #passes
Example #11
0
    def optimal_split(self,
                      X,
                      y,
                      split=None,
                      epsilon=None,
                      info=False,
                      graph=False):
        """
        Find the optimal combination split (D_k, D_l) for fixed epsilon value for the COBRA predictor.

        Parameteres
        -----------

        X: array-like, [n_features]
            Vector for which we want for optimal split.

        y: float
            Target value for query to compare.

        epsilon: float, optional.
            fixed epsilon value to help determine optimal machines.

        split: list, optional.
            D_k, D_l break-up to calculate MSE

        info: bool, optional.
            Returns MSE dictionary for each split.

        graph: bool, optional.
            Plots graph of MSE vs split

        Returns
        -------

        MSE: dictionary mapping split with mean squared errors
        opt: optimal epsilon value

        """
        if epsilon is None:
            epsilon = self.aggregate.epsilon

        if split is None:
            split = [(0.20, 0.80), (0.40, 0.60), (0.50, 0.50), (0.60, 0.40),
                     (0.80, 0.20)]

        MSE = {}
        for k, l in split:
            machine = Cobra(random_state=self.random_state, epsilon=epsilon)
            machine.fit(self.aggregate.X_, self.aggregate.y_, default=False)
            machine.split_data(int(k * len(self.aggregate.X_)),
                               int((k + l) * len(self.aggregate.X_)))
            machine.load_default()
            machine.load_machine_predictions()
            results = machine.predict(X)
            MSE[(k, l)] = (mean_squared_error(y, results))

        if graph:
            import matplotlib.pyplot as plt
            ratio, mse = [], []
            for value in split:
                ratio.append(value[0])
                mse.append(MSE[value])
            plt.plot(ratio, mse)

        if info:
            return MSE
        opt = min(MSE, key=MSE.get)
        return opt, MSE[opt]
Example #12
0
    def optimal_machines(self, X, y, single=False, epsilon=None, info=False):
        """
        Find the optimal combination of machines for testing data for the COBRA predictor.

        Parameteres
        -----------

        X: array-like, [n_features]
            Vector for which we want optimal machine combinations.

        y: float
            Target value for query to compare.

        single: boolean, optional
            Option to calculate optimal machine combinations for a single query point instead.

        info: bool, optional
            Returns MSE dictionary for each machine combination value

        epsilon: float, optional
            fixed epsilon value to help determine optimal machines.

        Returns
        -------

        MSE: dictionary mapping machines with mean squared errors
        opt: optimal machines combination

        """
        if epsilon is None:
            epsilon = self.aggregate.epsilon

        n_machines = np.arange(1, len(self.aggregate.estimators_) + 1)
        MSE = {}
        for num in n_machines:
            machine_names = self.aggregate.estimators_.keys()
            use = list(itertools.combinations(machine_names, num))
            for combination in use:
                machine = Cobra(random_state=self.random_state,
                                epsilon=epsilon)
                machine.fit(self.aggregate.X_,
                            self.aggregate.y_,
                            default=False)
                machine.split_data()
                machine.load_default(machine_list=combination)
                machine.load_machine_predictions()
                if single:
                    result = machine.predict(X.reshape(1, -1))
                    MSE[combination] = np.square(y - result)
                else:
                    results = machine.predict(X)
                    MSE[combination] = (mean_squared_error(y, results))

        if info:
            return MSE
        opt = min(MSE, key=MSE.get)
        return opt, MSE[opt]
Example #13
0
def cobraModelInit(trainNames, noiseType, imShape, patchSize=1, best=True):
    """
    Initalise and train cobra mode
    
    """
    print("Making training data ready")
    trainingData, trainingData1, trainingData2, testingData = loadTrainingData(
        trainNames, noiseType, patchSize)
    denoisemethods = denoiseMethods()
    epsilon = 0.2
    machines = 3
    cobra = Cobra(epsilon=epsilon, machines=machines)
    print("Training model")
    cobra.fit(trainingData, testingData)

    for i, denoise in enumerate(denoisemethods):
        cobra.load_machine(denoise, CobraMachine(denoise, patchSize))

    cobra.load_machine_predictions()
    # print("Predictions:", cobra.machine_predictions_)

    if best:

        print("Running Diagnostics")
        cobra_diagnostics = Diagnostics(cobra,
                                        trainingData,
                                        testingData,
                                        load_MSE=False)
        print("epsilon")
        epsilon, _ = cobra_diagnostics.optimal_epsilon(trainingData,
                                                       testingData,
                                                       line_points=100,
                                                       info=False)
        print("machines")
        machines, _ = cobra_diagnostics.optimal_alpha(trainingData,
                                                      testingData,
                                                      epsilon=epsilon,
                                                      info=False)

        cobra = Cobra(epsilon=epsilon, machines=machines)
        print("fit")
        cobra.fit(trainingData,
                  testingData,
                  default=False,
                  X_k=trainingData1,
                  X_l=trainingData2,
                  y_k=testingData,
                  y_l=testingData)
        for i, denoise in enumerate(denoisemethods):
            cobra.load_machine(denoise, CobraMachine(denoise, patchSize))
        cobra.load_machine_predictions()
        # print("Predictions:", cobra.machine_predictions_)

    return cobra, machines, epsilon
Example #14
0
    def optimal_machines_grid(self, X, y, line_points=200, info=False):
        """
        Find the optimal epsilon and machine-combination for a single query point for the COBRA predictor.

        Parameteres
        -----------

        X: array-like, [n_features]
            Vector for which we want optimal machines and epsilon values

        y: float
            Target value for query to compare.

        line_points: integer, optional
            Number of epsilon values to traverse the grid.

        info: bool, optional
            Returns MSE dictionary for each epsilon/machine value.

        Returns
        -------

        MSE: dictionary mapping (machine combination, epsilon) with mean squared errors
        opt: optimal epislon/machine combination

        """

        # code to find maximum and minimum distance between predictions to create grid
        a, size = sorted(self.aggregate.all_predictions_), len(
            self.aggregate.all_predictions_)
        res = [a[i + 1] - a[i] for i in range(size) if i + 1 < size]
        emin = min(res)
        emax = max(a) - min(a)
        erange = np.linspace(emin, emax, line_points)
        n_machines = np.arange(1, len(self.aggregate.machines_) + 1)
        MSE = {}

        for epsilon in erange:
            for num in n_machines:
                machine_names = self.aggregate.machines_.keys()
                use = list(itertools.combinations(machine_names, num))
                for combination in use:
                    machine = Cobra(random_state=self.random_state,
                                    epsilon=epsilon)
                    machine.fit(self.aggregate.X_,
                                self.aggregate.y_,
                                default=False)
                    machine.split_data()
                    machine.load_default(machine_list=combination)
                    machine.load_machine_predictions()
                    result = machine.predict(X.reshape(1, -1))
                    MSE[(combination, epsilon)] = np.square(y - result)

        if info:
            return MSE
        opt = min(MSE, key=MSE.get)
        return opt, MSE[opt]
Example #15
0
    def boxplot(self, reps=100, info=False, dataframe=None, kind="normal"):
        """
        Plots boxplots of machines.

        Parameters
        ----------
        reps: int, optional
            Number of times to repeat experiments for boxplot.

        info: boolean, optional
            Returns data 

        """

        kwargs = self.kwargs
        if dataframe is None:
            if type(self.aggregate) is Cobra:

                MSE = {k: [] for k, v in self.estimators.items()}
                MSE["Cobra"] = []
                for i in range(0, reps):
                    cobra = Cobra(epsilon=self.aggregate.epsilon)
                    X, y = shuffle(self.aggregate.X_, self.aggregate.y_)
                    cobra.fit(X, y, default=False)
                    cobra.split_data(shuffle_data=True)

                    for machine in self.aggregate.estimators_:
                        self.aggregate.estimators_[machine].fit(cobra.X_k_, cobra.y_k_)
                        cobra.load_machine(machine, self.aggregate.estimators_[machine])

                    cobra.load_machine_predictions()

                    for machine in self.estimators:
                        if "Cobra" in machine:
                            self.estimators[machine].fit(X, y)
                        else:
                            self.estimators[machine].fit(cobra.X_k_, cobra.y_k_)
                        try:
                            if type(self.estimators[machine]) == KernelCobra:
                                preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"])
                            else:
                                preds = self.estimators[machine].predict(self.X_test)
                        except KeyError:
                            preds = self.estimators[machine].predict(self.X_test)                      
                        
                        MSE[machine].append(mean_squared_error(self.y_test, preds))

                    MSE["Cobra"].append(mean_squared_error(self.y_test, cobra.predict(self.X_test)))

                try:
                    dataframe = pd.DataFrame(data=MSE)
                except ValueError:
                    return MSE

            if type(self.aggregate) is KernelCobra:

                MSE = {k: [] for k, v in self.estimators.items()}
                MSE["KernalCobra"] = []
                for i in range(0, reps):
                    kernel = KernelCobra()
                    X, y = shuffle(self.aggregate.X_, self.aggregate.y_)
                    kernel.fit(X, y, default=False)
                    kernel.split_data(shuffle_data=True)

                    for machine in self.aggregate.estimators_:
                        self.aggregate.estimators_[machine].fit(kernel.X_k_, kernel.y_k_)
                        kernel.load_machine(machine, self.aggregate.estimators_[machine])

                    kernel.load_machine_predictions()

                    for machine in self.estimators:
                        if "Cobra" in machine:
                            self.estimators[machine].fit(X, y)
                        else:
                            self.estimators[machine].fit(cobra.X_k_, cobra.y_k_)
                        
                        try:
                            if type(self.estimators[machine]) == KernelCobra:
                                preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"])
                            else:
                                preds = self.estimators[machine].predict(self.X_test)
                        except KeyError:
                            preds = self.estimators[machine].predict(self.X_test)

                        MSE[machine].append(mean_squared_error(self.y_test, preds))

                    MSE["KernelCobra"].append(mean_squared_error(self.y_test, kernel.predict(self.X_test, bandwidth=kwargs[bandwidth_kernel])))

                try:
                    dataframe = pd.DataFrame(data=MSE)
                except ValueError:
                    return MSE


            if type(self.aggregate) is Ewa:

                MSE = {k: [] for k, v in self.aggregate.estimators_.items()}
                MSE["EWA"] = []
                for i in range(0, reps):
                    ewa = Ewa(random_state=self.random_state, beta=self.aggregate.beta)
                    X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state)
                    ewa.fit(X, y, default=False)
                    ewa.split_data(shuffle_data=True)

                    for machine in self.estimators:
                        self.aggregate.estimators_[machine].fit(ewa.X_k_, ewa.y_k_)
                        ewa.load_machine(machine, self.aggregate.estimators_[machine])

                    ewa.load_machine_weights(self.aggregate.beta)
                    X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state)
                    for machine in self.estimators:
                        if "EWA" in machine:
                            self.estimators[machine].fit(X, y)
                        else:
                            self.estimators[machine].fit(ewa.X_k_, ewa.y_k_)
                        try:
                            if type(self.estimators[machine]) == KernelCobra:
                                preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"])
                            else:
                                preds = self.estimators[machine].predict(self.X_test)
                        except KeyError:
                            preds = self.estimators[machine].predict(self.X_test)                      
                        MSE[machine].append(mean_squared_error(y_test, preds))
                    
                    MSE["EWA"].append(mean_squared_error(y_test, ewa.predict(X_test)))

                try:
                    dataframe = pd.DataFrame(data=MSE)
                except ValueError:
                    return MSE

            if type(self.aggregate) is ClassifierCobra:

                errors = {k: [] for k, v in self.aggregate.estimators_.items()}
                errors["ClassifierCobra"] = []
                for i in range(0, reps):
                    cc = ClassifierCobra(random_state=self.random_state)
                    X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state)
                    cc.fit(X, y, default=False)
                    cc.split_data(shuffle_data=True)

                    for machine in self.aggregate.estimators_:
                        self.aggregate.estimators_[machine].fit(cc.X_k_, cc.y_k_)
                        cc.load_machine(machine, self.aggregate.estimators_[machine])

                    cc.load_machine_predictions()
                    X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state)
                    for machine in self.estimators: 
                        errors[machine].append(1 - accuracy_score(y_test, self.estimators[machine].predict(X_test)))
                    errors["ClassifierCobra"].append(1 - accuracy_score(y_test, cc.predict(X_test)))

                try:
                    dataframe = pd.DataFrame(data=errors)
                except ValueError:
                    return errors
        


        # code for different boxplot styles using the python graph gallery tutorial:
        # https://python-graph-gallery.com/39-hidden-data-under-boxplot/

        sns.set(style="whitegrid")

        if kind == "normal":
            sns.boxplot(data=dataframe)
            plt.title("Boxplot")

        if kind == "violin":
            sns.violinplot(data=dataframe)
            plt.title("Violin Plot")

        if kind == "jitterplot":
            ax = sns.boxplot(data=dataframe)
            ax = sns.stripplot(data=dataframe, color="orange", jitter=0.2, size=2.5)
            plt.title("Boxplot with jitter", loc="left")

        plt.ylabel("Mean Squared Errors")
        plt.xlabel("Estimators")
        plt.figure(figsize=(self.plot_size, self.plot_size))
        plt.show()

        
        if info:
            return dataframe
Example #16
0
X_train = X[:D1 + D2]
X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4]
X_eps = X[D1 + D2:D1 + D2 + D3]
# for testing
Y_train = Y[:D1 + D2]
Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4]
Y_eps = Y[D1 + D2:D1 + D2 + D3]

######################################################################
# Setting up COBRA
# ~~~~~~~~~~~~~~~~
#
# Let's up our COBRA machine with the data.
#

cobra = Cobra(random_state=0, epsilon=0.5)
cobra.fit(X_train, Y_train, default=False)

######################################################################
# When we are fitting, we initialise COBRA with an epsilon value of
# :math:`0.5` - this is because we are aware of the distribution and 0.5
# is a fair guess of what would be a "good" epsilon value, because the
# data varies from :math:`-1` to :math:`1`.
#
# If we do not pass the :math:`\epsilon` parameter, we perform a CV on the
# training data for an optimised epsilon.
#
# It can be noticed that the ``default`` parameter is set as false: this
# is so we can walk you through what happens when COBRA is set-up, instead
# of the deafult settings being used.
#