Ejemplo n.º 1
0
def _get_error_rate_and_constraints(preds, labels, didi, I):
    """
    Computes the error and constraint violations.
    """
    error = utils.mean_squared_error(preds, labels)
    ct_violation = utils.didi_r(preds, I) - 0.2 * didi
    return error, [ct_violation]
Ejemplo n.º 2
0
    def _training_generator(self,
                            x,
                            y,
                            minibatch_size,
                            num_iterations_per_loop=1,
                            num_loops=1):
        num_rows = x.shape[0]
        minibatch_size = min(minibatch_size, num_rows)
        permutation = list(range(x.shape[0]))
        random.shuffle(permutation)
        # print(f"Fairness bound: {0.2 * self.didi_tr}")
        minibatch_start_index = 0
        for n in xrange(num_loops):
            for _ in xrange(num_iterations_per_loop):
                minibatch_indices = []
                while len(minibatch_indices) < minibatch_size:
                    minibatch_end_index = (
                            minibatch_start_index + minibatch_size - len(minibatch_indices))
                    if minibatch_end_index >= num_rows:
                        minibatch_indices += range(minibatch_start_index, num_rows)
                        minibatch_start_index = 0
                    else:
                        minibatch_indices += range(minibatch_start_index, minibatch_end_index)
                        minibatch_start_index = minibatch_end_index
                self.session.run(
                    self.train_op,
                    feed_dict=self._feed_dict_helper(
                        x[[permutation[ii] for ii in minibatch_indices]],
                        y[[permutation[ii] for ii in minibatch_indices]],
                        [I[[permutation[ii] for ii in minibatch_indices]] for I in self.I_train.values()]))
            # ct = self.session.run(
            #     self.mp.constraint,
            #     feed_dict=self._feed_dict_helper(
            #         x,
            #         y,
            #         [I for I in self.I_train.values()])
            # )
            # print(f"Loop {n}")
            # print("DIDItr: %.3f" % (0.2 * self.didi_tr))
            # print(f"TF Constraint value {ct}")
            slack = self.session.run(
                self.mp.constraints(),
                feed_dict=self._feed_dict_helper(
                    x,
                    y,
                    [I for I in self.I_train.values()])
            )
            # print(f"TF Slack value {slack}")

            p = self.session.run(
                self.predictions_tensor,
                feed_dict=self._feed_dict_helper(x)
            )

            preds = (1 + np.sign(p)) / 2
            perc_didi = utils.didi_r(preds, self.I_train) / self.didi_tr
            # print("Positive preds: %.0f / %.0f" % (sum(preds), len(preds)))
            # print("DIDI index: %.3f" % perc_didi)

            yield p
Ejemplo n.º 3
0
    def cst_info(self, x, y):
        """
        Print information about the cost (satisfaction) associated to the inputs.
        """
        # Infer train /test set from the input arrays.
        I = None
        d = None
        n_points = len(x)
        if n_points == len(self.I_train[0]):
            I = self.I_train
            d = self.didi_tr
        elif n_points == len(self.I_test[0]):
            I = self.I_test
            d = self.didi_ts
        else:
            raise ValueError(
                "Cannot infer indicator matrix from input data. Input array has "
                "shape %d, with matrices having shape %d and %d" %
                (n_points, len(self.I_train[0]), len(self.I_test[0])))

        perc_didi = utils.didi_r(y, I) / d
        cost = {'DIDI perc. index': perc_didi}
        return cost
Ejemplo n.º 4
0
def cross_val():
    # New class implementation.
    xnp, xp, y = data_gen.load_adult()

    results = {
        'Last_iterate_train_acc': [],
        'Last_iterate_test_acc': [],
        'Last_iterate_train_ct': [],
        'Last_iterate_test_ct': [],
        'Best_iterate_train_acc': [],
        'Best_iterate_test_acc': [],
        'Best_iterate_train_ct': [],
        'Best_iterate_test_ct': [],
        'Stoch_iterate_train_acc': [],
        'Stoch_iterate_test_acc': [],
        'Stoch_iterate_train_ct': [],
        'Stoch_iterate_test_ct': [],
    }
    nfolds = 5
    fsize = int(np.ceil(len(xnp) / nfolds))
    for fidx in range(nfolds):
        print(f'\n### Processing fold {fidx}')

        # Build a full index set
        idx = np.arange(len(xnp))
        # Separate index sets
        tridx = np.hstack((idx[:fidx * fsize], idx[(fidx + 1) * fsize:]))
        tsidx = idx[fidx * fsize:(fidx + 1) * fsize]

        # Separate training and test data
        xptr = xp[tridx]
        ytr = y[tridx]
        xpts = xp[tsidx]
        yts = y[tsidx]

        # Standardize train set.
        scl = MinMaxScaler()
        xnptr = scl.fit_transform(xnp[tridx])
        xnpts = scl.transform(xnp[tsidx])

        # Add protected features.
        xtr = np.hstack([xnptr, xptr])
        xts = np.hstack([xnpts, xpts])

        scl = MinMaxScaler()
        ytr = scl.fit_transform(ytr)
        yts = scl.transform(yts)

        print("Computing indicator matrices.")
        I_train = utils.compute_indicator_matrix_c(xptr)
        I_test = utils.compute_indicator_matrix_c(xpts)
        didi_tr = utils.didi_c(ytr, I_train)
        didi_ts = utils.didi_c(yts, I_test)

        tfco_model = TFCOFairCls(input_dim=xtr.shape[1],
                                 output_dim=1,
                                 I_train=I_train,
                                 didi_tr=didi_tr)

        minibatch_size = 200
        iterations_per_loop = 200
        loops = 100

        train_pred, test_pred = tfco_model._full_training(
            xtr, xts, ytr, minibatch_size, iterations_per_loop, loops)

        train_errors = []
        train_violations = []
        train_didi = []
        train_acc = []

        for p in train_pred:
            p_class = (1 + np.sign(p)) / 2
            err, viol = _get_error_rate_and_didi(p, ytr.reshape(-1, 1),
                                                 didi_tr, I_train)
            acc = accuracy_score(ytr, p_class)
            didi = utils.didi_r(p_class, I_train) / didi_tr
            train_errors.append(err)
            train_violations.append(viol)
            train_didi.append(didi)
            train_acc.append(acc)

        test_errors = []
        test_violations = []
        test_didi = []
        test_acc = []

        for p in test_pred:
            p_class = (1 + np.sign(p)) / 2
            err, viol = _get_error_rate_and_didi(p, yts.reshape(-1, 1),
                                                 didi_ts, I_test)
            acc = accuracy_score(yts, p_class)
            didi = utils.didi_r(p_class, I_test) / didi_ts
            test_errors.append(err)
            test_violations.append(viol)
            test_didi.append(didi)
            test_acc.append(acc)

        train_violations = np.array(train_violations)
        print("Train Acc.", train_acc[-1])
        print("Train DIDI.", train_didi[-1])

        print("Test Acc.", test_acc[-1])
        print("Test DIDI.", test_didi[-1])

        print("Improving using Best Iterate instead of Last Iterate.")
        #
        # As discussed in [[CotterEtAl18b]](https://arxiv.org/abs/1809.04198), the last iterate may not be the best choice
        # and suggests a simple heuristic to choose the best iterate out of the ones found after each epoch.
        # The heuristic proceeds by ranking each of the solutions based on accuracy and fairness separately with respect to
        # the training data. Any solutions which satisfy the constraints are equally ranked top in terms fairness.
        # Each solution thus has two ranks. Then, the chosen solution is the one with the smallest maximum of the two ranks.
        # We see that this improves the fairness and can find a better accuracy / fairness trade-off on the training data.
        #
        # This solution can be calculated using find_best_candidate_index given the list of training errors and violations
        # associated with each of the epochs.

        best_cand_index = tfco.find_best_candidate_index(
            train_errors, train_violations)

        print("Train Acc.", train_acc[best_cand_index])
        print("Train DIDI.", train_didi[best_cand_index])

        print("Test Acc.", test_acc[best_cand_index])
        print("Test DIDI.", test_acc[best_cand_index])

        print("m-stochastic solution.")
        # [[CoJiSr19]](https://arxiv.org/abs/1804.06500) presents a method which shrinks down the T-stochastic solution down
        # to one that is supported on at most (m+1) points where m is the number of constraints and is guaranteed to be at
        # least as good as the T-stochastic solution.
        # Here we see that indeed there is benefit in performing the shrinking.
        #
        # This solution can be computed using find_best_candidate_distribution by passing in the training errors and
        # violations found at each epoch and returns the weight of each constituent. We see that indeed, it is sparse.

        cand_dist = tfco.find_best_candidate_distribution(
            train_errors, train_violations)
        print(cand_dist)

        m_stoch_train_acc = np.dot(cand_dist, train_acc)
        m_stoch_train_didi = np.dot(cand_dist, train_didi)
        m_stoch_test_acc = np.dot(cand_dist, test_acc)
        m_stoch_test_didi = np.dot(cand_dist, test_didi)

        print("Train Acc", m_stoch_train_acc)
        print("Train DIDI", m_stoch_train_didi)
        print("Test Acc", m_stoch_test_acc)
        print("Test DIDI", m_stoch_test_didi)

        results['Last_iterate_train_acc'].append(train_acc[-1])
        results['Last_iterate_test_acc'].append(test_acc[-1])
        results['Last_iterate_train_ct'].append(train_didi[-1])
        results['Last_iterate_test_ct'].append(test_didi[-1])

        results['Best_iterate_train_acc'].append(train_acc[best_cand_index])
        results['Best_iterate_test_acc'].append(test_acc[best_cand_index])
        results['Best_iterate_train_ct'].append(train_didi[best_cand_index])
        results['Best_iterate_test_ct'].append(test_didi[best_cand_index])

        results['Stoch_iterate_train_acc'].append(m_stoch_train_acc)
        results['Stoch_iterate_test_acc'].append(m_stoch_test_acc)
        results['Stoch_iterate_train_ct'].append(m_stoch_train_didi)
        results['Stoch_iterate_test_ct'].append(m_stoch_test_didi)

    for k, val in results.items():
        print(k, np.mean(val), np.std(val))
Ejemplo n.º 5
0
    def adjust_targets(self, y, p, alpha, beta, use_prob):
        """
        Solve the optimization model that returns the optimal prediction that satisfy the constraints.
        """
        assert (alpha == 0 or p is not None)
        # self.logger.debug("Setting up Opt Model")

        # Input adjusting.
        y = y.reshape(-1)
        p = p.reshape(-1)

        # Determine feasibility
        _feasible = (utils.didi_r(p, self.I_train) <= self.constraint_value)

        # Model declaration.
        mod = CPModel('Fairness Reg Problem')

        # Set a time limit.
        mod.parameters.timelimit = _CPLEX_TIME_LIMIT

        # Variable declaration.
        n_points = len(y)
        idx_var = [i for i in range(n_points)]
        x = mod.continuous_var_list(keys=idx_var, lb=0.0, ub=1.0, name='y')

        # Fairness constraint: instead of adding a penalization term in the objective function - as done by
        # Phebe et al - I impose the fairness term to stay below a certain threshold.
        constraint = .0
        abs_val = mod.continuous_var_list(keys=self.I_train.keys())
        for key, val in self.I_train.items():
            Np = np.sum(val)
            if Np > 0:
                tmp = (1.0 / n_points) * mod.sum(x) - \
                      (1.0 / Np) * mod.sum([val[j] * x[j] for j in idx_var])
                # Linearization of the absolute value.
                mod.add_constraint(abs_val[key] >= tmp)
                mod.add_constraint(abs_val[key] >= -tmp)

        constraint += mod.sum(abs_val)
        mod.add_constraint(constraint <= self.constraint_value,
                           ctname='fairness_cnst')

        # Objective Function.
        y_loss = (1.0 / n_points) * mod.sum([(y[i] - x[i]) * (y[i] - x[i])
                                             for i in idx_var])
        p_loss = (1.0 / n_points) * mod.sum([(p[i] - x[i]) * (p[i] - x[i])
                                             for i in idx_var])

        if _feasible and beta >= 0:
            # Constrain search on a ball.
            mod.add(p_loss <= beta)
            mod.minimize(y_loss)
        else:
            # Adds a regularization term to make sure the new targets are not too far from the actual
            # network's output.
            mod.minimize(y_loss + (1.0 / alpha) * p_loss)

            # 231020: Ball search.
            # First I compute the minimum range that assures feasibility and then impose
            # it as a costraint.
            # mod2 = mod.clone("Radius model")
            # mod2.minimize(n_points * p_loss)
            # mod2.solve()
            # r = mod2.objective_value
            # print("Objective value (radius): %.2f" % r)

            # mod.add(p_loss <= (1.05 * r))
            # mod.minimize(y_loss)

        # Problem solving.
        # self.logger.info("Solving Opt Model...")
        mod.solve()

        # Check solution.
        self._check_solution(mod)

        # Obtain the adjusted targets.
        y_opt = np.array([x[i].solution_value for i in range(n_points)])

        return y_opt
Ejemplo n.º 6
0
    def validate(self):

        # Load data.
        # self.load_data()

        for ii in range(self.nfolds):

            # TRAIN TEST SPLIT
            if self.dataset in BALANCE_DATASET:
                train_idx, test_idx = self.get_train_val_index(ii)
                xnp_train, y_train = self.xnp_tr[train_idx], self.y_tr[train_idx]
                xnp_test, y_test = self.xnp_tr[test_idx], self.y_tr[test_idx]

                # STANDARDIZATION.
                # Standardize train set.
                x_train = self.scaler.fit_transform(xnp_train)
                x_test = self.scaler.transform(xnp_test)

                # y_train = self.scaler.fit_transform(y_train)
                # y_test = self.scaler.transform(y_test)

            else:
                train_idx, test_idx = self.get_train_val_index(ii)
                xnp_train, xp_train, y_train = self.xnp_tr[train_idx], self.xp_tr[train_idx], self.y_tr[train_idx]
                xnp_test, xp_test, y_test = self.xnp_tr[test_idx], self.xp_tr[test_idx], self.y_tr[test_idx]

                # STANDARDIZATION.
                xnp_train = self.scaler.fit_transform(xnp_train)
                xnp_test = self.scaler.transform(xnp_test)

                # Add protected features.
                x_train = np.hstack([xnp_train, xp_train])
                x_test = np.hstack([xnp_test, xp_test])

                y_train = self.scaler.fit_transform(y_train)
                y_test = self.scaler.transform(y_test)

            if self.dataset in BALANCE_DATASET:
                # Data shapes.
                input_dim = x_train.shape[1]
                output_dim = len(np.unique(y_train))

                # Build the master
                if self.mtype == 'balance':
                    nclasses = len(np.unique(y_train))
                    self.master = BalancedCountsMaster(nclasses=nclasses)
                else:
                    raise ValueError(f'Unknown master type "{self.mtype}"')

                # Start the main process
                if self.ltype == 'cvx':
                    self.learner = cls.BalanceMultiLogRegressor(self.alpha)

                elif self.ltype == 'sbrnn':
                    self.learner = cls.SBRNN(input_dim, output_dim, self.alpha)

                elif self.ltype == 'lbrf':
                    self.learner = cls.LowBiasRandomForestLearner(input_dim, output_dim)

                elif self.ltype == 'lr':
                    self.learner = cls.LogisticRegressionLearner(input_dim, output_dim)

                elif self.ltype == 'rf':
                    self.learner = cls.RandomForestLearner(input_dim, output_dim)

                elif self.ltype == 'nn':
                    self.learner = cls.NeuralNetworkLearner(input_dim, output_dim)

                else:
                    raise ValueError(f'Unknown learner type "{self.ltype}"')

            elif self.dataset == 'adult':
                print("Computing indicator matrices.")
                I_train = utils.compute_indicator_matrix_c(xp_train)
                I_test = utils.compute_indicator_matrix_c(xp_test)
                didi_tr = utils.didi_c(y_train, I_train)
                didi_ts = utils.didi_c(y_test, I_test)

                # Build the master
                if self.mtype == 'fairness':
                    self.master = FairnessClsMaster(I_train, I_test, didi_tr, didi_ts)
                else:
                    raise ValueError(f'Unknown master type "{self.mtype}"')

                input_dim = x_train.shape[1]
                output_dim = len(np.unique(y_train))

                # Start the main process
                if self.ltype == 'cvx':
                    self.learner = cls.FairBinLogRegressor(self.alpha, I_train)

                elif self.ltype == 'cnd':
                    # Kamiran and Calders method.
                    # learner = cls.CND(xnptr, xptr, ytr)
                    raise NotImplementedError

                elif self.ltype == 'tfco':
                    input_dim = x_train.shape[1]
                    output_dim = 1
                    self.learner = tfco_cls.TFCOFairCls(input_dim, output_dim, I_train, didi_tr)

                elif self.ltype == 'lbrf':
                    self.learner = cls.LowBiasRandomForestLearner(input_dim, output_dim)

                elif self.ltype == 'lr':
                    self.learner = cls.LogisticRegressionLearner(input_dim, output_dim)

                elif self.ltype == 'rf':
                    self.learner = cls.RandomForestLearner(input_dim, output_dim)

                elif self.ltype == 'nn':
                    self.learner = cls.NeuralNetworkLearner(input_dim, output_dim)

                else:
                    raise ValueError(f'Unknown learner type "{self.ltype}"')

            elif self.dataset == 'crime':
                print("Computing indicator matrices.")
                I_train = utils.compute_indicator_matrix_r(xp_train)
                I_test = utils.compute_indicator_matrix_r(xp_test)
                didi_tr = utils.didi_r(y_train, I_train)
                didi_ts = utils.didi_r(y_test, I_test)

                # Build the master
                if self.mtype == 'fairness':
                    self.master = FairnessRegMaster(I_train, I_test, didi_tr, didi_ts)
                else:
                    raise ValueError(f'Unknown master type "{self.mtype}"')

                # Build the learner.
                if self.ltype == 'cvx':
                    self.learner = rgs.FairRegressor(self.alpha, I_train)

                elif self.ltype == 'tfco':
                    input_dim = x_train.shape[1]
                    output_dim = 1
                    self.learner = tfco_reg.TFCOFairReg(input_dim, output_dim, I_train, didi_tr)

                elif self.ltype == 'lbrf':
                    self.learner = rgs.LowBiasRandomForestLearner()

                elif self.ltype == 'lr':
                    self.learner = rgs.LRegressor()

                elif self.ltype == 'gb':
                    self.learner = rgs.GBTree()

                elif self.ltype == 'nn':
                    self.learner = rgs.Net((x_train.shape[1],), 1)

                else:
                    raise ValueError(f'Unknown learner type "{self.ltype}"')

            # Loggers
            params = dict(fold=ii, alpha=self.alpha, beta=self.beta, init=self.initial_step, use_prob=self.use_prob)
            wb_log = WandBLogger(self.learner, self.master, x_train, y_train, x_test, y_test, params, f'{self.dataset}')
            cst_log = CustomLogger(self.learner, self.master, x_train, y_train, nfold=ii, x_test=x_test, y_test=y_test)
            self.logger = MultiLogger([cst_log, wb_log])
            # Start the MACS process
            mp = macs.MACS(self.learner, self.master, self.logger)
            mp.fit(x_train, y_train, self.iterations, self.alpha, self.beta, self.initial_step, use_prob=self.use_prob)
            self.results[f'fold_{ii}'] = self.logger.results
Ejemplo n.º 7
0
def test():

    # Data with our preprocessing routines.
    from sklearn.preprocessing import MinMaxScaler

    xnp, xp, y = data_gen.load_crime()
    scl = MinMaxScaler()
    train_pts = int(0.8 * len(xnp))
    xnptr = scl.fit_transform(xnp[:train_pts])
    xnpts = scl.transform(xnp[train_pts:])
    xptr = xp[:train_pts]
    xpts = xp[train_pts:]
    ytr = y[:train_pts]
    yts = y[train_pts:]

    # Add protected features.
    xtr = np.hstack([xnptr, xptr])
    xts = np.hstack([xnpts, xpts])

    scl = MinMaxScaler()
    ytr = scl.fit_transform(ytr)
    yts = scl.transform(yts)

    I_train = utils.compute_indicator_matrix_r(xptr)
    I_test = utils.compute_indicator_matrix_r(xpts)
    didi_tr = utils.didi_r(ytr, I_train)
    didi_ts = utils.didi_r(yts, I_test)

    tfco_model = TFCOFairReg(input_dim=xtr.shape[1], output_dim=1,
                             I_train=I_train, didi_tr=didi_tr)

    # Fitting.
    # train_errors, train_violations = tfco_model.fit(xtr, ytr)
    # train_errors, train_violations = np.array(train_errors), np.array(train_violations)

    # test_errors, test_violations = tfco_model.predict_err(x_ts.values, y_ts.values)
    # test_preds = tfco_model.predict(xts)
    # test_errors, test_violations = _get_error_rate_and_constraints(
    #     test_preds, yts, didi_ts, I_test)

    minibatch_size = 200
    iterations_per_loop = 200
    loops = 80

    train_pred, test_pred = tfco_model._full_training(xtr, xts, ytr,
                                                      minibatch_size, iterations_per_loop, loops)

    train_errors = []
    train_violations = []
    train_didi = []
    train_r2 = []

    for p in train_pred:
        err, viol = _get_error_rate_and_constraints(p, ytr.reshape(-1, 1), didi_tr, I_train)
        r2 = r2_score(ytr, p)
        didi = utils.didi_r(p, I_train) / didi_tr
        train_errors.append(err)
        train_violations.append(viol)
        train_didi.append(didi)
        train_r2.append(r2)

    test_errors = []
    test_violations = []
    test_didi = []
    test_r2 = []

    for p in test_pred:
        err, viol = _get_error_rate_and_constraints(p, yts.reshape(-1, 1), didi_ts, I_test)
        r2 = r2_score(yts, p)
        didi = utils.didi_r(p, I_test) / didi_ts
        test_errors.append(err)
        test_violations.append(viol)
        test_didi.append(didi)
        test_r2.append(r2)

    train_violations = np.array(train_violations)
    # print("DIDI train", didi_tr)
    # print("DIDI test", didi_ts)
    # print("Train Error", train_errors[-1])
    # print("Train Violation", max(train_violations[-1]))
    print("Train R2", train_r2[-1])
    print("Train DIDI", train_didi[-1])

    # print("Test Error", test_errors[-1])
    # print("Test Violation", max(test_violations[-1]))
    print("Train R2", test_r2[-1])
    print("Train DIDI", test_didi[-1])

    print("Improving using Best Iterate instead of Last Iterate.")
    #
    # As discussed in [[CotterEtAl18b]](https://arxiv.org/abs/1809.04198), the last iterate may not be the best choice
    # and suggests a simple heuristic to choose the best iterate out of the ones found after each epoch.
    # The heuristic proceeds by ranking each of the solutions based on accuracy and fairness separately with respect to
    # the training data. Any solutions which satisfy the constraints are equally ranked top in terms fairness.
    # Each solution thus has two ranks. Then, the chosen solution is the one with the smallest maximum of the two ranks.
    # We see that this improves the fairness and can find a better accuracy / fairness trade-off on the training data.
    #
    # This solution can be calculated using find_best_candidate_index given the list of training errors and violations
    # associated with each of the epochs.

    best_cand_index = tfco.find_best_candidate_index(train_errors, train_violations)

    # print("Train Error", train_errors[best_cand_index])
    # print("Train Violation", max(train_violations[best_cand_index]))
    print("Train R2", train_r2[best_cand_index])
    print("Train DIDI", train_didi[best_cand_index])

    # print("Test Error", test_errors[best_cand_index])
    # print("Test Violation", max(test_violations[best_cand_index]))
    print("Test R2", test_r2[best_cand_index])
    print("Test DIDI", test_didi[best_cand_index])

    print("m-stochastic solution.")
    # [[CoJiSr19]](https://arxiv.org/abs/1804.06500) presents a method which shrinks down the T-stochastic solution down
    # to one that is supported on at most (m+1) points where m is the number of constraints and is guaranteed to be at
    # least as good as the T-stochastic solution.
    # Here we see that indeed there is benefit in performing the shrinking.
    #
    # This solution can be computed using find_best_candidate_distribution by passing in the training errors and
    # violations found at each epoch and returns the weight of each constituent. We see that indeed, it is sparse.

    cand_dist = tfco.find_best_candidate_distribution(train_errors, train_violations)
    print(cand_dist)

    # m_stoch_error_train, m_stoch_violations_train = _get_exp_error_rate_constraints(cand_dist, train_errors,
    #                                                                                 train_violations)
    # m_stoch_error_test, m_stoch_violations_test = _get_exp_error_rate_constraints(cand_dist, test_errors,
    #                                                                               test_violations)

    m_stoch_train_r2 = np.dot(cand_dist, train_r2)
    m_stoch_train_didi = np.dot(cand_dist, train_didi)
    m_stoch_test_r2 = np.dot(cand_dist, test_r2)
    m_stoch_test_didi = np.dot(cand_dist, test_didi)

    print("Train R2", m_stoch_train_r2)
    print("Train DIDI", m_stoch_train_didi)
    print("Test R2", m_stoch_test_r2)
    print("Test DIDI", m_stoch_test_didi)