def get_predictions(self, target_data):
        o = self.target_learner.predict_loo(target_data)
        if self.source_loo:
            o_source = self.source_learner.train_predict_loo(target_data)
        else:
            o_source = self.source_learner.predict(target_data)
        is_labeled = target_data.is_labeled

        target_labels = self.configs.target_labels
        if self.use_estimated_f:
            o = self.target_learner.predict_loo(target_data.get_subset(is_labeled))
        if target_data.is_regression:
            y_t = array_functions.vec_to_2d(o.fu)
            if self.source_loo:
                y_s = array_functions.vec_to_2d(o_source.fu)
            else:
                y_s = array_functions.vec_to_2d(o_source.fu[is_labeled])
            y_true = array_functions.vec_to_2d(o.true_y)
        else:
            assert False, 'Update this?'
            y_t = o.fu[:,target_labels]
            y_s = o_source.fu[:,target_labels]
            y_s = y_s[is_labeled,:]
            y_true = array_functions.make_label_matrix(o.true_y)[:,target_labels]
            y_true = array_functions.try_toarray(y_true)
        return (y_t, y_s, y_true)
Exemple #2
0
    def train(self, data):
        '''
        self.C = 1
        self.C2 = 10
        self.k = 1
        '''
        #self.C = 100
        #self.configs.use_fused_lasso = False
        g_max = 2
        g0_oracle = np.zeros(data.n)
        g0_oracle[data.x[:, 0] < .5] = g_max
        f = self.create_eval(data, self.C)
        g = self.create_gradient(data, self.C)
        bounds = list((0, g_max) for i in range(data.n))
        #bounds = list((i, i) for i in range(data.n))
        #bounds = list((0, 0) for i in range(data.n))
        #bounds[0] = (0,None)
        #self.include_bias = False
        if self.include_bias:
            bounds = [(None, None)] + bounds
            #bounds = [(10, 10)] + bounds
        else:
            bounds = [(0, 0)] + bounds
        n = data.n + 1

        g0 = np.zeros(n)
        g0[1:] = g0_oracle
        #g0[:] = 1
        x = data.x
        y_s = np.squeeze(data.y_s[:, 0])
        y_t = np.squeeze(data.y_t[:, 0])
        y = data.y
        W = -array_functions.make_laplacian_kNN(data.x, self.k,
                                                self.configs.metric)
        #W = array_functions.make_graph_radius(data.x, self.radius, self.configs.metric)
        #W = array_functions.make_graph_adjacent(data.x, self.configs.metric)
        W = array_functions.try_toarray(W)
        if not data.is_regression:
            y = array_functions.make_label_matrix(
                data.y)[:, data.classes].toarray()
            y = y[:, 0]
        reg = self.create_reg(data.x)
        reg2 = self.create_reg2(data.x)
        if self.configs.use_fused_lasso:
            method = 'SLSQP'
            max_iter = 10000
            maxfun = 10000
            fused_lasso = ScipyOptNonparametricHypothesisTransfer.fused_lasso
            lasso = lambda x: self.C - fused_lasso(x, W)
            constraints = [{'type': 'ineq', 'fun': lasso}]
            if self.configs.no_reg:
                constraints = ()
            args = (x, y, y_s, y_t, 0, reg, self.C2, reg2)
        else:
            method = 'L-BFGS-B'
            max_iter = np.inf
            maxfun = np.inf
            constraints = ()
            args = (x, y, y_s, y_t, self.C, reg, self.C2, reg2)

        if self.g_supervised:
            x = np.squeeze(data.x)
            assert x.ndim == 1
            min_i = x.argmin()
            max_i = x.argmax()
            bounds[min_i] = (1, None)
            bounds[max_i] = (0, 0)

        options = {
            'disp': False,
            'maxiter': max_iter,
            'maxfun': maxfun,
            #'pgtol': 1e-8
        }
        results = optimize.minimize(
            f,
            g0,
            method=method,
            bounds=bounds,
            jac=g,
            options=options,
            constraints=constraints,
            args=args,
        )
        compare_results = False
        if compare_results or not results.success:
            options['disp'] = False
            options['approx_grad'] = True
            results2 = optimize.minimize(f,
                                         g0,
                                         method=method,
                                         bounds=bounds,
                                         options=options,
                                         constraints=constraints,
                                         args=args)
        if compare_results:
            err = results.x - results2.x
            if norm(results2.x[1:]) == 0:
                print 'All zeros - using absolute error'
                print 'Abs Error - g: ' + str(norm(err[1:]))
            else:
                print 'Rel Error - g: ' + str(
                    norm(err[1:]) / norm(results2.x[1:]))
            if not self.include_bias:
                if norm(results2.x[0]) == 0:
                    print 'Abs Error - b: ' + str(norm(err[0]))
                else:
                    print 'Rel Error - b: ' + str(
                        norm(err[0]) / norm(results2.x[0]))
            rel_error = norm(results.fun - results2.fun) / norm(results2.fun)
            print 'Rel Error - f(g*): ' + str(rel_error)
            if rel_error > .001 and norm(results2.x) > 0:
                print 'Big error: C=' + str(self.C) + ' C2=' + str(self.C2)
        if not results.success:
            results = results2
        self.g = results.x[1:]
        self.bias = results.x[0]
        if not results.success:
            self.g[:] = 0
            self.bias = 0
            #print 'Failed: ' + results.message
        '''
        I = data.arg_sort()
        x = (data.x[I,:])
        g = array_functions.vec_to_2d(results.x[I])
        v = np.hstack((x,g))
        print v
        print ''
        '''
        s = 'C=' + str(self.C) + ',C2=' + str(self.C2) + ',k=' + str(
            self.k) + '-'
        if not results.success:
            s += 'Opt failed - '
        has_nonneg = (self.g[1:] < -1e-6).any()
        if has_nonneg:
            s += 'Negative g - min value: ' + str(self.g.min())
        if not results.success or has_nonneg:
            print s + ': ' + results.message
            self.g[:] = 0
        else:
            pass
        g_data = data_lib.Data()
        g_data.x = data.x
        g_data.y = results.x[1:]
        g_data.is_regression = True
        g_data.set_train()
        g_data.set_target()
        g_data.set_true_y()
        self.g_nw.train_and_test(g_data)
        if results.success:
            pass
    def train(self, data):

        '''
        self.C = 1
        self.C2 = 10
        self.k = 1
        '''
        #self.C = 100
        #self.configs.use_fused_lasso = False
        g_max = 2
        g0_oracle = np.zeros(data.n)
        g0_oracle[data.x[:,0] < .5] = g_max
        f = self.create_eval(data, self.C)
        g = self.create_gradient(data, self.C)
        bounds = list((0, g_max) for i in range(data.n))
        #bounds = list((i, i) for i in range(data.n))
        #bounds = list((0, 0) for i in range(data.n))
        #bounds[0] = (0,None)
        #self.include_bias = False
        if self.include_bias:
            bounds = [(None, None)] + bounds
            #bounds = [(10, 10)] + bounds
        else:
            bounds = [(0, 0)] + bounds
        n = data.n + 1

        g0 = np.zeros(n)
        g0[1:] = g0_oracle
        #g0[:] = 1
        x = data.x
        y_s = np.squeeze(data.y_s[:,0])
        y_t = np.squeeze(data.y_t[:,0])
        y = data.y
        W = -array_functions.make_laplacian_kNN(data.x,self.k,self.configs.metric)
        #W = array_functions.make_graph_radius(data.x, self.radius, self.configs.metric)
        #W = array_functions.make_graph_adjacent(data.x, self.configs.metric)
        W = array_functions.try_toarray(W)
        if not data.is_regression:
            y = array_functions.make_label_matrix(data.y)[:,data.classes].toarray()
            y = y[:,0]
        reg = self.create_reg(data.x)
        reg2 = self.create_reg2(data.x)
        if self.configs.use_fused_lasso:
            method = 'SLSQP'
            max_iter = 10000
            maxfun = 10000
            fused_lasso = ScipyOptNonparametricHypothesisTransfer.fused_lasso
            lasso = lambda x : self.C - fused_lasso(x,W)
            constraints = [{
                'type': 'ineq',
                'fun': lasso
            }]
            if self.configs.no_reg:
                constraints = ()
            args = (x,y,y_s,y_t,0,reg,self.C2,reg2)
        else:
            method = 'L-BFGS-B'
            max_iter = np.inf
            maxfun = np.inf
            constraints = ()
            args = (x,y,y_s,y_t,self.C,reg,self.C2,reg2)

        if self.g_supervised:
            x = np.squeeze(data.x)
            assert x.ndim == 1
            min_i = x.argmin()
            max_i = x.argmax()
            bounds[min_i] = (1,None)
            bounds[max_i] = (0,0)

        options = {
            'disp': False,
            'maxiter':max_iter,
            'maxfun': maxfun,
            #'pgtol': 1e-8
        }
        results = optimize.minimize(
            f,
            g0,
            method=method,
            bounds=bounds,
            jac=g,
            options=options,
            constraints=constraints,
            args=args,
        )
        compare_results = False
        if compare_results or not results.success:
            options['disp'] = False
            options['approx_grad'] = True
            results2 = optimize.minimize(
                f,
                g0,
                method=method,
                bounds=bounds,
                options=options,
                constraints=constraints,
                args=args
            )
        if compare_results:
            err = results.x - results2.x
            if norm(results2.x[1:]) == 0:
                print 'All zeros - using absolute error'
                print 'Abs Error - g: ' + str(norm(err[1:]))
            else:
                print 'Rel Error - g: ' + str(norm(err[1:])/norm(results2.x[1:]))
            if not self.include_bias:
                if norm(results2.x[0]) == 0:
                    print 'Abs Error - b: ' + str(norm(err[0]))
                else:
                    print 'Rel Error - b: ' + str(norm(err[0])/norm(results2.x[0]))
            rel_error = norm(results.fun-results2.fun)/norm(results2.fun)
            print 'Rel Error - f(g*): ' + str(rel_error)
            if rel_error > .001 and norm(results2.x) > 0:
                print 'Big error: C=' + str(self.C) + ' C2=' + str(self.C2)
        if not results.success:
            results = results2
        self.g = results.x[1:]
        self.bias = results.x[0]
        if not results.success:
            self.g[:] = 0
            self.bias = 0
            #print 'Failed: ' + results.message
        '''
        I = data.arg_sort()
        x = (data.x[I,:])
        g = array_functions.vec_to_2d(results.x[I])
        v = np.hstack((x,g))
        print v
        print ''
        '''
        s = 'C=' + str(self.C) + ',C2=' + str(self.C2) + ',k=' + str(self.k) + '-'
        if not results.success:
            s += 'Opt failed - '
        has_nonneg = (self.g[1:] < -1e-6).any()
        if has_nonneg:
            s += 'Negative g - min value: ' + str(self.g.min())
        if not results.success or has_nonneg:
            print s + ': ' + results.message
            self.g[:] = 0
        else:
            pass
        g_data = data_lib.Data()
        g_data.x = data.x
        g_data.y = results.x[1:]
        g_data.is_regression = True
        g_data.set_train()
        g_data.set_target()
        g_data.set_true_y()
        self.g_nw.train_and_test(g_data)
        if results.success:
            pass