def predict(self, data):
        # d = data_lib.Data(np.expand_dims(data.source_y_pred, 1), data.y)
        y_pred_source = data.source_y_pred
        I = np.arange(y_pred_source.size)
        if self.predict_sample is not None and self.predict_sample < y_pred_source.size:
            I = np.random.choice(y_pred_source.size, self.predict_sample, replace=False)
        #L = array_functions.make_laplacian(y_pred_source[I], self.sigma_tr)
        #W = array_functions.make_rbf(self.transform.transform(self.x), self.sigma_nw, x2=self.transform.transform(data.x[I,:])).T
        k_L = int(self.sigma_tr*I.size)
        L = array_functions.make_laplacian_kNN(y_pred_source[I], k_L)
        k_W = int(self.sigma_nw*self.x.shape[0])
        W = array_functions.make_knn(self.transform.transform(data.x[I, :]), k_W, x2=self.transform.transform(self.x))
        S = array_functions.make_smoothing_matrix(W)

        A = np.eye(I.size) + self.C*L
        try:
            f = np.linalg.lstsq(A, S.dot(self.y))[0]
        except:
            print 'GraphTransferNW:predict failed, returning mean'
            f = self.y.mean() * np.ones(data.true_y.shape)

        o = results.Output(data)
        if self.predict_sample is not None:
            nw_data = data_lib.Data(data.x[I,:], f)
            self.nw_learner.train_and_test(nw_data)
            nw_output = self.nw_learner.predict(data)
            o.y = nw_output.y
            o.fu = nw_output.y
        else:
            o.y = f
            o.fu = f

        return o
Ejemplo n.º 2
0
 def create_reg(self, x):
     L = array_functions.make_laplacian_kNN(x, self.k, self.metric)
     r = lambda g: ScipyOptNonparametricHypothesisTransfer.reg(g, L)
     return r
Ejemplo n.º 3
0
    def train(self, data):
        '''
        self.C = 1
        self.C2 = 10
        self.k = 1
        '''
        #self.C = 100
        #self.configs.use_fused_lasso = False
        g_max = 2
        g0_oracle = np.zeros(data.n)
        g0_oracle[data.x[:, 0] < .5] = g_max
        f = self.create_eval(data, self.C)
        g = self.create_gradient(data, self.C)
        bounds = list((0, g_max) for i in range(data.n))
        #bounds = list((i, i) for i in range(data.n))
        #bounds = list((0, 0) for i in range(data.n))
        #bounds[0] = (0,None)
        #self.include_bias = False
        if self.include_bias:
            bounds = [(None, None)] + bounds
            #bounds = [(10, 10)] + bounds
        else:
            bounds = [(0, 0)] + bounds
        n = data.n + 1

        g0 = np.zeros(n)
        g0[1:] = g0_oracle
        #g0[:] = 1
        x = data.x
        y_s = np.squeeze(data.y_s[:, 0])
        y_t = np.squeeze(data.y_t[:, 0])
        y = data.y
        W = -array_functions.make_laplacian_kNN(data.x, self.k,
                                                self.configs.metric)
        #W = array_functions.make_graph_radius(data.x, self.radius, self.configs.metric)
        #W = array_functions.make_graph_adjacent(data.x, self.configs.metric)
        W = array_functions.try_toarray(W)
        if not data.is_regression:
            y = array_functions.make_label_matrix(
                data.y)[:, data.classes].toarray()
            y = y[:, 0]
        reg = self.create_reg(data.x)
        reg2 = self.create_reg2(data.x)
        if self.configs.use_fused_lasso:
            method = 'SLSQP'
            max_iter = 10000
            maxfun = 10000
            fused_lasso = ScipyOptNonparametricHypothesisTransfer.fused_lasso
            lasso = lambda x: self.C - fused_lasso(x, W)
            constraints = [{'type': 'ineq', 'fun': lasso}]
            if self.configs.no_reg:
                constraints = ()
            args = (x, y, y_s, y_t, 0, reg, self.C2, reg2)
        else:
            method = 'L-BFGS-B'
            max_iter = np.inf
            maxfun = np.inf
            constraints = ()
            args = (x, y, y_s, y_t, self.C, reg, self.C2, reg2)

        if self.g_supervised:
            x = np.squeeze(data.x)
            assert x.ndim == 1
            min_i = x.argmin()
            max_i = x.argmax()
            bounds[min_i] = (1, None)
            bounds[max_i] = (0, 0)

        options = {
            'disp': False,
            'maxiter': max_iter,
            'maxfun': maxfun,
            #'pgtol': 1e-8
        }
        results = optimize.minimize(
            f,
            g0,
            method=method,
            bounds=bounds,
            jac=g,
            options=options,
            constraints=constraints,
            args=args,
        )
        compare_results = False
        if compare_results or not results.success:
            options['disp'] = False
            options['approx_grad'] = True
            results2 = optimize.minimize(f,
                                         g0,
                                         method=method,
                                         bounds=bounds,
                                         options=options,
                                         constraints=constraints,
                                         args=args)
        if compare_results:
            err = results.x - results2.x
            if norm(results2.x[1:]) == 0:
                print 'All zeros - using absolute error'
                print 'Abs Error - g: ' + str(norm(err[1:]))
            else:
                print 'Rel Error - g: ' + str(
                    norm(err[1:]) / norm(results2.x[1:]))
            if not self.include_bias:
                if norm(results2.x[0]) == 0:
                    print 'Abs Error - b: ' + str(norm(err[0]))
                else:
                    print 'Rel Error - b: ' + str(
                        norm(err[0]) / norm(results2.x[0]))
            rel_error = norm(results.fun - results2.fun) / norm(results2.fun)
            print 'Rel Error - f(g*): ' + str(rel_error)
            if rel_error > .001 and norm(results2.x) > 0:
                print 'Big error: C=' + str(self.C) + ' C2=' + str(self.C2)
        if not results.success:
            results = results2
        self.g = results.x[1:]
        self.bias = results.x[0]
        if not results.success:
            self.g[:] = 0
            self.bias = 0
            #print 'Failed: ' + results.message
        '''
        I = data.arg_sort()
        x = (data.x[I,:])
        g = array_functions.vec_to_2d(results.x[I])
        v = np.hstack((x,g))
        print v
        print ''
        '''
        s = 'C=' + str(self.C) + ',C2=' + str(self.C2) + ',k=' + str(
            self.k) + '-'
        if not results.success:
            s += 'Opt failed - '
        has_nonneg = (self.g[1:] < -1e-6).any()
        if has_nonneg:
            s += 'Negative g - min value: ' + str(self.g.min())
        if not results.success or has_nonneg:
            print s + ': ' + results.message
            self.g[:] = 0
        else:
            pass
        g_data = data_lib.Data()
        g_data.x = data.x
        g_data.y = results.x[1:]
        g_data.is_regression = True
        g_data.set_train()
        g_data.set_target()
        g_data.set_true_y()
        self.g_nw.train_and_test(g_data)
        if results.success:
            pass
Ejemplo n.º 4
0
 def create_reg(self,x):
     L = array_functions.make_laplacian_kNN(x,self.k,self.metric)
     r = lambda g: ScipyOptNonparametricHypothesisTransfer.reg(g,L)
     return r
Ejemplo n.º 5
0
    def train(self, data):

        '''
        self.C = 1
        self.C2 = 10
        self.k = 1
        '''
        #self.C = 100
        #self.configs.use_fused_lasso = False
        g_max = 2
        g0_oracle = np.zeros(data.n)
        g0_oracle[data.x[:,0] < .5] = g_max
        f = self.create_eval(data, self.C)
        g = self.create_gradient(data, self.C)
        bounds = list((0, g_max) for i in range(data.n))
        #bounds = list((i, i) for i in range(data.n))
        #bounds = list((0, 0) for i in range(data.n))
        #bounds[0] = (0,None)
        #self.include_bias = False
        if self.include_bias:
            bounds = [(None, None)] + bounds
            #bounds = [(10, 10)] + bounds
        else:
            bounds = [(0, 0)] + bounds
        n = data.n + 1

        g0 = np.zeros(n)
        g0[1:] = g0_oracle
        #g0[:] = 1
        x = data.x
        y_s = np.squeeze(data.y_s[:,0])
        y_t = np.squeeze(data.y_t[:,0])
        y = data.y
        W = -array_functions.make_laplacian_kNN(data.x,self.k,self.configs.metric)
        #W = array_functions.make_graph_radius(data.x, self.radius, self.configs.metric)
        #W = array_functions.make_graph_adjacent(data.x, self.configs.metric)
        W = array_functions.try_toarray(W)
        if not data.is_regression:
            y = array_functions.make_label_matrix(data.y)[:,data.classes].toarray()
            y = y[:,0]
        reg = self.create_reg(data.x)
        reg2 = self.create_reg2(data.x)
        if self.configs.use_fused_lasso:
            method = 'SLSQP'
            max_iter = 10000
            maxfun = 10000
            fused_lasso = ScipyOptNonparametricHypothesisTransfer.fused_lasso
            lasso = lambda x : self.C - fused_lasso(x,W)
            constraints = [{
                'type': 'ineq',
                'fun': lasso
            }]
            if self.configs.no_reg:
                constraints = ()
            args = (x,y,y_s,y_t,0,reg,self.C2,reg2)
        else:
            method = 'L-BFGS-B'
            max_iter = np.inf
            maxfun = np.inf
            constraints = ()
            args = (x,y,y_s,y_t,self.C,reg,self.C2,reg2)

        if self.g_supervised:
            x = np.squeeze(data.x)
            assert x.ndim == 1
            min_i = x.argmin()
            max_i = x.argmax()
            bounds[min_i] = (1,None)
            bounds[max_i] = (0,0)

        options = {
            'disp': False,
            'maxiter':max_iter,
            'maxfun': maxfun,
            #'pgtol': 1e-8
        }
        results = optimize.minimize(
            f,
            g0,
            method=method,
            bounds=bounds,
            jac=g,
            options=options,
            constraints=constraints,
            args=args,
        )
        compare_results = False
        if compare_results or not results.success:
            options['disp'] = False
            options['approx_grad'] = True
            results2 = optimize.minimize(
                f,
                g0,
                method=method,
                bounds=bounds,
                options=options,
                constraints=constraints,
                args=args
            )
        if compare_results:
            err = results.x - results2.x
            if norm(results2.x[1:]) == 0:
                print 'All zeros - using absolute error'
                print 'Abs Error - g: ' + str(norm(err[1:]))
            else:
                print 'Rel Error - g: ' + str(norm(err[1:])/norm(results2.x[1:]))
            if not self.include_bias:
                if norm(results2.x[0]) == 0:
                    print 'Abs Error - b: ' + str(norm(err[0]))
                else:
                    print 'Rel Error - b: ' + str(norm(err[0])/norm(results2.x[0]))
            rel_error = norm(results.fun-results2.fun)/norm(results2.fun)
            print 'Rel Error - f(g*): ' + str(rel_error)
            if rel_error > .001 and norm(results2.x) > 0:
                print 'Big error: C=' + str(self.C) + ' C2=' + str(self.C2)
        if not results.success:
            results = results2
        self.g = results.x[1:]
        self.bias = results.x[0]
        if not results.success:
            self.g[:] = 0
            self.bias = 0
            #print 'Failed: ' + results.message
        '''
        I = data.arg_sort()
        x = (data.x[I,:])
        g = array_functions.vec_to_2d(results.x[I])
        v = np.hstack((x,g))
        print v
        print ''
        '''
        s = 'C=' + str(self.C) + ',C2=' + str(self.C2) + ',k=' + str(self.k) + '-'
        if not results.success:
            s += 'Opt failed - '
        has_nonneg = (self.g[1:] < -1e-6).any()
        if has_nonneg:
            s += 'Negative g - min value: ' + str(self.g.min())
        if not results.success or has_nonneg:
            print s + ': ' + results.message
            self.g[:] = 0
        else:
            pass
        g_data = data_lib.Data()
        g_data.x = data.x
        g_data.y = results.x[1:]
        g_data.is_regression = True
        g_data.set_train()
        g_data.set_target()
        g_data.set_true_y()
        self.g_nw.train_and_test(g_data)
        if results.success:
            pass
    def train_g_nonparametric(self, target_data):
        y_t, y_s, y_true = self.get_predictions(target_data)

        is_labeled = target_data.is_labeled
        labeled_inds = is_labeled.nonzero()[0]
        n_labeled = len(labeled_inds)
        g = cvx.Variable(n_labeled)
        '''
        L = array_functions.make_laplacian_uniform(target_data.x[labeled_inds,:],self.radius,metric) \
            + .0001*np.identity(n_labeled)
        '''
        L = array_functions.make_laplacian_kNN(target_data.x[labeled_inds,:],self.k,self.metric) \
            + .0001*np.identity(n_labeled)
        if self.use_fused_lasso:
            reg = cvx_functions.create_fused_lasso(-L, g)
        else:
            reg = cvx.quad_form(g,L)
        loss = cvx.sum_entries(
            cvx.power(
                cvx.mul_elemwise(y_s[:,0], g) + cvx.mul_elemwise(y_t[:,0], (1-g)) - y_true[:,0],
                2
            )
        )
        constraints = [g >= 0, g <= .5]
        #constraints += [g[0] == .5, g[-1] == 0]
        obj = cvx.Minimize(loss + self.C*reg)
        prob = cvx.Problem(obj,constraints)

        assert prob.is_dcp()
        try:
            prob.solve()
            g_value = np.reshape(np.asarray(g.value),n_labeled)
        except:
            k = 0
            #assert prob.status is None
            print 'CVX problem: setting g = ' + str(k)
            print '\tsigma=' + str(self.sigma)
            print '\tC=' + str(self.C)
            print '\tradius=' + str(self.radius)
            g_value = k*np.ones(n_labeled)
        if self.should_plot_g and enable_plotting and target_data.x.shape[1] == 1:
            array_functions.plot_2d(target_data.x[labeled_inds,:],g_value)

        labeled_train_data = target_data.get_subset(labeled_inds)
        assert labeled_train_data.y.shape == g_value.shape
        g_nw = method.NadarayaWatsonMethod(copy.deepcopy(self.configs))
        labeled_train_data.is_regression = True
        labeled_train_data.y = g_value
        labeled_train_data.true_y = g_value
        g_nw.configs.loss_function = loss_function.MeanSquaredError()

        g_nw.tune_loo(labeled_train_data)
        g_nw.train(labeled_train_data)
        '''
        a =np.hstack((g_value[labeled_train_data.x.argsort(0)], np.sort(labeled_train_data.x,0)))
        print str(a)
        print 'g_nw sigma: ' + str(g_nw.sigma)
        print 'C:' + str(self.C)
        '''
        target_data.is_regression = True
        self.g = g_nw.predict(target_data).fu
        self.g[labeled_inds] = g_value
        assert not np.any(np.isnan(self.g))