def get_predictions(self, target_data):
     assert target_data.is_regression
     o = self.source_learner.predict(target_data)
     is_labeled = target_data.is_labeled
     y_s = array_functions.vec_to_2d(o.fu[is_labeled])
     y_true = array_functions.vec_to_2d(o.true_y[is_labeled])
     return (y_s, y_true)
    def get_predictions(self, target_data):
        '''
        o = self.target_learner.predict_loo(target_data)
        o_source = self.source_learner.predict(target_data)
        is_labeled = target_data.is_labeled

        target_labels = self.configs.target_labels
        if self.use_estimated_f:
            o = self.target_learner.predict_loo(target_data.get_subset(is_labeled))
        if target_data.is_regression:
            y_t = array_functions.vec_to_2d(o.fu)
            y_s = array_functions.vec_to_2d(o_source.fu[is_labeled])
            y_true = array_functions.vec_to_2d(o.true_y)
        else:
            y_t = o.fu[:,target_labels]
            y_s = o_source.fu[:,target_labels]
            y_s = y_s[is_labeled,:]
            y_true = array_functions.make_label_matrix(o.true_y)[:,target_labels]
            y_true = array_functions.try_toarray(y_true)
        return (y_t, y_s, y_true)
        '''
        assert target_data.is_regression
        o = self.source_learner.predict(target_data)
        is_labeled = target_data.is_labeled
        y_s = array_functions.vec_to_2d(o.fu[is_labeled])
        y_true = array_functions.vec_to_2d(o.true_y[is_labeled])
        return (y_s, y_true)
Ejemplo n.º 3
0
 def combine_predictions(self,x,y_source,y_target):
     data = data_lib.Data()
     data.x = x
     data.is_regression = True
     g = self.g_nw.predict(data).fu
     a_t = 1 / (1 + g)
     b_s = g / (1 + g)
     if y_source.ndim > 1:
         a_t = array_functions.vec_to_2d(a_t)
         b_s = array_functions.vec_to_2d(b_s)
         fu = a_t*y_target + b_s * (y_source + self.bias)
     else:
         fu = np.multiply(a_t, y_target) + np.multiply(b_s, y_source + self.bias)
     return fu
Ejemplo n.º 4
0
 def combine_predictions(self, x, y_source, y_target):
     data = data_lib.Data()
     data.x = x
     data.is_regression = True
     g = self.g_nw.predict(data).fu
     a_t = 1 / (1 + g)
     b_s = g / (1 + g)
     if y_source.ndim > 1:
         a_t = array_functions.vec_to_2d(a_t)
         b_s = array_functions.vec_to_2d(b_s)
         fu = a_t * y_target + b_s * (y_source + self.bias)
     else:
         fu = np.multiply(a_t, y_target) + np.multiply(
             b_s, y_source + self.bias)
     return fu
Ejemplo n.º 5
0
def viz_features(x, y, domain_ids, feature_names=None, alpha=.1, learner=None):
    #y = array_functions.normalize(y)
    x = array_functions.vec_to_2d(x)
    for i in range(x.shape[1]):
        xi = x[:, i]
        xi_train = xi
        yi = y
        ids_i = domain_ids
        title = str(i)
        density = None
        if feature_names is not None:
            title = str(i) + ': ' + feature_names[i]
        if learner is not None:
            xi, yi, ids_i, density = train_on_data(xi, yi, domain_ids, learner)
            density = density * 100 + 1
            I = array_functions.is_invalid(density)
            density[I] = 200
            alpha = 1
        array_functions.plot_2d_sub(xi,
                                    yi,
                                    alpha=alpha,
                                    title=title,
                                    data_set_ids=ids_i,
                                    sizes=density)
        k = 1
        array_functions.plot_histogram(xi_train, 100)
        k = 1
Ejemplo n.º 6
0
 def train(self, data):
     x = data.x
     x = array_functions.vec_to_2d(x)
     self.model = sm.nonparametric.KDEMultivariate(x,
                                                   var_type='c' *
                                                   x.shape[1],
                                                   bw='cv_ls')
Ejemplo n.º 7
0
    def train(self, data):
        I = data.is_train & data.is_labeled
        x = data.x[I,:]
        n = x.shape[0]
        p = x.shape[1]
        y = data.y[I]

        x_labeled_transform = self.transform.fit_transform(x)
        x_all_transform = self.transform.transform(data.x)

        x_bias = np.hstack((x_labeled_transform,np.ones((n,1))))
        x_all_bias = np.hstack((x_all_transform,np.ones((x_all_transform.shape[0],1))))
        O = np.eye(p+1)
        O[p,p] = 0
        x_L = x_all_bias
        if x_L.shape[0] > self.max_n_L:
            I_L = np.random.choice(x_L.shape[0], self.max_n_L, replace = False)
            x_L = x_L[I_L,:]
        L = self.create_laplacian(x_L)
        XX = x_bias.T.dot(x_bias)
        XLX = x_L.T.dot(L).dot(x_L)
        A = XX + self.C*O + self.C2*XLX
        v = np.linalg.lstsq(A,x_bias.T.dot(y))
        w_anal = array_functions.vec_to_2d(v[0][0:p])
        b_anal = v[0][p]
        self.w = w_anal
        self.b = b_anal
Ejemplo n.º 8
0
def viz(pc, fig=None, show_histogram=False, show=True):
    import create_data_set
    from methods import method
    source_learner = method.NadarayaWatsonMethod()
    target_learner = method.NadarayaWatsonMethod()
    #pc = configs_lib.ProjectConfigs()
    data = helper_functions.load_object('../' + pc.data_file).data
    data.set_train()
    source_data = data.get_transfer_subset(pc.source_labels)
    source_data.set_target()
    target_data= data.get_transfer_subset(pc.target_labels)
    target_data.set_target()
    source_learner.train_and_test(source_data)
    target_learner.train_and_test(target_data)
    source_learner.sigma = 10
    target_learner.sigma = 10
    x = array_functions.vec_to_2d(np.linspace(data.x.min(), data.x.max(), 100))
    test_data = data_lib.Data()
    test_data.x = x
    test_data.is_regression = True
    y_s = source_learner.predict(test_data).fu
    y_t = target_learner.predict(test_data).fu

    #array_functions.plot_line(x,y_t-y_s,pc.data_set,y_axes=np.asarray([-5,5]))
    y = y_t-y_s
    #y = y - y.mean()
    array_functions.plot_line(x,y, title=None ,fig=fig,show=show)
    if show_histogram:
        array_functions.plot_histogram(data.x,20)
    x=1
Ejemplo n.º 9
0
def make_uniform_data():
    X = np.linspace(0, 1, 100)
    Y = np.zeros(X.size)
    Y[X < .5] = 0
    Y[X >= .5] = 1
    X = array_functions.vec_to_2d(X)
    return X, Y
Ejemplo n.º 10
0
    def train(self, data):
        I = data.is_train & data.is_labeled
        x = data.x[I, :]
        n = x.shape[0]
        p = x.shape[1]
        y = data.y[I]

        x_labeled_transform = self.transform.fit_transform(x)
        x_all_transform = self.transform.transform(data.x)

        x_bias = np.hstack((x_labeled_transform, np.ones((n, 1))))
        x_all_bias = np.hstack(
            (x_all_transform, np.ones((x_all_transform.shape[0], 1))))
        O = np.eye(p + 1)
        O[p, p] = 0
        x_L = x_all_bias
        if x_L.shape[0] > self.max_n_L:
            I_L = np.random.choice(x_L.shape[0], self.max_n_L, replace=False)
            x_L = x_L[I_L, :]
        L = self.create_laplacian(x_L)
        XX = x_bias.T.dot(x_bias)
        XLX = x_L.T.dot(L).dot(x_L)
        A = XX + self.C * O + self.C2 * XLX
        v = np.linalg.lstsq(A, x_bias.T.dot(y))
        w_anal = array_functions.vec_to_2d(v[0][0:p])
        b_anal = v[0][p]
        self.w = w_anal
        self.b = b_anal
 def plot_g(self):
     x = np.linspace(0,1)
     x = array_functions.vec_to_2d(x)
     g_orig = self.g_learner.predict_g(x)
     g = 1 / (1+g_orig)
     array_functions.plot_2d(x,g)
     pass
 def plot_target(self):
     x = np.linspace(0,1)
     x = array_functions.vec_to_2d(x)
     d = data_lib.Data()
     d.x = x
     d.y = np.nan*np.ones(x.shape[0])
     d.is_regression = True
     o = self.target_learner.predict(d)
     array_functions.plot_2d(x, o.y)
Ejemplo n.º 13
0
def train_on_data(x,y,domain_ids,learner):
    domain_ids = np.squeeze(domain_ids)
    data = data_class.Data()
    data.is_regression = True
    data.x = array_functions.vec_to_2d(x)
    data.y = y
    data.set_train()
    data.set_true_y()
    data.set_target()
    x_plot = np.zeros((0,1))
    y_plot = np.zeros(0)
    ids_plot = np.zeros(0)
    density_plot = np.zeros(0)
    x_test = scipy.linspace(x.min(),x.max(),100)
    x_test = array_functions.vec_to_2d(x_test)
    data_test = data_class.Data()
    data_test.is_regression = True
    data_test.x = x_test
    data_test.y = np.zeros(x_test.shape[0])
    data_test.y[:] = np.nan

    from methods import density
    kde = density.KDE()

    max_n = 200.0
    for i in np.unique(domain_ids):
        I = domain_ids == i
        data_i = data.get_subset(I)
        if data_i.n > max_n:
            data_i = data_i.rand_sample(max_n/data_i.n)
        learner.train_and_test(data_i)
        o = learner.predict(data_test)
        x_plot = np.vstack((x_plot,x_test))
        y_plot = np.hstack((y_plot,o.y))
        ids_plot = np.hstack((ids_plot,np.ones(100)*i))
        '''
        kde.train_and_test(data_i)
        dens = kde.predict(data_test)
        dens.y = dens.y / dens.y.max()
        den_y = dens.y
        '''
        dens_y = np.ones(data_test.n)
        density_plot = np.hstack((density_plot,dens_y))
    return x_plot,y_plot,ids_plot,density_plot
Ejemplo n.º 14
0
def train_on_data(x, y, domain_ids, learner):
    domain_ids = np.squeeze(domain_ids)
    data = data_class.Data()
    data.is_regression = True
    data.x = array_functions.vec_to_2d(x)
    data.y = y
    data.set_train()
    data.set_true_y()
    data.set_target()
    x_plot = np.zeros((0, 1))
    y_plot = np.zeros(0)
    ids_plot = np.zeros(0)
    density_plot = np.zeros(0)
    x_test = scipy.linspace(x.min(), x.max(), 100)
    x_test = array_functions.vec_to_2d(x_test)
    data_test = data_class.Data()
    data_test.is_regression = True
    data_test.x = x_test
    data_test.y = np.zeros(x_test.shape[0])
    data_test.y[:] = np.nan

    from methods import density
    kde = density.KDE()

    max_n = 200.0
    for i in np.unique(domain_ids):
        I = domain_ids == i
        data_i = data.get_subset(I)
        if data_i.n > max_n:
            data_i = data_i.rand_sample(max_n / data_i.n)
        learner.train_and_test(data_i)
        o = learner.predict(data_test)
        x_plot = np.vstack((x_plot, x_test))
        y_plot = np.hstack((y_plot, o.y))
        ids_plot = np.hstack((ids_plot, np.ones(100) * i))
        '''
        kde.train_and_test(data_i)
        dens = kde.predict(data_test)
        dens.y = dens.y / dens.y.max()
        den_y = dens.y
        '''
        dens_y = np.ones(data_test.n)
        density_plot = np.hstack((density_plot, dens_y))
    return x_plot, y_plot, ids_plot, density_plot
Ejemplo n.º 15
0
def create_and_save_data(x, y, domain_ids, file):
    data = data_class.Data()
    data.x = array_functions.vec_to_2d(x)
    data.y = y
    data.set_train()
    data.set_target()
    data.set_true_y()
    data.is_regression = True
    data.data_set_ids = domain_ids
    helper_functions.save_object(file, data)
Ejemplo n.º 16
0
def create_and_save_data(x, y, domain_ids, file):
    data = data_class.Data()
    data.x = array_functions.vec_to_2d(x)
    data.y = y
    data.set_train()
    data.set_target()
    data.set_true_y()
    data.is_regression = True
    data.data_set_ids = domain_ids
    helper_functions.save_object(file, data)
Ejemplo n.º 17
0
 def set_synthetic_classification(self):
     self.loss_function = loss_function.ZeroOneError()
     self.data_dir = 'data_sets/synthetic_classification'
     self.data_name = 'synthetic_classification'
     self.data_set_file_name = 'split_data.pkl'
     self.results_dir = 'synthetic_classification'
     self.target_labels = np.asarray([1, 2])
     #self.target_labels = array_functions.vec_to_2d(self.target_labels).T
     self.source_labels = np.asarray([3, 4])
     self.source_labels = array_functions.vec_to_2d(self.source_labels).T
     self.cv_loss_function = loss_function.LogLoss()
 def set_synthetic_classification(self):
     self.loss_function = loss_function.ZeroOneError()
     self.data_dir = 'data_sets/synthetic_classification'
     self.data_name = 'synthetic_classification'
     self.data_set_file_name = 'split_data.pkl'
     self.results_dir = 'synthetic_classification'
     self.target_labels = np.asarray([1,2])
     #self.target_labels = array_functions.vec_to_2d(self.target_labels).T
     self.source_labels = np.asarray([3,4])
     self.source_labels = array_functions.vec_to_2d(self.source_labels).T
     self.cv_loss_function = loss_function.LogLoss()
    def predict(self, data):
        o = self.target_learner.predict(data)
        is_target = data.is_target
        o_source = self.source_learner.predict(data.get_subset(is_target))
        if not data.is_regression:
            assert o.fu.ndim == 2
        else:
            assert np.squeeze(o.fu).ndim == 1
            assert np.squeeze(o_source.fu).ndim == 1
            o.fu = o.fu.reshape((o.fu.size,1))
            o_source.fu = o_source.fu.reshape((o_source.fu.size,1))
        for i in range(o.fu.shape[1]):
            fu_t = o.fu[is_target,i]
            fu_s = o_source.fu[:,i]
            if self.g_learner is not None:
                pred = self.g_learner.combine_predictions(data.x[is_target,:],fu_s,fu_t)
                if data.x.shape[1] == 1:
                    x = scipy.linspace(data.x.min(),data.x.max(),100)
                    x = array_functions.vec_to_2d(x)
                    g = self.g_learner.predict_g(x)
                    o.x = x
                    o.g = g
            else:
                pred = np.multiply(fu_t,1-self.g) + np.multiply(fu_s,self.g)
            o.fu[is_target,i] = pred
            #o.fu[is_target] = np.multiply(o.fu[is_target],(1-self.g)) + np.multiply(self.g,o_source.fu)
        if data.is_regression:
            o.y = o.fu
        else:
            fu = array_functions.replace_invalid(o.fu,0,1)
            fu = array_functions.normalize_rows(fu)
            o.fu = fu
            o.y = fu.argmax(1)
        if data.x.shape[1] == 1:
            x = array_functions.vec_to_2d(scipy.linspace(data.x.min(),data.x.max(),100))
            o.linspace_x = x
            o.linspace_g = self.g_learner.predict_g(x)

        assert not (np.isnan(o.y)).any()
        assert not (np.isnan(o.fu)).any()
        return o
Ejemplo n.º 20
0
def create_concrete(transfer=False):
    file = 'concrete/Concrete_Data.csv'
    used_field_names, concrete_data = load_csv(file)

    data = data_class.Data()
    t = ''
    if transfer:
        feat_ind = 0
        domain_ind = (used_field_names == 'age').nonzero()[0][0]
        ages = concrete_data[:, domain_ind]
        domain_ids = np.zeros(ages.shape)
        domain_ids[ages < 10] = 1
        domain_ids[(ages >= 10) & (ages <= 28)] = 2
        domain_ids[ages > 75] = 3
        data.x = concrete_data[:, 0:(concrete_data.shape[1] - 2)]
        #0,3,5
        #data.x = preprocessing.scale(data.x)
        if concrete_num_feats == 1:
            data.x = array_functions.vec_to_2d(data.x[:, feat_ind])
            t = '-feat=' + str(feat_ind)
        elif concrete_num_feats >= data.x.shape[1]:
            t = '-' + str(min(data.x.shape[1], concrete_num_feats))
        else:
            assert False
        data.data_set_ids = domain_ids
    else:
        data.x = concrete_data[:, 0:-1]

    data.y = concrete_data[:, -1]
    data.set_train()
    data.set_target()
    data.set_true_y()
    data.is_regression = True

    viz = False
    if viz:
        to_use = domain_ids > 0
        domain_ids = domain_ids[to_use]
        concrete_data = concrete_data[to_use, :]
        np.delete(concrete_data, domain_ind, 1)
        viz_features(concrete_data, concrete_data[:, -1], domain_ids,
                     used_field_names)

        return
    data.x = array_functions.standardize(data.x)
    #viz_features(data.x,data.y,data.data_set_ids)

    s = concrete_file % t
    helper_functions.save_object(s, data)
Ejemplo n.º 21
0
def create_concrete(transfer=False):
    file = "concrete/Concrete_Data.csv"
    used_field_names, concrete_data = load_csv(file)

    data = data_class.Data()
    t = ""
    if transfer:
        feat_ind = 0
        domain_ind = (used_field_names == "age").nonzero()[0][0]
        ages = concrete_data[:, domain_ind]
        domain_ids = np.zeros(ages.shape)
        domain_ids[ages < 10] = 1
        domain_ids[(ages >= 10) & (ages <= 28)] = 2
        domain_ids[ages > 75] = 3
        data.x = concrete_data[:, 0 : (concrete_data.shape[1] - 2)]
        # 0,3,5
        # data.x = preprocessing.scale(data.x)
        if concrete_num_feats == 1:
            data.x = array_functions.vec_to_2d(data.x[:, feat_ind])
            t = "-feat=" + str(feat_ind)
        elif concrete_num_feats >= data.x.shape[1]:
            t = "-" + str(min(data.x.shape[1], concrete_num_feats))
        else:
            assert False
        data.data_set_ids = domain_ids
    else:
        data.x = concrete_data[:, 0:-1]

    data.y = concrete_data[:, -1]
    data.set_train()
    data.set_target()
    data.set_true_y()
    data.is_regression = True

    viz = False
    if viz:
        to_use = domain_ids > 0
        domain_ids = domain_ids[to_use]
        concrete_data = concrete_data[to_use, :]
        np.delete(concrete_data, domain_ind, 1)
        viz_features(concrete_data, concrete_data[:, -1], domain_ids, used_field_names)

        return
    data.x = array_functions.standardize(data.x)
    # viz_features(data.x,data.y,data.data_set_ids)

    s = concrete_file % t
    helper_functions.save_object(s, data)
Ejemplo n.º 22
0
def viz_features(x,y,domain_ids,feature_names=None,alpha=.1,learner=None):
    #y = array_functions.normalize(y)
    x = array_functions.vec_to_2d(x)
    for i in range(x.shape[1]):
        xi = x[:,i]
        xi_train = xi
        yi = y
        ids_i = domain_ids
        title = str(i)
        density = None
        if feature_names is not None:
            title = str(i) + ': ' + feature_names[i]
        if learner is not None:
            xi,yi,ids_i,density = train_on_data(xi,yi,domain_ids,learner)
            density = density*100 + 1
            I = array_functions.is_invalid(density)
            density[I] = 200
            alpha = 1
        array_functions.plot_2d_sub(xi,yi,alpha=alpha,title=title,data_set_ids=ids_i,sizes=density)
        k = 1
        array_functions.plot_histogram(xi_train,100)
        k=1
Ejemplo n.º 23
0
def create_bike_sharing():
    file = "bike_sharing/day.csv"
    columns = [0] + range(2, 16)
    all_field_names = pd.read_csv(file, nrows=1, dtype="string")
    all_field_names = np.asarray(all_field_names.keys())
    used_field_names = all_field_names[columns]
    bike_data = np.loadtxt(file, skiprows=1, delimiter=",", usecols=columns)
    domain_ind = used_field_names == "yr"
    domain_ids = np.squeeze(bike_data[:, domain_ind])
    # inds_to_keep = (used_field_names == 'temp') | (used_field_names == 'atemp')
    # bike_data = bike_data[:,inds_to_keep]
    # used_field_names = used_field_names[inds_to_keep]

    viz = True
    to_use = np.asarray([8, 9, 10, 11])
    x = bike_data[:, to_use]
    used_field_names = used_field_names[to_use]
    y = bike_data[:, -1]
    if viz:
        # learner = make_learner()
        learner = None
        viz_features(x, y, domain_ids, used_field_names, learner=learner)
    field_to_use = 1
    x = x[:, field_to_use]

    data = data_class.Data()
    data.is_regression = True
    data.x = array_functions.vec_to_2d(x)
    data.x = array_functions.standardize(data.x)
    data.y = y
    data.y = array_functions.normalize(data.y)
    data.set_defaults()
    data.data_set_ids = domain_ids

    s = bike_file % ("-feat=" + str(field_to_use))
    helper_functions.save_object(s, data)

    pass
Ejemplo n.º 24
0
def create_boston_housing(file_dir=""):
    boston_data = datasets.load_boston()
    data = data_class.Data()
    data.x = boston_data.data
    data.y = boston_data.target
    data.feature_names = list(boston_data.feature_names)

    data.set_train()
    data.set_target()
    data.set_true_y()
    data.is_regression = True
    s = boston_housing_raw_data_file
    x = data.x
    y = data.y
    if create_transfer_data:
        x_ind = 5
        domain_ind = 12
        domain_ids = np.ones(x.shape[0])
        domain_ids = array_functions.bin_data(x[:, domain_ind], num_bins=4)
        x = np.delete(x, domain_ind, 1)
        # viz_features(x,y,domain_ids,boston_data.feature_names)
        data.data_set_ids = domain_ids

        if boston_num_feats == 1:
            data.x = data.x[:, x_ind]
            data.x = array_functions.vec_to_2d(data.x)
            s = s % ""
        elif boston_num_feats >= data.x.shape[1]:
            data.x = array_functions.standardize(data.x)
            p = min(boston_num_feats, data.x.shape[1])
            s = s % ("-" + str(p))
        else:
            assert False
    else:
        s %= ""
    if file_dir != "":
        s = file_dir + "/" + s
    helper_functions.save_object(s, data)
Ejemplo n.º 25
0
def create_bike_sharing():
    file = 'bike_sharing/day.csv'
    columns = [0] + range(2, 16)
    all_field_names = pd.read_csv(file, nrows=1, dtype='string')
    all_field_names = np.asarray(all_field_names.keys())
    used_field_names = all_field_names[columns]
    bike_data = np.loadtxt(file, skiprows=1, delimiter=',', usecols=columns)
    domain_ind = used_field_names == 'yr'
    domain_ids = np.squeeze(bike_data[:, domain_ind])
    #inds_to_keep = (used_field_names == 'temp') | (used_field_names == 'atemp')
    #bike_data = bike_data[:,inds_to_keep]
    #used_field_names = used_field_names[inds_to_keep]

    viz = True
    to_use = np.asarray([8, 9, 10, 11])
    x = bike_data[:, to_use]
    used_field_names = used_field_names[to_use]
    y = bike_data[:, -1]
    if viz:
        #learner = make_learner()
        learner = None
        viz_features(x, y, domain_ids, used_field_names, learner=learner)
    field_to_use = 1
    x = x[:, field_to_use]

    data = data_class.Data()
    data.is_regression = True
    data.x = array_functions.vec_to_2d(x)
    data.x = array_functions.standardize(data.x)
    data.y = y
    data.y = array_functions.normalize(data.y)
    data.set_defaults()
    data.data_set_ids = domain_ids

    s = bike_file % ('-feat=' + str(field_to_use))
    helper_functions.save_object(s, data)

    pass
Ejemplo n.º 26
0
def create_boston_housing(file_dir=''):
    boston_data = datasets.load_boston()
    data = data_class.Data()
    data.x = boston_data.data
    data.y = boston_data.target
    data.feature_names = list(boston_data.feature_names)

    data.set_train()
    data.set_target()
    data.set_true_y()
    data.is_regression = True
    s = boston_housing_raw_data_file
    x = data.x
    y = data.y
    create_transfer_data = False
    create_y_split = True
    if create_y_split:
        from base import transfer_project_configs as configs_lib
        pc = configs_lib.ProjectConfigs()
        main_configs = configs_lib.MainConfigs(pc)
        learner = main_configs.learner
        learner.quiet = True
        learner.target_learner[0].quiet = True
        learner.source_learner.quiet = True
        learner.g_learner.quiet = False
        domain_ids = array_functions.bin_data(data.y, num_bins=2)
        data.data_set_ids = domain_ids
        data.is_train[:] = True
        corrs = []
        for i in range(x.shape[1]):
            corrs.append(scipy.stats.pearsonr(x[:, i], y)[0])
        learner.train_and_test(data)
        print 'Just playing with data - not meant to save it'
        for i, name in enumerate(data.feature_names):
            v = learner.g_learner.g[i]
            if abs(v) < 1e-6:
                v = 0
            print name + ': ' + str(v)
        exit()
    elif create_transfer_data:
        x_ind = 5
        domain_ind = 12
        domain_ids = np.ones(x.shape[0])
        domain_ids = array_functions.bin_data(x[:, domain_ind], num_bins=4)
        x = np.delete(x, domain_ind, 1)
        #viz_features(x,y,domain_ids,boston_data.feature_names)
        data.data_set_ids = domain_ids

        if boston_num_feats == 1:
            data.x = data.x[:, x_ind]
            data.x = array_functions.vec_to_2d(data.x)
            s = s % ''
        elif boston_num_feats >= data.x.shape[1]:
            data.x = array_functions.standardize(data.x)
            p = min(boston_num_feats, data.x.shape[1])
            s = s % ('-' + str(p))
        else:
            assert False
    else:
        s %= ''
    if file_dir != '':
        s = file_dir + '/' + s
    helper_functions.save_object(s, data)
Ejemplo n.º 27
0
 def predict(self, data):
     x = array_functions.vec_to_2d(data.x)
     x = array_functions.vec_to_2d(x)
     y = self.model.pdf(x)
     o = results.Output(data, y)
     return o