Python Data Examples, data.data.Data Python Examples

Example #1

0

Show file

def estimate_gradients(x, y, I):
    range = x.max(0) - x.min(0)
    #x = (x - x.min(0)) / range
    x = x[I, :]
    y = y[I]
    data = data_lib.Data(x, y)
    data.set_train()
    data.is_regression = True
    nw = method.NadarayaWatsonMethod()
    nw.cv_params = {}
    nw.sigma = 1000
    nw.train_and_test(data)

    num_x0 = 40
    num_x1 = int(num_x0 * range[1] / range[0])
    v = np.zeros((num_x0, num_x1))

    x0_vals = np.linspace(x[:, 0].min(), x[:, 0].max(), num_x0)
    x1_vals = np.linspace(x[:, 1].min(), x[:, 1].max(), num_x1)
    x1_vals = x1_vals[::-1]
    v = np.zeros((x1_vals.size, x0_vals.size))

    for idx0, x0 in enumerate(x0_vals):
        for idx1, x1 in enumerate(x1_vals):
            xi = np.asarray([x0, x1])
            d = data_lib.Data(xi[np.newaxis, :], np.asarray([np.nan]))
            v[idx1, idx0] = nw.predict(d).y
        print ''
    gradients = np.gradient(v)
    g = gradients[0]**2 + gradients[1]**2
    g = np.sqrt(g)
    return g, v

Example #2

0

Show file

File: viz_b.py Project: adgress/PythonFramework

def viz(pc, fig=None, show_histogram=False, show=True):
    import create_data_set
    from methods import method
    source_learner = method.NadarayaWatsonMethod()
    target_learner = method.NadarayaWatsonMethod()
    #pc = configs_lib.ProjectConfigs()
    data = helper_functions.load_object('../' + pc.data_file).data
    data.set_train()
    source_data = data.get_transfer_subset(pc.source_labels)
    source_data.set_target()
    target_data= data.get_transfer_subset(pc.target_labels)
    target_data.set_target()
    source_learner.train_and_test(source_data)
    target_learner.train_and_test(target_data)
    source_learner.sigma = 10
    target_learner.sigma = 10
    x = array_functions.vec_to_2d(np.linspace(data.x.min(), data.x.max(), 100))
    test_data = data_lib.Data()
    test_data.x = x
    test_data.is_regression = True
    y_s = source_learner.predict(test_data).fu
    y_t = target_learner.predict(test_data).fu

    #array_functions.plot_line(x,y_t-y_s,pc.data_set,y_axes=np.asarray([-5,5]))
    y = y_t-y_s
    #y = y - y.mean()
    array_functions.plot_line(x,y, title=None ,fig=fig,show=show)
    if show_histogram:
        array_functions.plot_histogram(data.x,20)
    x=1

Example #3

0

Show file

File: create_synthetic_data.py Project: adgress/PythonFramework

def create_synthetic_linear_classification(n=500,
                                           p=50,
                                           sigma=1,
                                           w=None,
                                           x=None):
    data = data_class.Data()
    if x is None:
        x = np.random.uniform(0, 1, (n, p))
    data.x = x
    if w is None:
        w = np.random.normal(0, sigma, p)
    data.y = data.x.dot(w)
    data.y -= data.y.mean()
    data.y += np.random.normal(0, sigma, n)
    y = data.y

    #data.y = np.sign(data.y)
    #w_eff = inv(x.T.dot(x)).dot(x.T).dot(y)
    #data.is_regression = False

    data.is_regression = True
    w_eff = w

    data.set_true_y()
    data.set_train()
    return data, w_eff

Example #4

0

Show file

File: create_synthetic_data.py Project: adgress/PythonFramework

def create_synthetic_regression_transfer(target_fun_or_list,
                                         source_fun_or_list,
                                         n_target=100,
                                         n_source=100,
                                         sigma=.5):
    try:
        source_funcs = list(source_fun_or_list)
    except:
        source_funcs = [source_fun_or_list]
    try:
        target_funcs = list(target_fun_or_list)
    except:
        target_funcs = [target_fun_or_list]
    num_target_funcs = len(target_funcs)
    num_source_funcs = len(source_funcs)
    n = n_target * num_target_funcs + n_source * num_source_funcs
    data = data_class.Data()
    data.x = np.random.uniform(0, 1, (n, 1))
    assert n_target == n_source
    data.data_set_ids = create_ids(n_target,
                                   num_source_funcs + num_target_funcs)
    data.y = np.zeros(n)
    all_funcs = target_funcs + source_funcs
    for id, f in enumerate(all_funcs):
        I = data.data_set_ids == id
        data.y[I] = f(data.x[I])
    data.y += np.random.normal(0, sigma, n)
    data.set_true_y()
    data.set_train()
    data.is_regression = True
    return data

Example #5

0

Show file

File: create_synthetic_data.py Project: adgress/PythonFramework

def create_synthetic_piecewise_transfer(file_dir='', dim=1):
    n_target = 150
    n_source = 150
    n = n_target + n_source
    sigma = .2
    data = data_class.Data()
    data.x = np.random.uniform(0, 1, (n, dim))
    data.data_set_ids = np.zeros(n)
    data.data_set_ids[n_target:] = 1
    data.y = np.zeros(n)
    data.y[(data.data_set_ids == 0) & (data.x[:, 0] <= 1)] = 1
    data.y[(data.data_set_ids == 0) & (data.x[:, 0] <= .66)] = 0
    data.y[(data.data_set_ids == 0) & (data.x[:, 0] <= .33)] = 2

    data.y[(data.data_set_ids == 1) & (data.x[:, 0] <= 1)] = 0
    data.y[(data.data_set_ids == 1) & (data.x[:, 0] <= .66)] = 2
    data.y[(data.data_set_ids == 1) & (data.x[:, 0] <= .33)] = 1
    data.y += np.random.normal(0, sigma, n)
    data.set_train()
    data.set_true_y()
    data.is_regression = True
    if dim == 1:
        array_functions.plot_2d(data.x, data.y, data.data_set_ids)
    s = synthetic_piecewise_file
    if dim > 1:
        s = synthetic_piecewise_file % dim
    if file_dir != '':
        s = file_dir + '/' + s
    helper_functions.save_object(s, data)

Example #6

0

Show file

File: create_data_set.py Project: adgress/PythonFramework

def create_kc_housing():
    file = 'kc_housing/processed_data.pkl'
    x, y = helper_functions.load_object(file)
    data = data_class.Data(x, y)
    data.is_regression = True
    s = kc_housing_file
    helper_functions.save_object(s, data)

Example #7

0

Show file

def combine_data(x1, y1, x2, y2):
    x = np.vstack((x1, x2))
    y = np.concatenate((y1, y2))
    data_set_ids = np.concatenate((np.zeros(y1.size), np.ones(y2.size)))
    data = data_lib.Data(x, y)
    data.data_set_ids = data_set_ids
    data.is_regression
    return data

Example #8

0

Show file

File: create_data_set.py Project: adgress/PythonFramework

def create_ds2_data(target_skill_idx=0):
    ds2_data = pickle.load(open('DS2-processed/DS2-dict.pkl'))
    x = ds2_data['skill_estimates']
    y = ds2_data['skill_estimates'][:, target_skill_idx]
    x = np.delete(x, target_skill_idx, 1)
    data = data_class.Data(x, y)
    s = 'DS2-processed/raw_data.pkl'
    helper_functions.save_object(s, data)

Example #9

0

Show file

File: create_data_set.py Project: adgress/PythonFramework

def create_covtype():
    covtype_data = datasets.fetch_covtype()
    print covtype_data.__dict__
    data = data_class.Data()
    data.x = covtype_data.data
    data.y = covtype_data.target
    helper_functions.save_object('data_sets/covtype/raw_data.pkl')
    pass

Example #10

0

Show file

def train_on_data(x, y, domain_ids, learner):
    domain_ids = np.squeeze(domain_ids)
    data = data_class.Data()
    data.is_regression = True
    data.x = array_functions.vec_to_2d(x)
    data.y = y
    data.set_train()
    data.set_true_y()
    data.set_target()
    x_plot = np.zeros((0, 1))
    y_plot = np.zeros(0)
    ids_plot = np.zeros(0)
    density_plot = np.zeros(0)
    x_test = scipy.linspace(x.min(), x.max(), 100)
    x_test = array_functions.vec_to_2d(x_test)
    data_test = data_class.Data()
    data_test.is_regression = True
    data_test.x = x_test
    data_test.y = np.zeros(x_test.shape[0])
    data_test.y[:] = np.nan

    from methods import density
    kde = density.KDE()

    max_n = 200.0
    for i in np.unique(domain_ids):
        I = domain_ids == i
        data_i = data.get_subset(I)
        if data_i.n > max_n:
            data_i = data_i.rand_sample(max_n / data_i.n)
        learner.train_and_test(data_i)
        o = learner.predict(data_test)
        x_plot = np.vstack((x_plot, x_test))
        y_plot = np.hstack((y_plot, o.y))
        ids_plot = np.hstack((ids_plot, np.ones(100) * i))
        '''
        kde.train_and_test(data_i)
        dens = kde.predict(data_test)
        dens.y = dens.y / dens.y.max()
        den_y = dens.y
        '''
        dens_y = np.ones(data_test.n)
        density_plot = np.hstack((density_plot, dens_y))
    return x_plot, y_plot, ids_plot, density_plot

Example #11

0

Show file

File: create_data_set.py Project: adgress/PythonFramework

def create_and_save_data(x, y, domain_ids, file):
    data = data_class.Data()
    data.x = array_functions.vec_to_2d(x)
    data.y = y
    data.set_train()
    data.set_target()
    data.set_true_y()
    data.is_regression = True
    data.data_set_ids = domain_ids
    helper_functions.save_object(file, data)

Example #12

0

Show file

File: model.py Project: VELA-CLARA-software/OnlineModel

 def __init__(self):
     self.my_name = 'model'
     self.username = ''
     self.password = ''
     self.hostname = 'apclara1.dl.ac.uk'
     self.port = 22
     self.pathscript = '/opt/ControlRoomApps/OnlineModel/'
     self.path_exists = False
     self.data = data.Data()
     self.client = SSHClient()
     self.generator_params = ['number_of_particles', 'dist_x', 'dist_y', 'dist_z', 'sig_x', 'sig_y', 'sig_z']
     self.scan_progress = -1

Example #13

0

Show file

File: create_data_set.py Project: adgress/PythonFramework

def create_spatial_data(dir='climate-month'):
    file = dir + '/processed_data.pkl'
    locs, y, ids = helper_functions.load_object(file)
    y = y.T
    is_missing_loc = (~np.isfinite(locs)).any(1)
    locs = locs[~is_missing_loc, :]
    y = y[~is_missing_loc, :]
    ids = ids[~is_missing_loc]
    data = data_class.Data(locs, y)
    data.multilabel_to_multisource()
    s = dir + '/raw_data.pkl'
    helper_functions.save_object(s, data)

Example #14

0

Show file

def create_transfer_data(locations, pricing_data, I, apply_log=False):
    x_all = np.vstack((locations[I, :], locations[I, :]))
    y_all = np.concatenate((pricing_data[I, 0], pricing_data[I, 1]))
    if apply_log:
        y_all = np.log(y_all)
    else:
        print 'not taking log of labels!'
    #y_all /= y_all[np.isfinite(y_all)].max()
    data_set_ids = np.concatenate((np.zeros(I.sum()), np.ones(I.sum())))
    data = data_lib.Data(x_all, y_all)
    data.data_set_ids = data_set_ids
    data.is_regression = True
    return data

Example #15

0

Show file

File: create_data_set.py Project: adgress/PythonFramework

def create_concrete(transfer=False):
    file = 'concrete/Concrete_Data.csv'
    used_field_names, concrete_data = load_csv(file)

    data = data_class.Data()
    t = ''
    if transfer:
        feat_ind = 0
        domain_ind = (used_field_names == 'age').nonzero()[0][0]
        ages = concrete_data[:, domain_ind]
        domain_ids = np.zeros(ages.shape)
        domain_ids[ages < 10] = 1
        domain_ids[(ages >= 10) & (ages <= 28)] = 2
        domain_ids[ages > 75] = 3
        data.x = concrete_data[:, 0:(concrete_data.shape[1] - 2)]
        #0,3,5
        #data.x = preprocessing.scale(data.x)
        if concrete_num_feats == 1:
            data.x = array_functions.vec_to_2d(data.x[:, feat_ind])
            t = '-feat=' + str(feat_ind)
        elif concrete_num_feats >= data.x.shape[1]:
            t = '-' + str(min(data.x.shape[1], concrete_num_feats))
        else:
            assert False
        data.data_set_ids = domain_ids
    else:
        data.x = concrete_data[:, 0:-1]

    data.y = concrete_data[:, -1]
    data.set_train()
    data.set_target()
    data.set_true_y()
    data.is_regression = True

    viz = False
    if viz:
        to_use = domain_ids > 0
        domain_ids = domain_ids[to_use]
        concrete_data = concrete_data[to_use, :]
        np.delete(concrete_data, domain_ind, 1)
        viz_features(concrete_data, concrete_data[:, -1], domain_ids,
                     used_field_names)

        return
    data.x = array_functions.standardize(data.x)
    #viz_features(data.x,data.y,data.data_set_ids)

    s = concrete_file % t
    helper_functions.save_object(s, data)

Example #16

0

Show file

 def combine_predictions(self, x, y_source, y_target):
     data = data_lib.Data()
     data.x = x
     data.is_regression = True
     g = self.g_nw.predict(data).fu
     a_t = 1 / (1 + g)
     b_s = g / (1 + g)
     if y_source.ndim > 1:
         a_t = array_functions.vec_to_2d(a_t)
         b_s = array_functions.vec_to_2d(b_s)
         fu = a_t * y_target + b_s * (y_source + self.bias)
     else:
         fu = np.multiply(a_t, y_target) + np.multiply(
             b_s, y_source + self.bias)
     return fu

Example #17

0

Show file

File: create_data_set.py Project: adgress/PythonFramework

def create_drosophila():
    data = helper_functions.load_object('drosophilia/processed_data.pkl')
    x, y = data
    y = np.reshape(y, y.shape[0])
    I = np.random.choice(x.shape[0], size=500, replace=False)
    x = x[I, :]
    y = y[I]
    data = data_class.Data()
    data.x = x
    data.y = y
    data.set_train()
    data.set_target()
    data.set_true_y()
    data.is_regression = True

    helper_functions.save_object(drosophila_file, data)

Example #18

0

Show file

def make_linear_data(n, p):
    #X = np.random.uniform(0, 1, (n,p))
    X = np.random.normal(0, scale=.1, size=(n,p))
    idx = int(n/2)
    X[:idx, :] += 1
    X[idx:, :] -= 1
    if p == 1:
        X = np.sort(X, 0)
    w = np.random.randn(p)
    w = np.abs(w)
    w[:] = 1
    noise = np.random.normal(0, scale=1, size=n)
    noise[:] = 0
    Y = X.dot(w)
    data = data_lib.Data(X, Y + noise)
    return data, w

Example #19

0

Show file

    def train(self, data):
        I = data.is_train & data.is_labeled
        x = data.x[I, :]
        n_L = x.shape[0]
        y_L = np.expand_dims(data.y[I], 1)

        x_L = x
        x_U = data.x
        if x_U.shape[0] > self.max_n_L:
            to_keep = I.nonzero()[0]
            to_sample = (~I).nonzero()[0]
            num_to_sample = self.max_n_L - to_keep.size
            sampled = np.random.choice(to_sample, num_to_sample, replace=False)
            to_use = np.hstack((to_keep, sampled))
            x_U = x_U[to_use, :]

        x_U_not_transformed = x_U
        x_L_not_transformed = x_L
        x_U = self.graph_transform.fit_transform(x_U)

        n_U = x_U.shape[0]
        L_UU = self.create_laplacian(x_U, x_U)
        L_inv = np.linalg.inv(L_UU + self.C * np.eye(n_U))
        D_L_inv = np.diag(1 / L_inv.sum(1))
        S_SSL = D_L_inv.dot(L_inv)
        S_SSL_UL = S_SSL[:, :n_L]
        S_SSL_UL = self.fix_matrix_rows(S_SSL_UL, 1.0 / n_L)
        '''
        x_L = self.graph_transform.transform(x_L)
        W_UL = self.create_similarity_matrix(x_U, x_L)
        D_W = np.diag(1 / W_UL.sum(1))
        S_NW = (1-self.C)*D_W.dot(W_UL)
        S_NW = self.fix_matrix_rows(S_NW, 1.0/n_L)
        f = (S_SSL_UL + S_NW).dot(y_L)
        '''
        f = np.squeeze(S_SSL_UL.dot(y_L))
        nw_data = data_lib.Data(x_U_not_transformed, f)
        nw_data.is_regression = True

        tune_loo = False
        if tune_loo:
            I = nw_data.is_labeled
            self.nw_method.x = nw_data.x
            self.nw_method.y = nw_data.y
            self.nw_method.tune_loo(nw_data)
        else:
            self.nw_method.train_and_test(nw_data)

Example #20

0

Show file

 def combine_predictions(self, x, y_source, y_target, data_set_ids):
     data = data_lib.Data()
     data.x = x
     data.is_regression = True
     if self.constant_b:
         g = self.g
     elif self.linear_b:
         x = self.transform.transform(data.x)
         g = x.dot(self.g)
         if self.clip_b:
             g = array_functions.clip(g, self.g_min, self.g_max)
         g = g + self.b
     else:
         g = self.g_nw.predict(data).fu
     fu = self.C3 * y_target + (1 - self.C3) * (y_source + g)
     #fu = y_source + g
     return fu

Example #21

0

Show file

 def combine_predictions(self, x, y_source, y_target, data_set_ids):
     assert x.shape[0] == data_set_ids.shape[0]
     data = data_lib.Data()
     data.x = x
     data.is_regression = True
     if self.linear_b:
         unique_ids = np.unique(data_set_ids)
         x = self.transform.transform(data.x)
         g = np.zeros(x.shape[0])
         for i, id in enumerate(unique_ids):
             I = data_set_ids == id
             g[I] = x[I].dot(self.g[i])
             g[I] = g[I] + self.b[i]
         if self.clip_b:
             g = array_functions.clip(g, self.g_min, self.g_max)
     fu = self.C3 * y_target + (1 - self.C3) * (y_source + g)
     #fu = y_source + g
     return fu

Example #22

0

Show file

File: create_synthetic_data.py Project: adgress/PythonFramework

def create_synthetic_classification(file_dir='', local=True):
    dim = 1
    n_target = 200
    n_source = 200
    n = n_target + n_source
    data = data_class.Data()
    data.x = np.random.uniform(0, 1, (n, dim))
    data.data_set_ids = np.zeros(n)
    data.data_set_ids[n_target:] = 1
    data.y = np.zeros(n)
    x, ids = data.x, data.data_set_ids
    I = array_functions.in_range(x, 0, .25)
    I2 = array_functions.in_range(x, .25, .5)
    I3 = array_functions.in_range(x, .5, .75)
    I4 = array_functions.in_range(x, .75, 1)
    id0 = ids == 0
    id1 = ids == 1
    data.y[I & id0] = 1
    data.y[I2 & id0] = 2
    data.y[I3 & id0] = 1
    data.y[I4 & id0] = 2

    data.y[I & id1] = 3
    data.y[I2 & id1] = 4
    data.y[I3 & id1] = 3
    data.y[I4 & id1] = 4
    if local:
        data.y[I3 & id1] = 4
        data.y[I4 & id1] = 3
    data.set_true_y()
    data.set_train()
    data.is_regression = False
    noise_rate = 0
    #data.add_noise(noise_rate)
    data.add_noise(noise_rate, id0, np.asarray([1, 2]))
    data.add_noise(noise_rate, id1, np.asarray([3, 4]))
    s = synthetic_classification_file
    if local:
        s = synthetic_classification_local_file
    i = id1
    array_functions.plot_2d(data.x[i, :], data.y[i])
    if file_dir != '':
        s = file_dir + '/' + s
    helper_functions.save_object(s, data)

Example #23

0

Show file

File: create_synthetic_data.py Project: adgress/PythonFramework

def create_synthetic_linear_regression(n=500,
                                       p=50,
                                       sigma=1,
                                       num_non_zero=None):
    data = data_class.Data()
    data.x = np.random.uniform(0, sigma, (n, p))
    w = np.random.normal(0, 1, p)
    #w = np.ones(p)
    if num_non_zero is not None:
        w[num_non_zero:] = 0
    data.y = data.x.dot(w)
    data.y += np.random.normal(0, sigma, n)
    data.is_regression = True
    data.set_true_y()
    data.set_train()
    suffix = str(n) + '-' + str(p) + '-' + str(sigma)
    data.metadata = dict()
    data.metadata['true_w'] = w.T
    if num_non_zero is not None:
        suffix += '-nnz=' + str(num_non_zero)
    s = synthetic_linear_reg_file % suffix
    helper_functions.save_object(s, data)

Example #24

0

Show file

File: create_synthetic_data.py Project: adgress/PythonFramework

def create_synthetic_step_linear_transfer(file_dir=''):
    n_target = 100
    n_source = 100
    n = n_target + n_source
    sigma = .5
    data = data_class.Data()
    data.x = np.random.uniform(0, 1, (n, 1))
    data.data_set_ids = np.zeros(n)
    data.data_set_ids[n_target:] = 1
    data.y = np.reshape(data.x * 5, data.x.shape[0])
    data.y[(data.data_set_ids == 1) & (data.x[:, 0] >= .5)] += 4
    data.y += np.random.normal(0, sigma, n)
    data.set_defaults()
    data.is_regression = True
    array_functions.plot_2d(data.x,
                            data.y,
                            data.data_set_ids,
                            title='Linear Step Data Set')
    s = synthetic_step_linear_transfer_file
    if file_dir != '':
        s = file_dir + '/' + s
    helper_functions.save_object(s, data)

Example #25

0

Show file

File: main.py Project: elianalien/SadangSerangInformationCenter

    def start(self):
        # setup logger
        gv.logger = logger.Logger()
        
        gv.sm = self.ids['screen_manager']

        self.menu_utama = self.add_screen(menu_utama.MenuUtama(), False)

        self.info_ngabandungan = self.add_screen(info_bdg.InfoNgabandungan())
        self.peta = self.add_screen(peta.Peta())
        self.data = self.add_screen(data.Data())
        self.info_perkembangan_rumah = self.add_screen(info_perkembangan_rumah.InfoPerkembanganRumah())
        self.info_tematik = self.add_screen(info_tematik.InfoTematik())

        gv.sm.current = "Menu Utama"
        # gv.sm.current = "Info Tematik"

        # auto standby
        #   return to main menu after a period of inactivity
        Clock.schedule_interval(self.standby_count_down, 10)
        self.bind(on_touch_down = self.reset_standby)
        self.bind(on_touch_up = self.log_touch_up)

Example #26

0

Show file

File: create_data_set.py Project: adgress/PythonFramework

def create_bike_sharing():
    file = 'bike_sharing/day.csv'
    columns = [0] + range(2, 16)
    all_field_names = pd.read_csv(file, nrows=1, dtype='string')
    all_field_names = np.asarray(all_field_names.keys())
    used_field_names = all_field_names[columns]
    bike_data = np.loadtxt(file, skiprows=1, delimiter=',', usecols=columns)
    domain_ind = used_field_names == 'yr'
    domain_ids = np.squeeze(bike_data[:, domain_ind])
    #inds_to_keep = (used_field_names == 'temp') | (used_field_names == 'atemp')
    #bike_data = bike_data[:,inds_to_keep]
    #used_field_names = used_field_names[inds_to_keep]

    viz = True
    to_use = np.asarray([8, 9, 10, 11])
    x = bike_data[:, to_use]
    used_field_names = used_field_names[to_use]
    y = bike_data[:, -1]
    if viz:
        #learner = make_learner()
        learner = None
        viz_features(x, y, domain_ids, used_field_names, learner=learner)
    field_to_use = 1
    x = x[:, field_to_use]

    data = data_class.Data()
    data.is_regression = True
    data.x = array_functions.vec_to_2d(x)
    data.x = array_functions.standardize(data.x)
    data.y = y
    data.y = array_functions.normalize(data.y)
    data.set_defaults()
    data.data_set_ids = domain_ids

    s = bike_file % ('-feat=' + str(field_to_use))
    helper_functions.save_object(s, data)

    pass

Example #27

0

Show file

import numpy as np
import scipy
from data_sets import create_data_set
from data import data as data_lib
from utility import helper_functions


file = 'SAheart.data.txt'
all_field_names, data = create_data_set.load_csv(file, has_field_names=True,dtype='string',delim=str(','))
data[data == 'Present'] = '1'
data[data == 'Absent'] = '0'
data = data[:, 1:]
data = data.astype(np.float)
data = data_lib.Data(data[:, :-1], data[:, -1])
data.set_train()
data.set_target()
helper_functions.save_object('raw_data.pkl', data)
print ''

Example #28

0

Show file

 def predict_g(self, x, data_set_ids=None):
     data = data_lib.Data()
     data.x = x
     data.is_regression = True
     g = self.g_nw.predict(data).fu
     return g

Example #29

0

Show file

                                 sizes=dot_size,
                                 alpha=1,
                                 subtract_min=False,
                                 fig=fig2)
    pl.title('Values 2')
    array_functions.move_fig(fig1, 500, 500, 2000, 100)
    array_functions.move_fig(fig2, 500, 500, 2600, 100)
    pl.show(block=True)

    data = (x, y)
    x = np.vstack((x[I1, :], x[I2, :]))
    data_set_ids = np.hstack((np.zeros(I1.sum()), np.ones(I2.sum())))

    y = np.hstack((y[I1], y[I2]))

    data = data_lib.Data(x, y)
    data.x[:, 0] = array_functions.normalize(data.x[:, 0])
    data.x[:, 1] = array_functions.normalize(data.x[:, 1])
    data.data_set_ids = data_set_ids
    print 'n-all: ' + str(data.y.size)
    if save_data:
        s = '../kc-housing-spatial'
        if suffix != '':
            s += '-' + suffix
        helper_functions.save_object(s + '/raw_data.pkl', data)

else:
    feats_to_clear = ['id', 'date', 'yr_renovated', 'zipcode', 'lat', 'long']
    clear_idx = array_functions.find_set(feat_names, feats_to_clear + [y_name])
    x = data[:, ~clear_idx]
    x = array_functions.remove_quotes(x)

Example #30

0

Show file

    def train(self, data):
        '''
        self.C = 1
        self.C2 = 10
        self.k = 1
        '''
        #self.C = 100
        #self.configs.use_fused_lasso = False
        g_max = 2
        g0_oracle = np.zeros(data.n)
        g0_oracle[data.x[:, 0] < .5] = g_max
        f = self.create_eval(data, self.C)
        g = self.create_gradient(data, self.C)
        bounds = list((0, g_max) for i in range(data.n))
        #bounds = list((i, i) for i in range(data.n))
        #bounds = list((0, 0) for i in range(data.n))
        #bounds[0] = (0,None)
        #self.include_bias = False
        if self.include_bias:
            bounds = [(None, None)] + bounds
            #bounds = [(10, 10)] + bounds
        else:
            bounds = [(0, 0)] + bounds
        n = data.n + 1

        g0 = np.zeros(n)
        g0[1:] = g0_oracle
        #g0[:] = 1
        x = data.x
        y_s = np.squeeze(data.y_s[:, 0])
        y_t = np.squeeze(data.y_t[:, 0])
        y = data.y
        W = -array_functions.make_laplacian_kNN(data.x, self.k,
                                                self.configs.metric)
        #W = array_functions.make_graph_radius(data.x, self.radius, self.configs.metric)
        #W = array_functions.make_graph_adjacent(data.x, self.configs.metric)
        W = array_functions.try_toarray(W)
        if not data.is_regression:
            y = array_functions.make_label_matrix(
                data.y)[:, data.classes].toarray()
            y = y[:, 0]
        reg = self.create_reg(data.x)
        reg2 = self.create_reg2(data.x)
        if self.configs.use_fused_lasso:
            method = 'SLSQP'
            max_iter = 10000
            maxfun = 10000
            fused_lasso = ScipyOptNonparametricHypothesisTransfer.fused_lasso
            lasso = lambda x: self.C - fused_lasso(x, W)
            constraints = [{'type': 'ineq', 'fun': lasso}]
            if self.configs.no_reg:
                constraints = ()
            args = (x, y, y_s, y_t, 0, reg, self.C2, reg2)
        else:
            method = 'L-BFGS-B'
            max_iter = np.inf
            maxfun = np.inf
            constraints = ()
            args = (x, y, y_s, y_t, self.C, reg, self.C2, reg2)

        if self.g_supervised:
            x = np.squeeze(data.x)
            assert x.ndim == 1
            min_i = x.argmin()
            max_i = x.argmax()
            bounds[min_i] = (1, None)
            bounds[max_i] = (0, 0)

        options = {
            'disp': False,
            'maxiter': max_iter,
            'maxfun': maxfun,
            #'pgtol': 1e-8
        }
        results = optimize.minimize(
            f,
            g0,
            method=method,
            bounds=bounds,
            jac=g,
            options=options,
            constraints=constraints,
            args=args,
        )
        compare_results = False
        if compare_results or not results.success:
            options['disp'] = False
            options['approx_grad'] = True
            results2 = optimize.minimize(f,
                                         g0,
                                         method=method,
                                         bounds=bounds,
                                         options=options,
                                         constraints=constraints,
                                         args=args)
        if compare_results:
            err = results.x - results2.x
            if norm(results2.x[1:]) == 0:
                print 'All zeros - using absolute error'
                print 'Abs Error - g: ' + str(norm(err[1:]))
            else:
                print 'Rel Error - g: ' + str(
                    norm(err[1:]) / norm(results2.x[1:]))
            if not self.include_bias:
                if norm(results2.x[0]) == 0:
                    print 'Abs Error - b: ' + str(norm(err[0]))
                else:
                    print 'Rel Error - b: ' + str(
                        norm(err[0]) / norm(results2.x[0]))
            rel_error = norm(results.fun - results2.fun) / norm(results2.fun)
            print 'Rel Error - f(g*): ' + str(rel_error)
            if rel_error > .001 and norm(results2.x) > 0:
                print 'Big error: C=' + str(self.C) + ' C2=' + str(self.C2)
        if not results.success:
            results = results2
        self.g = results.x[1:]
        self.bias = results.x[0]
        if not results.success:
            self.g[:] = 0
            self.bias = 0
            #print 'Failed: ' + results.message
        '''
        I = data.arg_sort()
        x = (data.x[I,:])
        g = array_functions.vec_to_2d(results.x[I])
        v = np.hstack((x,g))
        print v
        print ''
        '''
        s = 'C=' + str(self.C) + ',C2=' + str(self.C2) + ',k=' + str(
            self.k) + '-'
        if not results.success:
            s += 'Opt failed - '
        has_nonneg = (self.g[1:] < -1e-6).any()
        if has_nonneg:
            s += 'Negative g - min value: ' + str(self.g.min())
        if not results.success or has_nonneg:
            print s + ': ' + results.message
            self.g[:] = 0
        else:
            pass
        g_data = data_lib.Data()
        g_data.x = data.x
        g_data.y = results.x[1:]
        g_data.is_regression = True
        g_data.set_train()
        g_data.set_target()
        g_data.set_true_y()
        self.g_nw.train_and_test(g_data)
        if results.success:
            pass