Exemple #1
0
def mult_test(*args):
    print 'mult'
    size = (2000, 2000)
    X = np.random.uniform(-1, 1, size)
    tic()
    XX = X.T.dot(X)
    toc()
Exemple #2
0
def _run_experiment_args(self, results_file, data_and_splits, method_results, i_labels, split):
    num_labels = self.configs.num_labels[i_labels]
    s = str(num_labels) + '-' + str(split)
    curr_results = _load_temp_split_file(results_file, num_labels, split)
    if curr_results:
        return curr_results
    if mpi_utility.is_master():
        timer.tic()
    #print 'num_labels-split: ' + s
    temp_file_name = _temp_split_file_name(results_file, num_labels, split)
    temp_dir_root = helper_functions.remove_suffix(temp_file_name, '.pkl')
    temp_dir = temp_dir_root + '/CV-temp/'
    curr_data = data_and_splits.get_split(split, num_labels)
    learner = self.configs.learner
    curr_learner = copy.deepcopy(learner)
    curr_learner.split_idx_str = s
    curr_learner.temp_dir = temp_dir
    curr_results = curr_learner.train_and_test(curr_data)
    if mpi_utility.is_group_master():
        helper_functions.save_object(_temp_split_file_name(results_file,num_labels,split),curr_results)
        helper_functions.delete_dir_if_exists(temp_dir_root)
    instance_subset = learner.configs.instance_subset
    results_features = learner.configs.results_features
    test_error_to_print = 'is_train'
    if mpi_utility.is_group_master():
        if hasattr(curr_learner, 'best_params'):
            print s + '-' + str(curr_learner.best_params) + ' Error: ' + \
                  str(curr_results.compute_error(self.configs.loss_function, results_features, test_error_to_print))
        else:
            print s + ' Done'
    if mpi_utility.is_master():
        timer.toc()
    return curr_results
def mult_test(*args):
    print 'mult'
    size = (2000, 2000)
    X = np.random.uniform(-1, 1, size)
    tic()
    XX = X.T.dot(X)
    toc()
Exemple #4
0
def nystrom_woodbury_laplacian(X,
                               lamb,
                               perc_columns,
                               W=None,
                               C=None,
                               D=None,
                               v=None):
    lamb = float(lamb)
    timing_test = False
    if timing_test:
        tic()
    if W is None or C is None:
        W, C = nystrom(X, perc_columns)
    #W_inv = np.linalg.pinv(W)
    #X_n = X.shape[0]
    d = X.sum(1)
    dl_inv = 1 / (d + lamb)

    inv_approx = None
    vProd = None
    fast_solver = True
    if fast_solver:
        CTA = C.T * dl_inv
        B_inv = np.linalg.pinv(-W + CTA.dot(C))
        if v is not None:
            assert False, 'Make sure this works'
            v1 = CTA.dot(v)
            v2 = B_inv.dot(v1)
            v3 = -C.dot(v2)
            v4 = v3 + v
            v5 = dl_inv * v4
            vProd = v5
        else:
            T = -C.dot(B_inv).dot(CTA)
            T[np.diag_indices_from(T)] += 1
            inv_approx = dl_inv[:, None] * T
        '''
        vProd = inv_approx.dot(v)
        err = norm(vProd - v5) / norm(vProd)
        print str(err)
        print ''
        '''
    else:
        A_inv = np.diag(1 / (d + lamb))
        CTA = C.T.dot(A_inv)
        B_inv = np.linalg.pinv(-W + CTA.dot(C))
        inv_approx = A_inv - A_inv.dot(C).dot(B_inv).dot(CTA)
    #inv_approx = A_inv.dot(np.eye(A_inv.shape[0]) - C.dot(B_inv).dot(CTA))
    '''

    '''
    #print 'optimized approx error: ' + str(norm(inv_approx-inv_approx2))
    if timing_test:
        toc()
        tic()
        inv_actual = np.linalg.inv(lamb * np.eye(X.shape[0]) + np.diag(d) - X)
        print 'Nystrom-Woodbery error: ' + str(
            norm(inv_approx - inv_actual) / norm(inv_actual))
        toc()
    return inv_approx, vProd
Exemple #5
0
def normal_test(*args):
    n = 5000
    p = 2000
    X = np.random.uniform(-1, 1, (n, p))
    C = 1e-3
    y = np.random.uniform(-1, 1, n)
    tic()
    A = X.T.dot(X) + C * np.eye(p)
    k = X.T.dot(y)
    w = np.linalg.solve(A, k)
    toc()
def normal_test(*args):
    n = 5000
    p = 2000
    X = np.random.uniform(-1, 1, (n, p))
    C = 1e-3
    y = np.random.uniform(-1, 1, n)
    tic()
    A = X.T.dot(X) + C*np.eye(p)
    k = X.T.dot(y)
    w = np.linalg.solve(A, k)
    toc()
Exemple #7
0
def cvx_test(*args):
    n = 5000
    p = 100
    X = np.random.uniform(-1, 1, (n, p))
    C = 1e-3
    y = np.random.uniform(-1, 1, n)
    w = cvx.Variable(p)
    loss = cvx.sum_entries(cvx.square(X * w - y))
    reg = cvx.norm2(w)**2
    obj = cvx.Minimize(loss + C * reg)
    prob = cvx.Problem(obj, [])
    tic()
    prob.solve(solver=cvx.SCS, verbose=False)
    toc()
def cvx_test(*args):
    n = 5000
    p = 100
    X = np.random.uniform(-1,1,(n,p))
    C = 1e-3
    y = np.random.uniform(-1,1, n)
    w = cvx.Variable(p)
    loss = cvx.sum_entries(cvx.square(X*w - y))
    reg = cvx.norm2(w)**2
    obj = cvx.Minimize(loss + C*reg)
    prob = cvx.Problem(obj, [])
    tic()
    prob.solve(solver=cvx.SCS, verbose=False)
    toc()
Exemple #9
0
def run_main(num_labels=None, split_idx=None, no_viz=None, configs=None, comm=None):
    import argparse
    import sys
    #print sys.argv
    parser = argparse.ArgumentParser()
    parser.add_argument('-num_labels', type=int)
    parser.add_argument('-split_idx', type=int)
    parser.add_argument('-no_viz', action='store_true')
    arguments = parser.parse_args(sys.argv[1:])
    if num_labels is not None:
        arguments.num_labels = num_labels
    if split_idx is not None:
        arguments.split_idx = split_idx
    if no_viz is not None:
        arguments.no_viz = no_viz

    configs_lib.comm = comm
    if test_mpi:
        from mpi4py import MPI
        print str(MPI.COMM_WORLD.Get_rank()) + '-' + str(arguments.num_labels) + '-' + str(arguments.split_idx)
        return

    configs_lib.arguments = arguments
    import warnings
    #print 'Ignoring Deprecation Warnings'
    warnings.filterwarnings("ignore",category=DeprecationWarning)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    if MPI.COMM_WORLD.Get_size() > 1:
        if mpi_utility.is_group_master():
            print '(' + socket.gethostname() + ')''Process ' + str(comm.Get_rank()) + ': Starting experiments...'
    else:
        print 'Starting experiments...'
    if mpi_utility.is_group_master():
        timer.tic()
    if configs_lib.run_experiments:
        run_experiments(configs)
    if mpi_utility.is_group_master():
        timer.toc()
    if helper_functions.is_laptop():
        import winsound
        winsound.Beep(440, 1000)
    if helper_functions.is_laptop() and not arguments.no_viz and MPI.COMM_WORLD.Get_size() == 1:
        vis_configs = configs_lib.VisualizationConfigs()
        if vis_configs.vis_table:
            create_table()
        else:
            run_visualization()
Exemple #10
0
    A = X.T.dot(X) + C * np.eye(p)
    k = X.T.dot(y)
    w = np.linalg.solve(A, k)
    toc()


def cvx_test(*args):
    n = 5000
    p = 100
    X = np.random.uniform(-1, 1, (n, p))
    C = 1e-3
    y = np.random.uniform(-1, 1, n)
    w = cvx.Variable(p)
    loss = cvx.sum_entries(cvx.square(X * w - y))
    reg = cvx.norm2(w)**2
    obj = cvx.Minimize(loss + C * reg)
    prob = cvx.Problem(obj, [])
    tic()
    prob.solve(solver=cvx.SCS, verbose=False)
    toc()


if __name__ == '__main__':
    comm = MPI.COMM_WORLD
    is_master = comm.Get_rank() == 0
    if is_master:
        tic()
    run_test()
    if is_master:
        toc()
    def predict(self, data):
        # d = data_lib.Data(np.expand_dims(data.source_y_pred, 1), data.y)
        y_pred_source = data.source_y_pred
        I = np.arange(y_pred_source.size)
        if self.predict_sample is not None and self.predict_sample < y_pred_source.size:
            I = np.random.choice(y_pred_source.size,
                                 self.predict_sample,
                                 replace=False)
        if self.use_rbf:
            #L = array_functions.make_laplacian(y_pred_source[I], self.sigma_tr)
            W_source_pred = array_functions.make_rbf(y_pred_source[I],
                                                     self.sigma_tr)
            if self.oracle_guidance is not None:
                y = data.true_y[I]

                n_y = y.size
                num_to_sample = math.ceil(self.oracle_guidance * n_y**2)
                rand_index1 = np.random.choice(n_y,
                                               int(num_to_sample),
                                               replace=True)
                rand_index2 = np.random.choice(n_y,
                                               int(num_to_sample),
                                               replace=True)
                if self.oracle_guidance_binary:
                    target_distances = array_functions.make_graph_distance(y)
                    distance_threshold = .2 * (y.max() - y.min())
                    W_source_pred[rand_index1, rand_index2] = target_distances[
                        rand_index1, rand_index2] <= distance_threshold
                    W_source_pred[rand_index2, rand_index1] = target_distances[
                        rand_index2, rand_index1] <= distance_threshold
                else:
                    y_scaled = array_functions.normalize(y) * (
                        y_pred_source.max() - y_pred_source.min())
                    W_oracle_pred = array_functions.make_rbf(
                        y_scaled, self.sigma_tr)
                    W_source_pred[rand_index1,
                                  rand_index2] = W_oracle_pred[rand_index1,
                                                               rand_index2]
                    W_source_pred[rand_index2,
                                  rand_index1] = W_oracle_pred[rand_index2,
                                                               rand_index1]
            W = array_functions.make_rbf(self.transform.transform(self.x),
                                         self.sigma_nw,
                                         x2=self.transform.transform(
                                             data.x[I, :])).T

        else:
            assert self.oracle_guidance is None
            k_L = int(self.sigma_tr * I.size)
            #L = array_functions.make_laplacian_kNN(y_pred_source[I], k_L)
            W_source_pred = array_functions.make_knn(y_pred_source[I], k_L)
            k_W = int(self.sigma_nw * self.x.shape[0])
            W = array_functions.make_knn(self.transform.transform(
                data.x[I, :]),
                                         k_W,
                                         x2=self.transform.transform(self.x))
        sparsify_prediction_graph = False
        if self.use_prediction_graph_radius:
            sparsify_prediction_graph = True
            W_sparse = array_functions.make_graph_radius(
                self.transform.transform(data.x[I, :]),
                radius=self.radius,
            )
        if self.use_prediction_graph_sparsification:
            sparsify_prediction_graph = True
            W_sparse = array_functions.make_knn(self.transform.transform(
                data.x[I, :]),
                                                self.k_sparsification,
                                                normalize_entries=False)
            #W_L = array_functions.make_knn(y_pred_source[I], k_L)
        if sparsify_prediction_graph:
            W_source_pred = W_source_pred * W_sparse
        S = array_functions.make_smoothing_matrix(W)
        timing_test = False
        C = self.C * self.x.shape[0] / W_source_pred[:].sum()
        if self.nystrom_percentage > 0 or timing_test:
            if timing_test:
                tic()
            Sy = S.dot(self.y)
            if C != 0:
                lamb = 1 / float(C)
                f = None
                tic()
                inv_approx, _ = array_functions.nystrom_woodbury_laplacian(
                    W_source_pred, lamb, self.nystrom_percentage)
                self.predict_time = toc()
                #_, f2 = array_functions.nystrom_woodbury_laplacian(W_source_pred, lamb, self.nystrom_percentage, v=Sy)
                if f is not None:
                    f *= lamb
                else:
                    inv_approx *= lamb
                    f = inv_approx.dot(Sy)
            else:
                f = Sy
            if timing_test:
                toc()
        if self.nystrom_percentage == 0 or self.nystrom_percentage is None or timing_test:
            if timing_test:
                tic()
            L = array_functions.make_laplacian_with_W(W_source_pred,
                                                      normalized=False)
            A = np.eye(I.size) + C * L
            try:
                tic()
                f = np.linalg.lstsq(A, S.dot(self.y))[0]
                self.predict_time = toc()
            except:
                print 'GraphTransferNW:predict failed, returning mean'
                f = self.y.mean() * np.ones(data.true_y.shape)
            if timing_test:
                toc()
        if timing_test:
            A_inv = np.linalg.inv(A)
            print 'approx error: ' + str(
                norm(inv_approx - A_inv) / norm(A_inv))
        o = results.Output(data)
        if self.predict_sample is not None:
            nw_data = data_lib.Data(data.x[I, :], f)
            self.nw_learner.train_and_test(nw_data)
            nw_output = self.nw_learner.predict(data)
            o.y = nw_output.y
            o.fu = nw_output.y
        else:
            o.y = f
            o.fu = f

        return o
                continue
            if comm.Get_rank() == 0:
                timer.tic()
            num_labels_list = list(
                itertools.product(c.num_labels, range(c.num_splits)))
            no_viz = False
            pool.map(mpi_run_main_args,
                     [n + (
                         no_viz,
                         c,
                     ) for n in num_labels_list])
            pool.close()

            if comm.Get_rank() == 0:
                print 'TOTAL TIME:'
                timer.toc()
                main.run_main(configs=c)
    else:
        assert False, 'Use MPI instead!'
        if use_multiprocessing_pool:
            pool = multiprocessing_utility.LoggingPool(processes=pool_size)
            pool.map(launch_subprocess_args, num_labels_list)
        else:
            for i in num_labels_list:
                launch_subprocess_args(i)
        comm = MPI.COMM_WORLD
        if comm.Get_rank() == 0:
            print 'TOTAL TIME:'
            timer.toc()
            main.run_main()
    A = X.T.dot(X) + C*np.eye(p)
    k = X.T.dot(y)
    w = np.linalg.solve(A, k)
    toc()

def cvx_test(*args):
    n = 5000
    p = 100
    X = np.random.uniform(-1,1,(n,p))
    C = 1e-3
    y = np.random.uniform(-1,1, n)
    w = cvx.Variable(p)
    loss = cvx.sum_entries(cvx.square(X*w - y))
    reg = cvx.norm2(w)**2
    obj = cvx.Minimize(loss + C*reg)
    prob = cvx.Problem(obj, [])
    tic()
    prob.solve(solver=cvx.SCS, verbose=False)
    toc()



if __name__ == '__main__':
    comm = MPI.COMM_WORLD
    is_master = comm.Get_rank() == 0
    if is_master:
        tic()
    run_test()
    if is_master:
        toc()
        comm = MPI.COMM_WORLD
        for c in batch_configs.config_list:
            if results_exist(c):
                if comm.Get_rank() == 0:
                    print 'Skipping: ' + c.results_file
                continue
            if comm.Get_rank() == 0:
                timer.tic()
            num_labels_list = list(itertools.product(c.num_labels, range(c.num_splits)))
            no_viz = False
            pool.map(mpi_run_main_args, [n + (no_viz, c, ) for n in num_labels_list])
            pool.close()

            if comm.Get_rank() == 0:
                print 'TOTAL TIME:'
                timer.toc()
                main.run_main(configs=c)
    else:
        if use_multiprocessing_pool:
            pool = multiprocessing_utility.LoggingPool(processes=pool_size)
            pool.map(launch_subprocess_args, num_labels_list)
        else:
            for i in num_labels_list:
                launch_subprocess_args(i)
        comm = MPI.COMM_WORLD
        if comm.Get_rank() == 0:
            print 'TOTAL TIME:'
            timer.toc()
            main.run_main()