def run(params,project_dir): # # medianLayer0= params['ml1'][0] # medianLayer1= params['ml2'][0] # medianLayer2= params['ml3'][0] # medianLayer3= params['ml4'][0] genoutput(project_dir) medianLayer0= params[0] medianLayer1= params[1] medianLayer2= params[2] medianLayer3= params[3] # medianLayer0= 0.3 # medianLayer1= 1.3 # medianLayer2= 2.3 # medianLayer3= 3.3 RS = RandomState((seed, "to p_rs")) data = loadData.loadMnist() train_data_subclass = [] train_data, tests_data = loadData.load_data_as_dict(data, classNum) train_data_subclass= loadSubsetData(train_data,RS, N_train, clientNum) print "training samples {0}: testing samples: {1}".format(N_train,N_tests) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size init_scales = w_parser.new_vect(np.zeros(N_weights)) for i in range(N_layers): init_scales[('weights', i)] = 1 / np.sqrt(layer_sizes[i]) init_scales[('biases', i)] = 1.0 init_scales = init_scales.vect def process_reg(t_vect): # Remove the redundancy due to sharing regularization within units all_r = w_parser.new_vect(t_vect) new_r = np.zeros((0,)) for i in range(N_layers): layer = all_r[('weights', i)] assert np.all(layer[:, 0] == layer[:, 1]) cur_r = layer[:, 0] new_r = np.concatenate((new_r, cur_r)) return new_r fraction_error = 0.00 all_regs, all_tests_loss = [], [] def train_reg(reg_0, constraint, N_meta_iter, i_top): def hyperloss(reg, i_hyper, cur_train_data, cur_valid_data): RS = RandomState((seed, i_top, i_hyper, "hyperloss")) w_vect_0 = RS.randn(N_weights) * init_scales w_vect_final = train_z(loss_fun, cur_train_data, w_vect_0, reg) # fraction_error = frac_err(w_vect_final,**cur_valid_data) return loss_fun(w_vect_final, **cur_valid_data) hypergrad = grad(hyperloss) #reg is the list of hyperparameters cur_reg = reg_0 for i_hyper in range(N_meta_iter): if i_hyper % N_meta_thin == 0: tests_loss = hyperloss(cur_reg, i_hyper, train_data, tests_data) all_tests_loss.append(tests_loss) all_regs.append(cur_reg.copy()) print "Hyper iter {0}, test loss {1}".format(i_hyper, all_tests_loss[-1]) # print "Cur_reg", np.mean(cur_reg) print "Cur_reg", cur_reg for client_i in range (0,clientNum): RS = RandomState((seed, i_top, i_hyper, "hyperloss")) cur_split = random_partition(train_data_subclass.__getitem__(client_i), RS, [N_train - N_valid, N_valid]) # print("calculate hypergradients") raw_grad = hypergrad(cur_reg, i_hyper, *cur_split) constrained_grad = constrain_reg(w_parser, raw_grad, constraint) # cur_reg -= constrained_grad / np.abs(constrained_grad + 1e-8) * meta_alpha # cur_reg -= constrained_grad * meta_alpha/clientNum cur_reg -= np.sign(constrained_grad) * meta_alpha/clientNum print "\n" # print "constrained_grad",constrained_grad return cur_reg def new_hyperloss(reg, i_hyper, cur_train_data, cur_valid_data): RS = RandomState((seed, i_hyper, "hyperloss")) w_vect_0 = RS.randn(N_weights) * init_scales w_vect_final = train_z(loss_fun, cur_train_data, w_vect_0, reg) return loss_fun(w_vect_final, **cur_valid_data) # t_scale = [-1, 0, 1] # cur_split = random_partition(train_data, RS, [N_train - N_valid, N_valid]) # for s in t_scale: # reg = np.ones(N_weights) * log_L2_init + s # loss = new_hyperloss(reg, 0, *cur_split) # print "Results: s= {0}, loss = {1}".format(s, loss) # reg = np.ones(N_weights) * log_L2_init shape0 = layer_sizes.__getitem__(0) shape1 = layer_sizes.__getitem__(1) shape2 = layer_sizes.__getitem__(2) shape3 = layer_sizes.__getitem__(3) l1= np.ones(shape0*shape1)* medianLayer0 l2= np.ones(shape1*shape2+shape1)* medianLayer1 l3= np.ones(shape2*shape3+shape2)* medianLayer2 l4= np.ones(shape3)* medianLayer3 reg = np.concatenate([l1,l2,l3,l4]) constraints = ['universal', 'layers', 'units'] for i_top, (N_meta_iter, constraint) in enumerate(zip(all_N_meta_iter, constraints)): print "Top level iter {0}".format(i_top) reg = train_reg(reg, constraint, N_meta_iter, i_top) all_L2_regs = np.array(zip(*map(process_reg, all_regs))) # return all_L2_regs, all_tests_loss return all_tests_loss.__getitem__(all_tests_loss.__len__()-1)
def run(): RS = RandomState((seed, "top_rs")) data = loadData.loadMnist() train_data_subclass = [] train_data, tests_data = loadData.load_data_as_dict(data, classNum) train_data = random_partition(train_data, RS, [N_train_Full]).__getitem__(0) tests_data = random_partition(tests_data, RS, [N_tests]).__getitem__(0) train_data_subclass = loadData.loadSubsetData(train_data, RS, N_train, clientNum) print "training samples {0}: testing samples: {1}".format(N_train, N_tests) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size def transform_weights(z_vect, transform): return z_vect * np.exp(transform) def regularization(z_vect): return np.dot(z_vect, z_vect) * np.exp(log_L2) def constrain_reg(t_vect, name): all_t = w_parser.new_vect(t_vect) for i in range(N_layers): all_t[('biases', i)] = 0.0 if name == 'universal': t_mean = np.mean( [np.mean(all_t[('weights', i)]) for i in range(N_layers)]) for i in range(N_layers): all_t[('weights', i)] = t_mean elif name == 'layers': for i in range(N_layers): all_t[('weights', i)] = np.mean(all_t[('weights', i)]) elif name == 'units': for i in range(N_layers): all_t[('weights', i)] = np.mean(all_t[('weights', i)], axis=1, keepdims=True) else: raise Exception return all_t.vect def process_transform(t_vect): # Remove the redundancy due to sharing transformations within units all_t = w_parser.new_vect(t_vect) new_t = np.zeros((0, )) for i in range(N_layers): layer = all_t[('weights', i)] assert np.all(layer[:, 0] == layer[:, 1]) cur_t = log_L2 - 2 * layer[:, 0] new_t = np.concatenate((new_t, cur_t)) return new_t def train_z(data, z_vect_0, transform): N_data = data['X'].shape[0] def primal_loss(z_vect, transform, i_primal, record_results=False): RS = RandomState((seed, i_primal, "primal")) idxs = RS.randint(N_data, size=batch_size) minibatch = dictslice(data, idxs) w_vect = transform_weights(z_vect, transform) loss = loss_fun(w_vect, **minibatch) reg = regularization(z_vect) if record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}".format(i_primal, getval(loss)) return loss + reg return sgd(grad(primal_loss), transform, z_vect_0, alpha, beta, N_iters) all_transforms, all_tests_loss, all_tests_rates, all_avg_regs = [], [], [], [] def train_reg(reg_0, constraint, N_meta_iter, i_top): def hyperloss(transform, i_hyper, cur_train_data, cur_valid_data): RS = RandomState((seed, i_top, i_hyper, "hyperloss")) z_vect_0 = RS.randn(N_weights) * np.exp(log_init_scale) z_vect_final = train_z(cur_train_data, z_vect_0, transform) w_vect_final = transform_weights(z_vect_final, transform) return loss_fun(w_vect_final, **cur_valid_data) hypergrad = grad(hyperloss) def error_rate(transform, i_hyper, cur_train_data, cur_valid_data): RS = RandomState((seed, i_top, i_hyper, "hyperloss")) z_vect_0 = RS.randn(N_weights) * np.exp(log_init_scale) z_vect_final = train_z(cur_train_data, z_vect_0, transform) w_vect_final = transform_weights(z_vect_final, transform) return frac_err(w_vect_final, **cur_valid_data) cur_reg = reg_0 for i_hyper in range(N_meta_iter): if i_hyper % N_meta_thin == 0: test_rate = error_rate(cur_reg, i_hyper, train_data, tests_data) all_tests_rates.append(test_rate) all_transforms.append(cur_reg.copy()) all_avg_regs.append(np.mean(cur_reg)) print "Hyper iter {0}, error rate {1}".format( i_hyper, all_tests_rates[-1]) print "Cur_transform", np.mean(cur_reg) tempConstrained_grad = np.zeros(N_weights) for client_i in range(0, clientNum): RS = RandomState((seed, i_top, i_hyper, "hyperloss")) cur_split = random_partition( train_data_subclass.__getitem__(client_i), RS, [N_train - N_valid, N_valid]) print("calculate hypergradients") raw_grad = hypergrad(cur_reg, i_hyper, *cur_split) print("calculate hypergradients end ") constrained_grad = constrain_reg(raw_grad, constraint) tempConstrained_grad += constrained_grad / clientNum cur_reg -= np.sign(tempConstrained_grad) * meta_alpha print("calculate hypergradients end ") return cur_reg reg = np.zeros(N_weights) + 0.2 constraints = ['universal', 'layers', 'units'] for i_top, (N_meta_iter, constraint) in enumerate(zip(all_N_meta_iter, constraints)): print "Top level iter {0}".format(i_top) reg = train_reg(reg, constraint, N_meta_iter, i_top) all_L2_regs = np.array(zip(*map(process_transform, all_transforms))) return all_L2_regs, all_tests_rates, all_avg_regs
def run(): RS = RandomState((seed, "top_rs")) all_data = loadData.loadMnist() train_data, tests_data = loadData.load_data_as_dict(all_data, 10) train_data = random_partition(train_data, RS, [N_train]).__getitem__(0) tests_data = random_partition(tests_data, RS, [ N_tests]).__getitem__(0) print "training samples {0}: testing samples: {1}".format(N_train,N_tests) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size def transform_weights(z_vect, transform): return z_vect * np.exp(transform) def regularization(z_vect): return np.dot(z_vect, z_vect) * np.exp(log_L2) def constrain_reg(t_vect, name): all_t = w_parser.new_vect(t_vect) for i in range(N_layers): all_t[('biases', i)] = 0.0 if name == 'universal': t_mean = np.mean([np.mean(all_t[('weights', i)]) for i in range(N_layers)]) for i in range(N_layers): all_t[('weights', i)] = t_mean elif name == 'layers': for i in range(N_layers): all_t[('weights', i)] = np.mean(all_t[('weights', i)]) elif name == 'units': for i in range(N_layers): all_t[('weights', i)] = np.mean(all_t[('weights', i)], axis=1, keepdims=True) else: raise Exception return all_t.vect def process_transform(t_vect): # Remove the redundancy due to sharing transformations within units all_t = w_parser.new_vect(t_vect) new_t = np.zeros((0,)) for i in range(N_layers): layer = all_t[('weights', i)] assert np.all(layer[:, 0] == layer[:, 1]) cur_t = log_L2 - 2 * layer[:, 0] new_t = np.concatenate((new_t, cur_t)) return new_t def train_z(data, z_vect_0, transform): N_data = data['X'].shape[0] def primal_loss(z_vect, transform, i_primal, record_results=False): RS = RandomState((seed, i_primal, "primal")) idxs = RS.randint(N_data, size=batch_size) minibatch = dictslice(data, idxs) w_vect = transform_weights(z_vect, transform) loss = loss_fun(w_vect, **minibatch) reg = regularization(z_vect) if record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}".format(i_primal, getval(loss)) return loss + reg return sgd(grad(primal_loss), transform, z_vect_0, alpha, beta, N_iters) all_transforms, all_tests_loss, all_tests_rates, all_avg_regs = [], [], [], [] def train_reg(reg_0, constraint, N_meta_iter, i_top): def hyperloss(transform, i_hyper, cur_train_data, cur_valid_data): RS = RandomState((seed, i_top, i_hyper, "hyperloss")) z_vect_0 = RS.randn(N_weights) * np.exp(log_init_scale) z_vect_final = train_z(cur_train_data, z_vect_0, transform) w_vect_final = transform_weights(z_vect_final, transform) return loss_fun(w_vect_final, **cur_valid_data) hypergrad = grad(hyperloss) def error_rate(transform, i_hyper, cur_train_data, cur_valid_data): RS = RandomState((seed, i_top, i_hyper, "hyperloss")) z_vect_0 = RS.randn(N_weights) * np.exp(log_init_scale) z_vect_final = train_z(cur_train_data, z_vect_0, transform) w_vect_final = transform_weights(z_vect_final, transform) return frac_err(w_vect_final, **cur_valid_data) cur_reg = reg_0 for i_hyper in range(N_meta_iter): if i_hyper % N_meta_thin == 0: test_rate = error_rate(cur_reg, i_hyper, train_data, tests_data) all_tests_rates.append(test_rate) all_transforms.append(cur_reg.copy()) all_avg_regs.append(np.mean(cur_reg)) print "Hyper iter {0}, error rate {1}".format(i_hyper, all_tests_rates[-1]) print "Cur_transform", np.mean(cur_reg) RS = RandomState((seed, i_top, i_hyper, "hyperloss")) cur_split = random_partition(train_data, RS, [N_train - N_valid, N_valid]) raw_grad = hypergrad(cur_reg, i_hyper, *cur_split) constrained_grad = constrain_reg(raw_grad, constraint) cur_reg -= np.sign(constrained_grad) * meta_alpha return cur_reg reg = np.zeros(N_weights)+0.2 constraints = ['universal', 'layers', 'units'] for i_top, (N_meta_iter, constraint) in enumerate(zip(all_N_meta_iter, constraints)): print "Top level iter {0}".format(i_top) reg = train_reg(reg, constraint, N_meta_iter, i_top) all_L2_regs = np.array(zip(*map(process_transform, all_transforms))) return all_L2_regs, all_tests_rates, all_avg_regs