def test_setattr(self): nd = NamedDict(a=1, b=2, c=3) nd.a = 'a' self.assertEqual(nd.a, 'a') self.assertEqual(nd['a'], 'a') nd.d = 4 self.assertEqual(nd.d, 4) self.assertEqual(nd['d'], 4)
def get_stats_dict(self): stats = NamedDict( train_losses=self.train_losses,val_losses=self.val_losses,test_losses=self.test_losses, train_errors=self.train_errors,val_errors=self.val_errors,test_errors=self.test_errors, grads=self.grads, w_norms=self.w_norms ) if self.dynamic_stats_storer is not None: stats = NamedDict(stats,**self.dynamic_stats_storer) return stats
def test_create(self): nd = NamedDict({'a': 1, 'b': 2}) self.assertIsInstance(nd, NamedDict) self.assertTrue(hasattr(nd, 'a')) self.assertTrue(hasattr(nd, 'b')) nd = NamedDict(a=1, b=2) self.assertIsInstance(nd, NamedDict) self.assertTrue(hasattr(nd, 'a')) self.assertTrue(hasattr(nd, 'b')) nd = NamedDict([('a', 1), ('b', 2)]) self.assertIsInstance(nd, NamedDict) self.assertTrue(hasattr(nd, 'a')) self.assertTrue(hasattr(nd, 'b'))
def get_stats_dict(self): ## TODO: loop through fields? stats = NamedDict(train_losses=self.train_losses, val_losses=self.val_losses, test_losses=self.test_losses, train_errors=self.train_errors, val_errors=self.val_errors, test_errors=self.test_errors, train_accs=self.train_accs, val_accs=self.val_accs, test_accs=self.test_accs, grads=self.grads, w_norms=self.w_norms, perturbations_norms=self.perturbations_norms, ref_train_losses=self.ref_train_losses, ref_val_losses=self.ref_val_losses, ref_test_losses=self.ref_test_losses, ref_train_errors=self.ref_train_errors, ref_val_errors=self.ref_val_errors, ref_test_errors=self.ref_test_errors, ref_train_accs=self.ref_train_accs, ref_val_accs=self.ref_val_accs, ref_test_accs=self.ref_test_accs, all_train_losses=self.all_train_losses, all_val_losses=self.all_val_losses, all_test_losses=self.all_test_losses, all_train_errors=self.all_train_errors, all_val_errors=self.all_val_errors, all_test_errors=self.all_test_errors, all_train_accs=self.all_train_accs, all_val_accs=self.all_val_accs, all_test_accs=self.all_test_accs, rs=self.rs, random_dirs=self.random_dirs) return stats
def test_getattr(self): nd = NamedDict(a=1, b=2, c=3) self.assertEqual(nd['a'], 1) self.assertEqual(nd['b'], 2) self.assertEqual(nd['c'], 3) self.assertEqual(nd.a, 1) self.assertEqual(nd.b, 2) self.assertEqual(nd.c, 3)
def save2matlab_flatness_expt(path_to_filename, stats_collector, other_stats={}): ''' Saves the current results from flatnes¯s experiment. results_root = location of main folder where results are. e.g. './test_runs_flatness' expt_path = path ''' stats = stats_collector.get_stats_dict() experiment_results = NamedDict(stats,**other_stats) ## scipy.io.savemat(path_to_filename,experiment_results)
def return_attributes(self): attributes = [ attribute for attribute in dir(self) if not attribute.startswith('__') and not callable(getattr(self, attribute)) ] results = { attribute: getattr(self, attribute) for attribute in attributes } return NamedDict(results)
def return_results(self): ''' ''' ''' get list of attributes''' attributes = [ attribute for attribute in dir(self) if not attribute.startswith('__') and not callable(getattr(self, attribute)) ] ''' get results ''' ## all results must have the string "all" results = { attribute: getattr(self, attribute) for attribute in attributes if 'all' in attribute } return NamedDict(results)
def save2matlab_flatness_expt(results_root, expt_path, matlab_file_name, stats_collector, other_stats={}): ''' Saves the current results from flatness experiment. results_root = location of main folder where results are. e.g. './test_runs_flatness' expt_path = path ''' stats = stats_collector.get_stats_dict() experiment_results = NamedDict(stats, **other_stats) ## path_to_save = os.path.join(results_root, expt_path) utils.make_and_check_dir(path_to_save) path_to_save = os.path.join(path_to_save, matlab_file_name) scipy.io.savemat(path_to_save, experiment_results)
def main(**kwargs): print(f'torch.get_rng_state={torch.get_rng_state}') #torch.manual_seed() ## #MDL_2_TRAIN='WP' #MDL_2_TRAIN='SP' #MDL_2_TRAIN='PERT' #MDL_2_TRAIN='TRIG_PERT' MDL_2_TRAIN = 'logistic_regression_mdl' ## start_time = time.time() np.set_printoptions( suppress=True ) #Whether or not suppress printing of small floating point values using scientific notation (default False). ##dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU dtype = torch.FloatTensor dtype_x = dtype dtype_y = torch.LongTensor ## today_obj = date.today( ) # contains datetime.date(year, month, day); accessible via .day etc day = today_obj.day month = calendar.month_name[today_obj.month] ## Data file names truth_filename = '' data_filename = '' ## data_filename = 'classification_manual' ## Folder for experiment experiment_name = 'unit_logistic_regression' ## Regularization #reg_type = 'tikhonov' #reg_type = 'VW' #reg_type = 'V2W_D3' reg_type = '' ## config params ## LAMBDAS # expt_type = 'LAMBDAS' # N_lambdas = 50 # lb,ub = 0.01,10000 # one_over_lambdas = np.linspace(lb,ub,N_lambdas) # lambdas = list( 1/one_over_lambdas ) # lambdas = N_lambdas*[0.0] # nb_iterations = [int(1.4*10**6)] # nb_iterations = [int(8*10**4)] # nb_iterations = [int(100*1000)] # repetitions = len(lambdas)*[15] ## ITERATIONS # expt_type = 'ITERATIONS' # N_iterations = 30 # lb,ub = 1,60*10**4 # lambdas = [0] # nb_iterations = [ int(i) for i in np.linspace(lb,ub,N_iterations)] # repetitions = len(nb_iterations)*[10] ## SP DEGREE/MONOMIALS expt_type = 'SP_fig4' step_deg = 1 lb_deg, ub_deg = 1, 100 degrees = list(range(lb_deg, ub_deg + 1, step_deg)) st() lambdas = [0] #nb_iter = 1600*1000 #nb_iter = 10*1000*1000 #nb_iter = int(125*1000) nb_iter = int(10000) # sbatch nb_iterations = [nb_iter] repetitions = len(degrees) * [1] ## #debug, debug_sgd = True, False ## Hyper Params SGD weight parametrization M = 11 #eta = 0.00000000001 # eta = 1e-6 eta = 0.2 A = 0.0 ## pick the right hyper param if expt_type == 'LAMBDAS': degrees = [] reg_lambda = get_hp_to_run(hyper_params=lambdas, repetitions=repetitions, satid=SLURM_ARRAY_TASK_ID) nb_iter = nb_iterations[0] prefix_experiment = f'it_{nb_iter}/lambda_{reg_lambda}_reg_{reg_type}' elif expt_type == 'ITERATIONS': degrees = [] reg_lambda = lambdas[0] nb_iter = get_hp_to_run(hyper_params=nb_iterations, repetitions=repetitions, satid=SLURM_ARRAY_TASK_ID) prefix_experiment = f'lambda_{reg_lambda}/it_{nb_iter}_reg_{reg_type}' elif expt_type == 'SP_fig4': reg_lambda = lambdas[0] Degree_mdl = get_hp_to_run(hyper_params=degrees, repetitions=repetitions, satid=SLURM_ARRAY_TASK_ID) prefix_experiment = f'fig4_expt_lambda_{reg_lambda}_it_{nb_iter}/deg_{Degree_mdl}' else: raise ValueError( f'Experiment type expt_type={expt_type} does not exist, try a different expt_type.' ) print('reg_lambda = ', reg_lambda) print('nb_iter = ', nb_iter) #### Get Data set if truth_filename != '': mdl_truth_dict = torch.load('./data/' + truth_filename) D_layers_truth = extract_list_filename(truth_filename) ## load data if data_filename == 'regression_manual': # use hand made data set D0 = 1 lb, ub = -1, 1 freq_sin = 4 #2.3 #f_target = lambda x: np.sin(2*np.pi*freq_sin*x) freq1, freq2 = 3, 2 f_target = lambda x: np.sin(2 * np.pi * freq1 * x + 2 * np.pi * freq2 * x) # N_train = 30 #X_train = np.linspace(lb,ub,N_train).reshape(N_train,D0) X_train = get_chebyshev_nodes(lb, ub, N_train).reshape(N_train, D0) Y_train = f_target(X_train).reshape(N_train, 1) # eps_test = 0.0 lb_test, ub_test = lb + eps_test, ub - eps_test N_test = 100 X_test = np.linspace(lb, ub, N_test).reshape(N_test, D0) #X_test = get_chebyshev_nodes(lb,ub,N_test).reshape(N_test,D0) Y_test = f_target(X_test).reshape(N_test, 1) # data = { 'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test, 'Y_test': Y_test } data_lb, data_ub = lb, ub elif data_filename == 'classification_manual': D0 = 1 lb, ub = -1, 1 N_train = 50 N_test = 600 ## target function freq_sin = 4 #f_target = lambda x: np.sin(2*np.pi*freq_sin*x) #f_target = lambda x: (x-0.25)*(x-0.75)*(x+0.25)*(x+0.75) def f_target(x): poly_feat = PolynomialFeatures(degree=2) x_feature = poly_feat.fit_transform(x) # N x D, [1, x, x^2] normal = np.zeros((1, x_feature.shape[1])) # 1 x D normal[:, [0, 1, 2]] = [0, 1, -2] score = np.dot(normal, x_feature.T) label = score > 0 return label.astype(int) ## define x X_train = np.linspace(lb, ub, N_train).reshape((N_train, D0)) X_test = np.linspace(lb, ub, N_test).reshape((N_test, D0)) ## get y's Y_train = f_target(X_train) Y_test = f_target(X_test) ## data = { 'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test, 'Y_test': Y_test } data_lb, data_ub = lb, ub else: data = np.load('./data/{}'.format(data_filename)) if 'lb' and 'ub' in data: data_lb, data_ub = data['lb'], data['ub'] else: data_lb, data_ub = 0, 1 #TODO change! ## X_train, Y_train = data['X_train'], data['Y_train'] X_test, Y_test = data['X_test'], data['Y_test'] D_data = X_test.shape[1] ## get nb data points D0 = D_data N_train, _ = X_train.shape N_test, _ = X_test.shape print(f'N_train={N_train}, N_test={N_test}') ## activation function if MDL_2_TRAIN == 'WP': print('--->training WP mdl') adegree = 2 ax = np.concatenate((np.linspace(-20, 20, 100), np.linspace(-10, 10, 1000))) aX = np.concatenate((ax, np.linspace(-2, 2, 100000))) act, c_pinv_relu = get_relu_poly_act2( aX, degree=adegree) # ax**2+bx+c, #[1, x^1, ..., x^D] print('c_pinv_relu = ', c_pinv_relu) #act = relu #act = lambda x: x #act.__name__ = 'linear' # plot_activation_func(act,lb=-20,ub=20,N=1000) # plt.show() #### 2-layered mdl H1 = 12 D0, D1, D2 = D0, H1, 1 D_layers, act = [D0, D1, D2], act # H1,H2 = 20,20 # D0,D1,D2,D3 = D0,H1,H2,1 # D_layers,act = [D0,D1,D2,D3], act # H1,H2,H3 = 15,15,15 # D0,D1,D2,D3,D4 = D0,H1,H2,H3,1 # D_layers,act = [D0,D1,D2,D3,D4], act # H1,H2,H3,H4 = 25,25,25,25 # D0,D1,D2,D3,D4,D5 = D0,H1,H2,H3,H4,1 # D_layers,act = [D0,D1,D2,D3,D4,D5], act nb_layers = len( D_layers ) - 1 #the number of layers include the last layer (the regression layer) biases = [None] + [ True ] + (nb_layers - 1) * [False] #bias only in first layer #biases = [None] + (nb_layers)*[True] # biases in every layer ## mdl degree and D nb_hidden_layers = nb_layers - 1 #note the last "layer" is a summation layer for regression and does not increase the degree of the polynomial Degree_mdl = adegree**( nb_hidden_layers) # only hidden layers have activation functions ## Lift data/Kernelize data poly_feat = PolynomialFeatures(degree=Degree_mdl) Kern_train, Kern_test = poly_feat.fit_transform( X_train), poly_feat.fit_transform(X_test) ## LA models if D0 == 1: c_pinv = np.polyfit(X_train.reshape((N_train, )), Y_train.reshape((N_train, )), Degree_mdl)[::-1] else: ## TODO: https://stackoverflow.com/questions/10988082/multivariate-polynomial-regression-with-numpy c_pinv = np.dot(np.linalg.pinv(Kern_train), Y_train) ## inits init_config = Maps({ 'w_init': 'w_init_normal', 'mu': 0.0, 'std': 0.01, 'bias_init': 'b_fill', 'bias_value': 0.01, 'biases': biases, 'nb_layers': len(D_layers) }) w_inits_sgd, b_inits_sgd = get_initialization(init_config) init_config_standard_sgd = Maps({ 'mu': 0.0, 'std': 0.001, 'bias_value': 0.01 }) mdl_stand_initializer = lambda mdl: lifted_initializer( mdl, init_config_standard_sgd) ## SGD models if truth_filename: mdl_truth = NN(D_layers=D_layers_truth, act=act, w_inits=w_inits_sgd, b_inits=b_inits_sgd, biases=biases) mdl_truth.load_state_dict(mdl_truth_dict) mdl_sgd = NN(D_layers=D_layers, act=act, w_inits=w_inits_sgd, b_inits=b_inits_sgd, biases=biases) ## data to TORCH data = get_data_struct(X_train, Y_train, X_test, Y_test, Kern_train, Kern_test, dtype) ## nb_monomials = int(scipy.misc.comb(D0 + Degree_mdl, Degree_mdl)) ## logging_freq = 20 nb_terms = c_pinv.shape[0] legend_mdl = f'SGD solution weight parametrization, number of monomials={nb_terms}, batch-size={M}, iterations={nb_iter}, step size={eta}' elif MDL_2_TRAIN == 'SP': print('--->training SP mdl') ## Lift data/Kernelize data poly_feat = PolynomialFeatures(degree=Degree_mdl) Kern_train, Kern_test = poly_feat.fit_transform( X_train), poly_feat.fit_transform(X_test) #Kern_train, Kern_test = hermvander(X_train,Degree_mdl), hermvander(X_test,Degree_mdl) #Kern_train, Kern_test = Kern_train.reshape(N_train,Kern_train.shape[2]), Kern_test.reshape(N_test,Kern_test.shape[2]) ## LA models if D0 == 1: #c_pinv = np.polyfit( X_train.reshape((N_train,)) , Y_train.reshape((N_train,)) , Degree_mdl )[::-1] #pdb.set_trace() c_pinv = np.dot(np.linalg.pinv(Kern_train), Y_train) else: ## TODO: https://stackoverflow.com/questions/10988082/multivariate-polynomial-regression-with-numpy c_pinv = np.dot(np.linalg.pinv(Kern_train), Y_train) mdl_sgd = get_sequential_lifted_mdl(nb_monomials=c_pinv.shape[0], D_out=1, bias=False) mdl_sgd[0].weight.data.fill_(0) ## data = get_data_struct(X_train, Y_train, X_test, Y_test, Kern_train, Kern_test, dtype) data.X_train, data.X_test = data.Kern_train, data.Kern_test ## nb_monomials = int(scipy.misc.comb(D0 + Degree_mdl, Degree_mdl)) ## logging_freq = 20 nb_terms = c_pinv.shape[0] legend_mdl = f'SGD solution standard parametrization, number of monomials={nb_terms}, batch-size={M}, iterations={nb_iter}, step size={eta}' elif MDL_2_TRAIN == 'PERT': print(f'--->training {MDL_2_TRAIN} mdl') ## no activation functions act = lambda x: x act.__name__ = 'linear' ## Lift data/Kernelize data poly_feat = PolynomialFeatures(degree=Degree_mdl) Kern_train, Kern_test = poly_feat.fit_transform( X_train), poly_feat.fit_transform(X_test) #Kern_train, Kern_test = hermvander(X_train,Degree_mdl), hermvander(X_test,Degree_mdl) #Kern_train,_ = np.linalg.qr(Kern_train) #Kern_test,_ = np.linalg.qr(Kern_test) ## c_pinv = np.dot( np.linalg.pinv(Kern_train), Y_train ) ## TODO: https://stackoverflow.com/questions/10988082/multivariate-polynomial-regression-with-numpy nb_terms = c_pinv.shape[0] #### multiple layered mdl D_layers, act = [nb_terms, 1], act ## W1x = y #D_layers,act = [nb_terms,H1,1], act ## W2W1x = y nb_layers = len( D_layers ) - 1 #the number of layers include the last layer (the regression layer) biases = [None] + [False] + (nb_layers - 1) * [ False ] #bias not even in the first layer, note: its already there via parametrization of kernel ## LA models c_pinv = np.dot(np.linalg.pinv(Kern_train), Y_train) ## inits #0.00001 init_config = Maps({ 'w_init': 'w_init_normal', 'mu': 0.0, 'std': 0.01, 'bias_init': 'b_fill', 'bias_value': 0.01, 'biases': biases, 'nb_layers': len(D_layers) }) w_inits_sgd, b_inits_sgd = get_initialization(init_config) ## SGD models if truth_filename: mdl_truth = NN(D_layers=D_layers_truth, act=act, w_inits=w_inits_sgd, b_inits=b_inits_sgd, biases=biases) mdl_truth.load_state_dict(mdl_truth_dict) mdl_sgd = NN(D_layers=D_layers, act=act, w_inits=w_inits_sgd, b_inits=b_inits_sgd, biases=biases) mdl_sgd.linear_layers[1].weight.data.fill_(0) #pdb.set_trace() ## data to TORCH data = get_data_struct(X_train, Y_train, X_test, Y_test, Kern_train, Kern_test, dtype) ##1560.0 data = get_data_struct(X_train, Y_train, X_test, Y_test, Kern_train, Kern_test, dtype) data.X_train, data.X_test = data.Kern_train, data.Kern_test ## legend_mdl = f'SGD solution y=W_L...W1phi(X), number of monomials={nb_terms}, batch-size={M}, iterations={nb_iter}, step size={eta}' ## nb_monomials = int(scipy.misc.comb(D0 + Degree_mdl, Degree_mdl)) ## #frac_norm = 0.6 frac_norm = 0.0 logging_freq = 1 perturbation_freq = 4000 elif MDL_2_TRAIN == 'TRIG_PERT': Kern_train, Kern_test = trig_kernel_matrix( X_train, Degree_mdl), trig_kernel_matrix(X_test, Degree_mdl) c_pinv = np.dot( np.linalg.pinv(Kern_train), Y_train ) ## TODO: https://stackoverflow.com/questions/10988082/multivariate-polynomial-regression-with-numpy nb_terms = c_pinv.shape[0] #pdb.set_trace() ## no activation functions act = lambda x: x act.__name__ = 'linear' #### multiple layered mdl D_layers, act = [nb_terms, 1], act ## W1x = y #D_layers,act = [nb_terms,H1,1], act ## W2W1x = y nb_layers = len( D_layers ) - 1 #the number of layers include the last layer (the regression layer) biases = [None] + [False] + (nb_layers - 1) * [ False ] #bias not even in the first layer, note: its already there via parametrization of kernel ## LA models c_pinv = np.dot(np.linalg.pinv(Kern_train), Y_train) ## inits #0.00001 init_config = Maps({ 'w_init': 'w_init_normal', 'mu': 0.0, 'std': 0.00001, 'bias_init': 'b_fill', 'bias_value': 0.01, 'biases': biases, 'nb_layers': len(D_layers) }) w_inits_sgd, b_inits_sgd = get_initialization(init_config) ## SGD models if truth_filename: mdl_truth = NN(D_layers=D_layers_truth, act=act, w_inits=w_inits_sgd, b_inits=b_inits_sgd, biases=biases) mdl_truth.load_state_dict(mdl_truth_dict) mdl_sgd = NN(D_layers=D_layers, act=act, w_inits=w_inits_sgd, b_inits=b_inits_sgd, biases=biases) mdl_sgd.linear_layers[1].weight.data.fill_(0) #pdb.set_trace() ## data to TORCH data = get_data_struct(X_train, Y_train, X_test, Y_test, Kern_train, Kern_test, dtype) data.X_train, data.X_test = data.Kern_train, data.Kern_test ## legend_mdl = f'SGD solution y=W_L...W1phi(X), number of terms={nb_terms}, batch-size={M}, iterations={nb_iter}, step size={eta}' ## poly_feat = NamedDict( fit_transform=lambda x: trig_kernel_matrix(x, Degree_mdl)) #pdb.set_trace() nb_monomials = int(2 * Degree_mdl + 1) ## #frac_norm = 0.6 frac_norm = 0.1 logging_freq = 1 perturbation_freq = 200 elif MDL_2_TRAIN == 'logistic_regression_mdl': ## Y_train, Y_test = Y_train.reshape((N_train, )), Y_test.reshape( (N_test, )) ## poly_feat = PolynomialFeatures(degree=Degree_mdl) Kern_train, Kern_test = poly_feat.fit_transform( X_train), poly_feat.fit_transform(X_test) # N by D nb_terms = Kern_train.shape[1] ## get model bias = False # cuz the kernel/feature vector has a 1 [..., 1] n_classes = 2 mdl_sgd = torch.nn.Sequential( torch.nn.Linear(Kern_train.shape[1], n_classes, bias=bias)) loss = torch.nn.CrossEntropyLoss(size_average=True) optimizer = torch.optim.SGD(mdl_sgd.parameters(), lr=eta, momentum=0.98) ## data to TORCH data = get_data_struct_classification(X_train, Y_train, X_test, Y_test, Kern_train, Kern_test, dtype_x, dtype_y) data.X_train, data.X_test = data.Kern_train, data.Kern_test ## nb_monomials = int(scipy.misc.comb(D0 + Degree_mdl, Degree_mdl)) ## legend_mdl = 'logistic_regression_mdl' ## reg_lambda = 0 #frac_norm = 0.6 frac_norm = 0.0 logging_freq = 1 perturbation_freq = 600 ## c_pinv = None else: raise ValueError(f'Not implemented yet. {MDL_2_TRAIN}') ## check number of monomials print(f'nb_monomials={nb_monomials} \nnb_terms={nb_terms}') if nb_terms != nb_monomials: raise ValueError( f'nb of monomials dont match D0={D0},Degree_mdl={Degree_mdl}, number of monimials fron pinv={nb_terms}, number of monomials analyticall = {nb_monomials}' ) ######################################################################################################################################################## ## some debugging print statements print('nb_iter = ', nb_iter) print('reg_lambda = ', reg_lambda) print('reg_type = ', reg_type) ## arg = Maps(reg_type=reg_type) keep_training = True if MDL_2_TRAIN == 'PERT' or MDL_2_TRAIN == 'TRIG_PERT': train_loss_list_WP, test_loss_list_WP, grad_list_weight_sgd, func_diff_weight_sgd, erm_lamdas_WP, nb_module_params, w_norms = train_SGD_with_perturbations( arg, mdl_sgd, data, M, eta, nb_iter, A, logging_freq, dtype, c_pinv, reg_lambda, perturbation_freq, frac_norm) elif MDL_2_TRAIN == 'logistic_regression_mdl': train_loss_list_WP, test_loss_list_WP, grad_list_weight_sgd, func_diff_weight_sgd, erm_lamdas_WP, nb_module_params, w_norms, train_accs, test_accs = train_SGD_with_perturbations_optim( arg, mdl_sgd, data, optimizer, loss, M, eta, nb_iter, A, logging_freq, dtype_x, dtype_y, perturbation_freq, frac_norm) else: train_loss_list_WP, test_loss_list_WP, grad_list_weight_sgd, func_diff_weight_sgd, erm_lamdas_WP, nb_module_params = train_SGD( arg, mdl_sgd, data, M, eta, nb_iter, A, logging_freq, dtype, c_pinv, reg_lambda) ## print(mdl_sgd[0].weight.data) if MDL_2_TRAIN != 'logistic_regression_mdl': ## errors for PINV mdls train_error_pinv = (1 / N_train) * ( np.linalg.norm(Y_train - np.dot(Kern_train, c_pinv))**2) test_error_pinv = (1 / N_test) * ( np.linalg.norm(Y_test - np.dot(Kern_test, c_pinv))**2) ## errors for MDL_SGD train_error_WP = (1 / N_train) * (mdl_sgd.forward( data.X_train) - data.Y_train).pow(2).sum().data.numpy() test_error_WP = (1 / N_test) * ( mdl_sgd.forward(data.X_test) - Variable(torch.FloatTensor(Y_test))).pow(2).sum().data.numpy() reg = get_regularizer_term(arg, mdl_sgd, reg_lambda, X=data.X_train, Y=data.Y_train, l=2) erm_reg_WP = (1 / N_train) * (mdl_sgd.forward( data.X_train) - data.Y_train).pow(2).sum() + reg_lambda * reg ## condition_number_hessian = np.linalg.cond( np.dot(Kern_train.T, Kern_train)) ## if len(D_layers) <= 2: c_WP = list(mdl_sgd.parameters())[0].data.numpy() c_WP = c_WP.transpose() else: c_WP = np.zeros(c_pinv.shape) ## TODO print('WARNING NEED TO IMPLEMENT C_WP') ## print('----') print( f'condition_number_hessian=np.linalg.cond( np.dot(Kern_train.T,Kern_train))' ) print(f'condition_number_hessian={condition_number_hessian}') print(f'data_filename={data_filename} \n') print(f'train_error_pinv={train_error_pinv}') print(f'test_error_pinv={test_error_pinv}') print() print(f'train_error_WP={train_error_WP}') print(f'test_error_WP={test_error_WP}') print(f'erm_reg_WP={erm_reg_WP}') print() print('||c_WP - c_pinv||^2_2 = ', np.linalg.norm(c_WP - c_pinv, 2)) print(f'c_WP={c_WP}') print(f'c_pinv={c_pinv}') print('----') ## REPORT TIMES seconds = (time.time() - start_time) minutes = seconds / 60 hours = minutes / 60 print("--- %s seconds ---" % seconds) print("--- %s minutes ---" % minutes) print("--- %s hours ---" % hours) print('\a') if kwargs['save_bulk_experiment']: print('saving expt') path_to_save = f'./test_runs/{experiment_name}_reg_{reg_type}_expt_type_{expt_type}_N_train_{N_train}_M_{M}' experiment_results = dict(SLURM_JOBID=SLURM_JOBID, SLURM_ARRAY_TASK_ID=SLURM_ARRAY_TASK_ID, reg_type=reg_type, reg_lambda=reg_lambda, nb_iter=nb_iter, Degree_mdl=Degree_mdl, lambdas=lambdas, nb_iterations=nb_iterations, repetitions=repetitions, degrees=degrees, seconds=seconds, minutes=minutes, hours=hours, truth_filename=truth_filename, data_filename=data_filename, expt_type=expt_type, MDL_2_TRAIN=MDL_2_TRAIN, M=M, eta=eta, A=A) if MDL_2_TRAIN == 'PERT' or MDL_2_TRAIN == 'TRIG_PERT': experiment_results['w_norms'] = w_norms experiment_results['train_loss_list_WP'] = train_loss_list_WP experiment_results['test_loss_list_WP'] = test_loss_list_WP experiment_results['grad_list_weight_sgd'] = grad_list_weight_sgd experiment_results['frac_norm'] = frac_norm experiment_results['logging_freq'] = logging_freq experiment_results['perturbation_freq'] = perturbation_freq path_to_save = f'{path_to_save}_frac_norm_{frac_norm}_logging_freq_{logging_freq}_perturbation_freq_{perturbation_freq}' ## path_to_save = f'{path_to_save}/{prefix_experiment}' make_and_check_dir(path_to_save) path_to_save = f'{path_to_save}/satid_{SLURM_ARRAY_TASK_ID}_sid_{SLURM_JOBID}_{month}_{day}' scipy.io.savemat(path_to_save, experiment_results) ## print(f'plotting={kwargs}') print(f'lb_test=') if kwargs['plotting']: print('going to print') if D0 == 1 and MDL_2_TRAIN != 'logistic_regression_mdl': print(f'print D0={D0}') #f_sgd = lambda x: f_mdl_eval(x,mdl_sgd,dtype) plot_1D_stuff( NamedDict(data_lb=data_lb, data_ub=data_ub, dtype=dtype, poly_feat=poly_feat, mdl_sgd=mdl_sgd, data=data, legend_mdl=legend_mdl, c_pinv=c_pinv, X_train=X_train, f_target=f_target)) ## get iterations start = 0 iterations_axis = np.arange(1, nb_iter + 1, step=logging_freq)[start:] ## iterations vs ALL errors legend_comments = f'M={M},eta={eta},nb_iterations={nb_iter},reg_lambda={reg_lambda}' title_comments = f'#linear_layers = {len(D_layers)-1},N_train={N_train},nb_monomials={nb_monomials}, fraction of noise={frac_norm},Recordings:perturbation_freq={perturbation_freq},logging_freq={logging_freq}' plot_iter_vs_train_test_errors(iterations_axis=iterations_axis, train_loss_list=train_loss_list_WP, test_loss_list=test_loss_list_WP, title_comments=title_comments, legend_comments=legend_comments, error_type='Loss') #plot_iter_vs_all_errors(iterations_axis=iterations_axis, train_loss_list=train_loss_list_WP,test_loss_list=test_loss_list_WP,erm_lamdas=erm_lamdas_WP, reg_lambda=reg_lambda) ## iterations vs gradient norm layer = 0 grads = grad_list_weight_sgd[layer] plot_iter_vs_grads_norm2_4_current_layer( iterations_axis=iterations_axis, grads=grads, layer=layer) ## plt.figure() plt_w_norm, = plt.plot(iterations_axis, w_norms[0], color='b') plt_w_norm_legend = f'W.norm(2) = ||W||' plt.legend([plt_w_norm], [plt_w_norm_legend]) ## plt.show() elif D0 == 1 and MDL_2_TRAIN == 'logistic_regression_mdl': ## get iterations start = 0 iterations_axis = np.arange(1, nb_iter + 1, step=logging_freq)[start:] legend_comments = f'M={M},eta={eta},nb_iterations={nb_iter},reg_lambda={reg_lambda}' title_comments = f'#logistic_regression, N_train={N_train},nb_monomials={nb_monomials}, fraction of noise={frac_norm},Recordings:perturbation_freq={perturbation_freq},logging_freq={logging_freq}' ## plot_iter_vs_train_test_errors(iterations_axis=iterations_axis, train_loss_list=train_loss_list_WP, test_loss_list=test_loss_list_WP, title_comments=title_comments, legend_comments=legend_comments, error_type='Loss') plot_iter_vs_train_test_errors(iterations_axis=iterations_axis, train_loss_list=train_accs, test_loss_list=test_accs, title_comments=title_comments, legend_comments=legend_comments, error_type='Accuracy') ## iterations vs gradient norm layer = 0 grads = grad_list_weight_sgd[layer] plot_iter_vs_grads_norm2_4_current_layer( iterations_axis=iterations_axis, grads=grads, layer=layer) ## plt.figure() plt_w_norm, = plt.plot(iterations_axis, w_norms[0], color='b') plt_w_norm_legend = f'W.norm(2) = ||W||' plt.legend([plt_w_norm], [plt_w_norm_legend]) ## plt.show()
def save2matlab(path_to_save,stats_collector,other_stats): stats = stats_collector.get_stats_dict() experiment_results = NamedDict(stats,**other_stats) ## scipy.io.savemat(path_to_save,experiment_results)
def extract_hist(self, path_to_folder_expts): ''' extracts a single hist sample :param path_to_folder_expts: ''' ## normalized train_losses_norm, train_errors_norm = [], [] test_losses_norm, test_errors_norm = [], [] hist_train_norm = [] hist_test_norm = [] ## unnormalized train_losses_unnorm, train_errors_unnorm = [], [] test_losses_unnorm, test_errors_unnorm = [], [] hist_train_un = [] hist_test_un = [] ## other stats epoch_numbers = [] corruption_probs = [] stds_inits = [] ''' get un/normalized net results for all experiments ''' print( f'os.listdir(path_to_folder_expts) = {os.listdir(path_to_folder_expts)}' ) net_filenames = [ filename for filename in os.listdir(path_to_folder_expts) if 'net_' in filename ] matlab_filenames = [ filename for filename in os.listdir(path_to_folder_expts) if '.mat' in filename ] nb_zero_train_error = 0 for j, net_filename in enumerate( net_filenames ): # looping through all the nets that were trained print('------- part of the loop -------') print(f'>jth NET = {j}') print(f'>path_to_folder_expts = {path_to_folder_expts}') print(f'>net_filename = {net_filename}') ''' get matlab file ''' seed = net_filename.split('seed_')[1].split('_')[0] matlab_filename = [ filename for filename in matlab_filenames if seed in filename ][0] matlab_path = os.path.join(path_to_folder_expts, matlab_filename) mat_contents = sio.loadmat(matlab_path) ''' get results of normalized net if train_error == 0 ''' train_errors = mat_contents['train_errors'][0] corruption_prob = self.get_corruption_prob(path_to_folder_expts) print(f'>train_errors final epoch = {train_errors[-1]} ') print(f'---> corruption_prob={corruption_prob}') if train_errors[-1] == 0: nb_zero_train_error += 1 std = mat_contents['stds'][0][0] corruption_prob = self.get_corruption_prob( path_to_folder_expts) epoch = len(train_errors) ''' get results from normalized net''' hist_norm, hist_un = self.get_hist_last_layer_activations( net_filename, path_to_folder_expts, corruption_prob) results = self.get_results_of_net(net_filename, path_to_folder_expts, corruption_prob) ## extract results normalized_results, unnormalized_results = results train_loss_norm, train_error_norm, test_loss_norm, test_error_norm = normalized_results train_loss_un, train_error_un, test_loss_un, test_error_un = unnormalized_results print(f'>normalized_results = {normalized_results}') print(f'>unnormalized_results = {unnormalized_results}') ## extract histograms current_hist_train_norm, current_hist_test_norm = hist_norm current_hist_train_un, current_hist_test_un = hist_un ''' catch error if trian performance dont match''' if train_error_norm != 0 or train_error_un != 0: print() print( f'---> ERROR: train_error_norm != 0 or train_error_un != 0 values are train_error_norm={train_error_norm},train_error_un={train_error_un} they should be zero.' ) print( f'path_to_folder_expts = {path_to_folder_expts}\nnet_filename = {net_filename}' ) print( f'seed = {seed}\nmatlab_filename = {matlab_filename}') st() ''' append results ''' ## normalized train_losses_norm.append( train_loss_norm), train_errors_norm.append( train_error_norm) test_losses_norm.append( test_loss_norm), test_errors_norm.append(test_error_norm) hist_train_norm.append(current_hist_train_norm) hist_test_norm.append(current_hist_test_norm) ## unnormalized train_losses_unnorm.append( train_loss_un), train_errors_unnorm.append(train_error_un) test_losses_unnorm.append( test_loss_un), test_errors_unnorm.append(test_error_un) hist_train_un.append(current_hist_train_un) hist_test_un.append(current_hist_test_un) ''' append stats ''' epoch_numbers.append(epoch) corruption_probs.append(corruption_prob) stds_inits.append(std) ''' ''' print(f'-------------> # of nets trained = {len(net_filenames)}') print(f'-------------> nb_zero_train_error = {nb_zero_train_error}') print( f'-------------> frac zero train error = {nb_zero_train_error}/{len(net_filenames)} = {nb_zero_train_error/len(net_filenames)}' ) if nb_zero_train_error != 0: ''' organize/collect results''' ## IMPORTANT: adding things to this list is not enough to return it to matlab, also edit collect_all results = NamedDict(train_losses_norm=train_losses_norm, train_errors_norm=train_errors_norm, test_losses_norm=test_losses_norm, test_errors_norm=test_errors_norm, train_losses_unnorm=train_losses_unnorm, train_errors_unnorm=train_errors_unnorm, test_losses_unnorm=test_losses_unnorm, test_errors_unnorm=test_errors_unnorm, epoch_numbers=epoch_numbers, corruption_probs=corruption_probs, stds_inits=stds_inits, hist_train_norm=hist_train_norm, hist_test_norm=hist_test_norm, hist_train_un=hist_train_un, hist_test_un=hist_test_un) return results else: return -1
def extract_results_with_target_loss(self, path_to_folder_expts, target_loss): ''' extracts specific results of the current experiment, given a specific train loss. :param path_to_folder_expts: :param target_loss: :return: ''' #### train_losses_norm, train_errors_norm = [], [] test_losses_norm, test_errors_norm = [], [] # train_losses_unnorm, train_errors_unnorm = [], [] test_losses_unnorm, test_errors_unnorm = [], [] #### train_losses_norm_rand, train_errors_norm_rand = [], [] test_losses_norm_rand, test_errors_norm_rand = [], [] # train_losses_unnorm_rand, train_errors_unnorm_rand = [], [] test_losses_unnorm_rand, test_errors_unnorm_rand = [], [] ## epoch_numbers = [] corruption_probs = [] ''' go through results and get the ones with specific target loss ''' matlab_filenames = [ filename for filename in os.listdir(path_to_folder_expts) if '.mat' in filename ] for matlab_filename in matlab_filenames: # essentially looping through all the nets that were trained matlab_path = os.path.join(path_to_folder_expts, matlab_filename) mat_contents = sio.loadmat(matlab_path) ''' ''' #epoch,seed_id,actual_train_loss = self.match_zero_train_error(mat_contents) #epoch, seed_id, actual_train_loss = self.match_train_loss(target_loss, mat_contents) epoch, seed_id, actual_train_loss = self.final_train_error( mat_contents) if seed_id != -1: # if matched train error actually matched something #normalized_results, unnormalized_results = self.get_results_from_normalized_net(epoch-1,seed_id, path_to_folder_expts) # not ethe -1 is cuz files where labeled with 0 as the first epoch and after that it ends at 299 which is the last one but train errors had 0th mean the virgin net normalized_results, unnormalized_results, normalized_results_rand, unnormalized_results_rand = self.get_results_from_normalized_net( epoch - 1, seed_id, path_to_folder_expts) ## extract natural labels results train_loss_norm, train_error_norm, test_loss_norm, test_error_norm = normalized_results train_loss_un, train_error_un, test_loss_un, test_error_un = unnormalized_results ## extract random labels results train_loss_norm_rand, train_error_norm_rand, test_loss_norm_rand, test_error_norm_rand = normalized_results_rand train_loss_un_rand, train_error_un_rand, test_loss_un_rand, test_error_un_rand = unnormalized_results_rand ''' ''' corruption_prob = self.get_corruption_prob( path_to_folder_expts) ''' append results ''' #### natural label train_losses_norm.append( train_loss_norm), train_errors_norm.append( train_error_norm) test_losses_norm.append( test_loss_norm), test_errors_norm.append(test_error_norm) # train_losses_unnorm.append( train_loss_un), train_errors_unnorm.append(train_error_un) test_losses_unnorm.append( test_loss_un), test_errors_unnorm.append(test_error_un) #### random label train_losses_norm_rand.append( train_loss_norm_rand), train_errors_norm_rand.append( train_error_norm_rand) test_losses_norm_rand.append( test_loss_norm_rand), test_errors_norm_rand.append( test_error_norm_rand) # train_losses_unnorm_rand.append( train_loss_un_rand), train_errors_unnorm_rand.append( train_error_un_rand) test_losses_unnorm_rand.append( test_loss_un_rand), test_errors_unnorm_rand.append( test_error_un_rand) ## epoch_numbers.append(epoch) ## corruption_probs.append(corruption_prob) ''' organize/collect results''' results = NamedDict(train_losses_norm=train_losses_norm, train_errors_norm=train_errors_norm, test_losses_norm=test_losses_norm, test_errors_norm=test_errors_norm, train_losses_unnorm=train_losses_unnorm, train_errors_unnorm=train_errors_unnorm, test_losses_unnorm=test_losses_unnorm, test_errors_unnorm=test_errors_unnorm, train_losses_norm_rand=train_losses_norm_rand, train_errors_norm_rand=train_errors_norm_rand, test_losses_norm_rand=test_losses_norm_rand, test_errors_norm_rand=test_errors_norm_rand, train_losses_unnorm_rand=train_losses_unnorm_rand, train_errors_unnorm_rand=train_errors_unnorm_rand, test_losses_unnorm_rand=test_losses_unnorm_rand, test_errors_unnorm_rand=test_errors_unnorm_rand, epoch_numbers=epoch_numbers, corruption_probs=corruption_probs) return results
def main(plotting=False,save=False): ''' setup''' start_time = time.time() np.set_printoptions(suppress=True) #Whether or not suppress printing of small floating point values using scientific notation (default False). ##dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU ''' pytorch dtype setup ''' # dtype_y = torch.LongTensor dtype_x = torch.FloatTensor dtype_y = torch.FloatTensor # dtype_x = torch.cuda.FloatTensor # dtype_y = torch.cuda.FloatTensor ''' date parameters setup''' today_obj = date.today() # contains datetime.date(year, month, day); accessible via .day etc day = today_obj.day month = calendar.month_name[today_obj.month] ''' Model to train setup param ''' #MDL_2_TRAIN='logistic_regression_vec_mdl' #MDL_2_TRAIN='logistic_regression_poly_mdl' MDL_2_TRAIN = 'regression_poly_mdl' #MDL_2_TRAIN = 'HBF' ''' data file names ''' truth_filename='' data_filename='' #data_filename = 'classification_manual' data_filename = 'regression_manual' ''' Folder for experiment ''' experiment_name = 'RedoFig5_Cheby' ########## ''' Regularization ''' ## #reg_type = 'VW' #reg_type = 'V2W_D3' reg_type = '' reg = 0 ''' Experiment LAMBDA experiment params ''' # expt_type = 'LAMBDAS' # N_lambdas = 50 # lb,ub = 0.01,10000 # one_over_lambdas = np.linspace(lb,ub,N_lambdas) # lambdas = list( 1/one_over_lambdas ) # lambdas = N_lambdas*[0.0] # nb_iterations = [int(1.4*10**6)] # repetitions = len(lambdas)*[15] ''' Experiment ITERATIONS experiment params ''' # expt_type = 'ITERATIONS' # N_iterations = 30 # lb,ub = 1,60*10**4 # lambdas = [0] # nb_iterations = [ int(i) for i in np.linspace(lb,ub,N_iterations)] # repetitions = len(nb_iterations)*[10] ''' Experiment DEGREE/MONOMIALS ''' expt_type='DEGREES' step_deg=1 lb_deg,ub_deg = 39,39 degrees = list(range(lb_deg,ub_deg+1,step_deg)) lambdas = [0] #nb_iterations = [int(2500000)] #nb_iterations = [int(1000000)] #nb_iterations = [int(5 * 10**6)] #nb_iterations = [int(1.1 * 10 ** 7)] repetitions = len(degrees)*[30] ''' Experiment Number of vector elements''' expt_type='NB_VEC_ELEMENTS' step=1 lb_vec,ub_vec = 30,30 nb_elements_vecs = list(range(lb_vec,ub_vec+1,step)) lambdas = [0] nb_iterations = [int(250000)] #nb_iterations = [int(2500)] repetitions = len(nb_elements_vecs)*[1] ''' Get setup for process to run ''' ps_params = NamedDict() # process params if expt_type == 'LAMBDAS': ps_params.degrees=[] ps_params.reg_lambda = dispatcher_code.get_hp_to_run(hyper_params=lambdas,repetitions=repetitions,satid=satid) ps_params.nb_iter = nb_iterations[0] #ps_params.prefix_experiment = f'it_{nb_iter}/lambda_{reg_lambda}_reg_{reg_type}' elif expt_type == 'ITERATIONS': ps_params.degrees=[] ps_params.reg_lambda = lambdas[0] ps_params.nb_iter = dispatcher_code.get_hp_to_run(hyper_params=nb_iterations,repetitions=repetitions,satid=satid) #ps_params.prefix_experiment = f'lambda_{reg_lambda}/it_{nb_iter}_reg_{reg_type}' elif expt_type == 'DEGREES': ps_params.reg_lambda = lambdas[0] ps_params.degree_mdl = dispatcher_code.get_hp_to_run(hyper_params=degrees,repetitions=repetitions,satid=satid) #ps_params.prefix_experiment = f'fig4_expt_lambda_{reg_lambda}_it_{nb_iter}/deg_{Degree_mdl}' hp_param = ps_params.degree_mdl elif expt_type == 'NB_VEC_ELEMENTS': ps_params.reg_lambda = lambdas[0] ps_params.nb_elements_vec = dispatcher_code.get_hp_to_run(hyper_params=nb_elements_vecs,repetitions=repetitions,satid=satid) ps_params.nb_iter = nb_iterations[0] #ps_params.prefix_experiment = f'it_{ps_params.nb_iter}/lambda_{ps_params.reg_lambda}_reg_{reg_type}' else: raise ValueError(f'Experiment type expt_type={expt_type} does not exist, try a different expt_type.') print(f'ps_params={ps_params}') ######## data set ''' Get data set''' if data_filename == 'classification_manual': N_train,N_val,N_test = 81,100,500 lb,ub = -1,1 w_target = np.array([1,1]) f_target = lambda x: np.int64( (np.dot(w_target,x) > 0).astype(int) ) Xtr,Ytr, Xv,Yv, Xt,Yt = data_class.get_2D_classification_data(N_train,N_val,N_test,lb,ub,f_target) elif data_filename == 'regression_manual': N_train,N_val,N_test = 9,81,100 lb,ub = -1,1 f_target = lambda x: np.sin(2*np.pi*4*x) Xtr,Ytr, Xv,Yv, Xt,Yt = data_reg.get_2D_regression_data_chebyshev_nodes(N_train,N_val,N_test,lb,ub,f_target) else: data = np.load( './data/{}'.format(data_filename) ) if 'lb' and 'ub' in data: data_lb, data_ub = data['lb'],data['ub'] else: raise ValueError('Error, go to code and fix lb and ub') N_train,N_test = Xtr.shape[0], Xt.shape[0] print(f'N_train={N_train}, N_test={N_test}') ######## ''' SGD params ''' #optimizer_mode = 'SGD_AND_PERTURB' optimizer_mode = 'SGD_train_then_pert' M = int(Xtr.shape[0]) #M = int(81) eta = 0.2 momentum = 0.0 nb_iter = nb_iterations[0] A = 0.0 ## logging_freq = 1 ''' MODEL ''' if MDL_2_TRAIN=='logistic_regression_vec_mdl': in_features=31 n_classes=1 bias=False mdl = mdl_lreg.get_logistic_regression_mdl(in_features,n_classes,bias) loss = torch.nn.CrossEntropyLoss(size_average=True) ''' stats collector ''' loss_collector = lambda mdl,X,Y: calc_loss(mdl,loss,X,Y) acc_collector = calc_accuracy acc_collector = calc_error stats_collector = tr_alg.StatsCollector(mdl, loss_collector,acc_collector) ''' make features for data ''' poly = PolynomialFeatures(in_features-1) Xtr,Xv,Xt = poly.fit_transform(Xtr), poly.fit_transform(Xv), poly.fit_transform(Xt) elif MDL_2_TRAIN == 'regression_poly_mdl': in_features = degrees[0]+1 mdl = mdl_lreg.get_logistic_regression_mdl(in_features, 1, bias=False) loss = torch.nn.MSELoss(size_average=True) ''' stats collector ''' loss_collector = lambda mdl, X, Y: calc_loss(mdl, loss, X, Y) acc_collector = loss_collector acc_collector = loss_collector stats_collector = tr_alg.StatsCollector(mdl, loss_collector, acc_collector) ''' make features for data ''' poly = PolynomialFeatures(in_features - 1) Xtr, Xv, Xt = poly.fit_transform(Xtr), poly.fit_transform(Xv), poly.fit_transform(Xt) elif MDL_2_TRAIN=='HBF': bias=True D_in, D_out = Xtr.shape[0], Ytr.shape[1] ## RBF std = (Xtr[1] - Xtr[0])/ 0.8 # less than half the avg distance #TODO use np.mean centers=Xtr mdl = hkm.OneLayerHBF(D_in,D_out, centers=centers,std=std, train_centers=False,train_std=False) mdl[0].weight.data.fill_(0) mdl[0].bias.data.fill_(0) loss = torch.nn.MSELoss(size_average=True) ''' stats collector ''' loss_collector = lambda mdl,X,Y: tr_alg.calc_loss(mdl,loss,X,Y) acc_collector = loss_collector ''' dynamic stats collector ''' c_pinv = hkm.get_rbf_coefficients(X=Xtr,centers=Xtr,Y=Ytr,std=std) def diff_GD_vs_PINV(storer, i, mdl, Xtr,Ytr,Xv,Yv,Xt,Yt): c_pinv_torch = torch.FloatTensor( c_pinv ) diff_GD_pinv = (mdl.C.weight.data.t() - c_pinv_torch).norm(2) storer.append(diff_GD_pinv) dynamic_stats = NamedDict(diff_GD_vs_PINV=([],diff_GD_vs_PINV)) ## stats_collector = tr_alg.StatsCollector(mdl, loss_collector,acc_collector,dynamic_stats=dynamic_stats) else: raise ValueError(f'MDL_2_TRAIN={MDL_2_TRAIN}') ''' TRAIN ''' perturbfreq = 1.1 * 10**5 perturb_magnitude = 0.45 if optimizer_mode =='SGD_AND_PERTURB': ## momentum = 0.0 optim = torch.optim.SGD(mdl.parameters(), lr=eta, momentum=momentum) ## reg_lambda = ps_params.reg_lambda tr_alg.SGD_perturb(mdl, Xtr,Ytr,Xv,Yv,Xt,Yt, optim,loss, M,eta,nb_iter,A ,logging_freq, dtype_x,dtype_y, perturbfreq,perturb_magnitude, reg=reg,reg_lambda=reg_lambda, stats_collector=stats_collector) elif optimizer_mode == 'SGD_train_then_pert': iterations_switch_mode = 1 # never perturb #iterations_switch_mode = nb_iter # always perturb iterations_switch_mode = nb_iter/2 # perturb for half print(f'iterations_switch_mode={iterations_switch_mode}') ## optimizer = torch.optim.SGD(mdl.parameters(), lr=eta, momentum=momentum) ## reg_lambda = ps_params.reg_lambda tr_alg.SGD_pert_then_train(mdl, Xtr,Ytr,Xv,Yv,Xt,Yt, optimizer,loss, M,nb_iter ,logging_freq ,dtype_x,dtype_y, perturbfreq,perturb_magnitude, iterations_switch_mode, reg,reg_lambda, stats_collector) else: raise ValueError(f'MDL_2_TRAIN={MDL_2_TRAIN} not implemented') seconds,minutes,hours = utils.report_times(start_time) ''' Plots and Print statements''' print('\n----\a\a') print(f'some SGD params: batch_size={M}, eta={eta}, nb_iterations={nb_iter}') if save: ''' save experiment results to maltab ''' experiment_results=stats_collector.get_stats_dict() experiment_results=NamedDict(seconds=seconds,minutes=minutes,hours=hours,**experiment_results) save2matlab.save_experiment_results_2_matlab(experiment_results=experiment_results, root_path=f'./test_runs_flatness3', experiment_name=experiment_name, training_config_name=f'nb_iterations_{nb_iterations[0]}_N_train_{Xtr.shape[0]}_N_test_{Xt.shape[0]}_batch_size_{M}_perturb_freq_{perturbfreq}_perturb_magnitude_{perturb_magnitude}_momentum_{momentum}_iterations_switch_mode_{iterations_switch_mode}', main_experiment_params=f'{expt_type}_lambda_{ps_params.reg_lambda}_it_{nb_iter}_reg_{reg_type}', expt_type=f'expt_type_{expt_type}_{hp_param}', matlab_file_name=f'satid_{satid}_sid_{sj}_{month}_{day}' ) if MDL_2_TRAIN=='HBF': ''' print statements R/HBF''' print(f'distance_btw_data_points={Xtr[1] - Xtr[0]}') print(f'std={std}') print(f'less than half the average distance?={(std < (Xtr[1] - Xtr[0])/2)}') beta = (1.0/std)**2 rank = np.linalg.matrix_rank( np.exp( -beta*hkm.euclidean_distances_manual(x=Xtr,W=centers.T) ) ) print(f'rank of Kernel matrix = Rank(K) = {rank}') ''' plots for R/HBF''' f_mdl = lambda x: mdl( Variable(torch.FloatTensor(x),requires_grad=False) ).data.numpy() f_pinv = lambda x: hkm.f_rbf(x,c=c_pinv,centers=Xtr,std=std) f_target = f_target iterations = np.array(range(0,nb_iter)) N_denseness = 1000 legend_hyper_params=f'N_train={Xtr.shape[0]},N_test={Xt.shape[0]},batch-size={M},learning step={eta},# iterations = {nb_iter} momentum={momentum}, Model=Gaussian, # centers={centers.shape[0]}, std={std[0]}' ''' PLOT ''' ## plots plot_utils.plot_loss_errors(iterations,stats_collector,test_error_pinv=data_utils.l2_np_loss(f_pinv(Xt),Yt),legend_hyper_params=legend_hyper_params) plot_utils.visualize_1D_reconstruction(lb,ub,N_denseness, f_mdl,f_target=f_target,f_pinv=f_pinv,X=Xtr,Y=Ytr,legend_data_set='Training data points') plot_utils.plot_sgd_vs_pinv_distance_during_training(iterations,stats_collector) #plot_utils.print_gd_vs_pinv_params(mdl,c_pinv) plt.show() elif MDL_2_TRAIN=='logistic_regression_vec_mdl': ''' arguments for plotting things ''' f_mdl = lambda x: mdl( Variable(torch.FloatTensor(x),requires_grad=False) ).data.numpy() f_target = lambda x: -1*(w_target[0]/w_target[1])*x iterations = np.array(range(0,nb_iter)) N_denseness = 1000 legend_hyper_params=f'N_train={Xtr.shape[0]},N_test={Xt.shape[0]},batch-size={M},learning step={eta},# iterations = {nb_iter} momentum={momentum}, Model=Logistic Regression' ''' PLOT ''' ## plots plot_utils.plot_loss_errors(iterations,stats_collector,legend_hyper_params=legend_hyper_params) plot_utils.plot_loss_classification_errors(iterations,stats_collector,legend_hyper_params=legend_hyper_params) plot_utils.visualize_classification_data_learned_planes_2D(lb,ub,N_denseness,Xtr,Ytr,f_mdl,f_target) plot_utils.plot_weight_norm_vs_iterations(iterations,stats_collector.w_norms[0]) plt.show() if plotting: legend_hyper_params = f'N_train={Xtr.shape[0]},N_test={Xt.shape[0]},batch-size={M},learning step={eta},# iterations = {nb_iter} momentum={momentum}, Model=Regression' iterations = np.array(range(0, nb_iter)) plot_utils.plot_loss_errors(iterations, stats_collector, legend_hyper_params=legend_hyper_params) plot_utils.plot_weight_norm_vs_iterations(iterations, stats_collector.w_norms[0]) plt.show()
#R_x = Y_train_R_x R_x = R_a_mdl(a) R_x = R_x.view(1, D) ''' compute x.^2 = [...,|x_i|^2,...]''' x_2 = x**2 x_2 = x_2.view(D, 1) ''' Regularization R(f) = <R_f,x.^2>''' R_f = R_x.mm(x_2) return R_f #R_a_mdl = lambda a_norm: 1/Y_train_R_x #R_a_mdl = lambda a_norm: 1/Variable(torch.Tensor(x_real)) #R_a_mdl = lambda a_norm: 1/Variable(torch.Tensor(x_real**2)) R_a_mdl = lambda a_norm: 1 / Variable(torch.Tensor(x_real / sum(x_real))) R_x_params = NamedDict(a=a, R_a_mdl=R_a_mdl) R_x = fix_softmax # R_x_params = NamedDict(a=a_norm,R_a_mdl=R_a_mdl) # R_x = get_reg_softmax # R_x_params = NamedDict({'a':a,'A_param':A_param,'t_param':t_param,'sigma_param':sigma_param}) # R_x = get_reg ''' SGD mdl ''' bias = False mdl_sgd = torch.nn.Sequential(torch.nn.Linear(D, D_out, bias=bias)) #mdl_sgd[0].weight.data.fill_(0) #print(f'mdl_sgd[0].weight.data = {mdl_sgd[0].weight.data.numpy().shape}') #mdl_sgd[0].weight.data = torch.FloatTensor(x_pinv.reshape(1,D)) ''' train SGM ''' M = int(N / 4) eta = 0.000001 eta_R_x = 0.000000