def train_on_dataset(dataset, reload=False): train_dict = split_count_and_lensing_maps_by_dataset(dataset, config=config, order=order, noiseless_m=True, noiseless_kg=True, scramble=True, gaussian=gaussian) train = LabeledDataset(train_dict["x"], train_dict["y"]) val_dict = split_count_and_lensing_maps_by_dataset("TESTLITE", config=config, order=order, noiseless_m=True, noiseless_kg=True, scramble=True, gaussian=gaussian) val = LabeledDataset(val_dict["x"], val_dict["y"]) model = model_v3(exp_name="simple-{0}-{1}-{2}".format(name, config, sys.argv[4]), gc_depth=12, input_channels=channels, num_epochs=4, nsides=[1024, 1024, 512, 512, 256, 256, 128, 128, 64, 32, 16, 8, 4], filters=[32] * 6 + [64] * 6, var_k=[5] * 6 + [10] * 6, fc_layers=[128], learning_rate=lr) if reload: accuracy_validation, loss_validation, loss_training, t_step = model.fit(train, val, session=model._get_session()) else: accuracy_validation, loss_validation, loss_training, t_step = model.fit(train, val) np.savez_compressed( "../metrics/v3-simple-{0}-{1}-{2}-{3}-noiseless.npz".format(name, config, sys.argv[4], dataset), lval=loss_validation, ltrain=loss_training, t=t_step)
def train_one_noisy_quartile_epoch(quartile, lr, iteration): val_dict = split_count_and_lensing_maps_by_dataset("TEST", config=config, order=order, scramble=True) val_dict["x"] = val_dict["x"][:64] val_dict["y"] = val_dict["y"][:64] val = LabeledDataset(val_dict["x"], val_dict["y"]) train_dict = split_count_and_lensing_maps_by_dataset(quartile, config=config, order=order, scramble=True) train = LabeledDataset(train_dict["x"], train_dict["y"]) model = model_by_architecture("data1", num_epochs=1, learning_rate=lr, input_channels=channels, nmaps=45, order=order, exp_name="final2-mixed", nfilters=k) if iteration == 1 and quartile == "Q1": accuracy_validation, loss_validation, loss_training, t_step = model.fit(train, val) else: accuracy_validation, loss_validation, loss_training, t_step = model.fit(train, val, session=model._get_session()) np.savez_compressed("../metrics/vdata1-final2-mixed-metrics-{0}-{1}.npz".format(iteration, quartile), lval=loss_validation, ltrain=loss_training, t=t_step)
def train_one_epoch(lr, noise_level, iteration): train_dict = split_count_and_lensing_maps_by_dataset("TRAINLITE", config=config, order=order, density_kg=density_kg_by_iter(noise_level), noiseless_kg=noiseless_kg_by_iter(noise_level), scramble=True) train = LabeledDataset(train_dict["x"], train_dict["y"]) val_dict = split_count_and_lensing_maps_by_dataset("TESTLITE", config=config, order=order, density_kg=density_kg_by_iter(noise_level), noiseless_kg=noiseless_kg_by_iter(noise_level), scramble=True) val = LabeledDataset(val_dict["x"], val_dict["y"]) model = model_by_architecture("data1", num_epochs=1, learning_rate=lr, decay_factor=1, input_channels=channels, nmaps=8, order=order, exp_name="adaptive1-1", nfilters=k) if noise_level == 0 and iteration == 0: accuracy_validation, loss_validation, loss_training, t_step = model.fit(train, val) else: accuracy_validation, loss_validation, loss_training, t_step = model.fit(train, val, session=model._get_session()) np.savez_compressed("../metrics/adaptive1-1-metrics-{0}-{1}.npz".format(noise_level, iteration), lval=loss_validation, ltrain=loss_training, t=t_step)
def train_one_epoch(lr, noise_level, iteration): train_dict = split_count_and_lensing_maps_by_dataset( "TRAINLITE", config=config, order=order, density_m=density_count_by_iter(noise_level, nlevels=noise_levels), noiseless_m=noiseless_count_by_iter(noise_level), density_kg=density_kg_by_iter(noise_level, nlevels=noise_levels), noiseless_kg=noiseless_kg_by_iter(noise_level), scramble=True, gaussian=(gaussian == "GAUSS")) train = LabeledDataset(train_dict["x"], train_dict["y"]) val_dict = split_count_and_lensing_maps_by_dataset( "TESTLITE", config=config, order=order, density_m=density_count_by_iter(noise_level, nlevels=noise_levels), noiseless_m=noiseless_count_by_iter(noise_level), density_kg=density_kg_by_iter(noise_level, nlevels=noise_levels), noiseless_kg=noiseless_kg_by_iter(noise_level), scramble=True, gaussian=(gaussian == "GAUSS")) val = LabeledDataset(val_dict["x"], val_dict["y"]) model = model_v3(exp_name="{0}-{1}-{2}".format(name, config, gaussian), gc_depth=16, input_channels=channels, nsides=[ 1024, 1024, 512, 512, 256, 256, 128, 128, 64, 64, 32, 32, 16, 16, 8, 8, 4 ], filters=[32] * 8 + [64] * 8, var_k=[5] * 8 + [10] * 8, fc_layers=[128, 128], learning_rate=lr) if noise_level == 0 and iteration == 0: accuracy_validation, loss_validation, loss_training, t_step = model.fit( train, val) else: accuracy_validation, loss_validation, loss_training, t_step = model.fit( train, val, session=model._get_session()) np.savez_compressed("../metrics/v3-{0}-{1}-{2}-{3}-{4}.npz".format( name, config, gaussian, noise_level, iteration), lval=loss_validation, ltrain=loss_training, t=t_step) print("-----------------END OF EPOCH-----------------") print("NOISE LEVEL: {0}, ITERATION: {1}, LR: {2}".format( noise_level, iteration, lr)) print("VALIDATION LOSS (used for curriculum strategy): ", loss_validation) print("----------------------------------") return np.mean(loss_validation)
def train_one_epoch(lr, noise_level, iteration): train_dict = split_count_and_lensing_maps_by_dataset( "TRAINLITE", config=config, order=order, density_kg=density_kg_by_iter(noise_level), noiseless_kg=noiseless_kg_by_iter(noise_level), scramble=True) train = LabeledDataset(train_dict["x"], train_dict["y"]) val_dict = split_count_and_lensing_maps_by_dataset( "TESTLITE", config=config, order=order, density_kg=density_kg_by_iter(noise_level), noiseless_kg=noiseless_kg_by_iter(noise_level), scramble=True) val = LabeledDataset(val_dict["x"], val_dict["y"]) model = model_v2_biasless( exp_name="1-fid-leaky-relu-{0}-{1}-{2}-{3}".format( noise_levels, ilr, decay_noise, decay_train), gc_depth=12, nsides=[1024, 1024, 512, 512, 256, 256, 128, 128, 64, 32, 16, 8, 4], filters=[32] * 6 + [64] * 6, var_k=[5] * 6 + [10] * 6, fc_layers=[128], learning_rate=lr, activation_func="leaky_relu") if noise_level == 0 and iteration == 0: accuracy_validation, loss_validation, loss_training, t_step = model.fit( train, val) else: accuracy_validation, loss_validation, loss_training, t_step = model.fit( train, val, session=model._get_session()) np.savez_compressed( "../metrics/v2-biasless-1-fid-leaky-relu-{0}-{1}-{2}-{3}-{4}-{5}.npz". format(noise_levels, ilr, decay_noise, decay_train, noise_level, iteration), lval=loss_validation, ltrain=loss_training, t=t_step) print("-----------------END OF EPOCH-----------------") print("NOISE LEVEL: {0}, ITERATION: {1}, LR: {2}".format( noise_level, iteration, lr)) print("VALIDATION LOSS (used for curriculum strategy): ", loss_validation) print("----------------------------------") return np.mean(loss_validation)
def single_experiment(sigma, order, sigma_noise, experiment_type): ename = '_'+experiment_type Nside = 1024 EXP_NAME = '40sim_{}sides_{}noise_{}order_{}sigma{}'.format( Nside, sigma_noise, order, sigma, ename) x_raw_train, labels_raw_train, x_raw_std = experiment_helper.get_training_data(sigma, order) x_raw_test, labels_test, _ = experiment_helper.get_testing_data(sigma, order, sigma_noise, x_raw_std) ret = experiment_helper.data_preprossing(x_raw_train, labels_raw_train, x_raw_test, sigma_noise, feature_type=None) features_train, labels_train, features_validation, labels_validation, features_test = ret training = LabeledDatasetWithNoise(features_train, labels_train, end_level=sigma_noise) validation = LabeledDataset(features_validation, labels_validation) params = get_params(training.N, EXP_NAME, order, Nside, experiment_type) model = models.deepsphere(**params) # Cleanup before running again. shutil.rmtree('summaries/{}/'.format(EXP_NAME), ignore_errors=True) shutil.rmtree('checkpoints/{}/'.format(EXP_NAME), ignore_errors=True) model.fit(training, validation) error_validation = experiment_helper.model_error(model, features_validation, labels_validation) print('The validation error is {}%'.format(error_validation * 100), flush=True) error_test = experiment_helper.model_error(model, features_test, labels_test) print('The testing error is {}%'.format(error_test * 100), flush=True) return error_test
def single_experiment(sigma, order, sigma_noise, experiment_type, new, n_neighbors): ename = '_' + experiment_type Nside = 1024 if Nside == 1024: data_path = '/mnt/scratch/lts2/mdeff/deepsphere/data/same_psd/' else: data_path = 'data/same_psd/' EXP_NAME = 'cosmo' if new else 'oldgraph' EXP_NAME += '_{}sides_{}noise_{}order_{}sigma_{}neighbor{}_fold3'.format( Nside, sigma_noise, order, sigma, n_neighbors, ename) x_raw_train, labels_raw_train, x_raw_std = experiment_helper.get_training_data( sigma, order, data_path=data_path) x_raw_test, labels_test, _ = experiment_helper.get_testing_data( sigma, order, sigma_noise, x_raw_std, data_path=data_path[:-9]) ret = experiment_helper.data_preprossing(x_raw_train, labels_raw_train, x_raw_test, sigma_noise, feature_type=None) features_train, labels_train, features_validation, labels_validation, features_test = ret training = LabeledDatasetWithNoise(features_train, labels_train, end_level=sigma_noise) validation = LabeledDataset(features_validation, labels_validation) # Cleanup before running again. shutil.rmtree('summaries/{}/'.format(EXP_NAME), ignore_errors=True) shutil.rmtree('checkpoints/{}/'.format(EXP_NAME), ignore_errors=True) params = hyperparameters.get_params(training.N, EXP_NAME, order, Nside, experiment_type) model = models.deepsphere(**params, new=new, n_neighbors=n_neighbors) accuracy_validation, loss_validation, loss_training, t_step, t_batch = model.fit( training, validation) print("inference time: ", t_batch / params["batch_size"]) error_validation = experiment_helper.model_error( model, features_validation[:, :, np.newaxis], labels_validation) print('The validation error is {}%'.format(error_validation * 100), flush=True) error_test = experiment_helper.model_error(model, features_test[:, :, np.newaxis], labels_test) print('The testing error is {}%'.format(error_test * 100), flush=True) return error_test, t_batch
def dataset_global(datas, lon=None, lat=None, alt=None, w_days=None, add_feat=True, ratio=0.7): n_days = datas.shape[0] limit = int(ratio * n_days) mean = datas.mean(axis=(0, 1))[0] std = datas.std(axis=(0, 1))[0] x_train = np.atleast_3d((datas[:limit, :, 0] - mean) / std) labels_train = w_days[:limit] x_val = np.atleast_3d((datas[limit:, :, 0] - mean) / std) labels_val = w_days[limit:] if add_feat: # location of stations coords_v = np.stack([lon, lat], axis=-1) coords_v = (coords_v - coords_v.mean(axis=0)) / coords_v.std(axis=0) # altitude of stations alt_v = alt alt_v = (alt_v - alt_v.mean()) / alt_v.std() x_train = np.dstack([ x_train, np.repeat(coords_v[np.newaxis, :], x_train.shape[0], axis=0), np.repeat(alt_v[np.newaxis, :], x_train.shape[0], axis=0), np.repeat(w_days[:limit, np.newaxis], x_train.shape[1], axis=1) ]) x_val = np.dstack([ x_val, np.repeat(coords_v[np.newaxis, :], x_val.shape[0], axis=0), np.repeat(alt_v[np.newaxis, :], x_val.shape[0], axis=0), np.repeat(w_days[limit:, np.newaxis], x_val.shape[1], axis=1) ]) training = LabeledDataset(x_train, labels_train) validation = LabeledDataset(x_val, labels_val) return training, validation
def single_experiment(sigma, order, sigma_noise, experiment_type): ename = '_'+experiment_type Nside = 1024 data_path = '../../data/same_psd/' EXP_NAME = '40sim_{}sides_{}noise_{}order_{}sigma{}'.format( Nside, sigma_noise, order, sigma, ename) x_raw_train, labels_raw_train, x_raw_std = experiment_helper.get_training_data(sigma, order, data_path=data_path) x_raw_test, labels_test, _ = experiment_helper.get_testing_data(sigma, order, sigma_noise, x_raw_std, data_path=data_path[:-9]) ret = experiment_helper.data_preprossing(x_raw_train, labels_raw_train, x_raw_test, sigma_noise, feature_type=None) features_train, labels_train, features_validation, labels_validation, features_test = ret nx = Nside//order nlevels = np.round(np.log2(nx)).astype(np.int) index = build_index(nlevels).astype(np.int) features_train = features_train[:, index] features_validation = features_validation[:, index] shuffle = np.random.permutation(len(features_test)) features_test = features_test[:, index] features_test = features_test[shuffle] labels_test = labels_test[shuffle] training = LabeledDatasetWithNoise(features_train, labels_train, end_level=sigma_noise) validation = LabeledDataset(features_validation, labels_validation) # Better implementation, but it doesn't work for some reason. # params = hyperparameters.get_params_CNN2D(training.N, EXP_NAME, order, Nside, experiment_type) # model = Healpix2CNN(**params) params = get_params(training.N, EXP_NAME, order, Nside, experiment_type) model = models.cnn2d(**params) # Cleanup before running again. shutil.rmtree('summaries/{}/'.format(EXP_NAME), ignore_errors=True) shutil.rmtree('checkpoints/{}/'.format(EXP_NAME), ignore_errors=True) model.fit(training, validation) error_validation = experiment_helper.model_error(model, features_validation, labels_validation) print('The validation error is {}%'.format(error_validation * 100), flush=True) error_test = experiment_helper.model_error(model, features_test, labels_test) print('The testing error is {}%'.format(error_test * 100), flush=True) return error_test
config = "k" channels = total_channels(config) order = 2 k = 64 step = int(sys.argv[1]) val_dict = split_count_and_lensing_maps_by_dataset("TEST", config=config, order=order, scramble=True) val_dict["x"] = val_dict["x"][:64] val_dict["y"] = val_dict["y"][:64] val = LabeledDataset(val_dict["x"], val_dict["y"]) def train_one_quartile_epoch(quartile, lr, iteration): train_dict = split_count_and_lensing_maps_by_dataset(quartile, config=config, order=order, scramble=True) train = LabeledDataset(train_dict["x"], train_dict["y"]) model = model_by_architecture("data1", num_epochs=1, learning_rate=lr, input_channels=channels, nmaps=45,
k = 64 epochs1 = 20 epochs2 = 30 lr1 = 1e-4 lr2 = 1e-4 val_dict = split_count_and_lensing_maps_by_dataset("TEST", config=config, order=order, scramble=True, noiseless_m=True, noiseless_kg=True) val_dict["x"] = val_dict["x"][:64] val_dict["y"] = val_dict["y"][:64] val = LabeledDataset(val_dict["x"], val_dict["y"]) train_dict = split_count_and_lensing_maps_by_dataset("Q1", config=config, noiseless_m=True, noiseless_kg=True, order=order, scramble=True) train_dict["x"] = train_dict["x"][:nmaps * 12 * order * order] train_dict["y"] = train_dict["y"][:nmaps * 12 * order * order] train = LabeledDataset(train_dict["x"], train_dict["y"]) model = model_by_architecture("data1", num_epochs=epochs1, learning_rate=lr1,
import numpy as np from deepsphere.data import LabeledDataset # Run on GPU. os.environ["CUDA_VISIBLE_DEVICES"] = "0" PATH_TO_OUTPUT = "/pylon5/ch4s8kp/adiraj21/flaskwrapper/output/" PATH_TO_VAL = "/pylon5/ch4s8kp/adiraj21/DeepSphere/validation_101.npz" y = {} for dataset_name in dataset_names(): y[dataset_name] = cosmologies_list(dataset_name) val = LabeledDataset( np.load(PATH_TO_VAL)['arr_0'][:, :, 0], np.load(PATH_TO_VAL)['arr_1']) model = model_by_architecture("v11", num_epochs=1, learning_rate=1e-4, eval_frequency=6) for epoch in range(16): for i in range(4): for j in range(5): if (5 * i + j) % 4 == 0: model_by_architecture("v11", num_epochs=1, learning_rate=1e-4) y_train = y["Q{}".format(i + 1)][4 * j:4 * (j + 1)] train = split_poisson_maps_by_vals(y_train, noiseless=True,
import numpy as np from deepsphere.data import LabeledDataset # Run on GPU. # os.environ["CUDA_VISIBLE_DEVICES"] = "0" data = split_poisson_maps_by_dataset("Q1", path_to_output="data/flask101/output/", scramble=True, noiseless=True, deepsphere_dataset=True) # val = LabeledDataset(np.load("$SCRATCH/DeepSphere/validation_101.npz")['arr_0'][:, :, 0], # np.load("$SCRATCH/DeepSphere/validation_101.npz")['arr_1']) val = LabeledDataset( np.load("validation_101.npz")['arr_0'][:, :, 0], np.load("validation_101.npz")['arr_1']) model = model_by_architecture("v11", path_to_checkpoints="checkpoints/", num_epochs=16, learning_rate=1e-4) accuracy_validation, loss_validation, loss_training, t_step = model.fit( data, val) data = split_poisson_maps_by_dataset("Q1", path_to_output="data/flask101/output/", scramble=True, density=0.4, deepsphere_dataset=True) model = model_by_architecture("v11", path_to_checkpoints="checkpoints/",
def dataset_reg(datas, lon=None, lat=None, alt=None, w_days=None, add_feat=False, days_pred=5, ratio=0.7): n_days, n_stations, n_feature = datas.shape limit = int(ratio * (n_days - days_pred)) dataset_x = np.vstack( [np.roll(datas, -i, axis=0) for i in range(days_pred)]) dataset_x = dataset_x.reshape(days_pred, n_days, n_stations, n_feature).transpose((1, 2, 3, 0)) # days_x = np.vstack([np.roll(w_days, -i, axis=0) for i in range(days_pred)]) # days_x = days_x.reshape(days_pred, n_days).transpose() x_train = dataset_x[:limit, :, :, :].transpose(0, 2, 1, 3).reshape( -1, n_stations, days_pred) labels_train = datas[days_pred:limit + days_pred, :, :].transpose( 0, 2, 1).reshape(-1, n_stations) x_val = dataset_x[limit:n_days - days_pred, :, :, :].transpose( 0, 2, 1, 3).reshape(-1, n_stations, days_pred) labels_val = datas[days_pred + limit:, :, :].transpose(0, 2, 1).reshape( -1, n_stations) if add_feat: # location of stations coords_v = np.stack([lon, lat], axis=-1) coords_v = (coords_v - coords_v.mean(axis=0)) / coords_v.std(axis=0) # altitude of stations alt_v = alt alt_v = (alt_v - alt_v.mean()) / alt_v.std() x_train = np.dstack([ x_train, # np.broadcast_to(month_x[:n_days-days_pred,np.newaxis, :], x_train.shape), np.repeat(coords_v[np.newaxis, :], x_train.shape[0], axis=0), np.repeat(alt_v[np.newaxis, :], x_train.shape[0], axis=0), np.tile( np.repeat(w_days[:limit, np.newaxis], x_train.shape[1], axis=1), (2, 1)) ]) # np.broadcast_to(days_x[:n_days-days_pred,np.newaxis, :], x_train.shape)]) x_val = np.dstack([ x_val, # np.broadcast_to(month_x[:n_days-days_pred,np.newaxis, :], x_val.shape), np.repeat(coords_v[np.newaxis, :], x_val.shape[0], axis=0), np.repeat(alt_v[np.newaxis, :], x_val.shape[0], axis=0), np.tile( np.repeat(w_days[limit:n_days - days_pred, np.newaxis], x_val.shape[1], axis=1), (2, 1)) ]) # np.broadcast_to(days_x[:n_days-days_pred,np.newaxis, :], x_val.shape)]) training = LabeledDataset(x_train, labels_train) validation = LabeledDataset(x_val, labels_val) return training, validation
params[ 'regularization'] = 0 # Amount of L2 regularization over the weights (will be divided by the number of weights). params['dropout'] = 0.5 # Percentage of neurons to keep. # Training. params['num_epochs'] = 12 # Number of passes through the training data. params[ 'batch_size'] = 16 # Number of samples per training batch. Should be a power of 2 for greater speed. params[ 'eval_frequency'] = 15 # Frequency of model evaluations during training (influence training time). params['scheduler'] = lambda step: 1e-1 # Constant learning rate. params['optimizer'] = lambda lr: tf.train.GradientDescentOptimizer(lr) #params['optimizer'] = lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5) #params['optimizer'] = lambda lr: tf.train.AdamOptimizer(lr, beta1=0.9, beta2=0.999, epsilon=1e-8) model = models.deepsphere(**params) # Cleanup before running again. shutil.rmtree('summaries/{}/'.format(EXP_NAME), ignore_errors=True) shutil.rmtree('checkpoints/{}/'.format(EXP_NAME), ignore_errors=True) training = LabeledDataset(x_raw_train, labels_train) testing = LabeledDataset(x_raw_test, labels_test) accuracy_validation, loss_validation, loss_training, t_step = model.fit( training, testing) error_train = experiment_helper.model_error(model, x_raw_train, labels_train) error_test = experiment_helper.model_error(model, x_raw_test, labels_test) print('The training error is: {:.2%}'.format(error_train)) print('The testing error is: {:.2%}'.format(error_test))