Python prepare_tf_record Examples, ovejero.model_trainer.prepare_tf_record Python Examples

Example #1

0

Show file

File: forward_modeling_tests.py Project: swagnercarena/ovejero

    def test_plot_posterior_contours(self):
        # Check that nothing crashes when we try to plot the posterior
        # contours
        fow_model = forward_modeling.ForwardModel(self.cfg)
        image_index = 4
        fow_model.select_image(image_index, block=False)
        plt.close('all')
        walker_ratio = 3
        chains_save_path = self.root_path + 'test_chains.h5'
        fow_model.initialize_sampler(walker_ratio, chains_save_path)
        n_samps = 20
        fow_model.run_sampler(n_samps)
        burnin = 0
        num_samples = 20

        os.remove(self.tf_record_path)
        model_trainer.prepare_tf_record(self.cfg, self.root_path,
                                        self.tf_record_path, self.lens_params,
                                        'train')
        dpi = 20
        fow_model.plot_posterior_contours(burnin,
                                          num_samples,
                                          dpi=dpi,
                                          block=False)
        plt.close('all')

        os.remove(chains_save_path)

Example #2

0

Show file

File: bnn_inference_tests.py Project: swagnercarena/ovejero

    def setUp(self):
        # Open up the config file.
        self.root_path = os.path.dirname(
            os.path.abspath(__file__)) + '/test_data/'
        with open(self.root_path + 'test.json', 'r') as json_f:
            self.cfg = json.load(json_f)
        self.batch_size = self.cfg['training_params']['batch_size']
        self.normalized_param_path = self.root_path + 'normed_metadata.csv'
        self.normalization_constants_path = self.root_path + 'norm.csv'
        self.lens_params_path = self.root_path + 'metadata.csv'
        self.lens_params = [
            'external_shear_gamma_ext', 'external_shear_psi_ext',
            'lens_mass_center_x', 'lens_mass_center_y', 'lens_mass_e1',
            'lens_mass_e2', 'lens_mass_gamma', 'lens_mass_theta_E'
        ]
        self.num_params = len(self.lens_params)
        self.cfg['dataset_params']['normalization_constants_path'] = 'norm.csv'
        self.cfg['training_params']['final_params'] = self.lens_params
        self.cfg['training_params']['bnn_type'] = 'diag'
        self.tf_record_path = self.root_path + self.cfg['validation_params'][
            'tf_record_path']

        # Simulate training
        self.final_params = [
            'external_shear_g1', 'external_shear_g2', 'lens_mass_center_x',
            'lens_mass_center_y', 'lens_mass_e1', 'lens_mass_e2',
            'lens_mass_gamma', 'lens_mass_theta_E_log'
        ]
        model_trainer.prepare_tf_record(self.cfg, self.root_path,
                                        self.tf_record_path, self.final_params,
                                        'train')
        os.remove(self.tf_record_path)

        np.random.seed(2)
        tf.random.set_seed(2)

Example #3

0

Show file

File: forward_modeling_tests.py Project: swagnercarena/ovejero

    def setUp(self):
        # Open up the config file.
        # Initialize the class with a test baobab config
        self.root_path = os.path.dirname(
            os.path.abspath(__file__)) + '/test_data/'
        self.config_path = self.root_path + 'test.json'
        self.baobab_cfg_path = self.root_path + 'test_baobab_cfg.py'

        # Initialize the config
        self.cfg = model_trainer.load_config(self.config_path)

        # Also initialize the baobab config
        self.baobab_cfg = configs.BaobabConfig.from_file(self.baobab_cfg_path)

        # A few bnn_inference testing things that need to be used again here.
        self.lens_params = [
            'external_shear_gamma_ext', 'external_shear_psi_ext',
            'lens_mass_center_x', 'lens_mass_center_y', 'lens_mass_e1',
            'lens_mass_e2', 'lens_mass_gamma', 'lens_mass_theta_E'
        ]
        self.normalization_constants_path = self.root_path + 'norms.csv'
        self.tf_record_path = self.root_path + 'tf_record_test'

        # Create the normalization file that would have been made
        # during training.
        self.final_params = [
            'external_shear_g1', 'external_shear_g2', 'lens_mass_center_x',
            'lens_mass_center_y', 'lens_mass_e1', 'lens_mass_e2',
            'lens_mass_gamma', 'lens_mass_theta_E_log'
        ]
        model_trainer.prepare_tf_record(self.cfg, self.root_path,
                                        self.tf_record_path, self.final_params,
                                        'train')

Example #4

0

Show file

    def setUp(self):
        # Open up the config file.
        self.root_path = os.path.dirname(
            os.path.abspath(__file__)) + '/test_data/'
        with open(self.root_path + 'test.json', 'r') as json_f:
            self.cfg = json.load(json_f)
        self.interim_baobab_omega_path = self.root_path + 'test_baobab_cfg.py'
        self.target_ovejero_omega_path = self.root_path + 'test_emp_cfg_prior.py'
        self.target_baobab_omega_path = self.root_path + 'test_emp_cfg.py'
        self.lens_params = self.cfg['dataset_params']['lens_params']
        self.num_params = len(self.lens_params)
        self.batch_size = 20

        self.normalized_param_path = self.root_path + 'new_metadata.csv'
        self.normalization_constants_path = self.root_path + 'norm.csv'
        self.final_params = self.cfg['training_params']['final_params']
        self.cfg['dataset_params']['normalization_constants_path'] = 'norm.csv'
        self.cfg['training_params']['bnn_type'] = 'diag'
        self.tf_record_path = self.root_path + self.cfg['validation_params'][
            'tf_record_path']

        # We'll have to make the tf record and clean it up at the end
        model_trainer.prepare_tf_record(self.cfg,
                                        self.root_path,
                                        self.tf_record_path,
                                        self.final_params,
                                        train_or_test='train')

        # Make the train_to_test_param_map
        self.train_to_test_param_map = dict(
            orig_params=['lens_mass_e1', 'lens_mass_e2'],
            transform_func=ellipticity2phi_q,
            new_params=['lens_mass_phi', 'lens_mass_q'])

        self.hclass = hierarchical_inference.HierarchicalClass(
            self.cfg,
            self.interim_baobab_omega_path,
            self.target_ovejero_omega_path,
            self.root_path,
            self.tf_record_path,
            self.target_baobab_omega_path,
            self.train_to_test_param_map,
            lite_class=True)

        os.remove(self.tf_record_path)

Example #5

0

Show file

File: model_trainer_tests.py Project: swagnercarena/ovejero

    def test_prepare_tf_record(self):
        # Test that the prepare_tf_record function works as expected.
        with open(self.root_path + 'test.json', 'r') as json_f:
            cfg = json.load(json_f)
        with self.assertRaises(ValueError):
            train_or_test = 'test'
            model_trainer.prepare_tf_record(cfg, self.root_path,
                                            self.tf_record_path,
                                            self.lens_params, train_or_test)
        train_or_test = 'train'
        model_trainer.prepare_tf_record(cfg, self.root_path,
                                        self.tf_record_path, self.lens_params,
                                        train_or_test)

        # Check the TFRecord and make sure the number of parameters and values
        # all seems reasonable.
        num_npy = len(glob.glob(self.root_path + 'X*.npy'))
        self.assertTrue(os.path.exists(self.tf_record_path))

        # Open up this TFRecord file and take a look inside
        raw_dataset = tf.data.TFRecordDataset(self.tf_record_path)

        # Define a mapping function to parse the image
        def parse_image(example):
            data_features = {
                'image': tf.io.FixedLenFeature([], tf.string),
                'height': tf.io.FixedLenFeature([], tf.int64),
                'width': tf.io.FixedLenFeature([], tf.int64),
                'index': tf.io.FixedLenFeature([], tf.int64),
            }
            for lens_param in self.lens_params:
                data_features[lens_param] = tf.io.FixedLenFeature([],
                                                                  tf.float32)
            return tf.io.parse_single_example(example, data_features)

        batch_size = 10
        dataset = raw_dataset.map(parse_image).batch(batch_size)
        dataset_comparison(self, dataset, batch_size, num_npy)

        train_or_test = 'test'
        model_trainer.prepare_tf_record(cfg, self.root_path,
                                        self.tf_record_path, self.lens_params,
                                        train_or_test)

        # Check the TFRecord and make sure the number of parameters and values
        # all seems reasonable.
        num_npy = len(glob.glob(self.root_path + 'X*.npy'))
        self.assertTrue(os.path.exists(self.tf_record_path))

        # Open up this TFRecord file and take a look inside
        raw_dataset = tf.data.TFRecordDataset(self.tf_record_path)
        batch_size = 10
        dataset = raw_dataset.map(parse_image).batch(batch_size)
        dataset_comparison(self, dataset, batch_size, num_npy)

        # Clean up the file now that we're done
        os.remove(self.tf_record_path)
        os.remove(self.root_path + 'new_metadata.csv')
        os.remove(self.root_path + 'norms.csv')

Example #6

0

Show file

File: bnn_inference_tests.py Project: swagnercarena/ovejero

    def test_specify_test_set_path(self):
        # Pass a specific test_set_path to the inference class and make sure
        # it behaves as expected.
        test_set_path = self.root_path

        # Check that the file doesn't already exist.
        self.assertFalse(os.path.isfile(test_set_path + 'tf_record_test_val'))

        # We will again have to simulate training so that the desired
        # normalization path exists.
        model_trainer.prepare_tf_record(self.cfg, self.root_path,
                                        self.tf_record_path, self.final_params,
                                        'train')
        os.remove(self.tf_record_path)

        _ = bnn_inference.InferenceClass(self.cfg,
                                         test_set_path=test_set_path,
                                         lite_class=True)

        # Check that a new tf_record was generated
        self.assertTrue(os.path.isfile(test_set_path + 'tf_record_test_val'))

        # Check that passing a fake test_set_path raises an error.
        fake_test_path = self.root_path + 'fake_data'
        os.mkdir(fake_test_path)

        with self.assertRaises(FileNotFoundError):
            _ = bnn_inference.InferenceClass(self.cfg,
                                             test_set_path=fake_test_path,
                                             lite_class=True)

        # Test cleanup
        os.rmdir(fake_test_path)
        os.remove(test_set_path + 'tf_record_test_val')
        os.remove(self.root_path + 'new_metadata.csv')
        os.remove(self.normalization_constants_path)

Example #7

0

Show file

File: bnn_inference.py Project: swagnercarena/ovejero

    def __init__(self, cfg, lite_class=False, test_set_path=None):
        """
		Initialize the InferenceClass instance using the parameters of the
		configuration file.

		Parameters:
		cfg (dict): The dictionary attained from reading the json config file.
		lite_class (bool): If True, do not bother loading the BNN model weights.
			This allows the user to save on memory, but will cause an error
			if the BNN samples have not already been drawn.
		test_set_path (str): The path to the set of images that the
			forward modeling image will be pulled from. If None, the path to
			the validation set images will be used.
		"""

        self.cfg = cfg

        # Replace the validation path with the test_set_path if specified
        if test_set_path is not None:
            self.cfg['validation_params']['root_path'] = test_set_path

        self.lite_class = lite_class
        if self.lite_class:
            self.model = None
            self.loss = None
        else:
            self.model, self.loss = model_trainer.model_loss_builder(
                cfg, verbose=True)

        # Load the validation set we're going to use.
        self.tf_record_path_v = os.path.join(
            cfg['validation_params']['root_path'],
            cfg['validation_params']['tf_record_path'])
        # Load the parameters and the batch size needed for computation
        self.final_params = cfg['training_params']['final_params']
        self.final_params_print_names = cfg['inference_params'][
            'final_params_print_names']
        self.num_params = len(self.final_params)
        self.batch_size = cfg['training_params']['batch_size']
        self.norm_images = cfg['training_params']['norm_images']
        self.baobab_config_path = cfg['training_params']['baobab_config_path']

        if not os.path.exists(self.tf_record_path_v):
            print('Generating new TFRecord at %s' % (self.tf_record_path_v))
            model_trainer.prepare_tf_record(
                cfg,
                cfg['validation_params']['root_path'],
                self.tf_record_path_v,
                self.final_params,
                train_or_test='test')
        else:
            print('TFRecord found at %s' % (self.tf_record_path_v))

        self.tf_dataset_v = data_tools.build_tf_dataset(
            self.tf_record_path_v,
            self.final_params,
            self.batch_size,
            1,
            self.baobab_config_path,
            norm_images=self.norm_images)

        self.bnn_type = cfg['training_params']['bnn_type']

        # This code is borrowed from the LensingLossFunctions initializer
        self.flip_pairs = cfg['training_params']['flip_pairs']
        # Always include no flips as an option.
        self.flip_mat_list = [np.diag(np.ones(self.num_params))]
        for flip_pair in self.flip_pairs:
            # Initialize a numpy array since this is the easiest way
            # to flexibly set the tensor.
            const_initializer = np.ones(self.num_params)
            const_initializer[flip_pair] = -1
            self.flip_mat_list.append(np.diag(const_initializer))

        self.loss_class = bnn_alexnet.LensingLossFunctions(
            self.flip_pairs, self.num_params)

        self.y_pred = None
        self.y_cov = None
        self.y_std = None
        self.y_test = None
        self.predict_samps = None
        self.samples_init = False

Example #8

0

Show file

File: hierarchical_inference.py Project: swagnercarena/ovejero

	def __init__(self,cfg,interim_baobab_omega_path,target_ovejero_omega_path,
		test_dataset_path,test_dataset_tf_record_path,
		target_baobab_omega_path=None,train_to_test_param_map=None,
		lite_class=False):
		# Initialzie our class.
		self.cfg = cfg
		# Pull the needed param information from the config file.
		self.lens_params_train = cfg['dataset_params']['lens_params']
		self.lens_params_test = copy.deepcopy(self.lens_params_train)
		# We will need to encode the difference between the test and train
		# parameter names.
		if train_to_test_param_map is not None:
			self.lens_params_change_ind = []
			# Go through each parameter, mark its index, and make the swap
			for li, lens_param in enumerate(
				train_to_test_param_map['orig_params']):
				self.lens_params_change_ind.append(self.lens_params_train.index(
					lens_param))
				self.lens_params_test[self.lens_params_change_ind[-1]] = (
					train_to_test_param_map['new_params'][li])

		self.lens_params_log = cfg['dataset_params']['lens_params_log']
		self.gampsi = cfg['dataset_params']['gampsi']
		self.final_params = cfg['training_params']['final_params']

		# Read the config files and turn them into evaluation dictionaries
		self.interim_baobab_omega = configs.BaobabConfig.from_file(
			interim_baobab_omega_path)
		self.target_baobab_omega = load_prior_config(target_ovejero_omega_path)
		self.interim_eval_dict = build_eval_dict(self.interim_baobab_omega,
			self.lens_params_train,baobab_config=True)
		self.target_eval_dict = build_eval_dict(self.target_baobab_omega,
			self.lens_params_test,baobab_config=False)
		self.train_to_test_param_map = train_to_test_param_map

		# Get the number of parameters and set the batch size to the full
		# test set.
		self.num_params = len(self.lens_params_train)
		self.norm_images = cfg['training_params']['norm_images']
		n_npy_files = len(glob.glob(os.path.join(test_dataset_path,'X*.npy')))
		self.cfg['training_params']['batch_size'] = n_npy_files

		# Make our inference class we'll use to generate samples.
		self.infer_class = bnn_inference.InferenceClass(self.cfg,lite_class)

		# The inference class will load the validation set from the config
		# file. We do not want this. Therefore we must reset it here.
		if not os.path.exists(test_dataset_tf_record_path):
			print('Generating new TFRecord at %s'%(test_dataset_tf_record_path))
			model_trainer.prepare_tf_record(cfg,test_dataset_path,
				test_dataset_tf_record_path,self.final_params,
				train_or_test='test')
		else:
			print('TFRecord found at %s'%(test_dataset_tf_record_path))
		self.tf_dataset = data_tools.build_tf_dataset(
			test_dataset_tf_record_path,self.final_params,n_npy_files,1,
			target_baobab_omega_path,norm_images=self.norm_images)
		self.infer_class.tf_dataset_v = self.tf_dataset

		# Track if the sampler has been initialzied yet.
		self.sampler_init = False

		# Initialize our probability class
		self.prob_class = ProbabilityClass(self.target_eval_dict,
			self.interim_eval_dict,self.lens_params_train,self.lens_params_test)

		# If a baobab config path was provided for the test set we will extract
		# the true values of the hyperparameters from it
		if target_baobab_omega_path is not None:
			temp_config = configs.BaobabConfig.from_file(
				target_baobab_omega_path)
			temp_eval_dict = build_eval_dict(temp_config,self.lens_params_test,
				baobab_config=True)
			# Go through the target_eval_dict and extract the true values
			# from the temp_eval_dict (i.e. the eval dict generated by the
			# baobab config used to make the test set).
			self.true_hyp_values = []
			for name in self.target_eval_dict['hyp_names']:
				temp_index = temp_eval_dict['hyp_names'].index(name)
				self.true_hyp_values.append(temp_eval_dict['hyp_values'][
					temp_index])
		else:
			self.true_hyp_values = None

Example #9

0

Show file

    def test_gen_samples(self):

        # Test that generating samples gives reasonable outputs.
        class ToyModel():
            def __init__(self, mean, covariance, batch_size, al_std):
                # We want to make sure our performance is consistent for a
                # test
                np.random.seed(4)
                self.mean = mean
                self.covariance = covariance
                self.batch_size = batch_size
                self.al_std = al_std

            def predict(self, image):
                # We won't actually be using the image. We just want it for
                # testing.
                return tf.constant(
                    np.concatenate([
                        np.random.multivariate_normal(
                            self.mean, self.covariance, self.batch_size),
                        np.zeros(
                            (self.batch_size, len(self.mean))) + self.al_std
                    ],
                                   axis=-1), tf.float32)

        # Start with a simple covariance matrix example.
        mean = np.ones(self.num_params) * 2
        covariance = np.diag(np.ones(self.num_params))
        al_std = -1000
        diag_model = ToyModel(mean, covariance, self.batch_size, al_std)

        # We don't want any flipping going on
        self.hclass.infer_class.flip_mat_list = [
            np.diag(np.ones(self.num_params))
        ]

        # Create tf record. This won't be used, but it has to be there for
        # the function to be able to pull some images.
        # Make fake norms data
        fake_norms = {}
        for lens_param in self.final_params + self.lens_params:
            fake_norms[lens_param] = np.array([0.0, 1.0])
        fake_norms = pd.DataFrame(data=fake_norms)
        fake_norms.to_csv(self.normalization_constants_path, index=False)
        train_or_test = 'test'
        model_trainer.prepare_tf_record(self.cfg, self.root_path,
                                        self.tf_record_path, self.final_params,
                                        train_or_test)

        # Replace the real model with our fake model and generate samples
        self.hclass.infer_class.model = diag_model
        # Set this to false so the system doesn't complain when we try
        # to generate samples.
        self.hclass.infer_class.lite_class = False

        self.hclass.gen_samples(100)

        # All we need to check here is that the mapping from e1, e2 to
        # phi and q was succesful.
        self.assertAlmostEqual(
            np.max(
                np.abs(self.hclass.predict_samps[:, :, -1] -
                       np.log(hierarchical_inference.lens_samps[-1]))), 0)
        # Now check the catersian to polar for shears.
        e1 = self.hclass.predict_samps[:, :, 4]
        e2 = self.hclass.predict_samps[:, :, 5]
        phi, q = ellipticity2phi_q(e1, e2)
        np.testing.assert_almost_equal(phi,
                                       hierarchical_inference.lens_samps[4])
        np.testing.assert_almost_equal(q, hierarchical_inference.lens_samps[5])

        # Clean up the files we generated
        os.remove(self.normalization_constants_path)
        os.remove(self.normalized_param_path)
        os.remove(self.tf_record_path)

Example #10

0

Show file

    def test_gen_samples(self):

        # Test that generating samples gives reasonable outputs.
        class ToyModel():
            def __init__(self, mean, covariance, batch_size, al_std):
                # We want to make sure our performance is consistent for a
                # test
                np.random.seed(4)
                self.mean = mean
                self.covariance = covariance
                self.batch_size = batch_size
                self.al_std = al_std

            def predict(self, image):
                # We won't actually be using the image. We just want it for
                # testing.
                return tf.constant(
                    np.concatenate([
                        np.random.multivariate_normal(
                            self.mean, self.covariance, self.batch_size),
                        np.zeros(
                            (self.batch_size, len(self.mean))) + self.al_std
                    ],
                                   axis=-1), tf.float32)

        # Start with a simple covariance matrix example.
        mean = np.ones(self.num_params) * 2
        covariance = np.diag(np.ones(self.num_params))
        al_std = -1000
        diag_model = ToyModel(mean, covariance, self.batch_size, al_std)

        # We don't want any flipping going on
        self.hclass.infer_class.flip_mat_list = [
            np.diag(np.ones(self.num_params))
        ]

        # Create tf record. This won't be used, but it has to be there for
        # the function to be able to pull some images.
        # Make fake norms data
        fake_norms = {}
        for lens_param in self.final_params + self.lens_params:
            fake_norms[lens_param] = np.array([0.0, 1.0])
        fake_norms = pd.DataFrame(data=fake_norms)
        fake_norms.to_csv(self.normalization_constants_path, index=False)
        train_or_test = 'test'
        model_trainer.prepare_tf_record(self.cfg, self.root_path,
                                        self.tf_record_path, self.final_params,
                                        train_or_test)

        # Replace the real model with our fake model and generate samples
        self.hclass.infer_class.model = diag_model
        # Set this to false so the system doesn't complain when we try
        # to generate samples.
        self.hclass.infer_class.lite_class = False

        self.hclass.gen_samples(100)

        # First, make sure all of the values of lens_samps were filled out.
        for pi in range(self.num_params):
            self.assertGreater(np.sum(hierarchical_inference.lens_samps[pi]),
                               0)

        # Check that the parameters that got changed did so in the right way.
        # First check theta_e
        self.assertAlmostEqual(
            np.max(
                np.abs(self.hclass.predict_samps[:, :, -1] -
                       np.log(hierarchical_inference.lens_samps[-1]))), 0)
        # Now check the catersian to polar for shears.
        gamma = hierarchical_inference.lens_samps[0]
        ang = hierarchical_inference.lens_samps[1]
        g1 = gamma * np.cos(2 * ang)
        g2 = gamma * np.sin(2 * ang)
        self.assertAlmostEqual(
            np.max(np.abs(self.hclass.predict_samps[:, :, 0] - g1)), 0)
        self.assertAlmostEqual(
            np.max(np.abs(self.hclass.predict_samps[:, :, 1] - g2)), 0)

        # Just make sure this is set and set using the interim dict.
        np.testing.assert_almost_equal(
            self.hclass.prob_class.pt_omegai,
            hierarchical_inference.log_p_xi_omega(
                hierarchical_inference.lens_samps,
                self.hclass.interim_eval_dict['hyp_values'],
                self.hclass.interim_eval_dict, self.hclass.lens_params_train))

        # Now check that if we offer a save path it gets used.
        save_path = self.root_path + 'save_samps/'
        self.hclass.gen_samples(100, save_path)
        orig_samps = np.copy(hierarchical_inference.lens_samps)
        orig_bnn_samps = np.copy(self.hclass.infer_class.predict_samps)
        self.hclass.gen_samples(100, save_path)

        np.testing.assert_almost_equal(orig_samps,
                                       hierarchical_inference.lens_samps)
        np.testing.assert_almost_equal(orig_bnn_samps,
                                       self.hclass.infer_class.predict_samps)

        # Finall check that subsampling doesn't cause any issues
        subsample = 10
        self.hclass.gen_samples(100, save_path, subsample=subsample)
        np.testing.assert_almost_equal(hierarchical_inference.lens_samps,
                                       orig_samps[:, :, :10])

        # Clean up the files we generated
        os.remove(self.normalization_constants_path)
        os.remove(self.normalized_param_path)
        os.remove(self.tf_record_path)
        os.remove(save_path + 'lens_samps.npy')
        os.remove(save_path + 'pred.npy')
        os.remove(save_path + 'al_cov.npy')
        os.remove(save_path + 'images.npy')
        os.remove(save_path + 'y_test.npy')
        os.rmdir(save_path)