Esempio n. 1
0
def distributed_strategy(args):
    kappa_gen = NISGenerator( # only used to generate pixelated kappa fields
        kappa_fov=args.kappa_fov,
        src_fov=args.source_fov,
        pixels=args.kappa_pixels,
        z_source=args.z_source,
        z_lens=args.z_lens
    )

    min_theta_e = 0.1 * args.image_fov if args.min_theta_e is None else args.min_theta_e
    max_theta_e = 0.45 * args.image_fov if args.max_theta_e is None else args.max_theta_e

    cosmos_files = glob.glob(os.path.join(args.cosmos_dir, "*.tfrecords"))
    cosmos = tf.data.TFRecordDataset(cosmos_files, compression_type=args.compression_type)
    cosmos = cosmos.map(decode_image).map(preprocess_image)
    if args.shuffle_cosmos:
        cosmos = cosmos.shuffle(buffer_size=args.buffer_size, reshuffle_each_iteration=True)
    cosmos = cosmos.batch(args.batch_size)

    window = tukey(args.src_pixels, alpha=args.tukey_alpha)
    window = np.outer(window, window)
    phys = PhysicalModel(
        image_fov=args.image_fov,
        kappa_fov=args.kappa_fov,
        src_fov=args.source_fov,
        pixels=args.lens_pixels,
        kappa_pixels=args.kappa_pixels,
        src_pixels=args.src_pixels,
        method="conv2d"
    )
    noise_a = (args.noise_rms_min - args.noise_rms_mean) / args.noise_rms_std
    noise_b = (args.noise_rms_max - args.noise_rms_mean) / args.noise_rms_std
    psf_a = (args.psf_fwhm_min - args.psf_fwhm_mean) / args.psf_fwhm_std
    psf_b = (args.psf_fwhm_max - args.psf_fwhm_mean) / args.psf_fwhm_std

    options = tf.io.TFRecordOptions(compression_type=args.compression_type)
    with tf.io.TFRecordWriter(os.path.join(args.output_dir, f"data_{THIS_WORKER}.tfrecords"), options) as writer:
        print(f"Started worker {THIS_WORKER} at {datetime.now().strftime('%y-%m-%d_%H-%M-%S')}")
        for i in range((THIS_WORKER - 1) * args.batch_size, args.len_dataset, N_WORKERS * args.batch_size):
            for galaxies in cosmos:
                break
            galaxies = window[np.newaxis, ..., np.newaxis] * galaxies

            noise_rms = truncnorm.rvs(noise_a, noise_b, loc=args.noise_rms_mean, scale=args.noise_rms_std, size=args.batch_size)
            fwhm = truncnorm.rvs(psf_a, psf_b, loc=args.psf_fwhm_mean, scale=args.psf_fwhm_std, size=args.batch_size)
            psf = phys.psf_models(fwhm, cutout_size=args.psf_cutout_size)

            batch_size = galaxies.shape[0]
            _r = tf.random.uniform(shape=[batch_size, 1, 1], minval=0, maxval=args.max_shift)
            _theta = tf.random.uniform(shape=[batch_size, 1, 1], minval=-np.pi, maxval=np.pi)
            x0 = _r * tf.math.cos(_theta)
            y0 = _r * tf.math.sin(_theta)
            ellipticity = tf.random.uniform(shape=[batch_size, 1, 1], minval=0., maxval=args.max_ellipticity)
            phi = tf.random.uniform(shape=[batch_size, 1, 1], minval=-np.pi, maxval=np.pi)
            einstein_radius = tf.random.uniform(shape=[batch_size, 1, 1], minval=min_theta_e, maxval=max_theta_e)

            kappa = kappa_gen.kappa_field(x0, y0, ellipticity, phi, einstein_radius)

            lensed_images = phys.noisy_forward(galaxies, kappa, noise_rms=noise_rms, psf=psf)

            records = encode_examples(
                kappa=kappa,
                galaxies=galaxies,
                lensed_images=lensed_images,
                z_source=args.z_source,
                z_lens=args.z_lens,
                image_fov=phys.image_fov,
                kappa_fov=phys.kappa_fov,
                source_fov=args.source_fov,
                noise_rms=noise_rms,
                psf=psf,
                fwhm=fwhm
            )
            for record in records:
                writer.write(record)
    print(f"Finished work at {datetime.now().strftime('%y-%m-%d_%H-%M-%S')}")
Esempio n. 2
0
def distributed_strategy(args):
    psf_pixels = 20
    pixels = 128
    model = os.path.join(os.getenv('CENSAI_PATH'), "models", args.model)

    ps_observation = PowerSpectrum(bins=args.observation_coherence_bins,
                                   pixels=pixels)
    ps_source = PowerSpectrum(bins=args.source_coherence_bins, pixels=pixels)
    ps_kappa = PowerSpectrum(bins=args.kappa_coherence_bins, pixels=pixels)

    phys = PhysicalModel(
        pixels=pixels,
        kappa_pixels=pixels,
        src_pixels=pixels,
        image_fov=7.68,
        kappa_fov=7.68,
        src_fov=3.,
        method="fft",
    )

    with open(os.path.join(model, "unet_hparams.json")) as f:
        unet_params = json.load(f)
    unet_params["kernel_l2_amp"] = args.l2_amp
    unet = Model(**unet_params)
    ckpt = tf.train.Checkpoint(net=unet)
    checkpoint_manager = tf.train.CheckpointManager(ckpt, model, 1)
    checkpoint_manager.checkpoint.restore(
        checkpoint_manager.latest_checkpoint).expect_partial()
    with open(os.path.join(model, "rim_hparams.json")) as f:
        rim_params = json.load(f)
    rim_params["source_link"] = "relu"
    rim = RIM(phys, unet, **rim_params)

    kvae_path = os.path.join(os.getenv('CENSAI_PATH'), "models",
                             args.kappa_vae)
    with open(os.path.join(kvae_path, "model_hparams.json"), "r") as f:
        kappa_vae_hparams = json.load(f)
    kappa_vae = VAE(**kappa_vae_hparams)
    ckpt1 = tf.train.Checkpoint(step=tf.Variable(1), net=kappa_vae)
    checkpoint_manager1 = tf.train.CheckpointManager(ckpt1, kvae_path, 1)
    checkpoint_manager1.checkpoint.restore(
        checkpoint_manager1.latest_checkpoint).expect_partial()

    svae_path = os.path.join(os.getenv('CENSAI_PATH'), "models",
                             args.source_vae)
    with open(os.path.join(svae_path, "model_hparams.json"), "r") as f:
        source_vae_hparams = json.load(f)
    source_vae = VAE(**source_vae_hparams)
    ckpt2 = tf.train.Checkpoint(step=tf.Variable(1), net=source_vae)
    checkpoint_manager2 = tf.train.CheckpointManager(ckpt2, svae_path, 1)
    checkpoint_manager2.checkpoint.restore(
        checkpoint_manager2.latest_checkpoint).expect_partial()

    model_name = os.path.split(model)[-1]
    wk = tf.keras.layers.Lambda(lambda k: tf.sqrt(k) / tf.reduce_sum(
        tf.sqrt(k), axis=(1, 2, 3), keepdims=True))
    with h5py.File(
            os.path.join(
                os.getenv("CENSAI_PATH"), "results", args.experiment_name +
                "_" + model_name + f"_{THIS_WORKER:02d}.h5"), 'w') as hf:
        data_len = args.size // N_WORKERS
        hf.create_dataset(name="observation",
                          shape=[data_len, phys.pixels, phys.pixels, 1],
                          dtype=np.float32)
        hf.create_dataset(name="psf",
                          shape=[data_len, psf_pixels, psf_pixels, 1],
                          dtype=np.float32)
        hf.create_dataset(name="psf_fwhm", shape=[data_len], dtype=np.float32)
        hf.create_dataset(name="noise_rms", shape=[data_len], dtype=np.float32)
        hf.create_dataset(
            name="source",
            shape=[data_len, phys.src_pixels, phys.src_pixels, 1],
            dtype=np.float32)
        hf.create_dataset(
            name="kappa",
            shape=[data_len, phys.kappa_pixels, phys.kappa_pixels, 1],
            dtype=np.float32)
        hf.create_dataset(name="observation_pred",
                          shape=[data_len, phys.pixels, phys.pixels, 1],
                          dtype=np.float32)
        hf.create_dataset(name="observation_pred_reoptimized",
                          shape=[data_len, phys.pixels, phys.pixels, 1],
                          dtype=np.float32)
        hf.create_dataset(
            name="source_pred",
            shape=[data_len, rim.steps, phys.src_pixels, phys.src_pixels, 1],
            dtype=np.float32)
        hf.create_dataset(
            name="source_pred_reoptimized",
            shape=[data_len, phys.src_pixels, phys.src_pixels, 1],
            dtype=np.float32)
        hf.create_dataset(name="kappa_pred",
                          shape=[
                              data_len, rim.steps, phys.kappa_pixels,
                              phys.kappa_pixels, 1
                          ],
                          dtype=np.float32)
        hf.create_dataset(
            name="kappa_pred_reoptimized",
            shape=[data_len, phys.kappa_pixels, phys.kappa_pixels, 1],
            dtype=np.float32)
        hf.create_dataset(name="chi_squared",
                          shape=[data_len, rim.steps],
                          dtype=np.float32)
        hf.create_dataset(name="chi_squared_reoptimized",
                          shape=[data_len, rim.steps],
                          dtype=np.float32)
        hf.create_dataset(name="chi_squared_reoptimized_series",
                          shape=[data_len, rim.steps, args.re_optimize_steps],
                          dtype=np.float32)
        hf.create_dataset(name="sampled_chi_squared_reoptimized_series",
                          shape=[data_len, args.re_optimize_steps],
                          dtype=np.float32)
        hf.create_dataset(name="source_optim_mse",
                          shape=[data_len],
                          dtype=np.float32)
        hf.create_dataset(name="source_optim_mse_series",
                          shape=[data_len, args.re_optimize_steps],
                          dtype=np.float32)
        hf.create_dataset(name="sampled_source_optim_mse_series",
                          shape=[data_len, args.re_optimize_steps],
                          dtype=np.float32)
        hf.create_dataset(name="kappa_optim_mse",
                          shape=[data_len],
                          dtype=np.float32)
        hf.create_dataset(name="kappa_optim_mse_series",
                          shape=[data_len, args.re_optimize_steps],
                          dtype=np.float32)
        hf.create_dataset(name="sampled_kappa_optim_mse_series",
                          shape=[data_len, args.re_optimize_steps],
                          dtype=np.float32)
        hf.create_dataset(name="latent_kappa_gt_distance_init",
                          shape=[data_len, kappa_vae.latent_size],
                          dtype=np.float32)
        hf.create_dataset(name="latent_source_gt_distance_init",
                          shape=[data_len, source_vae.latent_size],
                          dtype=np.float32)
        hf.create_dataset(name="latent_kappa_gt_distance_end",
                          shape=[data_len, kappa_vae.latent_size],
                          dtype=np.float32)
        hf.create_dataset(name="latent_source_gt_distance_end",
                          shape=[data_len, source_vae.latent_size],
                          dtype=np.float32)
        hf.create_dataset(name="source_coherence_spectrum",
                          shape=[data_len, args.source_coherence_bins],
                          dtype=np.float32)
        hf.create_dataset(name="source_coherence_spectrum_reoptimized",
                          shape=[data_len, args.source_coherence_bins],
                          dtype=np.float32)
        hf.create_dataset(name="observation_coherence_spectrum",
                          shape=[data_len, args.observation_coherence_bins],
                          dtype=np.float32)
        hf.create_dataset(name="observation_coherence_spectrum_reoptimized",
                          shape=[data_len, args.observation_coherence_bins],
                          dtype=np.float32)
        hf.create_dataset(name="kappa_coherence_spectrum",
                          shape=[data_len, args.kappa_coherence_bins],
                          dtype=np.float32)
        hf.create_dataset(name="kappa_coherence_spectrum_reoptimized",
                          shape=[data_len, args.kappa_coherence_bins],
                          dtype=np.float32)
        hf.create_dataset(name="observation_frequencies",
                          shape=[args.observation_coherence_bins],
                          dtype=np.float32)
        hf.create_dataset(name="source_frequencies",
                          shape=[args.source_coherence_bins],
                          dtype=np.float32)
        hf.create_dataset(name="kappa_frequencies",
                          shape=[args.kappa_coherence_bins],
                          dtype=np.float32)
        hf.create_dataset(name="kappa_fov", shape=[1], dtype=np.float32)
        hf.create_dataset(name="source_fov", shape=[1], dtype=np.float32)
        hf.create_dataset(name="observation_fov", shape=[1], dtype=np.float32)
        for i in range(data_len):
            checkpoint_manager.checkpoint.restore(
                checkpoint_manager.latest_checkpoint).expect_partial(
                )  # reset model weights

            # Produce an observation
            kappa = 10**kappa_vae.sample(1)
            source = tf.nn.relu(source_vae.sample(1))
            source /= tf.reduce_max(source, axis=(1, 2, 3), keepdims=True)
            noise_rms = 10**tf.random.uniform(shape=[1],
                                              minval=-2.5,
                                              maxval=-1)
            fwhm = tf.random.uniform(shape=[1], minval=0.06, maxval=0.3)
            psf = phys.psf_models(fwhm, cutout_size=psf_pixels)
            observation = phys.noisy_forward(source, kappa, noise_rms, psf)

            # RIM predictions for kappa and source
            source_pred, kappa_pred, chi_squared = rim.predict(
                observation, noise_rms, psf)
            observation_pred = phys.forward(source_pred[-1], kappa_pred[-1],
                                            psf)
            source_o = source_pred[-1]
            kappa_o = kappa_pred[-1]

            # Latent code of model predictions
            z_source, _ = source_vae.encoder(source_o)
            z_kappa, _ = kappa_vae.encoder(log_10(kappa_o))

            # Ground truth latent code for oracle metrics
            z_source_gt, _ = source_vae.encoder(source)
            z_kappa_gt, _ = kappa_vae.encoder(log_10(kappa))

            # Re-optimize weights of the model
            STEPS = args.re_optimize_steps
            learning_rate_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
                initial_learning_rate=args.learning_rate,
                decay_rate=args.decay_rate,
                decay_steps=args.decay_steps,
                staircase=args.staircase)
            optim = tf.keras.optimizers.RMSprop(
                learning_rate=learning_rate_schedule)

            chi_squared_series = tf.TensorArray(DTYPE, size=STEPS)
            source_mse = tf.TensorArray(DTYPE, size=STEPS)
            kappa_mse = tf.TensorArray(DTYPE, size=STEPS)
            sampled_chi_squared_series = tf.TensorArray(DTYPE, size=STEPS)
            sampled_source_mse = tf.TensorArray(DTYPE, size=STEPS)
            sampled_kappa_mse = tf.TensorArray(DTYPE, size=STEPS)

            best = chi_squared
            source_best = source_pred[-1]
            kappa_best = kappa_pred[-1]
            source_mse_best = tf.reduce_mean((source_best - source)**2)
            kappa_mse_best = tf.reduce_mean((kappa_best - log_10(kappa))**2)

            # ===================== Optimization ==============================
            for current_step in tqdm(range(STEPS)):
                # ===================== VAE SAMPLING ==============================

                # L1 distance with ground truth in latent space -- this is changed by an user defined value when using real data
                # z_source_std = tf.abs(z_source - z_source_gt)
                # z_kappa_std = tf.abs(z_kappa - z_kappa_gt)
                z_source_std = args.source_vae_ball_size
                z_kappa_std = args.kappa_vae_ball_size

                # Sample latent code, then decode and forward
                z_s = tf.random.normal(
                    shape=[args.sample_size, source_vae.latent_size],
                    mean=z_source,
                    stddev=z_source_std)
                z_k = tf.random.normal(
                    shape=[args.sample_size, kappa_vae.latent_size],
                    mean=z_kappa,
                    stddev=z_kappa_std)
                sampled_source = tf.nn.relu(source_vae.decode(z_s))
                sampled_source /= tf.reduce_max(sampled_source,
                                                axis=(1, 2, 3),
                                                keepdims=True)
                sampled_kappa = kappa_vae.decode(z_k)  # output in log_10 space
                sampled_observation = phys.noisy_forward(
                    sampled_source, 10**sampled_kappa, noise_rms,
                    tf.tile(psf, [args.sample_size, 1, 1, 1]))
                with tf.GradientTape() as tape:
                    tape.watch(unet.trainable_variables)
                    s, k, chi_sq = rim.call(
                        sampled_observation,
                        noise_rms,
                        tf.tile(psf, [args.sample_size, 1, 1, 1]),
                        outer_tape=tape)
                    _kappa_mse = tf.reduce_sum(wk(10**sampled_kappa) *
                                               (k - sampled_kappa)**2,
                                               axis=(2, 3, 4))
                    cost = tf.reduce_mean(_kappa_mse)
                    cost += tf.reduce_mean((s - sampled_source)**2)
                    cost += tf.reduce_sum(rim.unet.losses)  # weight decay

                grads = tape.gradient(cost, unet.trainable_variables)
                optim.apply_gradients(zip(grads, unet.trainable_variables))

                # Record performance on sampled dataset
                sampled_chi_squared_series = sampled_chi_squared_series.write(
                    index=current_step,
                    value=tf.squeeze(tf.reduce_mean(chi_sq[-1])))
                sampled_source_mse = sampled_source_mse.write(
                    index=current_step,
                    value=tf.reduce_mean((s[-1] - sampled_source)**2))
                sampled_kappa_mse = sampled_kappa_mse.write(
                    index=current_step,
                    value=tf.reduce_mean((k[-1] - sampled_kappa)**2))
                # Record model prediction on data
                s, k, chi_sq = rim.call(observation, noise_rms, psf)
                chi_squared_series = chi_squared_series.write(
                    index=current_step, value=tf.squeeze(chi_sq))
                source_o = s[-1]
                kappa_o = k[-1]
                # oracle metrics, remove when using real data
                source_mse = source_mse.write(index=current_step,
                                              value=tf.reduce_mean(
                                                  (source_o - source)**2))
                kappa_mse = kappa_mse.write(index=current_step,
                                            value=tf.reduce_mean(
                                                (kappa_o - log_10(kappa))**2))

                if abs(chi_sq[-1, 0] - 1) < abs(best[-1, 0] - 1):
                    source_best = tf.nn.relu(source_o)
                    kappa_best = 10**kappa_o
                    best = chi_sq
                    source_mse_best = tf.reduce_mean((source_best - source)**2)
                    kappa_mse_best = tf.reduce_mean(
                        (kappa_best - log_10(kappa))**2)

            source_o = source_best
            kappa_o = kappa_best
            y_pred = phys.forward(source_o, kappa_o, psf)

            chi_sq_series = tf.transpose(chi_squared_series.stack())
            source_mse = source_mse.stack()
            kappa_mse = kappa_mse.stack()
            sampled_chi_squared_series = sampled_chi_squared_series.stack()
            sampled_source_mse = sampled_source_mse.stack()
            sampled_kappa_mse = sampled_kappa_mse.stack()

            # Latent code of optimized model predictions
            z_source_opt, _ = source_vae.encoder(tf.nn.relu(source_o))
            z_kappa_opt, _ = kappa_vae.encoder(log_10(kappa_o))

            # Compute Power spectrum of converged predictions
            _ps_observation = ps_observation.cross_correlation_coefficient(
                observation[..., 0], observation_pred[..., 0])
            _ps_observation2 = ps_observation.cross_correlation_coefficient(
                observation[..., 0], y_pred[..., 0])
            _ps_kappa = ps_kappa.cross_correlation_coefficient(
                log_10(kappa)[..., 0],
                log_10(kappa_pred[-1])[..., 0])
            _ps_kappa2 = ps_kappa.cross_correlation_coefficient(
                log_10(kappa)[..., 0], log_10(kappa_o[..., 0]))
            _ps_source = ps_source.cross_correlation_coefficient(
                source[..., 0], source_pred[-1][..., 0])
            _ps_source2 = ps_source.cross_correlation_coefficient(
                source[..., 0], source_o[..., 0])

            # save results
            hf["observation"][i] = observation.numpy().astype(np.float32)
            hf["psf"][i] = psf.numpy().astype(np.float32)
            hf["psf_fwhm"][i] = fwhm.numpy().astype(np.float32)
            hf["noise_rms"][i] = noise_rms.numpy().astype(np.float32)
            hf["source"][i] = source.numpy().astype(np.float32)
            hf["kappa"][i] = kappa.numpy().astype(np.float32)
            hf["observation_pred"][i] = observation_pred.numpy().astype(
                np.float32)
            hf["observation_pred_reoptimized"][i] = y_pred.numpy().astype(
                np.float32)
            hf["source_pred"][i] = tf.transpose(
                source_pred, perm=(1, 0, 2, 3, 4)).numpy().astype(np.float32)
            hf["source_pred_reoptimized"][i] = source_o.numpy().astype(
                np.float32)
            hf["kappa_pred"][i] = tf.transpose(
                kappa_pred, perm=(1, 0, 2, 3, 4)).numpy().astype(np.float32)
            hf["kappa_pred_reoptimized"][i] = kappa_o.numpy().astype(
                np.float32)
            hf["chi_squared"][i] = tf.squeeze(chi_squared).numpy().astype(
                np.float32)
            hf["chi_squared_reoptimized"][i] = tf.squeeze(best).numpy().astype(
                np.float32)
            hf["chi_squared_reoptimized_series"][i] = chi_sq_series.numpy(
            ).astype(np.float32)
            hf["sampled_chi_squared_reoptimized_series"][
                i] = 2 * sampled_chi_squared_series.numpy().astype(np.float32)
            hf["source_optim_mse"][i] = source_mse_best.numpy().astype(
                np.float32)
            hf["source_optim_mse_series"][i] = source_mse.numpy().astype(
                np.float32)
            hf["sampled_source_optim_mse_series"][
                i] = sampled_source_mse.numpy().astype(np.float32)
            hf["kappa_optim_mse"][i] = kappa_mse_best.numpy().astype(
                np.float32)
            hf["kappa_optim_mse_series"][i] = kappa_mse.numpy().astype(
                np.float32)
            hf["sampled_kappa_optim_mse_series"][i] = sampled_kappa_mse.numpy(
            ).astype(np.float32)
            hf["latent_source_gt_distance_init"][i] = tf.abs(
                z_source - z_source_gt).numpy().squeeze().astype(np.float32)
            hf["latent_kappa_gt_distance_init"][i] = tf.abs(
                z_kappa - z_kappa_gt).numpy().squeeze().astype(np.float32)
            hf["latent_source_gt_distance_end"][i] = tf.abs(
                z_source_opt - z_source_gt).numpy().squeeze().astype(
                    np.float32)
            hf["latent_kappa_gt_distance_end"][i] = tf.abs(
                z_kappa_opt - z_kappa_gt).numpy().squeeze().astype(np.float32)
            hf["observation_coherence_spectrum"][i] = _ps_observation
            hf["observation_coherence_spectrum_reoptimized"][
                i] = _ps_observation2
            hf["source_coherence_spectrum"][i] = _ps_source
            hf["source_coherence_spectrum_reoptimized"][i] = _ps_source2
            hf["kappa_coherence_spectrum"][i] = _ps_kappa
            hf["kappa_coherence_spectrum_reoptimized"][i] = _ps_kappa2

            if i == 0:
                _, f = np.histogram(np.fft.fftfreq(phys.pixels)[:phys.pixels //
                                                                2],
                                    bins=ps_observation.bins)
                f = (f[:-1] + f[1:]) / 2
                hf["observation_frequencies"][:] = f
                _, f = np.histogram(np.fft.fftfreq(
                    phys.src_pixels)[:phys.src_pixels // 2],
                                    bins=ps_source.bins)
                f = (f[:-1] + f[1:]) / 2
                hf["source_frequencies"][:] = f
                _, f = np.histogram(np.fft.fftfreq(
                    phys.kappa_pixels)[:phys.kappa_pixels // 2],
                                    bins=ps_kappa.bins)
                f = (f[:-1] + f[1:]) / 2
                hf["kappa_frequencies"][:] = f
                hf["kappa_fov"][0] = phys.kappa_fov
                hf["source_fov"][0] = phys.src_fov
Esempio n. 3
0
def main(args):
    files = glob.glob(os.path.join(args.dataset, "*.tfrecords"))
    files = tf.data.Dataset.from_tensor_slices(files)
    dataset = files.interleave(lambda x: tf.data.TFRecordDataset(
        x, compression_type=args.compression_type),
                               block_length=1,
                               num_parallel_calls=tf.data.AUTOTUNE)
    for physical_params in dataset.map(decode_physical_model_info):
        break
    dataset = dataset.map(decode_train)

    # files = glob.glob(os.path.join(args.source_dataset, "*.tfrecords"))
    # files = tf.data.Dataset.from_tensor_slices(files)
    # source_dataset = files.interleave(lambda x: tf.data.TFRecordDataset(x, compression_type=args.compression_type),
    #                            block_length=1, num_parallel_calls=tf.data.AUTOTUNE)
    # source_dataset = source_dataset.map(decode_image).map(preprocess_image).shuffle(10000).batch(args.sample_size)

    with open(os.path.join(args.kappa_vae, "model_hparams.json"), "r") as f:
        kappa_vae_hparams = json.load(f)
    kappa_vae = VAE(**kappa_vae_hparams)
    ckpt1 = tf.train.Checkpoint(step=tf.Variable(1), net=kappa_vae)
    checkpoint_manager1 = tf.train.CheckpointManager(ckpt1, args.kappa_vae, 1)
    checkpoint_manager1.checkpoint.restore(
        checkpoint_manager1.latest_checkpoint).expect_partial()

    with open(os.path.join(args.source_vae, "model_hparams.json"), "r") as f:
        source_vae_hparams = json.load(f)
    source_vae = VAE(**source_vae_hparams)
    ckpt2 = tf.train.Checkpoint(step=tf.Variable(1), net=source_vae)
    checkpoint_manager2 = tf.train.CheckpointManager(ckpt2, args.source_vae, 1)
    checkpoint_manager2.checkpoint.restore(
        checkpoint_manager2.latest_checkpoint).expect_partial()

    phys = PhysicalModel(pixels=physical_params["pixels"].numpy(),
                         kappa_pixels=physical_params["kappa pixels"].numpy(),
                         src_pixels=physical_params["src pixels"].numpy(),
                         image_fov=physical_params["image fov"].numpy(),
                         kappa_fov=physical_params["kappa fov"].numpy(),
                         src_fov=physical_params["source fov"].numpy(),
                         method="fft")

    # simulate observations
    kappa = 10**kappa_vae.sample(args.sample_size)
    source = preprocess_image(source_vae.sample(args.sample_size))
    # for source in source_dataset:
    #     break
    fwhm = tf.random.normal(shape=[args.sample_size],
                            mean=1.5 * phys.image_fov / phys.pixels,
                            stddev=0.5 * phys.image_fov / phys.pixels)
    # noise_rms = tf.random.normal(shape=[args.sample_size], mean=args.noise_mean, stddev=args.noise_std)
    psf = phys.psf_models(fwhm, cutout_size=20)
    y_vae = phys.forward(source, kappa, psf)

    with h5py.File(
            os.path.join(os.getenv("CENSAI_PATH"), "results",
                         args.output_name + ".h5"), 'w') as hf:
        # rank these observations against the dataset with L2 norm
        for i in tqdm(range(args.sample_size)):
            distances = []
            for y_d, _, _, _, _ in dataset:
                distances.append(
                    tf.sqrt(tf.reduce_sum(
                        (y_d - y_vae[i][None, ...])**2)).numpy().astype(
                            np.float32))
            k_indices = np.argsort(distances)[:args.k]

            # save results
            g = hf.create_group(f"sample_{i:02d}")
            g.create_dataset(name="matched_source",
                             shape=[args.k, phys.src_pixels, phys.src_pixels],
                             dtype=np.float32)
            g.create_dataset(
                name="matched_kappa",
                shape=[args.k, phys.kappa_pixels, phys.kappa_pixels],
                dtype=np.float32)
            g.create_dataset(name="matched_obs",
                             shape=[args.k, phys.pixels, phys.pixels],
                             dtype=np.float32)
            g.create_dataset(name="matched_psf",
                             shape=[args.k, 20, 20],
                             dtype=np.float32)
            g.create_dataset(name="matched_noise_rms",
                             shape=[args.k],
                             dtype=np.float32)
            g.create_dataset(name="obs_L2_distance",
                             shape=[args.k],
                             dtype=np.float32)
            g["vae_source"] = source[i, ..., 0].numpy().astype(np.float32)
            g["vae_kappa"] = kappa[i, ..., 0].numpy().astype(np.float32)
            g["vae_obs"] = y_vae[i, ..., 0].numpy().astype(np.float32)
            g["vae_psf"] = psf[i, ..., 0].numpy().astype(np.float32)

            for rank, j in enumerate(k_indices):
                # fetch back the matched observation
                for y_d, source_d, kappa_d, noise_rms_d, psf_d in dataset.skip(
                        j):
                    break
                # g["vae_noise_rms"] = noise_rms[i].numpy().astype(np.float32)
                g["matched_source"][rank] = source_d[..., 0].numpy().astype(
                    np.float32)
                g["matched_kappa"][rank] = kappa_d[..., 0].numpy().astype(
                    np.float32)
                g["matched_obs"][rank] = y_d[..., 0].numpy().astype(np.float32)
                g["matched_noise_rms"][rank] = noise_rms_d.numpy().astype(
                    np.float32)
                g["matched_psf"][rank] = psf_d[...,
                                               0].numpy().astype(np.float32)
                g["obs_L2_distance"][rank] = distances[j]
Esempio n. 4
0
def distributed_strategy(args):
    kappa_datasets = []
    for path in args.kappa_datasets:
        files = glob.glob(os.path.join(path, "*.tfrecords"))
        files = tf.data.Dataset.from_tensor_slices(files).shuffle(
            len(files), reshuffle_each_iteration=True)
        dataset = files.interleave(lambda x: tf.data.TFRecordDataset(
            x, compression_type=args.compression_type),
                                   block_length=args.block_length,
                                   num_parallel_calls=tf.data.AUTOTUNE)
        kappa_datasets.append(
            dataset.shuffle(args.buffer_size, reshuffle_each_iteration=True))
    kappa_dataset = tf.data.experimental.sample_from_datasets(
        kappa_datasets, weights=args.kappa_datasets_weights)
    # Read off global parameters from first example in dataset
    for example in kappa_dataset.map(decode_kappa_info):
        kappa_fov = example["kappa fov"].numpy()
        kappa_pixels = example["kappa pixels"].numpy()
        break
    kappa_dataset = kappa_dataset.map(decode_kappa).batch(args.batch_size)

    cosmos_datasets = []
    for path in args.cosmos_datasets:
        files = glob.glob(os.path.join(path, "*.tfrecords"))
        files = tf.data.Dataset.from_tensor_slices(files).shuffle(
            len(files), reshuffle_each_iteration=True)
        dataset = files.interleave(lambda x: tf.data.TFRecordDataset(
            x, compression_type=args.compression_type),
                                   block_length=args.block_length,
                                   num_parallel_calls=tf.data.AUTOTUNE)
        cosmos_datasets.append(
            dataset.shuffle(args.buffer_size, reshuffle_each_iteration=True))
    cosmos_dataset = tf.data.experimental.sample_from_datasets(
        cosmos_datasets, weights=args.cosmos_datasets_weights)
    # Read off global parameters from first example in dataset
    for src_pixels in cosmos_dataset.map(decode_cosmos_info):
        src_pixels = src_pixels.numpy()
        break
    cosmos_dataset = cosmos_dataset.map(decode_cosmos).map(
        preprocess_cosmos).batch(args.batch_size)

    window = tukey(src_pixels, alpha=args.tukey_alpha)
    window = np.outer(window, window)[np.newaxis, ..., np.newaxis]
    window = tf.constant(window, dtype=DTYPE)

    phys = PhysicalModel(image_fov=kappa_fov,
                         src_fov=args.source_fov,
                         pixels=args.lens_pixels,
                         kappa_pixels=kappa_pixels,
                         src_pixels=src_pixels,
                         kappa_fov=kappa_fov,
                         method="conv2d")

    noise_a = (args.noise_rms_min - args.noise_rms_mean) / args.noise_rms_std
    noise_b = (args.noise_rms_max - args.noise_rms_mean) / args.noise_rms_std
    psf_a = (args.psf_fwhm_min - args.psf_fwhm_mean) / args.psf_fwhm_std
    psf_b = (args.psf_fwhm_max - args.psf_fwhm_mean) / args.psf_fwhm_std

    options = tf.io.TFRecordOptions(compression_type=args.compression_type)
    with tf.io.TFRecordWriter(
            os.path.join(args.output_dir, f"data_{THIS_WORKER}.tfrecords"),
            options) as writer:
        print(
            f"Started worker {THIS_WORKER} at {datetime.now().strftime('%y-%m-%d_%H-%M-%S')}"
        )
        for i in range((THIS_WORKER - 1) * args.batch_size, args.len_dataset,
                       N_WORKERS * args.batch_size):
            for galaxies in cosmos_dataset:  # select a random batch from our dataset that is reshuffled each iterations
                break
            for kappa in kappa_dataset:
                break
            galaxies = window * galaxies
            noise_rms = truncnorm.rvs(noise_a,
                                      noise_b,
                                      loc=args.noise_rms_mean,
                                      scale=args.noise_rms_std,
                                      size=args.batch_size)
            fwhm = truncnorm.rvs(psf_a,
                                 psf_b,
                                 loc=args.psf_fwhm_mean,
                                 scale=args.psf_fwhm_std,
                                 size=args.batch_size)
            psf = phys.psf_models(fwhm, cutout_size=args.psf_cutout_size)
            lensed_images = phys.noisy_forward(galaxies,
                                               kappa,
                                               noise_rms=noise_rms,
                                               psf=psf)
            records = encode_examples(kappa=kappa,
                                      galaxies=galaxies,
                                      lensed_images=lensed_images,
                                      z_source=args.z_source,
                                      z_lens=args.z_lens,
                                      image_fov=phys.image_fov,
                                      kappa_fov=phys.kappa_fov,
                                      source_fov=args.source_fov,
                                      noise_rms=noise_rms,
                                      psf=psf,
                                      fwhm=fwhm)
            for record in records:
                writer.write(record)
    print(f"Finished work at {datetime.now().strftime('%y-%m-%d_%H-%M-%S')}")