Beispiel #1
0
class TestNeffCriterion(tf.test.TestCase):
    def setUp(self):
        self.weights = tf.constant([[1 / 3, 1 / 3, 1 / 3], [0.05, 0.05, 0.9]])

        self.log_weights = tf.math.log(self.weights)

        self.state = MockState(self.weights)

        self._scaled_weights = 3. * self.weights
        self._scaled_log_weights = self.log_weights + math.log(3)

        self.neff_log_instance = NeffCriterion(0.5, True, True, True)
        self.neff_instance = NeffCriterion(0.5, True, False, True)

    def test_neff_normalized(self):
        flag, _ = neff(self.weights, True, False, 0.5 * 3)
        flag_log, _ = neff(self.log_weights, True, True, 0.5 * 3)

        self.assertAllEqual(flag, flag_log)
        self.assertAllEqual(flag, [False, True])

    def test_neff_unnormalized(self):
        flag, _ = neff(self._scaled_weights, False, False, 0.5 * 3)
        flag_log, _ = neff(self._scaled_log_weights, False, True, 0.5 * 3)

        self.assertAllEqual(flag, flag_log)
        self.assertAllEqual(flag, [False, True])

    def test_neff(self):
        log_flags, _ = self.neff_log_instance.apply(self.state)
        flags, _ = self.neff_instance.apply(self.state)

        self.assertAllEqual(log_flags, flags)
        self.assertAllEqual(flags, [False, True])
Beispiel #2
0
    def setUp(self):
        self.weights = tf.constant([[1 / 3, 1 / 3, 1 / 3], [0.05, 0.05, 0.9]])

        self.log_weights = tf.math.log(self.weights)

        self.state = MockState(self.weights)

        self._scaled_weights = 3. * self.weights
        self._scaled_log_weights = self.log_weights + math.log(3)

        self.neff_log_instance = NeffCriterion(0.5, True, True, True)
        self.neff_instance = NeffCriterion(0.5, True, False, True)
Beispiel #3
0
    def setUp(self):
        N = 10
        n_particles = tf.constant(N)
        dimension = tf.constant(1)
        batch_size = tf.constant(4)

        weights = tf.ones((batch_size, n_particles), dtype=float) / tf.cast(
            n_particles, float)
        initial_particles = tf.random.uniform(
            (batch_size, n_particles, dimension), -1, 1)
        log_likelihoods = tf.zeros((batch_size), dtype=float)
        self.initial_state = State(particles=initial_particles,
                                   log_weights=tf.math.log(weights),
                                   weights=weights,
                                   log_likelihoods=log_likelihoods,
                                   ancestor_indices=None,
                                   resampling_correction=None)

        error_variance = tf.constant([0.5], dtype=tf.float32)
        error_rv = tfp.distributions.MultivariateNormalDiag(
            tf.constant([0.]), error_variance)

        noise_variance = tf.constant([0.5])
        noise_rv = tfp.distributions.MultivariateNormalDiag(
            tf.constant([0.]), noise_variance)

        observation_model = LinearObservationModel(tf.constant([[1.]]),
                                                   error_rv)

        transition_matrix = tf.constant([[1.]])
        transition_model = RandomWalkModel(transition_matrix, noise_rv)

        bootstrap = BootstrapProposalModel(transition_model)
        resampling_criterion = NeffCriterion(tf.constant(0.5),
                                             is_relative=tf.constant(True))
        systematic_resampling_method = SystematicResampler()

        self.bootstrap_filter = SMC(observation_model, transition_model,
                                    bootstrap, resampling_criterion,
                                    systematic_resampling_method)

        # TODO: Let's change this using an instance of StateSpaceModel
        self.n = 100
        observation = np.array([[[0.]]]).astype(np.float32)
        observations = []
        for _ in range(self.n):
            observations.append(observation)
            observation = observation + np.random.normal(0., 1., [1, 1, 1])
        self.observation_dataset = tf.data.Dataset.from_tensor_slices(
            observations)
def main(resampling_method_value, resampling_neff, resampling_kwargs=None, T=150, batch_size=50, n_particles=25,
         data_seed=0, filter_seed=555, savefig=False):
    transition_matrix = 0.5 * np.eye(2, dtype=np.float32)
    transition_covariance = np.eye(2, dtype=np.float32)
    observation_matrix = np.eye(2, dtype=np.float32)
    observation_covariance = 0.1 * np.eye(2, dtype=np.float32)

    resampling_method_enum = ResamplingMethodsEnum(resampling_method_value)

    np_random_state = np.random.RandomState(seed=data_seed)
    data, kf = get_data(transition_matrix, observation_matrix, transition_covariance, observation_covariance, T,
                        np_random_state)
    observation_dataset = tf.data.Dataset.from_tensor_slices(data)

    if resampling_method_enum == ResamplingMethodsEnum.KALMAN:
        return kalman_main(kf, data, savefig)

    if resampling_kwargs is None:
        resampling_kwargs = {}

    if resampling_neff == 0.:
        resampling_criterion = NeverResample()
    elif resampling_neff == 1.:
        resampling_criterion = AlwaysResample()
    else:
        resampling_criterion = NeffCriterion(resampling_neff, True)

    if resampling_method_enum == ResamplingMethodsEnum.MULTINOMIAL:
        resampling_method = MultinomialResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.SYSTEMATIC:
        resampling_method = SystematicResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.STRATIFIED:
        resampling_method = StratifiedResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.REGULARIZED:
        resampling_method = RegularisedTransform(**resampling_kwargs)
    elif resampling_method_enum == ResamplingMethodsEnum.VARIANCE_CORRECTED:
        regularized_resampler = RegularisedTransform(**resampling_kwargs)
        resampling_method = PartiallyCorrectedRegularizedTransform(regularized_resampler)
    elif resampling_method_enum == ResamplingMethodsEnum.OPTIMIZED:
        lr = resampling_kwargs.pop('lr', resampling_kwargs.pop('learning_rate', 0.1))

        loss = SinkhornLoss(**resampling_kwargs, symmetric=True)
        optimizer = SGD(loss, lr=lr, decay=0.95)
        regularized_resampler = RegularisedTransform(**resampling_kwargs)

        resampling_method = OptimizedPointCloud(optimizer, intermediate_resampler=regularized_resampler)
    else:
        raise ValueError(f'resampling_method_name {resampling_method_enum} is not a valid ResamplingMethodsEnum')

    observation_matrix = tf.convert_to_tensor(observation_matrix)
    transition_covariance_chol = tf.linalg.cholesky(transition_covariance)
    observation_covariance_chol = tf.linalg.cholesky(observation_covariance)

    initial_particles = np_random_state.normal(0., 1., [batch_size, n_particles, 2]).astype(np.float32)
    initial_state = State(tf.constant(initial_particles))

    smc = make_filter(observation_matrix, transition_matrix, observation_covariance_chol,
                      transition_covariance_chol, resampling_method, resampling_criterion)

    states = get_states(smc,
                        initial_state,
                        observation_dataset,
                        tf.constant(T),
                        tf.constant(filter_seed))

    stddevs = std(states, keepdims=False).numpy()
    stddevs_df = stddevs
def main(resampling_method_value,
         resampling_neff,
         learning_rates=(1e-4, 1e-3),
         resampling_kwargs=None,
         alpha=0.42,
         dx=10,
         dy=3,
         observation_covariance=1.,
         dense=False,
         T=20,
         batch_size=1,
         n_particles=25,
         data_seed=0,
         n_data=50,
         n_iter=50,
         savefig=False,
         filter_seed=0,
         use_xla=False,
         change_seed=True):
    transition_matrix = get_transition_matrix(alpha, dx)
    transition_covariance = get_transition_covariance(dx)
    observation_matrix = get_observation_matrix(dx, dy, dense)
    observation_covariance = get_observation_covariance(
        observation_covariance, dy)

    resampling_method_enum = ResamplingMethodsEnum(resampling_method_value)

    np_random_state = np.random.RandomState(seed=data_seed)

    observation_matrix = tf.convert_to_tensor(observation_matrix)
    transition_covariance_chol = tf.linalg.cholesky(transition_covariance)
    observation_covariance_chol = tf.linalg.cholesky(observation_covariance)

    initial_particles = np_random_state.normal(
        0., 1., [batch_size, n_particles, dx]).astype(np.float32)
    initial_state = State(initial_particles)

    if resampling_neff == 0.:
        resampling_criterion = NeverResample()
    elif resampling_neff == 1.:
        resampling_criterion = AlwaysResample()
    else:
        resampling_criterion = NeffCriterion(resampling_neff, True)

    optimal_smc = make_optimal_filter(observation_matrix, transition_matrix,
                                      observation_covariance_chol,
                                      transition_covariance_chol,
                                      MultinomialResampler(),
                                      resampling_criterion)

    if resampling_kwargs is None:
        resampling_kwargs = {}

    resampling_method = resampling_method_factory(resampling_method_enum,
                                                  resampling_kwargs)

    datas = []
    lls = []
    observation_datasets = []
    optimal_lls = []

    log_phi_x_0 = tf.ones(dx)
    phi_y_0 = tf.zeros(dy)

    for _ in range(n_data):
        data, ll = get_data(transition_matrix, observation_matrix,
                            transition_covariance, observation_covariance, T,
                            np_random_state)
        datas.append(data)
        lls.append(ll / T)
        observation_dataset = tf.data.Dataset.from_tensor_slices(data)
        observation_datasets.append(observation_dataset)
        final_state = optimal_smc(initial_state, observation_dataset, T, None,
                                  True, filter_seed)
        optimal_lls.append(final_state.log_likelihoods.numpy().mean() / T)

    log_phi_x = tf.Variable(log_phi_x_0, trainable=True)
    phi_y = tf.Variable(phi_y_0, trainable=True)

    smc = make_filter(observation_matrix, transition_matrix,
                      observation_covariance_chol, transition_covariance_chol,
                      resampling_method, resampling_criterion, log_phi_x,
                      phi_y)

    def optimizer_maker(learning_rate):
        # tf.function doesn't like creating variables. This is a way to create them outside the graph
        # We can't reuse the same optimizer because it would be giving a warmed-up momentum to the ones run later
        optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
        return optimizer

    initial_values = [log_phi_x_0, phi_y_0]
    losses_list = []
    ess_profiles_list = []
    mean_errors = []
    for observation_dataset in observation_datasets:
        try:
            losses, ess_profiles = compare_learning_rates(
                smc, initial_state, observation_dataset, T, log_phi_x, phi_y,
                initial_values, n_iter, optimizer_maker, learning_rates,
                filter_seed, use_xla, change_seed)
        except:
            print('one dataset failed, ignoring')
            continue
        losses_df = pd.DataFrame(np.stack(losses).T,
                                 columns=np.log10(learning_rates))
        ess_df = pd.DataFrame(np.stack(ess_profiles).T,
                              columns=np.log10(learning_rates))

        losses_df.columns.name = 'log learning rate'
        losses_df.columns.epoch = 'epoch'

        ess_df.columns.name = 'log learning rate'
        ess_df.columns.epoch = 'epoch'

        losses_list.append(losses_df)
        ess_profiles_list.append(ess_df)

        delta_phi_m_1 = tf.linalg.diag(tf.exp(-log_phi_x))
        diff_cov = optimal_smc._proposal_model._sigma - delta_phi_m_1 @ transition_covariance
        approx_error = tf.linalg.diag_part(diff_cov).numpy()
        mean_error = np.sqrt(np.nanmean(approx_error**2))
        mean_errors.append(mean_error)

    losses_data = pd.concat(losses_list, axis=1)
    ess_data = pd.concat(ess_profiles_list, axis=1)

    mean_data = pd.DataFrame([[np.mean(mean_errors)]],
                             index=pd.MultiIndex.from_tuples([(batch_size,
                                                               n_particles)]),
                             columns=pd.MultiIndex.from_tuples([
                                 (resampling_method_enum.name, change_seed)
                             ]))

    losses_data = losses_data.groupby(axis=1, level=0).mean()
    ess_data = ess_data.groupby(axis=1, level=0).mean()

    # plot_losses(losses_df, resampling_method_enum.name, savefig, dx, dy, dense, T, change_seed)
    plot_losses_vs_ess(losses_data, ess_data, resampling_method_enum.name,
                       savefig, dx, dy, dense, T, n_particles,
                       change_seed, batch_size, np.mean(optimal_lls),
                       np.mean(lls), n_iter, mean_data, n_data)
    print(tf.exp(log_phi_x))
Beispiel #6
0
def main(resampling_method_value,
         resampling_neff,
         resampling_kwargs=None,
         T=150,
         batch_size=50,
         n_particles=25,
         data_seed=0,
         values=(0.25, 0.5, 0.75),
         filter_seed=555,
         savefig=False):
    transition_matrix = 0.5 * np.eye(2, dtype=np.float32)
    transition_covariance = np.eye(2, dtype=np.float32)
    observation_matrix = np.eye(2, dtype=np.float32)
    observation_covariance = 0.1 * np.eye(2, dtype=np.float32)

    values = np.array(list(zip(values, values)), dtype=np.float32)

    resampling_method_enum = ResamplingMethodsEnum(resampling_method_value)

    np_random_state = np.random.RandomState(seed=data_seed)
    data, kf = get_data(transition_matrix, observation_matrix,
                        transition_covariance, observation_covariance, T,
                        np_random_state)
    observation_dataset = tf.data.Dataset.from_tensor_slices(data)

    if resampling_method_enum == 6:
        return kalman_main(kf, data, values, T, savefig)

    if resampling_kwargs is None:
        resampling_kwargs = {}

    if resampling_neff == 0.:
        resampling_criterion = NeverResample()
    elif resampling_neff == 1.:
        resampling_criterion = AlwaysResample()
    else:
        resampling_criterion = NeffCriterion(resampling_neff, True)

    if resampling_method_enum == ResamplingMethodsEnum.MULTINOMIAL:
        resampling_method = MultinomialResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.SYSTEMATIC:
        resampling_method = SystematicResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.STRATIFIED:
        resampling_method = StratifiedResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.REGULARIZED:
        resampling_method = RegularisedTransform(**resampling_kwargs)
    elif resampling_method_enum == ResamplingMethodsEnum.VARIANCE_CORRECTED:
        regularized_resampler = RegularisedTransform(**resampling_kwargs)
        resampling_method = PartiallyCorrectedRegularizedTransform(
            regularized_resampler)
    elif resampling_method_enum == ResamplingMethodsEnum.OPTIMIZED:
        lr = resampling_kwargs.pop('lr',
                                   resampling_kwargs.pop('learning_rate', 0.1))

        loss = SinkhornLoss(**resampling_kwargs, symmetric=True)
        optimizer = SGD(loss, lr=lr, decay=0.95)
        regularized_resampler = RegularisedTransform(**resampling_kwargs)

        resampling_method = OptimizedPointCloud(
            optimizer, intermediate_resampler=regularized_resampler)
    else:
        raise ValueError(
            f'resampling_method_name {resampling_method_enum} is not a valid ResamplingMethodsEnum'
        )

    init_transition_matrix = (0.5 * np.eye(2) +
                              0.1 * np_random_state.randn(2, 2)).astype(
                                  np.float32)
    modifiable_transition_matrix = tf.Variable(init_transition_matrix,
                                               trainable=True)
    observation_matrix = tf.convert_to_tensor(observation_matrix)
    transition_covariance_chol = tf.linalg.cholesky(transition_covariance)
    observation_covariance_chol = tf.linalg.cholesky(observation_covariance)

    initial_particles = np_random_state.normal(
        0., 1., [batch_size, n_particles, 2]).astype(np.float32)
    initial_state = State(tf.constant(initial_particles))

    smc = make_filter(observation_matrix, modifiable_transition_matrix,
                      observation_covariance_chol, transition_covariance_chol,
                      resampling_method, resampling_criterion)

    elbos = get_elbos(smc, initial_state, observation_dataset, tf.constant(T),
                      modifiable_transition_matrix, tf.constant(values),
                      tf.constant(filter_seed))

    elbos_df = pd.DataFrame(
        elbos.numpy(), pd.Index(values[:, 0], name=r'$\theta_1$, $\theta_2$'))

    elbos_df = elbos_df.T.describe().T[['mean', 'std']].reset_index()

    if savefig:
        filename = f"{resampling_method_enum.name}_batchsize_{batch_size}_N_{n_particles}_epsilon_{resampling_kwargs.get('epsilon')}_likelihoods_values.tex"
        elbos_df.to_latex(buf=os.path.join('./tables/', filename),
                          float_format='{:,.3f}'.format,
                          escape=False,
                          index=False)
    else:
        print(
            elbos_df.to_latex(float_format='{:,.3f}'.format,
                              escape=False,
                              index=False))
Beispiel #7
0
def main(resampling_method_value,
         resampling_neff,
         resampling_kwargs=None,
         T=100,
         batch_size=1,
         n_particles=25,
         phi=0.5,
         data_seed=0,
         filter_seed=1,
         learning_rate=0.001,
         n_iter=50,
         savefig=False,
         use_xla=False,
         batch_data=1,
         assume_differentiable=False,
         change_seed=False):
    transition_matrix = phi * np.eye(2, dtype=np.float32)
    transition_covariance = 0.5 * np.eye(2, dtype=np.float32)
    observation_matrix = np.eye(2, dtype=np.float32)
    observation_covariance = 0.1 * np.eye(2, dtype=np.float32)

    resampling_method_enum = ResamplingMethodsEnum(resampling_method_value)

    np_random_state = np.random.RandomState(seed=data_seed)
    data = []
    np_data = []

    assert batch_data > 0
    for _ in range(batch_data):
        a_data, kf = get_data(transition_matrix, observation_matrix,
                              transition_covariance, observation_covariance, T,
                              np_random_state)
        data.append(tf.data.Dataset.from_tensor_slices(a_data))
        np_data.append(a_data)

    if resampling_kwargs is None:
        resampling_kwargs = {}

    if resampling_neff == 0.:
        resampling_criterion = NeverResample()
    elif resampling_neff == 1.:
        resampling_criterion = AlwaysResample()
    else:
        resampling_criterion = NeffCriterion(resampling_neff, True)

    if resampling_method_enum == ResamplingMethodsEnum.MULTINOMIAL:
        resampling_method = MultinomialResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.SYSTEMATIC:
        resampling_method = SystematicResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.STRATIFIED:
        resampling_method = StratifiedResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.REGULARIZED:
        resampling_method = RegularisedTransform(**resampling_kwargs)
    elif resampling_method_enum == ResamplingMethodsEnum.VARIANCE_CORRECTED:
        regularized_resampler = RegularisedTransform(**resampling_kwargs)
        resampling_method = PartiallyCorrectedRegularizedTransform(
            regularized_resampler)
    elif resampling_method_enum == ResamplingMethodsEnum.OPTIMIZED:
        lr = resampling_kwargs.pop('lr',
                                   resampling_kwargs.pop('learning_rate', 0.1))

        loss = SinkhornLoss(**resampling_kwargs, symmetric=True)
        optimizer = SGD(loss, lr=lr, decay=0.95)
        regularized_resampler = RegularisedTransform(**resampling_kwargs)

        resampling_method = OptimizedPointCloud(
            optimizer, intermediate_resampler=regularized_resampler)
    else:
        raise ValueError(
            f'resampling_method_name {resampling_method_enum} is not a valid ResamplingMethodsEnum'
        )

    modifiable_transition_matrix = tf.Variable(transition_matrix,
                                               trainable=True)
    observation_matrix = tf.convert_to_tensor(observation_matrix)
    transition_covariance_chol = tf.linalg.cholesky(transition_covariance)
    observation_covariance_chol = tf.linalg.cholesky(observation_covariance)

    initial_particles = np_random_state.normal(
        0., 1., [batch_size, n_particles, 2]).astype(np.float32)
    initial_state = State(initial_particles)

    smc = make_filter(observation_matrix, modifiable_transition_matrix,
                      observation_covariance_chol, transition_covariance_chol,
                      resampling_method, resampling_criterion)

    x0 = np.array([2 * phi / 3] * 2).astype(np.float32)
    print(x0)

    if resampling_method.DIFFERENTIABLE or assume_differentiable:
        loss_fun = lambda x, observation_dataset, seed: values_and_gradient(
            x, modifiable_transition_matrix, smc, initial_state,
            observation_dataset, T, seed)
    else:
        loss_fun = lambda x, observation_dataset, seed: values_and_gradient_finite_diff(
            x, modifiable_transition_matrix, smc, initial_state,
            observation_dataset, T, seed)

    final_values = []
    losses = []
    kalman_params = []

    def kf_likelihood_fun(val, data):
        import copy
        kf_copy = copy.copy(kf)
        kf_copy.transition_matrices = np.diag(val)
        return -kf_loglikelihood(kf_copy, data)

    fun = tf.function(loss_fun, experimental_compile=use_xla)

    for observation_dataset, np_dataset in tqdm(zip(data, np_data),
                                                total=batch_data):
        final_value, loss = gradient_descent(fun, x0, observation_dataset,
                                             tf.constant(learning_rate),
                                             tf.constant(n_iter),
                                             tf.constant(filter_seed),
                                             tf.constant(change_seed))
        final_values.append(final_value.numpy())
        losses.append(loss.numpy())
        kf_params = minimize(kf_likelihood_fun, x0, args=(np_dataset, ))
        kalman_params.append(kf_params.x)
    losses = np.array(losses).T
    plt.plot(losses)
    plt.show()
    final_values = np.vstack(final_values)
    kalman_params = np.vstack(kalman_params)

    df = pd.DataFrame(final_values - kalman_params,
                      columns=[r'$\theta_1', r'$\theta_2'])
    parameters_diff = np.mean(np.square(df), 0)
    if savefig:
        filename = f'theta_diff_{resampling_method_enum.name}_batch_size_{batch_size}_N_{n_particles}_batch_data_{batch_data}_changeseed_{change_seed}.csv'
        df.to_csv(os.path.join('./tables/', filename), float_format='%.5f')
    else:
        print(parameters_diff.to_latex(float_format='%.5f'))
Beispiel #8
0
def main(run_method,
         latent_size=10,
         latent_encoded_size=32,
         batch_size=1,
         n_particles=25,
         epsilon=0.5,
         scaling=0.9,
         neff=0.9,
         max_iter=1000,
         additional_variables_are_state=False,
         convergence_threshold=1e-3,
         n_iter=100,
         initial_lr=0.01,
         decay=0.5,
         steps=100,
         warmup=100,
         data_seed=0,
         filter_seed=1,
         fixed_seed=False,
         out_dir='./',
         data_fp='../data/data/piano_data/jsb.pkl'):
    inputs_tensor, targets_tensor, lens, mean = create_pianoroll_dataset(data_fp, split='train', batch_size=1)

    T = targets_tensor.shape.as_list()[0]
    observation_size = targets_tensor.shape.as_list()[-1]

    encoded_data_size = latent_size
    rnn_hidden_size = latent_size // 2

    latent_encoder_layers = [32]

    latent_encoder = snt.nets.MLP(
        output_sizes=latent_encoder_layers + [latent_encoded_size],
        name="latent_encoder")

    # store observations

    dimension = latent_size
    inputs_tensor = tf.expand_dims(inputs_tensor, 1)
    targets_tensor = tf.expand_dims(targets_tensor, 1)

    obs_data = tf.data.Dataset.from_tensor_slices(targets_tensor)
    inputs_data = tf.data.Dataset.from_tensor_slices(inputs_tensor)
    transition_model = VRNNTransitionModel(rnn_hidden_size, latent_encoder, latent_size)
    observation_model = VRNNBernoulliObservationModel(latent_encoder, observation_size)
    proposal_model = VRNNProposalModel(rnn_hidden_size, latent_encoder, latent_size)

    test_transition_model = TESTVRNNTransitionModel(rnn_hidden_size, latent_encoder, latent_size)
    test_proposal_model = TESTVRNNProposalModel(rnn_hidden_size, latent_encoder, latent_size)

    # initial state
    tf.random.set_seed(data_seed)
    normal_dist = tfp.distributions.Normal(0., 1.)
    initial_latent_state = tf.zeros([batch_size, n_particles, dimension])
    initial_latent_state = tf.cast(initial_latent_state, dtype=float)
    latent_encoded = transition_model.latent_encoder(initial_latent_state)

    # initial rnn_state
    initial_rnn_state = [normal_dist.sample([batch_size, n_particles, rnn_hidden_size], seed=data_seed)] * 2
    initial_rnn_state = tf.concat(initial_rnn_state, axis=-1)

    # rnn_out
    initial_rnn_out = tf.zeros([batch_size, n_particles, rnn_hidden_size])

    initial_weights = tf.ones((batch_size, n_particles), dtype=float) / tf.cast(n_particles, float)
    log_likelihoods = tf.zeros(batch_size, dtype=float)
    init_state = VRNNState(particles=initial_latent_state,
                           log_weights=tf.math.log(initial_weights),
                           weights=initial_weights,
                           obs_likelihood=log_likelihoods,
                           log_likelihoods=log_likelihoods,
                           rnn_state=initial_rnn_state,
                           rnn_out=initial_rnn_out,
                           latent_encoded=latent_encoded)

    # record loss
    LARGE_B = 50
    N = 25

    # initial state
    large_initial_latent_state = tf.zeros([LARGE_B, N, dimension])
    large_initial_latent_state = tf.cast(large_initial_latent_state, dtype=float)
    large_latent_encoded = transition_model.latent_encoder(large_initial_latent_state)

    # initial rnn_state
    large_initial_rnn_state = [normal_dist.sample([LARGE_B, N, rnn_hidden_size])] * 2
    large_initial_rnn_state = tf.concat(large_initial_rnn_state, axis=-1)

    # rnn_out
    large_initial_rnn_out = tf.zeros([LARGE_B, N, rnn_hidden_size])
    obs_likelihood = tf.zeros(LARGE_B, dtype=float)
    large_init_state = VRNNState(particles=large_initial_latent_state,
                                 obs_likelihood=obs_likelihood,
                                 rnn_state=large_initial_rnn_state,
                                 rnn_out=large_initial_rnn_out,
                                 latent_encoded=large_latent_encoded)
    ## Check variables

    # snt networks initiated on first call
    t_samp = transition_model.sample(init_state, inputs_tensor[0], seed=data_seed)
    obs_samp = observation_model.sample(init_state, seed=data_seed)

    # for var in transition_model.variables:
    #    print(var.name)

    # for var in observation_model.variables:
    #    print(var.name)

    ## Particle Filter

    trainable_variables = transition_model.variables + observation_model.variables
    init_values = [v.value() for v in trainable_variables]

    resampling_criterion = NeffCriterion(tf.constant(neff), tf.constant(True))
    # resampling_criterion = AlwaysResample()
    resampling_method = MultinomialResampler()

    epsilon = tf.constant(epsilon)
    scaling = tf.constant(scaling)

    regularized = RegularisedTransform(epsilon,
                                       scaling=scaling,
                                       max_iter=max_iter,
                                       convergence_threshold=convergence_threshold,
                                       additional_variables_are_state=additional_variables_are_state)

    multinomial_smc = VRNNSMC(observation_model, transition_model, proposal_model, resampling_criterion, MultinomialResampler())
    regularized_smc = VRNNSMC(observation_model, transition_model, proposal_model, resampling_criterion, regularized)
    test_reg = VRNNSMC(observation_model, test_transition_model, test_proposal_model, resampling_criterion, regularized)
    test_mul = VRNNSMC(observation_model, test_transition_model, test_proposal_model, resampling_criterion, MultinomialResampler())

    def run_smc(smc, optimizer, n_iter, seed=filter_seed):
        # print(optimizer.weights)# check
        @tf.function
        def smc_routine(smc, state, use_correction_term=False, seed=seed):
            final_state = smc(state, obs_data, n_observations=T, inputs_series=inputs_data, return_final=True,
                              seed=seed)
            res = tf.reduce_mean(final_state.log_likelihoods)
            obs_likelihood = tf.reduce_mean(final_state.obs_likelihood)
            ess = final_state.ess
            if use_correction_term:
                return res, tf.reduce_mean(final_state.resampling_correction)
            return res, ess, tf.constant(0.), obs_likelihood

        @tf.function
        def run_one_step(smc, use_correction_term, init_state, seed=seed):
            with tf.GradientTape() as tape:
                tape.watch(trainable_variables)
                real_ll, ess, correction, obs_likelihood = smc_routine(smc, init_state, use_correction_term, seed)
                loss = -(real_ll + correction)
            grads_loss = tape.gradient(loss, trainable_variables)
            return real_ll, grads_loss, ess, obs_likelihood

        @tf.function
        def train_one_step(smc, use_correction_term, seed=seed):
            real_ll, grads_loss, ess, obs_likelihood = run_one_step(smc, use_correction_term, init_state, seed)
            capped_gvs = [tf.clip_by_value(grad, -500., 500.) for grad in grads_loss]
            optimizer.apply_gradients(zip(capped_gvs, trainable_variables))
            return -real_ll, capped_gvs, ess, obs_likelihood

        @tf.function
        def train_niter(smc, num_steps=100, use_correction_term=False, reset=True, seed=seed, fixed_seed=fixed_seed):
            if reset:
                reset_operations = [v.assign(init) for v, init in zip(trainable_variables, init_values)]
            else:
                reset_operations = []
            obs_lik_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False,
                                                  element_shape=[])
            multi_loss_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False,
                                                     element_shape=[])
            test_reg_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False,
                                                     element_shape=[])          
            test_mul_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False,
                                                     element_shape=[])                                                                                 
            loss_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[])
            ess_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[])
            grad_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[])
            time_tensor_array = tf.TensorArray(dtype=tf.float64, size=num_steps, dynamic_size=False, element_shape=[])
            with tf.control_dependencies(reset_operations):
                toc = tf.constant(0., dtype=tf.float64)
                tic = tf.timestamp()
                for step in tf.range(1, num_steps + 1):
                    if fixed_seed:
                        seed = seed
                    else:
                        seed = step
                    tic_loss = tf.timestamp()
                    with tf.control_dependencies([tic_loss]):
                        loss, grads, ess_run, obs_likelihood = train_one_step(smc, use_correction_term, seed)
                    with tf.control_dependencies([loss]):
                        toc_loss = tf.timestamp()
                        multi_loss_state = multinomial_smc(large_init_state, obs_data,
                                                           n_observations=T, inputs_series=inputs_data,
                                                           return_final=True, seed=seed)

                        test_reg_state = test_reg(large_init_state, obs_data,
                                                           n_observations=T, inputs_series=inputs_data,
                                                           return_final=True, seed=seed)

                        test_mul_state = test_mul(large_init_state, obs_data,
                                                           n_observations=T, inputs_series=inputs_data,
                                                           return_final=True, seed=seed)                                   
                        test_reg_loss = -tf.reduce_mean(test_reg_state.log_likelihoods)
                        test_mul_loss = -tf.reduce_mean(test_mul_state.log_likelihoods)
                        multi_loss = -tf.reduce_mean(multi_loss_state.log_likelihoods)
                        ess = multi_loss_state.ess
                    toc += toc_loss - tic_loss

                    max_grad = tf.reduce_max([tf.reduce_max(tf.abs(grad)) for grad in grads])

                    print_step = num_steps // 10
                    if step % print_step == 0:
                        tf.print('Step', step, '/', num_steps,
                                 ', obs_likelihood = ', obs_likelihood,
                                 ', loss = ', loss,
                                 ', test_reg = ', test_reg_loss,
                                 ', test_mul = ', test_mul_loss,
                                 ', multi_loss= ', multi_loss,
                                 ': ms per step= ', 1000. * toc / tf.cast(step, tf.float64),
                                 end='\r')

                    test_reg_tensor_array = test_reg_tensor_array.write(step - 1, test_reg_loss)
                    test_mul_tensor_array = test_mul_tensor_array.write(step - 1, test_mul_loss)
                    obs_lik_tensor_array = obs_lik_tensor_array.write(step - 1, obs_likelihood)
                    multi_loss_tensor_array = multi_loss_tensor_array.write(step - 1, multi_loss)
                    ess_tensor_array = ess_tensor_array.write(step - 1, ess[0])
                    loss_tensor_array = loss_tensor_array.write(step - 1, loss)
                    grad_tensor_array = grad_tensor_array.write(step - 1, max_grad)
                    time_tensor_array = time_tensor_array.write(step - 1, toc)

            return (loss_tensor_array.stack(), grad_tensor_array.stack(),
                    time_tensor_array.stack(), ess_tensor_array.stack(),
                    multi_loss_tensor_array.stack(), obs_lik_tensor_array.stack(), 
                    test_reg_tensor_array.stack(), test_mul_tensor_array.stack())

        return train_niter(smc, tf.constant(n_iter))

    def run_block(smc, method, n_iter, initial_lr, decay, steps, out_dir, col='blue', warnup=100, force=False,
                  data_name=None):
        if not os.path.isdir(out_dir):
            os.mkdir(out_dir)
        optimizer = make_optimizer(initial_learning_rate=initial_lr,
                                   decay_steps=steps, decay_rate=decay, staircase=True)
        key = fn_identifier(initial_lr, decay, steps, method, data_name)
        filename = "vrnn_loss_{0}.pkl".format(key)
        filepath = os.path.join(out_dir, filename)

        print("\n {0}".format(method))

        print(key)

        (loss_array,
         grad_array,
         time_array,
         ess_array,
         multi_loss_array,
         obs_lik_array,
         test_reg_array, test_mul_array) = run_smc(smc, optimizer, n_iter, seed=filter_seed)

        obs_lik_array = obs_lik_array.numpy()
        loss_array = loss_array.numpy()
        grad_array = grad_array.numpy()
        time_array = time_array.numpy()
        ess_array = ess_array.numpy()
        test_reg_array= test_reg_array.numpy()
        test_mul_array= test_mul_array.numpy()
        multi_loss_array = multi_loss_array.numpy()

        pickle_obj(loss_array, os.path.join(out_dir, filename))

        filename_test_loss = "vrnn_reg_tloss_{0}.pkl".format(key)
        pickle_obj(test_reg_array, os.path.join(out_dir, filename_test_loss))

        filename_test_loss = "vrnn_mul_tloss_{0}.pkl".format(key)
        pickle_obj(test_mul_array, os.path.join(out_dir, filename_test_loss))

        filename_olik = "vrnn_olik_{0}.pkl".format(key)
        pickle_obj(obs_lik_array, os.path.join(out_dir, filename_olik))

        filename_mloss = "vrnn_mloss_{0}.pkl".format(key)
        pickle_obj(multi_loss_array, os.path.join(out_dir, filename_mloss))

        filename_ess = "vrnn_ess_{0}.pkl".format(key)
        pickle_obj(ess_array, os.path.join(out_dir, filename_ess))
        filename_grad = "vrnn_grad_{0}.pkl".format(key)
        pickle_obj(grad_array, os.path.join(out_dir, filename_grad))

        fig, ax = plt.subplots(figsize=(10, 5))
        ax.plot(ess_array, color=col)
        fig.savefig(os.path.join(out_dir, 'vrnn_ess_{0}.png'.format(key)))
        plt.close()

        fig, ax = plt.subplots(figsize=(10, 5))
        ax.plot(grad_array, color=col)
        fig.savefig(os.path.join(out_dir, 'vrnn_grad_{0}.png'.format(key)))
        plt.close()

        # fig, ax = plt.subplots(figsize=(10, 5))
        # ax.plot(loss_array[warmup:], color=col)
        # fig.savefig(os.path.join(out_dir, 'vrnn_loss_{0}.png'.format(key)))
        # plt.close()

        return multi_loss_array

    print(run_method)
    data_name = os.path.splitext(os.path.basename(data_fp))[0]

    if run_method == 'mult':
        multi_array = run_block(multinomial_smc, 'mult', n_iter, initial_lr, decay, steps, out_dir, col='blue',
                                data_name=data_name)

    if run_method == 'reg':
        print(resampling_method)
        reg_array = run_block(regularized_smc, 'reg', n_iter, initial_lr, decay, steps, out_dir, col='green',
                              data_name=data_name)
Beispiel #9
0
def main(resampling_method_value,
         resampling_neff,
         resampling_kwargs=None,
         T=100,
         batch_size=1,
         n_particles=25,
         data_seed=0,
         filter_seed=1,
         mesh_size=10,
         savefig=True,
         use_tqdm=False,
         use_xla=False,
         diff_epsilon=1e-1,
         optimal_proposal=False):

    v = 1.
    t = .1
    transition_matrix = np.array([[1., 1.], [0., 1.]], dtype=np.float32)

    transition_covariance = v**2 * np.array([[1 / 3, 1 / 2], [1 / 2, 1.]],
                                            dtype=np.float32)

    observation_matrix = np.array([[1., 0]], dtype=np.float32)
    observation_covariance = np.array([[t**2]], dtype=np.float32)

    resampling_method_enum = ResamplingMethodsEnum(resampling_method_value)

    x_linspace = np.linspace(0.95, 1., mesh_size).astype(np.float32)
    y_linspace = np.linspace(0.95, 1., mesh_size).astype(np.float32)
    mesh = np.asanyarray([(x, y) for x in x_linspace for y in y_linspace])

    np_random_state = np.random.RandomState(seed=data_seed)
    data, kf = get_data(transition_matrix, observation_matrix,
                        transition_covariance, observation_covariance, T,
                        np_random_state)

    if resampling_method_enum == ResamplingMethodsEnum.KALMAN:
        return kalman_main(kf, data, mesh, mesh_size, 1e-2, use_tqdm, savefig)

    observation_dataset = tf.data.Dataset.from_tensor_slices(data)

    if resampling_kwargs is None:
        resampling_kwargs = {}

    if resampling_neff == 0.:
        resampling_criterion = NeverResample()
    elif resampling_neff == 1.:
        resampling_criterion = AlwaysResample()
    else:
        resampling_criterion = NeffCriterion(resampling_neff, True)

    if resampling_method_enum == ResamplingMethodsEnum.MULTINOMIAL:
        resampling_method = MultinomialResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.SYSTEMATIC:
        resampling_method = SystematicResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.STRATIFIED:
        resampling_method = StratifiedResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.REGULARIZED:
        resampling_method = RegularisedTransform(**resampling_kwargs)
    elif resampling_method_enum == ResamplingMethodsEnum.VARIANCE_CORRECTED:
        regularized_resampler = RegularisedTransform(**resampling_kwargs)
        resampling_method = PartiallyCorrectedRegularizedTransform(
            regularized_resampler)
    elif resampling_method_enum == ResamplingMethodsEnum.OPTIMIZED:
        lr = resampling_kwargs.pop('lr',
                                   resampling_kwargs.pop('learning_rate', 0.1))

        loss = SinkhornLoss(**resampling_kwargs, symmetric=True)
        optimizer = SGD(loss, lr=lr, decay=0.95)
        regularized_resampler = RegularisedTransform(**resampling_kwargs)

        resampling_method = OptimizedPointCloud(
            optimizer, intermediate_resampler=regularized_resampler)
    else:
        raise ValueError(
            f'resampling_method_name {resampling_method_enum} is not a valid ResamplingMethodsEnum'
        )

    modifiable_transition_matrix = tf.Variable(transition_matrix,
                                               trainable=False)
    observation_matrix = tf.convert_to_tensor(observation_matrix)
    transition_covariance_chol = tf.linalg.cholesky(transition_covariance)
    observation_covariance_chol = tf.linalg.cholesky(observation_covariance)

    initial_particles = np_random_state.normal(
        0., .01, [batch_size, n_particles, 2]).astype(np.float32)
    initial_state = State(initial_particles)

    smc = make_filter(observation_matrix,
                      modifiable_transition_matrix,
                      observation_covariance_chol,
                      transition_covariance_chol,
                      resampling_method,
                      resampling_criterion,
                      optimal_proposal=optimal_proposal)

    # if resampling_method.DIFFERENTIABLE:
    get_method = tf.function(get_surface, experimental_compile=use_xla)
    # else:
    #     fun = partial(get_surface_finite_difference, diff_epsilon=diff_epsilon)
    #     get_method = tf.function(fun, experimental_compile=use_xla)

    log_likelihoods, gradients = get_method(mesh, modifiable_transition_matrix,
                                            smc, initial_state, False,
                                            observation_dataset, T,
                                            filter_seed, use_tqdm)

    plot_surface(mesh, mesh_size, log_likelihoods.numpy(),
                 resampling_method_enum.name, resampling_kwargs, n_particles,
                 savefig)
    plot_vector_field(mesh, mesh_size, log_likelihoods.numpy(),
                      gradients.numpy(), resampling_method_enum.name,
                      resampling_kwargs, n_particles, savefig)
Beispiel #10
0
def main(resampling_method_value,
         resampling_neff,
         learning_rates=(1e-4, 1e-3),
         resampling_kwargs=None,
         currencies=('EUR', 'GBP', 'CAD'),
         batch_size=1,
         n_particles=25,
         api_key='',
         start_date="2019-09-02",
         end_date="2020-01-02",
         n_iter=50,
         savefig=False,
         filter_seed=0,
         use_xla=False,
         change_seed=True):
    data = get_data(currencies, api_key, start_date, end_date)
    M = len(currencies)

    T = len(data)

    resampling_method_enum = ResamplingMethodsEnum(resampling_method_value)

    observation_dataset = tf.data.Dataset.from_tensor_slices(data)

    if resampling_kwargs is None:
        resampling_kwargs = {}

    if resampling_neff == 0.:
        resampling_criterion = NeverResample()
    elif resampling_neff == 1.:
        resampling_criterion = AlwaysResample()
    else:
        resampling_criterion = NeffCriterion(resampling_neff, True)

    if resampling_method_enum == ResamplingMethodsEnum.MULTINOMIAL:
        resampling_method = MultinomialResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.SYSTEMATIC:
        resampling_method = SystematicResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.STRATIFIED:
        resampling_method = StratifiedResampler()
    elif resampling_method_enum == ResamplingMethodsEnum.REGULARIZED:
        resampling_method = RegularisedTransform(**resampling_kwargs)
    elif resampling_method_enum == ResamplingMethodsEnum.VARIANCE_CORRECTED:
        regularized_resampler = RegularisedTransform(**resampling_kwargs)
        resampling_method = PartiallyCorrectedRegularizedTransform(
            regularized_resampler)
    elif resampling_method_enum == ResamplingMethodsEnum.OPTIMIZED:
        lr = resampling_kwargs.pop('lr',
                                   resampling_kwargs.pop('learning_rate', 0.1))

        loss = SinkhornLoss(**resampling_kwargs, symmetric=True)
        optimizer = SGD(loss, lr=lr, decay=0.95)
        regularized_resampler = RegularisedTransform(**resampling_kwargs)

        resampling_method = OptimizedPointCloud(
            optimizer, intermediate_resampler=regularized_resampler)
    elif resampling_method_enum == ResamplingMethodsEnum.CORRECTED:
        resampling_method = CorrectedRegularizedTransform(**resampling_kwargs)
    else:
        raise ValueError(
            f'resampling_method_name {resampling_method_enum} is not a valid ResamplingMethodsEnum'
        )

    np_random_state = np.random.RandomState(seed=555)

    initial_particles = np_random_state.normal(
        1., 0.5, [batch_size, n_particles, M]).astype(np.float32)
    initial_state = State(initial_particles)

    large_initial_particles = np_random_state.normal(
        1., 0.5, [25, n_particles, M]).astype(np.float32)
    large_initial_state = State(large_initial_particles)

    mu_init = -5. * tf.ones(M)
    F_init = 0.9 * tf.eye(M)
    transition_cov_init = 0.35 * tf.eye(M)
    observation_cov_init = 1. * tf.eye(M)

    mu = tf.Variable(mu_init, trainable=True)
    F = tf.Variable(F_init, trainable=True)
    transition_cov = tf.Variable(transition_cov_init, trainable=True)
    observation_cov = tf.Variable(observation_cov_init, trainable=False)

    smc = make_filter(mu, F, transition_cov, observation_cov,
                      resampling_method, resampling_criterion)
    surrogate_smc = make_filter(mu, F, transition_cov, observation_cov,
                                SystematicResampler(), resampling_criterion)

    def optimizer_maker(learning_rate):
        # tf.function doesn't like creating variables. This is a way to create them outside the graph
        # We can't reuse the same optimizer because it would be giving a warmed-up momentum to the ones run later
        optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
        return optimizer

    variables = [mu, F, transition_cov]
    initial_values = [mu_init, F_init, transition_cov_init]
    losses, ess_profiles = compare_learning_rates(
        smc, initial_state, observation_dataset, T, variables, initial_values,
        n_iter, optimizer_maker, learning_rates, filter_seed, change_seed,
        large_initial_state, surrogate_smc)

    losses_df = pd.DataFrame(np.stack(losses).T,
                             columns=np.log10(learning_rates))
    ess_df = pd.DataFrame(np.stack(ess_profiles).T,
                          columns=np.log10(learning_rates))

    losses_df.columns.name = 'log learning rate'
    losses_df.columns.epoch = 'epoch'

    ess_df.columns.name = 'log learning rate'
    ess_df.columns.epoch = 'epoch'

    # plot_losses(losses_df, resampling_method_enum.name, savefig, dx, dy, dense, T, change_seed)
    plot_losses_vs_ess(losses_df, ess_df, resampling_method_enum.name, savefig,
                       M, n_particles, change_seed, batch_size, n_iter,
                       resampling_kwargs.get("epsilon"))

    print(mu)
    print(F)
    print(transition_cov)

    print(mu_init)
    print(F_init)
    print(transition_cov_init)