コード例 #1
0
def get_noisy_X_and_importance_weights(X, e, walkback_vector_func=None):
    if (e.has_key('target') and e.has_key('sampled')):
        sys.stdout.write("    Using stddev (sampled, target) = (%f, %f),\n" %
                         (e['sampled'], e['target']))
        (noisy_X, importance_sampling_weights
         ) = isotropic_gaussian_noise_and_importance_sampling_weights(
             X, e['sampled'], e['target'])

    elif (e.has_key('sampled') and e.has_key('kicking')
          and e.has_key('kicking_param_p')):
        noisy_X = isotropic_gaussian_noise_with_kicking(
            X, e['sampled'], e['kicking'], e['kicking_param_p'])
        importance_sampling_weights = np.ones((noisy_X.shape[0], ))
    elif (e.has_key('sampled') and e.has_key('walkback_param_p')):
        assert walkback_vector_func is not None
        noisy_X = isotropic_gaussian_noise_with_walkback(X,
                                                         e['sampled'],
                                                         walkback_vector_func,
                                                         e['walkback_param_p'],
                                                         min_steps=0,
                                                         cutoff=10)
        importance_sampling_weights = np.ones((noisy_X.shape[0], ))
    else:
        print "Error : Unrecognized setup."
        print e
        quit()

    return (noisy_X, importance_sampling_weights)
コード例 #2
0
ファイル: dae.py プロジェクト: gyom/denoising_autoencoder
def get_noisy_X_and_importance_weights(X, e, walkback_vector_func=None):
    if e.has_key("target") and e.has_key("sampled"):
        sys.stdout.write("    Using stddev (sampled, target) = (%f, %f),\n" % (e["sampled"], e["target"]))
        (noisy_X, importance_sampling_weights) = isotropic_gaussian_noise_and_importance_sampling_weights(
            X, e["sampled"], e["target"]
        )

    elif e.has_key("sampled") and e.has_key("kicking") and e.has_key("kicking_param_p"):
        noisy_X = isotropic_gaussian_noise_with_kicking(X, e["sampled"], e["kicking"], e["kicking_param_p"])
        importance_sampling_weights = np.ones((noisy_X.shape[0],))
    elif e.has_key("sampled") and e.has_key("walkback_param_p"):
        assert walkback_vector_func is not None
        noisy_X = isotropic_gaussian_noise_with_walkback(
            X, e["sampled"], walkback_vector_func, e["walkback_param_p"], min_steps=0, cutoff=10
        )
        importance_sampling_weights = np.ones((noisy_X.shape[0],))
    else:
        print "Error : Unrecognized setup."
        print e
        quit()

    return (noisy_X, importance_sampling_weights)
コード例 #3
0
ファイル: dae.py プロジェクト: gyom/denoising_autoencoder
    def fit_with_decreasing_noise(
        self,
        X,
        list_of_train_stddev,
        optimization_args,
        early_termination_args={},
        X_valid=None,
        list_of_additional_valid_stddev=None,
    ):
        """
        The 'optimization_args' filters through to the 'fit' function almost unchanged.

        There is the option of adding a a special provision
        for it's 'maxiter' entry when we get a list.
        In such a situation, we use one value of maxiter
        from the list for each value of list_of_train_stddev.

        The 'early_termination_args' is optional. It provides a way to
        stop the training if we determine that we started in a state
        that was irredeemable and would only lead to a bad local minimum.
        We can keep in mind the r(x) = x solution as a benchmark and
        observe that, with r(x) = x we would have a loss function that
        roughly equals
            d * train_stddev**2, where d is the dimension of the data.

        The 'early_termination_args' dict has one key for now.
            early_termination_args['stop_if_loss_greater_than'] = [...]
                or
            early_termination_args['stop_if_loss_greater_than'] = "auto"

        If X_valid is not None, we will also return the values of the
        objective function evaluated with those validation samples.
        Those values will be the onces according to which we will
        decide to stop or not the descent with the train_stddev values.
        """

        # If we were passed the argument "auto", we have to replace the
        # value with an array of corresponding values.
        if (
            early_termination_args.has_key("stop_if_loss_greater_than")
            and type(early_termination_args["stop_if_loss_greater_than"]) == str
        ):
            if early_termination_args["stop_if_loss_greater_than"] == "auto":
                early_termination_args["stop_if_loss_greater_than"] = [
                    X.shape[1] * train_stddev ** 2 for train_stddev in list_of_train_stddev
                ]
                print "early termination with losses : "
                print early_termination_args["stop_if_loss_greater_than"]
            else:
                print "Wrong value for early_termination_args. Only valid string is 'auto'."
                print "Exiting."
                quit()

        # at some point we might want to decide to
        # record all the best_q for the sequence
        seq_train_mean_best_U_q = []
        seq_valid_mean_best_U_q = []
        i = 0
        progress_logger = make_progress_logger("Training")

        for train_stddev in list_of_train_stddev:

            sys.stdout.write("    Using train_stddev %f, " % train_stddev)
            (noisy_X, importance_sampling_weights) = isotropic_gaussian_noise_and_importance_sampling_weights(
                X, 4.0 * train_stddev, train_stddev
            )
            # noisy_X = X + np.random.normal(size = X.shape, scale = train_stddev)

            if optimization_args.has_key("maxiter") and type(optimization_args["maxiter"]) in [list, np.array]:
                assert len(optimization_args["maxiter"]) == len(list_of_train_stddev)
                optimization_args0 = conj(optimization_args, "maxiter", optimization_args["maxiter"][i])
            else:
                optimization_args0 = optimization_args
            (best_q, train_U_best_q_) = self.fit(X, noisy_X, optimization_args0)
            # (best_q, train_U_best_q_) = self.fit(X, noisy_X, optimization_args)

            train_U_best_q = self.q_loss(best_q, X, noisy_X, importance_sampling_weights).sum()
            # sanity check to make sure that we're evaluating this right
            assert abs(train_U_best_q - train_U_best_q_) < 1e-8

            train_mean_U_best_q = train_U_best_q / X.shape[0]
            seq_train_mean_best_U_q.append(train_mean_U_best_q)
            sys.stdout.write("train mean loss is %f, " % (train_mean_U_best_q,))

            if not (X_valid == None):
                (noisy_X_valid, importance_sampling_weights) = isotropic_gaussian_noise_and_importance_sampling_weights(
                    X_valid, 4.0 * train_stddev, train_stddev
                )

                # noisy_X_valid = X_valid + np.random.normal(size = X_valid.shape, scale = train_stddev)
                valid_U_best_q = self.q_loss(best_q, X_valid, noisy_X_valid, importance_sampling_weights).sum()
                valid_mean_U_best_q = valid_U_best_q / X_valid.shape[0]
                seq_valid_mean_best_U_q.append(valid_mean_U_best_q)
                sys.stdout.write("valid mean loss is %f." % (valid_mean_U_best_q,))

                # if we're dealing with a validation set, it will be the one used
                # to determine the stopping point
                if (
                    early_termination_args.has_key("stop_if_loss_greater_than")
                    and early_termination_args["stop_if_loss_greater_than"][i] < valid_mean_U_best_q
                ):
                    break
            else:
                # if we don't have a validation set, then we'll use mean_U_best_q
                # for the termination condition

                if (
                    early_termination_args.has_key("stop_if_loss_greater_than")
                    and early_termination_args["stop_if_loss_greater_than"][i] < mean_U_best_q
                ):
                    break

            print ""
            progress_logger(1.0 * i / len(list_of_train_stddev))
            i += 1
        # end for

        # might as well pad the rest of the list to
        # signify that we terminated early
        while len(seq_train_mean_best_U_q) < len(list_of_train_stddev):
            seq_train_mean_best_U_q.append(np.nan)
        while len(seq_valid_mean_best_U_q) < len(list_of_train_stddev):
            seq_valid_mean_best_U_q.append(np.nan)

        # Now we want to recompute the model losses for all the values of
        # the train_stddev, but using the final parameters best_q.
        # This will be used as an addition quality evaluation to determine
        # how the DAE treats data that's relatively far from the manifold
        # once it's done training.
        # It might be even more informative than the validation losses.

        seq_valid_mean_U_final_best_q = None
        seq_alt_valid_mean_U_final_best_q = None
        if not (X_valid == None):
            nreps = 10
            # This thing doesn't work with the list comprehension. You need to generate the data every time.
            (noisy_X_valid, importance_sampling_weights) = isotropic_gaussian_noise_and_importance_sampling_weights(
                X_valid, 4.0 * train_stddev, train_stddev
            )
            seq_valid_mean_U_final_best_q = [
                np.array(
                    [
                        self.q_loss(best_q, X_valid, noisy_X_valid, importance_sampling_weights).sum()
                        / X_valid.shape[0]
                        for _ in range(nreps)
                    ]
                ).mean()
                for train_stddev in list_of_train_stddev
            ]

            if (list_of_additional_valid_stddev is not None) and len(list_of_additional_valid_stddev) > 0:
                # TODO : use some kind of tool to generate the importance_sampling_weights
                seq_alt_valid_mean_U_final_best_q = [
                    np.array(
                        [
                            self.q_loss(
                                best_q, X_valid, X_valid + np.random.normal(size=X_valid.shape, scale=alt_valid_stddev)
                            ).sum()
                            / X_valid.shape[0]
                            for _ in range(nreps)
                        ]
                    ).mean()
                    for alt_valid_stddev in list_of_additional_valid_stddev
                ]
        # end if

        return (
            seq_train_mean_best_U_q,
            seq_valid_mean_best_U_q,
            seq_valid_mean_U_final_best_q,
            seq_alt_valid_mean_U_final_best_q,
        )
コード例 #4
0
    def fit_with_decreasing_noise(self,
                                  X,
                                  list_of_train_stddev,
                                  optimization_args,
                                  early_termination_args={},
                                  X_valid=None,
                                  list_of_additional_valid_stddev=None):
        """
        The 'optimization_args' filters through to the 'fit' function almost unchanged.

        There is the option of adding a a special provision
        for it's 'maxiter' entry when we get a list.
        In such a situation, we use one value of maxiter
        from the list for each value of list_of_train_stddev.

        The 'early_termination_args' is optional. It provides a way to
        stop the training if we determine that we started in a state
        that was irredeemable and would only lead to a bad local minimum.
        We can keep in mind the r(x) = x solution as a benchmark and
        observe that, with r(x) = x we would have a loss function that
        roughly equals
            d * train_stddev**2, where d is the dimension of the data.

        The 'early_termination_args' dict has one key for now.
            early_termination_args['stop_if_loss_greater_than'] = [...]
                or
            early_termination_args['stop_if_loss_greater_than'] = "auto"

        If X_valid is not None, we will also return the values of the
        objective function evaluated with those validation samples.
        Those values will be the onces according to which we will
        decide to stop or not the descent with the train_stddev values.
        """

        # If we were passed the argument "auto", we have to replace the
        # value with an array of corresponding values.
        if (early_termination_args.has_key('stop_if_loss_greater_than')
                and type(early_termination_args['stop_if_loss_greater_than'])
                == str):
            if early_termination_args['stop_if_loss_greater_than'] == "auto":
                early_termination_args['stop_if_loss_greater_than'] = [
                    X.shape[1] * train_stddev**2
                    for train_stddev in list_of_train_stddev
                ]
                print "early termination with losses : "
                print early_termination_args['stop_if_loss_greater_than']
            else:
                print "Wrong value for early_termination_args. Only valid string is 'auto'."
                print "Exiting."
                quit()

        # at some point we might want to decide to
        # record all the best_q for the sequence
        seq_train_mean_best_U_q = []
        seq_valid_mean_best_U_q = []
        i = 0
        progress_logger = make_progress_logger("Training")

        for train_stddev in list_of_train_stddev:

            sys.stdout.write("    Using train_stddev %f, " % train_stddev)
            (noisy_X, importance_sampling_weights
             ) = isotropic_gaussian_noise_and_importance_sampling_weights(
                 X, 4.0 * train_stddev, train_stddev)
            #noisy_X = X + np.random.normal(size = X.shape, scale = train_stddev)

            if optimization_args.has_key('maxiter') and type(
                    optimization_args['maxiter']) in [list, np.array]:
                assert len(
                    optimization_args['maxiter']) == len(list_of_train_stddev)
                optimization_args0 = conj(optimization_args, "maxiter",
                                          optimization_args['maxiter'][i])
            else:
                optimization_args0 = optimization_args
            (best_q, train_U_best_q_) = self.fit(X, noisy_X,
                                                 optimization_args0)
            #(best_q, train_U_best_q_) = self.fit(X, noisy_X, optimization_args)

            train_U_best_q = self.q_loss(best_q, X, noisy_X,
                                         importance_sampling_weights).sum()
            # sanity check to make sure that we're evaluating this right
            assert (abs(train_U_best_q - train_U_best_q_) < 1e-8)

            train_mean_U_best_q = train_U_best_q / X.shape[0]
            seq_train_mean_best_U_q.append(train_mean_U_best_q)
            sys.stdout.write("train mean loss is %f, " %
                             (train_mean_U_best_q, ))

            if not (X_valid == None):
                (noisy_X_valid, importance_sampling_weights
                 ) = isotropic_gaussian_noise_and_importance_sampling_weights(
                     X_valid, 4.0 * train_stddev, train_stddev)

                #noisy_X_valid = X_valid + np.random.normal(size = X_valid.shape, scale = train_stddev)
                valid_U_best_q = self.q_loss(
                    best_q, X_valid, noisy_X_valid,
                    importance_sampling_weights).sum()
                valid_mean_U_best_q = valid_U_best_q / X_valid.shape[0]
                seq_valid_mean_best_U_q.append(valid_mean_U_best_q)
                sys.stdout.write("valid mean loss is %f." %
                                 (valid_mean_U_best_q, ))

                # if we're dealing with a validation set, it will be the one used
                # to determine the stopping point
                if (early_termination_args.has_key('stop_if_loss_greater_than')
                        and
                        early_termination_args['stop_if_loss_greater_than'][i]
                        < valid_mean_U_best_q):
                    break
            else:
                # if we don't have a validation set, then we'll use mean_U_best_q
                # for the termination condition

                if (early_termination_args.has_key('stop_if_loss_greater_than')
                        and
                        early_termination_args['stop_if_loss_greater_than'][i]
                        < mean_U_best_q):
                    break

            print ""
            progress_logger(1.0 * i / len(list_of_train_stddev))
            i += 1
        # end for

        # might as well pad the rest of the list to
        # signify that we terminated early
        while len(seq_train_mean_best_U_q) < len(list_of_train_stddev):
            seq_train_mean_best_U_q.append(np.nan)
        while len(seq_valid_mean_best_U_q) < len(list_of_train_stddev):
            seq_valid_mean_best_U_q.append(np.nan)

        # Now we want to recompute the model losses for all the values of
        # the train_stddev, but using the final parameters best_q.
        # This will be used as an addition quality evaluation to determine
        # how the DAE treats data that's relatively far from the manifold
        # once it's done training.
        # It might be even more informative than the validation losses.

        seq_valid_mean_U_final_best_q = None
        seq_alt_valid_mean_U_final_best_q = None
        if not (X_valid == None):
            nreps = 10
            # This thing doesn't work with the list comprehension. You need to generate the data every time.
            (noisy_X_valid, importance_sampling_weights
             ) = isotropic_gaussian_noise_and_importance_sampling_weights(
                 X_valid, 4.0 * train_stddev, train_stddev)
            seq_valid_mean_U_final_best_q = [
                np.array([
                    self.q_loss(best_q, X_valid, noisy_X_valid,
                                importance_sampling_weights).sum() /
                    X_valid.shape[0] for _ in range(nreps)
                ]).mean() for train_stddev in list_of_train_stddev
            ]

            if (list_of_additional_valid_stddev
                    is not None) and len(list_of_additional_valid_stddev) > 0:
                # TODO : use some kind of tool to generate the importance_sampling_weights
                seq_alt_valid_mean_U_final_best_q = [
                    np.array([
                        self.q_loss(
                            best_q, X_valid, X_valid +
                            np.random.normal(size=X_valid.shape,
                                             scale=alt_valid_stddev)).sum() /
                        X_valid.shape[0] for _ in range(nreps)
                    ]).mean()
                    for alt_valid_stddev in list_of_additional_valid_stddev
                ]
        # end if

        return (seq_train_mean_best_U_q, seq_valid_mean_best_U_q,
                seq_valid_mean_U_final_best_q,
                seq_alt_valid_mean_U_final_best_q)