def fit(self, xs, ys, log=True):

        if self._subsample_factor < 1:
            num_samples_tot = xs.shape[0]
            idx = np.random.randint(
                0, num_samples_tot,
                int(num_samples_tot * self._subsample_factor))
            xs, ys = xs[idx], ys[idx]

        if self._normalize_inputs:
            # recompute normalizing constants for inputs
            self._x_mean_var.set_value(
                np.mean(xs, axis=0,
                        keepdims=True).astype(theano.config.floatX))
            self._x_std_var.set_value((np.std(xs, axis=0, keepdims=True) +
                                       1e-8).astype(theano.config.floatX))
        if self._normalize_outputs:
            # recompute normalizing constants for outputs
            self._y_mean_var.set_value(
                np.mean(ys, axis=0,
                        keepdims=True).astype(theano.config.floatX))
            self._y_std_var.set_value((np.std(ys, axis=0, keepdims=True) +
                                       1e-8).astype(theano.config.floatX))
        if self._name:
            prefix = self._name + "_"
        else:
            prefix = ""
        # FIXME: needs batch computation to avoid OOM.
        loss_before, loss_after, mean_kl, batch_count = 0., 0., 0., 0
        for batch in iterate_minibatches_generic(input_lst=[xs, ys],
                                                 batchsize=self._batchsize,
                                                 shuffle=True):
            batch_count += 1
            xs, ys = batch
            if self._use_trust_region:
                old_means, old_log_stds = self._f_pdists(xs)
                inputs = [xs, ys, old_means, old_log_stds]
            else:
                inputs = [xs, ys]
            loss_before += self._optimizer.loss(inputs)

            self._optimizer.optimize(inputs)
            loss_after += self._optimizer.loss(inputs)
            if self._use_trust_region:
                mean_kl += self._optimizer.constraint_val(inputs)

        if log:
            logger.record_tabular(prefix + 'LossBefore',
                                  loss_before / batch_count)
            logger.record_tabular(prefix + 'LossAfter',
                                  loss_after / batch_count)
            logger.record_tabular(prefix + 'dLoss',
                                  loss_before - loss_after / batch_count)
            if self._use_trust_region:
                logger.record_tabular(prefix + 'MeanKL', mean_kl / batch_count)
    def fit(self, xs, ys):

        if self._subsample_factor < 1:
            num_samples_tot = xs.shape[0]
            idx = np.random.randint(0, num_samples_tot, int(num_samples_tot * self._subsample_factor))
            xs, ys = xs[idx], ys[idx]

        if self._normalize_inputs:
            # recompute normalizing constants for inputs
            self._x_mean_var.set_value(
                np.mean(xs, axis=0, keepdims=True).astype(theano.config.floatX))
            self._x_std_var.set_value(
                (np.std(xs, axis=0, keepdims=True) + 1e-8).astype(theano.config.floatX))
        if self._normalize_outputs:
            # recompute normalizing constants for outputs
            self._y_mean_var.set_value(
                np.mean(ys, axis=0, keepdims=True).astype(theano.config.floatX))
            self._y_std_var.set_value(
                (np.std(ys, axis=0, keepdims=True) + 1e-8).astype(theano.config.floatX))
        if self._name:
            prefix = self._name + "_"
        else:
            prefix = ""
        # FIXME: needs batch computation to avoid OOM.
        loss_before, loss_after, mean_kl, batch_count = 0., 0., 0., 0
        for batch in iterate_minibatches_generic(input_lst=[xs, ys], batchsize=self._batchsize, shuffle=True):
            batch_count += 1
            xs, ys = batch
            if self._use_trust_region:
                old_means, old_log_stds = self._f_pdists(xs)
                inputs = [xs, ys, old_means, old_log_stds]
            else:
                inputs = [xs, ys]
            loss_before += self._optimizer.loss(inputs)

            self._optimizer.optimize(inputs)
            loss_after += self._optimizer.loss(inputs)
            if self._use_trust_region:
                mean_kl += self._optimizer.constraint_val(inputs)

        logger.record_tabular(prefix + 'LossBefore', loss_before / batch_count)
        logger.record_tabular(prefix + 'LossAfter', loss_after / batch_count)
        logger.record_tabular(prefix + 'dLoss', loss_before - loss_after / batch_count)
        if self._use_trust_region:
            logger.record_tabular(prefix + 'MeanKL', mean_kl / batch_count)
    def fit(self, xs, ys):
        shareds, barriers = self._par_objs

        # report sample sizes
        n_steps_collected = xs.shape[0]
        shareds.n_steps_collected[self.rank] = n_steps_collected
        self._optimizer.set_avg_fac(n_steps_collected)
        total_n_steps_collected = np.sum(shareds.n_steps_collected)

        if self._subsample_factor < 1:
            idx = np.random.randint(
                0, n_steps_collected,
                int(n_steps_collected * self._subsample_factor))
            xs, ys = xs[idx], ys[idx]
        """ the only change in parallel version """
        if self._normalize_inputs:
            # each worker computes its statistics
            input_dim = np.prod(self.input_shape)
            x_sum = np.sum(xs, axis=0, keepdims=False).reshape(input_dim)
            x_square_sum = np.sum(xs**2, axis=0,
                                  keepdims=False).reshape(input_dim)
            shareds.x_sum_2d[:, self.rank] = x_sum
            shareds.x_square_sum_2d[:, self.rank] = x_square_sum
            barriers.normalize_inputs[0].wait()

            # sum up statistics from different workers
            # currently this is performed by all workers
            x_mean = np.sum(shareds.x_sum_2d, axis=1) / total_n_steps_collected
            x_square_mean = np.sum(shareds.x_square_sum_2d,
                                   axis=1) / total_n_steps_collected
            x_std = np.sqrt(x_square_mean - x_mean**2 + 1e-8)

            # prepare for NN to use
            self._x_mean_var.set_value(
                x_mean.reshape((1, -1)).astype(theano.config.floatX))
            self._x_std_var.set_value(
                x_std.reshape((1, -1)).astype(theano.config.floatX))
            barriers.normalize_inputs[1].wait()

        if self._normalize_outputs:
            # each worker computes its statistics
            output_dim = self.output_dim
            y_sum = np.sum(ys, axis=0, keepdims=False).reshape(output_dim)
            y_square_sum = np.sum(ys**2, axis=0,
                                  keepdims=False).reshape(output_dim)
            shareds.y_sum_2d[:, self.rank] = y_sum
            shareds.y_square_sum_2d[:, self.rank] = y_square_sum
            barriers.normalize_outputs[0].wait()

            # sum up statistics from different workers
            # currently this is performed by all workers
            y_mean = np.sum(shareds.y_sum_2d, axis=1) / total_n_steps_collected
            y_square_mean = np.sum(shareds.y_square_sum_2d,
                                   axis=1) / total_n_steps_collected
            y_std = np.sqrt(y_square_mean - y_mean**2 + 1e-8)

            # prepare for NN to use
            self._y_mean_var.set_value(
                y_mean.reshape((1, -1)).astype(theano.config.floatX))
            self._y_std_var.set_value(
                y_std.reshape((1, -1)).astype(theano.config.floatX))
            # DEBUG: check whether the normalization is correct
            # print(y_sum / n_steps_collected, y_mean,np.std(ys), y_std)
            barriers.normalize_outputs[1].wait()
        """""" """""" """""" """""" """""" """"""

        if self._name:
            prefix = self._name + "_"
        else:
            prefix = ""

        # FIXME: needs batch computation to avoid OOM.
        loss_before, loss_after, mean_kl, batch_count = 0., 0., 0., 0
        for batch in iterate_minibatches_generic(input_lst=[xs, ys],
                                                 batchsize=self._batchsize,
                                                 shuffle=True):
            batch_count += 1
            _xs, _ys = batch
            if self._use_trust_region:
                old_means, old_log_stds = self._f_pdists(_xs)
                inputs = [_xs, _ys, old_means, old_log_stds]
            else:
                inputs = [_xs, _ys]
            loss_before += self._optimizer._loss(inputs, extra_inputs=None)

            self._optimizer.optimize(inputs)
            loss_after += self._optimizer._loss(inputs, extra_inputs=None)
            if self._use_trust_region:
                mean_kl += self._optimizer._constraint_val(inputs,
                                                           extra_inputs=None)

        logger.record_tabular(prefix + 'LossBefore', loss_before / batch_count)
        logger.record_tabular(prefix + 'LossAfter', loss_after / batch_count)
        logger.record_tabular(prefix + 'dLoss',
                              (loss_before - loss_after) / batch_count)
        if self._use_trust_region:
            logger.record_tabular(prefix + 'MeanKL', mean_kl / batch_count)
    def fit(self, xs, ys):

        if self._subsample_factor < 1:
            num_samples_tot = xs.shape[0]
            idx = np.random.randint(
                0, num_samples_tot,
                int(num_samples_tot * self._subsample_factor))
            xs, ys = xs[idx], ys[idx]

        if self._normalize_inputs:
            # recompute normalizing constants for inputs
            self._x_mean_var.set_value(
                np.mean(xs, axis=0,
                        keepdims=True).astype(theano.config.floatX))
            self._x_std_var.set_value((np.std(xs, axis=0, keepdims=True) +
                                       1e-8).astype(theano.config.floatX))
        if self._normalize_outputs:
            # recompute normalizing constants for outputs
            self._y_mean_var.set_value(
                np.mean(ys, axis=0,
                        keepdims=True).astype(theano.config.floatX))
            self._y_std_var.set_value((np.std(ys, axis=0, keepdims=True) +
                                       1e-8).astype(theano.config.floatX))
        if self._name:
            prefix = self._name + "_"
        else:
            prefix = ""
        # FIXME: needs batch computation to avoid OOM.
        loss_before, loss_after, mean_kl, batch_count = 0., 0., 0., 0
        for batch in iterate_minibatches_generic(input_lst=[xs, ys],
                                                 batchsize=self._batchsize,
                                                 shuffle=True):
            batch_count += 1
            xs, ys = batch
            if self._use_trust_region:
                old_means, old_log_stds = self._f_pdists(xs)
                inputs = [xs, ys, old_means, old_log_stds]
            else:
                inputs = [xs, ys]

            loss_before_cur = self._optimizer.loss(inputs)

            # saving parameters to reverse in case of Nan problem
            params_before_optimization = copy.deepcopy(
                self._optimizer._target.get_param_values(
                    trainable=True).astype('float64'))
            self._optimizer.optimize(inputs)
            loss_after_cur = self._optimizer.loss(inputs)

            if (not math.isnan(loss_after_cur)) and \
               (not math.isinf(loss_after_cur)):
                loss_after += loss_after_cur
                loss_before += loss_before_cur
                if self._use_trust_region:
                    mean_kl += self._optimizer.constraint_val(inputs)
            else:  #reversing for this particular batch
                batch_count -= 1  #to remove failure batch
                batch_str = str(inputs) + '\n'
                #Documenting error
                with open(self.error_file, "a") as myfile:
                    myfile.write(batch_str)
                #Reversing optimized parameters
                self._optimizer._target.set_param_values(
                    params_before_optimization, trainable=True)
                print(
                    '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
                )
                print('ERROR: Nans appear. Parameters reversed to: ',
                      params_before_optimization)

        if batch_count == 0:
            # to avoid devision by 0 and getting Nans
            batch_count = 1

        logger.record_tabular(prefix + 'LossBefore', loss_before / batch_count)
        logger.record_tabular(prefix + 'LossAfter', loss_after / batch_count)
        logger.record_tabular(prefix + 'dLoss',
                              loss_before - loss_after / batch_count)
        if self._use_trust_region:
            logger.record_tabular(prefix + 'MeanKL', mean_kl / batch_count)