Beispiel #1
0
def replace_missing_with_kde_samples(data_frame, attribute):
    """ Replace missing values based on samples from KDE function

    :param data_frame: Pandas dataframe holding the attribute
    :type data_frame: pandas.DataFrame
    :param attribute: The attribute for which missing values should be replaced
    :type attribute: str
    """
    minimum = data_frame[attribute].min()
    maximum = data_frame[attribute].max()
    values = np.array(data_frame[attribute].dropna())
    kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(
        values.reshape(-1, 1))
    missing_values = data_frame.loc[
        data_frame[attribute].isnull(), attribute]
    samples = [num for num in
               kde.sample(n_samples=len(data_frame[attribute].dropna()))
               if minimum <= num <= maximum]
    while len(samples) < 2*len(missing_values):
        samples.extend([num for num in
                        kde.sample(n_samples=len(data_frame[attribute].dropna()))
                        if minimum <= num <= maximum])
    samples = [samples[i] for i in
               sorted(random.sample(xrange(len(samples)), len(missing_values)))]

    for index, value in enumerate(samples):
        missing_values[index] = samples[index]

    data_frame.update(pd.DataFrame(missing_values))
Beispiel #2
0
def kde_sampler_life(enc,
                     X,
                     y,
                     batch_size,
                     bandwidth=0.5,
                     nn_subset_size=None):

    while True:

        log_time("get_z_enc")
        if nn_subset_size is None:
            imgs = X
        else:
            rand_idxs = np.random.randint(0, len(X), nn_subset_size)
            imgs = X[rand_idxs]
        z_enc = ld_gan.utils.model_handler.apply_model(enc,
                                                       imgs,
                                                       batch_size=500)
        log_time("get_z_enc")

        batch_idxs = np.random.randint(0, len(z_enc), batch_size)
        img_batch = imgs[batch_idxs]
        y_batch = y[batch_idxs]
        kde = KernelDensity(bandwidth=bandwidth).fit(z_enc)
        z_batch = kde.sample(batch_size)

        yield img_batch, y_batch, z_batch, z_batch
Beispiel #3
0
def find_max_density(point_list):
    point_list, _ = remove_nan(point_list)
    if point_list.shape[0] == 0:
        return [float('nan'),float('nan'),float('nan')]
    kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(point_list)
    points = kde.sample(100000)
    prob_list = kde.score_samples(points)
    max_point = points[np.argmax(prob_list)]
    # print "max", max_point
    return max_point
def train_rlos(data, show_chart=False):
    """Train LOS estimator"""
    """Train patient LOS for triplet (sex, age, sline)"""
    freq = {}
    for row in data:
        sex = int(row["sex"])
        age = fp.split_age(int(row["age"]))
        sline = row["sline"]
        rlos = int(row["rlos"])

        if rlos == 0:
            print "RLOS equals zero for sex %d, age %d, SL %s" % (sex, age, sline)

        tuple = (sex, age, sline)
        freq.setdefault(tuple, [])
        freq[tuple].append(rlos)

    result = {}
    for tuple, train_data in freq.items():
        (sex, age, sline) = tuple
        if len(train_data) < training_threshold:
            print "Too small training set (<%d) for sex %d, age %d, SL %s. Data will be skipped. " % \
                  (training_threshold, sex, age, sline)
            continue

        X = np.array([train_data]).transpose()
        kde = KernelDensity(kernel='tophat', bandwidth=0.5).fit(X)
        kdef = lambda size: [round(l[0]) for l in kde.sample(size).tolist()]
        result[tuple] = kde

        if show_chart:
            # print "Sex=%d, Age=%d, SL=%s" % (sex, age, sline)
            # print_freq(ages)
            samples = kdef(len(train_data)) if len(train_data) < 500 else kdef(500)
            # print_freq(samples)

            # hist for train data
            plt.subplot(211)
            plt.title("RLOS train data for Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('RLOS')
            plt.hist(train_data)

            # estimated density
            plt.subplot(212)
            plt.title("Estimated density Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('RLOS')
            plt.hist(samples)

            plt.show()

    return result
def get_numerical_signature(values, S):
    '''
    Learns a distribution of the values
    Then generates a sample of size S
    '''
    # Transform data to numpy array
    Xnumpy = np.asarray(values)
    X = Xnumpy.reshape(-1, 1)
    # Learn kernel
    kde = KernelDensity(kernel=C.kd["kernel"],
                        bandwidth=C.kd["bandwidth"]).fit(X)
    sig_v = [kde.sample()[0][0] for x in range(S)]
    return sig_v
def train_admit_count(data, show_chart=False):
    """Train patient admittance number for triplet (sex, age, sline)"""
    freq = {}
    for row in data:
        sex = int(row["sex"])
        age = fp.split_age(int(row["age"]))
        sline = row["sline"]
        admit = row["admit"]

        tuple = (sex, age, sline)
        freq.setdefault(tuple, {})
        freq[tuple].setdefault(admit, 0)
        freq[tuple][admit] += 1

    result = {}
    for tuple, days in freq.items():
        (sex, age, sline) = tuple
        train_data = days.values()
        if len(train_data) < training_threshold:
            print "Too small training set (<%d) for sex %d, age %d, SL %s. Data will be skipped. " % \
                  (training_threshold, sex, age, sline)
            continue

        X = np.array([train_data]).transpose()
        kde = KernelDensity(kernel='tophat', bandwidth=0.5).fit(X)
        kdef = lambda size: [int(round(l[0])) for l in kde.sample(size).tolist()]
        result[tuple] = kde

        if show_chart:
            # print "Sex=%d, Age=%d, SL=%s" % (sex, age, sline)
            # print_freq(ages)
            samples = kdef(len(train_data)) if len(train_data) < 500 else kdef(500)
            # print_freq(samples)

            # hist for train data
            plt.subplot(211)
            plt.title("Admit count train data for Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('admittance count')
            plt.hist(train_data)

            # estimated density
            plt.subplot(212)
            plt.title("Estimated density Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('admittance count')
            plt.hist(samples)

            plt.show()

    return result
Beispiel #7
0
class KDEModel(object):
    """
    Wrapper class for Scikit Learn's Kernel Density Estimation model.

    Attributes
    ----------
    model : KernelDensity
        Wrapped class model.
    """
    def __init__(self, kernel='gaussian', bandwidth=.001):
        self.model = KernelDensity(kernel='gaussian', bandwidth=bandwidth)

    def fit(self, train_X):
        """
        Wrapper method for fit() method of Kernel Density model.

        Parameters
        ----------
        train_X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        """
        self.model.fit(train_X)

    def generate_samples(self, n_samples):
        """
        Generates the random samples according to the fitted distribution.

        Returns
        -------
        list
            List of numpy arrays of randomly generated observations.

        """
        points = self.model.sample(n_samples)
        return points

    def score_samples(self, X):
        """
        Predicts the log likelihood score of the samples in X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        """
        return self.model.score_samples(X)
def get_kde_pdf(X, bandwidth=2, step=.1, num_samples=200, optimize=False):
    """
    return kde and pdf from a data sample
    """
    if len(X) == 0:
        return [], np.array([]), []
    if optimize:
        bandwidths = 10**np.linspace(-1, 1, 10)
        grid = GridSearchCV(KernelDensity(kernel='gaussian'),
                            {'bandwidth': bandwidths},
                            cv=LeaveOneOut(len(X)))
        grid.fit(X[:, None])
        kde = KernelDensity(kernel='gaussian',
                            bandwidth=grid.best_params_['bandwidth']).fit(
                                X[:, None])
    else:
        kde = KernelDensity(kernel='gaussian', bandwidth=2).fit(X[:, None])
    pdf = np.exp(kde.score_samples(np.arange(0, 100, step)[:, None]))
    samples = kde.sample(num_samples)
    return kde, np.array(pdf), samples
Beispiel #9
0
def get_sampled_feature(dict_ids,
                        article_ids,
                        ndim=100,
                        type_features='second',
                        features=['intervals'],
                        bw=1):
    '''
    Each article has 'intervals' (second) between two adjacent retweets.
    ndim : feature dimension. Thus, the number of samplings
    '''
    X = np.zeros((len(article_ids), ndim * len(features)))
    for i, id_ in enumerate(article_ids):
        article_id = id_
        stats = dict_ids[id_]
        for find, feature in enumerate(features):
            raw_data = change_resolution(stats[feature], type_features)
            kde = KernelDensity(kernel='gaussian',
                                bandwidth=bw).fit(raw_data.reshape(-1, 1))
            X[i,
              find * ndim:(find + 1) * ndim] = np.sort(kde.sample(ndim)[:, 0])
    return X
def resample_state(D, w):
    w_norm = np.sum(w)  # Normalization factor for weights
    w_ecdf = np.cumsum(w) / w_norm  # New weight given the new measurement
    # Resample the points
    D_new, ind = np.empty_like(D), np.empty_like(D)
    for i, q in enumerate(D):
        ind[i] = bisect.bisect_left(w_ecdf, np.random.uniform(
            0, 1))  # Indexes for new samples
        D_new[i] = D[int(
            ind[i]
        )]  # New weighted particles (samples) from previous step given new measuremnt
    # Regularize it!


#    std = np.std(D_new)
    bandwidth = 0.05  #1.06*std*len(D_new)**-0.2  ## used to be 0.08
    kde = KernelDensity(
        bandwidth=bandwidth, kernel='gaussian', algorithm='ball_tree'
    )  # Bandwidth = 0.006 is calculated based on Silverman's Rule of Thumb
    kde.fit(D_new[:, np.newaxis])
    return kde.sample(num_particles).flatten(), ind
class DensityEstimator:
    def __init__(self,
                 training_set,
                 method_name,
                 n_components=None,
                 log_dir=None,
                 second_stage_beta=None):
        self.log_dir = log_dir
        self.training_set = training_set
        self.fitting_done = False
        self.method_name = method_name
        self.second_density_mdl = None
        self.skip_fitting_and_sampling = False
        if method_name == "GMM_Dirichlet":
            self.model = mixture.BayesianGaussianMixture(
                n_components=n_components,
                covariance_type='full',
                weight_concentration_prior=1.0 / n_components)
        elif method_name == "GMM":
            self.model = mixture.GaussianMixture(n_components=n_components,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_1":
            self.model = mixture.GaussianMixture(n_components=1,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_10":
            self.model = mixture.GaussianMixture(n_components=10,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_20":
            self.model = mixture.GaussianMixture(n_components=20,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_100":
            self.model = mixture.GaussianMixture(n_components=100,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_200":
            self.model = mixture.GaussianMixture(n_components=200,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)

        elif method_name.find("aux_vae") >= 0:
            have_2nd_density_est = False
            if method_name[8:] != "":
                self.second_density_mdl = method_name[8:]
                have_2nd_density_est = True
            self.model = VaeModelWrapper(
                input_shape=(training_set.shape[-1], ),
                latent_space_dim=training_set.shape[-1],
                have_2nd_density_est=have_2nd_density_est,
                log_dir=self.log_dir,
                sec_stg_beta=second_stage_beta)

        elif method_name == "given_zs":
            files = os.listdir(log_dir)
            for z_smpls in files:
                if z_smpls.endswith('.npy'):
                    break
            self.z_smps = np.load(os.path.join(log_dir, z_smpls))
            self.skip_fitting_and_sampling = True

        elif method_name.upper() == "KDE":
            self.model = KernelDensity(kernel='gaussian', bandwidth=0.425)
            # self.model = KernelDensity(kernel='tophat', bandwidth=15)
        else:
            raise NotImplementedError("Method specified : " +
                                      str(method_name) +
                                      " doesn't have an implementation yet.")

    def fitorload(self, file_name=None):
        if not self.skip_fitting_and_sampling:
            if file_name is None:
                self.model.fit(self.training_set, self.second_density_mdl)
            else:
                self.model.load(file_name)

        self.fitting_done = True

    def score(self, X, y=None):
        if self.method_name.upper().find(
                "AUX_VAE") >= 0 or self.skip_fitting_and_sampling:
            raise NotImplementedError(
                "Log likelihood evaluation for VAE is difficult. or skipped")
        else:
            return self.model.score(X, y)

    def save(self, file_name):
        if not self.skip_fitting_and_sampling:
            if self.method_name.find('vae') >= 0:
                self.model.save(file_name)
            else:
                with open(file_name, 'wb') as f:
                    pickle.dump(self.model, f)

    def reconstruct(self, input_batch):
        if self.method_name.upper().find("AUX_VAE") < 0:
            raise ValueError("Non autoencoder style density estimator: " +
                             self.method_name)
        return self.model.reconstruct(input_batch)

    def get_samples(self, n_samples):
        if not self.skip_fitting_and_sampling:
            if not self.fitting_done:
                self.fitorload()
            scrmb_idx = np.array(range(n_samples))
            np.random.shuffle(scrmb_idx)
            if self.log_dir is not None:
                pickle_path = os.path.join(self.log_dir,
                                           self.method_name + '_mdl.pkl')
                with open(pickle_path, 'wb') as f:
                    pickle.dump(self.model, f)
            if self.method_name.upper() == "GMM_DIRICHLET" or self.method_name.upper() == "AUX_VAE" \
                    or self.method_name.upper() == "GMM" or self.method_name.upper() == "GMM_1" \
                    or self.method_name.upper() == "GMM_10" or self.method_name.upper() == "GMM_20" \
                    or self.method_name.upper() == "GMM_100" or self.method_name.upper() == "GMM_200"\
                    or self.method_name.upper().find("AUX_VAE") >= 0:
                return self.model.sample(n_samples)[0][scrmb_idx, :]
            else:
                return np.random.shuffle(
                    self.model.sample(n_samples))[scrmb_idx, :]
        else:
            return self.z_smps
Beispiel #12
0
def estimate():
	kde = KernelDensity(kernel = "gaussian", bandwidth = 0.1).fit(X)
	#get random sample
	samples = kde.sample()
Beispiel #13
0

# In[34]:

pca1 = PCA(n_components=n1, whiten=True)
dt = pca1.fit_transform(digits.data)
# print(dt.shape)

# In[35]:

kde_model = KernelDensity(kernel='gaussian', bandwidth=bandwidth)
kde_model.fit(dt)

# In[38]:

d1new = kde_model.sample(n_samples=100, random_state=0)
digits1k_new = pca1.inverse_transform(d1new)
plot_digits(digits1k_new)

# In[40]:

n_comps = np.arange(50, 210, 10)
clf_gauss_models = [
    GaussianMixture(n_components=n, covariance_type='full', random_state=0)
    for n in n_comps
]
aics = [model.fit(dt).aic(dt) for model in clf_gauss_models]
lbd = aics.index(min(aics))
print("Optimal Number of Components for GMM =", n_comps[lbd])

# In[44]:
Beispiel #14
0
def kernel_smoother(all_data, bandwidth, sample_size):
    X = np.array(all_data)
    kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(X)
    return kde.sample(sample_size)
def train_age(data, show_chart=False):
    """Train age estimator for each SL"""
    def print_freq(data):
        freq = {}
        length = float(len(data))
        for x in data:
            xcat = fp.split_age(x)
            freq.setdefault(xcat, 0)
            freq[xcat] += 1
        for x in sorted(freq.keys()):
            print "%d: %.2f" % (x, round(freq[x]/length, 2)),
        print

    sline_ages = {}
    bad_sl = set()
    for row in data:
        sline = row['sline']
        age = int(row["age"])

        if age <= 0:
            bad_sl.add(sline)
            continue

        sline_ages.setdefault(sline, [])
        sline_ages[sline].append(age)

    for sl in bad_sl:
        print "SL=%s has age values equal or less than zero. Values were ignored" % sl


    for sline, ages in sline_ages.items():
        if len(ages) < alert_count:
            print "SL=%s has less(%d) than %d samples and will be excluded" % (sline, len(ages), alert_count)
            del sline_ages[sline]

    result = {}
    for sline,ages in sline_ages.items():
        X = np.array([ages]).transpose()
        kde = KernelDensity(kernel='tophat', bandwidth=1.0).fit(X)
        kdef = lambda size: [round(l[0]) for l in kde.sample(size).tolist()]
        result[sline] = kdef

        if show_chart:

            print "SL=%s" % sline
            print_freq(ages)
            samples = kdef(len(ages)) if len(ages) < 500 else kdef(500)
            print_freq(samples)

            # hist for train data
            plt.subplot(211)
            plt.title("Age train data for SL=%s" %(sline))
            plt.ylabel('freq')
            plt.xlabel('age category')
            plt.hist(ages)

            # estimated density
            plt.subplot(212)
            plt.title("Estimated density %s" % sline)
            plt.ylabel('freq')
            plt.xlabel('age category')
            plt.hist(samples)

            plt.show()
    return result
# In[25]:


get_ipython().run_line_magic('pinfo', 'kde.sample')


# Basically, that means we can use this model to predict what the next output of the 3 arms (constituting the Gaussian problem) will be.
# 
# Let see this with one example.

# In[26]:


np.random.seed(1)
one_sample = kde.sample()
one_sample


# In[27]:


one_draw = M.draw_each()
one_draw


# Of course, the next random rewards from the arms have no reason to be close to predicted ones...
# 
# But maybe we can use the prediction to choose the arm with highest sample?
# And hopefully this will be the best arm, *at least in average*!
Beispiel #17
0
class CSGM(torch.nn.Module):
    def __init__(self,
                 target,
                 filter,
                 G,
                 num_samples,
                 BS=64,
                 init_threshold=1e-2,
                 threshold=0.05,
                 bandwidth=0.1,
                 lr=1e-2):
        super(CSGM, self).__init__()
        self.target = torch.FloatTensor(target).cuda()
        self.A = torch.FloatTensor(filter).cuda()
        self.num_samples = num_samples
        self.G = G
        self.n_pixels = np.sum(filter)
        self.threshold = threshold
        self.init_threshold = init_threshold
        self.BS = BS
        # determine the points for KDE
        self.z, self.init_samples, self.init_bg = reconstruct_batch(
            target,
            filter,
            self.n_pixels,
            G,
            num_samples,
            threshold=init_threshold,
            lr=lr)
        self.Dz = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(
            self.z.reshape(num_samples, 100))

    def update_sampler(self, bandwidth):
        self.Dz = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(
            self.z.reshape(-1, 100))

    def sample(self, num_samples):
        count = 0
        z_samples = []
        gen_samples = []
        bg_samples = []
        while count < num_samples:
            Z = self.Dz.sample(self.BS)
            Z = torch.FloatTensor(Z).cuda().view(-1, 100, 1, 1)
            gen = self.G(Z).view(Z.shape[0], 28, 28)
            yhat = gen * self.A
            error = i_se(yhat,
                         self.target.unsqueeze(0).repeat(Z.shape[0], 1,
                                                         1)) / self.n_pixels

            Z = Z[error <= self.threshold]
            gen = gen[error <= self.threshold]
            end = min(Z.shape[0], num_samples - count)
            bg = gen[:end] * (1 - self.A)
            z_samples.append(Z[:end].data.cpu().numpy())
            gen_samples.append(gen[:end].data.cpu().numpy())
            bg_samples.append(bg.data.cpu().numpy())
            count += end

        z_samples = np.concatenate(z_samples, axis=0)
        gen_samples = np.concatenate(gen_samples, axis=0)
        bg_samples = np.concatenate(bg_samples, axis=0)

        return z_samples, gen_samples, bg_samples
class SkewExploreKDE(SkewExploreBase):
    """
    Class for state density estimation using sklearn kernel density estimator, goal proposing and result plotting.
    The goal proposing distribution is computed using Skew-fit algorithm (https://arxiv.org/abs/1903.03698) 
        with a sklearn kernel density estimation. 

    :param env: the environment, used to access environment properties such as state range, 
        pass proposed goals and pass the on-line mean and standard diviation for state normalization 
    :param args: additional arguments for configuring result plotting
    """
    def __init__(self, env, args):
        super().__init__(env, args)
        self.density_estimator = KernelDensity(kernel='gaussian',
                                               bandwidth=self.bandwidth)
        self.density_estimator_raw = KernelDensity(kernel='gaussian',
                                                   bandwidth=0.1)
        self.sample_prob = []

        ## for coverage computation
        self.obs_in_use = None
        self.num_points_estimator = 50000  #70000

    def fit_model(self):
        """
        fit the kernel density model
        """
        self.count += 1
        logging.info('Activate buffer')
        self.init_buffer = True

        selected_index = np.random.randint(len(self.obs_hist),
                                           size=self.num_points_estimator)
        self.obs_in_use = self.obs_hist[selected_index]

        # only yumi environments need to normalize the observation states on-line
        if self.args.use_auto_scale:
            if self.args.env == 'yumi' or self.args.env == 'yumi_box_pick' or self.args.env == 'yumi_door_button':
                if self.count % 2 == 0:
                    self.obs_mean = self.obs_rms.mean[
                        0]  #np.mean(self.obs_in_use, axis=0)
                    self.obs_std = np.sqrt(
                        self.obs_rms.var[0]
                    ) + 1e-8  #np.std(self.obs_in_use, axis=0) + 0.000000001

        self.obs_nomalized = (self.obs_in_use - self.obs_mean) / self.obs_std
        self.density_estimator.fit(self.obs_nomalized)

        # scale the observation for entropy computation
        self.obs_scaled = (self.obs_in_use -
                           self.entropy_shift) / self.entropy_scale
        self.density_estimator_raw.fit(self.obs_scaled)

        if self.plot_density:
            if self.args.env == 'yumi':
                self.xy_estimator.fit(self.obs_nomalized[:, 0:2])
                self.doorangle_estimator.fit(self.obs_nomalized[:, -1:])
            elif self.args.env == 'yumi_box_pick' or self.args.env == 'yumi_door_button':
                self.xy_estimator.fit(self.obs_nomalized[:, 0:2])
                self.doorangle_estimator.fit(self.obs_nomalized[:, -2:])

    def get_samples_and_density(self, sample_num):
        """
        Sample states from the density model and compute the sample density
        """
        samples = self.density_estimator.sample(self.skew_sample_num)
        samples_density = np.exp(self.density_estimator.score_samples(samples))
        return samples, samples_density

    def get_log_density(self, obs_test):
        """
        Compute log density
        """
        log_density = self.density_estimator.score_samples(obs_test)
        return log_density

    def get_density(self, obs_test):
        """
        Compute density
        """
        density = np.exp(self.density_estimator.score_samples(obs_test))
        return density
class Texture3D:
    def __init__(self, size):
        self.rocks = []
        self.size = size
        self.data = []

    '''
	add a rock to the texture if the rock doesn't any rock in self.rocks 
	returns bool
	'''

    def add(self, rock):
        if not self.intersect(rock):
            self.rocks.append(rock)
            self.data.append(rock.data())
            return True
        return False

    '''
	Create the tree for the kernelDensity
	'''

    def learn(self):
        self.kde = KernelDensity(kernel='gaussian',
                                 bandwidth=0.04).fit(self.data)

    '''
	samples rocks from kde one by one making sure there are no intersection
	returns a Texture
	'''

    def sample(self, n_rocks=None):
        length = n_rocks
        if length == None:
            length = len(self.data)

        mtexture = Texture3D(self.size)
        i = 0
        while i < length:
            new_rock = rock.dataToRock3D(
                self.kde.sample(1, random_state=None)[0])

            if mtexture.add(new_rock):
                i = i + 1

        return mtexture

    '''
	compute the distance between two points
	return float
	'''

    def __distance(self, center1, center2):
        center1 = np.array(center1)
        center2 = np.array(center2)

        sub = center1 - center2
        sub = sub**2

        return math.sqrt(np.sum(sub))

    '''
	returns a string defining the texture. (for saving)
	format:
	size
	c1,c2,c3#rad1,rad2,rad3#col1,col2,col3#rot1,rot2,rot3
	c1,c2,c3#rad1,rad2,rad3#col1,col2,col3#rot1,rot2,rot3
	'''

    def toString(self):
        result = str(self.size) + '\n'
        for rock in self.rocks:
            center = '' + str(rock.center[0]) + ',' + str(
                rock.center[1]) + ',' + str(rock.center[2])
            radius = '' + str(rock.radius[0]) + ',' + str(
                rock.radius[1]) + ',' + str(rock.radius[2])
            color = '' + str(rock.color[0]) + ',' + str(
                rock.color[1]) + ',' + str(rock.color[2] + ',' +
                                           str(rock.color[3]))
            rotation = '' + str(rock.rotation[0]) + ',' + str(
                rock.rotation[1]) + ',' + str(rock.rotation[2])

            result = result + center + '#' + radius + '#' + color + '#' + rotation + '\n'
        return result

    '''
	Test if two rocks intersect. 
	returns bool
	'''

    def __intersect(self, rock1, rock2):
        return self.__distance(
            rock1.center, rock2.center) < max(rock1.radius) + max(rock2.radius)

    '''
	Test if a rock intersects another rock in self.rocks
	'''

    def intersect(self, rock):
        for r in self.rocks:
            if self.__intersect(rock, r):
                return True

        return False
Beispiel #20
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors.kde import KernelDensity

#load data
xtrain = np.genfromtxt('../../contest_data/train.csv', delimiter=',')[1:, 1:-1]
ytrain = np.genfromtxt('../../contest_data/train.csv', delimiter=',')[1:, -1]
ytrain = np.asmatrix(ytrain).T
xtrain_linear_imputed = np.genfromtxt(
    '../../contest_data/xtrain_linear_imputed.csv', delimiter=',')

#imputing by sampling from class conditioned density estimate
#class conditional density estimate of column 1
for k in range(500):
    finite = np.isfinite(xtrain[:, k])
    nans = np.isnan(xtrain[:, k])
    y = np.array(ytrain[finite].T)
    X = xtrain[finite, k][:, np.newaxis]
    print k
    for i in range(29):
        #X_plot=np.linspace(0,1,1000)[:,np.newaxis]
        ind = y == float(i)
        kde = KernelDensity(kernel='gaussian', bandwidth=0.1).fit(X[ind[0]])
        nans_i = np.isnan(xtrain[:, k]) * np.array((ytrain == float(i)).T)
        xtrain[nans_i[0],
               k] = np.array(kde.sample(sum(nans_i[0]), random_state=0).T)
        log_dens = kde.score_samples(X_plot)
        #dens=np.exp(log_dens)
        #plt.plot(X_plot,dens)
    #plt.show()
class SkewExploreBase():
    """
    Class for state density estimation, goal proposing and result plotting.
    The goal proposing distribution is computed using Skew-fit algorithm (https://arxiv.org/abs/1903.03698) 
        with a sklearn kernel density estimation. 

    :param env: the environment, used to access environment properties such as state range, 
        pass proposed goals and pass the on-line mean and standard diviation for state normalization 
    :param args: additional arguments for configuring result plotting
    """
    def __init__(self, env, args):
        self.density_estimator_raw = None  #KernelDensity(kernel='gaussian', bandwidth=0.1)
        self.density_estimator = None
        self.args = args
        self.env = env
        self.obs_rms = None  #RunningMeanStd(shape=env.observation_space)

        self.skew_sample_num = 10000  #25000
        self.skew_alpha = args.skew_alpha  #-2.5 #-2.3 #-2.1
        self.goal_sampling_num = 100

        self.init_buffer = False
        self.obs_hist = None
        self.obs_next_hist = None
        self.dones = None

        self.obs_in_use = None
        self.obs_new = None

        self.plot_coverage = False
        self.plot_density = False
        self.plot_overall_coverage = False
        self.plot_entropy = False

        self.count = 0
        self.coverages = []
        self.entropy = []
        self.task_reward = []

        self.obs_mean = None
        self.obs_std = None

        self.plot_coverage = self.args.plot_coverage
        self.plot_density = self.args.plot_density
        self.plot_overall_coverage = self.args.plot_overall_coverage
        self.plot_entropy = self.args.plot_entropy

        # for coverage plotting
        if self.args.env == 'maze':
            self.bandwidth = 0.1
            self.init_maze_plotting_params()
            sigma = 0.1
        elif self.args.env == 'yumi':
            # self.bandwidth = 0.003
            self.bandwidth = 0.1
            self.init_door_plotting_params()
            sigma = 0.1
        elif self.args.env == 'yumi_box_pick' or self.args.env == 'yumi_door_button':
            self.bandwidth = 0.11
            self.init_boxpick_plotting_params()
            sigma = 0.005

        self.beta = 1 / (sigma**2 * 2)

    def init_maze_plotting_params(self):
        """
        Initialize parameters to evaluate and plot results of point maze environment
        """
        xbins = 50j
        ybins = 50j
        x_start = -6
        x_end = 6
        y_start = -12
        y_end = 4

        self.xx, self.yy = np.mgrid[x_start:x_end:xbins, y_start:y_end:ybins]
        self.eval_sample = np.vstack([self.yy.ravel(), self.xx.ravel()]).T
        self.eval_sample_min_dist = np.ones(len(self.eval_sample))

        self.skewed_estimator = KernelDensity(kernel='gaussian',
                                              bandwidth=self.bandwidth)

        self.entropy_shift = np.array([y_start, x_start])
        self.entropy_scale = np.array([(y_end - y_start, x_end - x_start)])

    def init_door_plotting_params(self):
        """
        Initialize parameters to evaluate and plot results of yumi door opening environment
        """
        xbins, ybins, zbins, gbins, dbins = 10j, 10j, 10j, 2j, 10j
        self.x_start, self.y_start, self.z_start = self.env.xyz_start
        self.x_end, self.y_end, self.z_end = self.env.xyz_end
        self.g_start, self.g_end = self.env.gripper_start, self.env.gripper_end
        self.d_start, self.d_end = self.env.door_start, self.env.door_end

        # for xy and door angle plotting
        self.mesh_xx, self.mesh_yy = np.mgrid[self.x_start:self.x_end:xbins,
                                              self.y_start:self.y_end:ybins]
        self.dd = np.mgrid[self.d_start:self.d_end:dbins]
        self.xy_eval_sample = np.vstack(
            [self.mesh_xx.ravel(), self.mesh_yy.ravel()]).T
        self.door_eval_sample = np.vstack([self.dd.ravel()]).T
        self.door_eval_sample_min_dist = np.ones(len(self.door_eval_sample))

        # for coverage plotting
        self.xx, self.yy, self.zz, self.gg, self.dd = np.mgrid[
            self.x_start:self.x_end:xbins, self.y_start:self.y_end:ybins,
            self.z_start:self.z_end:zbins, self.g_start:self.g_end:gbins,
            self.d_start:self.d_end:dbins]
        self.eval_sample = np.vstack([
            self.xx.ravel(),
            self.yy.ravel(),
            self.zz.ravel(),
            self.gg.ravel(),
            self.dd.ravel()
        ]).T
        self.eval_sample_min_dist = np.ones(len(self.eval_sample))

        self.xy_estimator = KernelDensity(kernel='gaussian',
                                          bandwidth=self.bandwidth)
        self.doorangle_estimator = KernelDensity(kernel='gaussian',
                                                 bandwidth=self.bandwidth)

        self.skewed_estimator = KernelDensity(kernel='gaussian',
                                              bandwidth=self.bandwidth)

        self.entropy_shift = np.array([
            self.x_start, self.y_start, self.z_start, self.g_start,
            self.d_start
        ])
        self.entropy_scale = np.array([
            self.x_start - self.x_end, self.y_start - self.y_end,
            self.z_start - self.z_end, self.g_start - self.g_end,
            self.d_start - self.d_end
        ])

    def init_boxpick_plotting_params(self):
        """
        Initialize parameters to evaluate and plot results of yumi door button environment
        """
        xbins, ybins, zbins, gbins, dlbins, drbins = 1j, 1j, 1j, 1j, 10j, 5j
        self.x_start, self.y_start, self.z_start = self.env.xyz_start
        self.x_end, self.y_end, self.z_end = self.env.xyz_end
        self.g_start, self.g_end = self.env.gripper_start, self.env.gripper_end
        self.dl_start, self.dl_end = self.env.door_l_start, self.env.door_l_end
        self.dr_start, self.dr_end = self.env.door_r_start, self.env.door_r_end

        # for xy and door angle plotting
        self.mesh_xx, self.mesh_yy = np.mgrid[self.x_start:self.x_end:xbins,
                                              self.y_start:self.y_end:ybins]
        self.xy_eval_sample = np.vstack(
            [self.mesh_xx.ravel(), self.mesh_yy.ravel()]).T

        self.mesh_ld, self.mesh_rd = np.mgrid[self.dl_start:self.dl_end:dlbins,
                                              self.dr_start:self.dr_end:drbins]
        self.door_eval_sample = np.vstack(
            [self.mesh_ld.ravel(), self.mesh_rd.ravel()]).T
        self.door_eval_sample_min_dist = np.ones(len(self.door_eval_sample))

        # for coverage plotting
        self.xx, self.yy, self.zz, self.gg, self.dl, self.dr = np.mgrid[
            self.x_start:self.x_end:xbins, self.y_start:self.y_end:ybins,
            self.z_start:self.z_end:zbins, self.g_start:self.g_end:gbins,
            self.dl_start:self.dl_end:dlbins, self.dr_start:self.dr_end:drbins]
        self.eval_sample = np.vstack([
            self.xx.ravel(),
            self.yy.ravel(),
            self.zz.ravel(),
            self.gg.ravel(),
            self.dl.ravel(),
            self.dr.ravel()
        ]).T
        self.eval_sample_min_dist = np.ones(len(self.eval_sample))

        self.xy_estimator = KernelDensity(kernel='gaussian',
                                          bandwidth=self.bandwidth)
        self.doorangle_estimator = KernelDensity(kernel='gaussian',
                                                 bandwidth=self.bandwidth)

        self.skewed_estimator = KernelDensity(kernel='gaussian',
                                              bandwidth=self.bandwidth)

        self.entropy_shift = np.array([
            self.x_start, self.y_start, self.z_start, self.g_start,
            self.dl_start, self.dr_start
        ])
        self.entropy_scale = np.array([
            self.x_start - self.x_end, self.y_start - self.y_end,
            self.z_start - self.z_end, self.g_start - self.g_end,
            self.dl_start - self.dl_end, self.dr_start - self.dr_end
        ])

    def plot_maze_metrics(self):
        """
        Plot intermediate result for point maze environment
        """
        fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, 1, figsize=(5, 20))

        if self.plot_entropy or self.plot_density:
            eval_sample_log_density = self.get_log_density(self.eval_sample)
            eval_sample_density = np.exp(eval_sample_log_density)

        if self.plot_density:
            zz_density = np.reshape(eval_sample_density, self.xx.shape)
            im = ax1.pcolormesh(self.yy, self.xx, zz_density)

        if self.plot_entropy:
            entropy = self.compute_entropy(eval_sample_density,
                                           eval_sample_log_density)
            self.entropy.append(entropy)
            ax3.plot(self.entropy)
            # np.save(self.args.save_path + '/entropy.npy', self.entropy)

        if self.plot_coverage:
            z_coverage = self.get_coverage()
            zz_coverage = np.reshape(z_coverage, self.xx.shape)
            im = ax2.pcolormesh(self.yy, self.xx, zz_coverage, vmin=0, vmax=1)

        # if the use_extrinsic_reward flag is true, it will only plot the curve of task_reward
        if self.args.use_extrinsic_reward:
            ax4.plot(self.task_reward)
        elif self.plot_overall_coverage:
            self.coverages.append(z_coverage.mean())
            ax4.plot(self.coverages)
            # np.save(self.args.save_path + '/coverage.npy', self.coverages)

        sample_goal = self.sample_goal(200)
        ax5.scatter(sample_goal[:, 0], sample_goal[:, 1], s=10, color='red')
        ax5.set_xlim([-12, 4])
        ax5.set_ylim([-6, 6])

        if self.plot_density or self.plot_coverage or self.plot_overall_coverage or self.plot_entropy:
            plt.savefig(self.args.save_path + '/coverage_' + str(self.count) +
                        '.svg')
        plt.close()

    def plot_door_metrics(self):
        """
        Plot intermediate result for door opening environment
        """
        fig, ax = plt.subplots(3, 2, figsize=(10, 15))

        if self.plot_density:
            xy_eval_sample_norm = (self.xy_eval_sample -
                                   self.obs_mean[:2]) / self.obs_std[:2]
            xy_sample_density = np.exp(
                self.xy_estimator.score_samples(xy_eval_sample_norm))
            xy_density = np.reshape(xy_sample_density, self.mesh_xx.shape)

            door_eval_sample_norm = (self.door_eval_sample -
                                     self.obs_mean[-1]) / self.obs_std[-1]
            door_sample_density = np.exp(
                self.doorangle_estimator.score_samples(door_eval_sample_norm))

            im = ax[0][0].pcolormesh(self.mesh_xx, self.mesh_yy, xy_density)
            im = ax[0][1].scatter(self.door_eval_sample, door_sample_density)
            ax[0][0].set_xlim([self.x_start - 0.05, self.x_end + 0.05])
            ax[0][0].set_ylim([self.y_start - 0.05, self.y_end + 0.05])
            ax[0][1].set_xlim([self.d_start - 0.05, self.d_end + 0.05])
            ax[0][1].set_ylim([-0.05, 1])

        if self.plot_coverage:
            door_sample_coverage = self.get_door_coverage(
                self.door_eval_sample, -1)
            ax[2][1].scatter(self.door_eval_sample, door_sample_coverage)

        # if the use_extrinsic_reward flag is true, it will only plot the curve of task_reward
        if self.args.use_extrinsic_reward:
            ax[2][0].plot(self.task_reward)
        elif self.plot_overall_coverage:
            eval_sample_coverage = self.get_coverage()
            self.coverages.append(eval_sample_coverage.mean())
            ax[2][0].plot(self.coverages)

        if self.plot_entropy:
            eval_sample_scaled = (self.eval_sample -
                                  self.entropy_shift) / self.entropy_scale
            eval_sample_log_density = self.density_estimator_raw.score_samples(
                eval_sample_scaled)  #self.get_log_density(eval_sample_norm)
            eval_sample_density = np.exp(eval_sample_log_density)
            entropy = self.compute_entropy(eval_sample_density,
                                           eval_sample_log_density)
            self.entropy.append(entropy)
            ax[0][0].plot(self.entropy)
            np.save(self.args.save_path + '/entropy', np.array(self.entropy))

        sample_goal = self.sample_goal(200)
        ax[1][0].scatter(sample_goal[:, 0],
                         sample_goal[:, 1],
                         s=10,
                         color='red')
        ax[1][0].set_xlim([self.x_start - 0.05, self.x_end + 0.05])
        ax[1][0].set_ylim([self.y_start - 0.05, self.y_end + 0.05])

        ax[1][1].scatter(sample_goal[:, -1],
                         np.ones(len(sample_goal)),
                         s=1,
                         color='red')
        ax[1][1].set_xlim([self.d_start - 0.05, self.d_end + 0.05])

        if self.plot_density or self.plot_coverage or self.plot_overall_coverage or self.plot_entropy:
            plt.savefig(self.args.save_path + '/coverage_' + str(self.count) +
                        '.svg')
        plt.close()

    def plot_boxpick_metrics(self):
        """
        Plot intermediate result for door button environment
        """
        fig, ax = plt.subplots(3, 2, figsize=(15, 15))

        if self.plot_density:
            xy_eval_sample_norm = (self.xy_eval_sample -
                                   self.obs_mean[:2]) / self.obs_std[:2]
            xy_sample_density = np.exp(
                self.xy_estimator.score_samples(xy_eval_sample_norm))
            xy_density = np.reshape(xy_sample_density, self.mesh_xx.shape)

            door_eval_sample_norm = (self.door_eval_sample -
                                     self.obs_mean[-2:]) / self.obs_std[-2:]
            door_sample_density = np.exp(
                self.doorangle_estimator.score_samples(door_eval_sample_norm))
            door_density = np.reshape(door_sample_density, self.mesh_ld.shape)

            im = ax[0][0].pcolormesh(self.mesh_xx, self.mesh_yy, xy_density)
            im = ax[0][1].pcolormesh(self.mesh_ld, self.mesh_rd, door_density)

            ax[0][0].set_xlim([self.x_start - 0.05, self.x_end + 0.05])
            ax[0][0].set_ylim([self.y_start - 0.05, self.y_end + 0.05])
            ax[0][1].set_xlim([self.dl_start, self.dl_end])
            ax[0][1].set_ylim([self.dr_start, self.dr_end])

        # if the use_extrinsic_reward flag is true, it will only plot the curve of task_reward
        if self.args.use_extrinsic_reward:
            ax[2][0].plot(self.task_reward)
        elif self.plot_overall_coverage:
            eval_sample_coverage = self.get_coverage()
            self.coverages.append(eval_sample_coverage.mean())
            ax[2][0].plot(self.coverages)

        if self.plot_coverage:
            door_sample_coverage = self.get_door_coverage(
                self.door_eval_sample, -2)
            door_coverage = np.reshape(door_sample_coverage,
                                       self.mesh_ld.shape)
            ax[2][1].pcolormesh(self.mesh_ld,
                                self.mesh_rd,
                                door_coverage,
                                vmin=0,
                                vmax=1)

        if self.plot_entropy:
            eval_sample_norm = (self.eval_sample -
                                self.obs_mean) / self.obs_std
            eval_sample_log_density = self.get_log_density(eval_sample_norm)
            eval_sample_density = np.exp(eval_sample_log_density)
            entropy = self.compute_entropy(eval_sample_density,
                                           eval_sample_log_density)
            self.entropy.append(entropy)
            np.save(self.args.save_path + '/entropy', np.array(self.entropy))

        sample_goal = self.sample_goal(200)
        ax[1][0].scatter(sample_goal[:, 0],
                         sample_goal[:, 1],
                         s=10,
                         color='red')
        ax[1][0].set_xlim([self.x_start - 0.05, self.x_end + 0.05])
        ax[1][0].set_ylim([self.y_start - 0.05, self.y_end + 0.05])

        ax[1][1].scatter(sample_goal[:, -2],
                         sample_goal[:, -1],
                         s=1,
                         color='red')
        ax[1][1].set_xlim([self.dl_start, self.dl_end])
        ax[1][1].set_ylim([self.dr_start, self.dr_end])

        if self.plot_density or self.plot_coverage or self.plot_overall_coverage or self.plot_entropy:
            plt.savefig(self.args.save_path + '/coverage_' + str(self.count) +
                        '.svg')
        plt.close()

    def activate_buffer(self):
        """
        Update the history buffer, 
        update the state density estimation model
        update the goal proposing distribution model 
        """
        start_time = time.time()
        if self.obs_hist is None:
            self.obs_hist = self.obs_new
            # self.obs_next_hist = obs_next
            # self.done_hist = dones
        else:
            self.obs_hist = np.concatenate((self.obs_hist, self.obs_new),
                                           axis=0)

        self.fit_model()
        fitmodel_time = time.time()
        logging.info("fit model time cost: %f" % (fitmodel_time - start_time))

        self.train_skew_generator()
        fitskew_time = time.time()
        logging.info("fit skew-model time cost: %f" %
                     (fitskew_time - start_time))

        # update goal samples in the environment and update the obs mean and std
        if self.args.use_auto_scale:
            self.env.update_reward_scale(self.obs_mean, self.obs_std)
        sampled_goal = self.sample_goal(self.goal_sampling_num)
        self.env.set_goals(sampled_goal)
        self.env.set_density_estimator(self.density_estimator)

        # compute task_reward
        if self.args.use_extrinsic_reward:
            # dones = self.dones.astype(int)
            task_reward = self.env.get_extrinsic_reward(
                self.obs_new)  # * dones
            self.task_reward.append(task_reward.mean())

        # plotting
        if self.plot_density or self.plot_coverage or self.plot_overall_coverage or self.plot_entropy:
            if self.args.env == 'maze':
                self.plot_maze_metrics()
            elif self.args.env == 'yumi':
                self.plot_door_metrics()
            elif self.args.env == 'yumi_box_pick' or self.args.env == 'yumi_door_button':
                self.plot_boxpick_metrics()
        self.obs_new = None
        self.dones = None
        finish_time = time.time()
        logging.info('time cost: %f' % (finish_time - start_time))

        np.save(self.args.save_path + '/entropy', np.array(self.entropy))
        np.save(self.args.save_path + '/coverage', np.array(self.coverages))
        np.save(self.args.save_path + '/task_reward',
                np.array(self.task_reward))
        logging.info('end of activate buffer')

    def train_skew_generator(self):
        """
        Update the goal proposing distribution using the Skew-fit algorithm
        (https://arxiv.org/abs/1903.03698) 
        """
        # NOTE: The skewed samples are sampled from density estimator
        self.skew_samples, skew_samples_density = self.get_samples_and_density(
            self.skew_sample_num)

        # self.skew_samples = self.density_estimator.sample(self.skew_sample_num)
        # skew_samples_density = np.exp(self.density_estimator.score_samples(self.skew_samples))
        skew_unnormalized_weights = skew_samples_density * skew_samples_density**self.skew_alpha

        skew_zeta_alpha = np.sum(skew_unnormalized_weights)
        self.skew_weights = skew_unnormalized_weights / skew_zeta_alpha

        self.skewed_estimator.fit(self.skew_samples,
                                  sample_weight=self.skew_weights)

    def sample_goal(self, goal_num):
        """
        Sample goal states from the goal proposing distribution
        """
        sampled_data = self.skewed_estimator.sample(goal_num)
        sampled_data = sampled_data * self.obs_std + self.obs_mean
        return sampled_data  #sampled_data[goal_index]

    def get_samples_and_density(self, sample_num):
        raise NotImplementedError()

    def fit_model(self):
        raise NotImplementedError()

    def get_pvisited(self, obs_test):
        raise NotImplementedError()

    def get_log_density(self, obs_test):
        raise NotImplementedError()

    def get_coverage(self):
        """
        Compute the current coverage of the states used for evaluation
        """
        p_coverage = np.zeros(len(self.eval_sample))

        for i in range(len(self.eval_sample)):
            obs = self.eval_sample[i]
            obs_diff = self.obs_new - obs

            diff_norm = LA.norm(obs_diff, axis=1)
            min_dist = diff_norm.min()

            current_min_dist = self.eval_sample_min_dist[i]
            new_min_dist = np.minimum(current_min_dist, min_dist)
            self.eval_sample_min_dist[i] = new_min_dist

            pv = np.exp(-new_min_dist * new_min_dist * self.beta)
            p_coverage[i] = 1 - pv

        return p_coverage

    def get_door_coverage(self, door_eval_sample, index):
        """
        Compute the current coverage of the door states used for evaluation
        """
        p_coverage = np.zeros(len(door_eval_sample))

        for i in range(len(door_eval_sample)):
            obs = door_eval_sample[i]
            obs_diff = self.obs_new[:, index:] - obs

            diff_norm = LA.norm(obs_diff, axis=1)
            min_dist = diff_norm.min()

            current_min_dist = self.door_eval_sample_min_dist[i]
            new_min_dist = np.minimum(current_min_dist, min_dist)
            self.door_eval_sample_min_dist[i] = new_min_dist

            pv = np.exp(-new_min_dist * new_min_dist * self.beta)
            p_coverage[i] = pv

        return p_coverage

    def compute_entropy(self, density, log_density):
        """
        Compute the entropy
        """
        d_mul_logd = density * log_density
        entropy = -np.sum(d_mul_logd)
        return entropy

    def get_preach(self, obs_from):
        raise NotImplementedError()

    def get_preal(self, obs_test):
        raise NotImplementedError()

    def compute_reward(self, obs_test, use_sampling=False):
        raise NotImplementedError()

    def update_history(self, obs, dones):
        """
        Save the new states in the self.obs_new buffer.
        the self.obs_new buffer will be merged to the self.obs buffer
        in activate_buffer() function.
        """
        if self.args.use_index:
            obs = obs[:, :-1]
        if self.obs_mean is None:
            self.obs_mean = np.zeros_like(obs)[0]
            self.obs_std = np.ones_like(obs)[0]
            self.obs_rms = RunningMeanStd(shape=obs.shape)

        if self.obs_new is None:
            self.obs_new = obs
            # self.obs_next_hist = obs_next
            self.dones = dones
        else:
            self.obs_new = np.concatenate((self.obs_new, obs), axis=0)
            # self.obs_next_hist = np.concatenate((self.obs_next_hist, obs_next), axis=0)
            self.dones = np.concatenate((self.dones, dones), axis=0)
        self.obs_rms.update(obs)
class GAE():
    def __init__(self,
                 img_shape=(48, 96, 96, 1),
                 encoded_dim=8,
                 optimizer=SGD(0.001, momentum=.9),
                 optimizer_discriminator=SGD(0.0001, momentum=.9),
                 optimizer_autoencoder=Adam(0.0001)):
        self.encoded_dim = encoded_dim
        self.optimizer = optimizer
        self.optimizer_discriminator = optimizer_discriminator
        self.optimizer_autoencoder = optimizer_autoencoder
        self.img_shape = img_shape
        self.initializer = RandomNormal(mean=0., stddev=1.)
        self._initAndCompileFullModel(img_shape, encoded_dim)

    def _genEncoderModel(self, img_shape, encoded_dim):
        """ Build Encoder Model Based on Paper Configuration
        Args:
            img_shape (tuple) : shape of input image
            encoded_dim (int) : number of latent variables
        Return:
            A sequential keras model
        """
        encoder = Sequential()
        encoder.add(
            keras.layers.Conv3D(input_shape=img_shape,
                                filters=16,
                                kernel_size=3,
                                strides=(1, ) * 3,
                                padding="SAME",
                                activation='relu'))
        encoder.add(keras.layers.Dropout(0.2))
        encoder.add(
            keras.layers.Conv3D(filters=16,
                                kernel_size=3,
                                strides=(2, ) * 3,
                                padding="SAME",
                                activation='relu'))
        #encoder.add(keras.layers.MaxPool3D(pool_size=(2,)*3, padding="SAME"))

        encoder.add(
            keras.layers.Conv3D(filters=32,
                                kernel_size=3,
                                strides=(1, ) * 3,
                                padding="SAME",
                                activation='relu'))
        encoder.add(keras.layers.Dropout(0.2))
        encoder.add(
            keras.layers.Conv3D(filters=32,
                                kernel_size=3,
                                strides=(2, ) * 3,
                                padding="SAME",
                                activation='relu'))
        #encoder.add(keras.layers.MaxPool3D(pool_size=(2,)*3, padding="SAME"))

        encoder.add(
            keras.layers.Conv3D(filters=64,
                                kernel_size=3,
                                strides=(1, ) * 3,
                                padding="SAME",
                                activation='relu'))
        encoder.add(keras.layers.Dropout(0.2))
        encoder.add(
            keras.layers.Conv3D(filters=64,
                                kernel_size=3,
                                strides=(2, ) * 3,
                                padding="SAME",
                                activation='relu'))
        #encoder.add(keras.layers.MaxPool3D(pool_size=(2,)*3, padding="SAME"))
        encoder.add(keras.layers.GlobalAvgPool3D())
        encoder.add(keras.layers.Flatten())
        encoder.add(Dense(encoded_dim))
        encoder.summary()
        return encoder

    def _getDecoderModel(self, encoded_dim, img_shape):
        """ Build Decoder Model Based on Paper Configuration
        Args:
            encoded_dim (int) : number of latent variables
            img_shape (tuple) : shape of target images
        Return:
            A sequential keras model
        """
        decoder = Sequential()
        decoder.add(Dense(128, activation='relu', input_dim=encoded_dim))
        decoder.add(Reshape((128, 1)))
        decoder.add(
            keras.layers.Conv1D(filters=108,
                                kernel_size=3,
                                strides=1,
                                padding="SAME",
                                activation='relu'))
        decoder.add(Reshape([3, 6, 6, 128]))
        decoder.add(
            Conv3DTranspose(filters=64,
                            kernel_size=3,
                            strides=(2, ) * 3,
                            padding="SAME",
                            activation='relu'))
        decoder.add(
            Conv3DTranspose(filters=32,
                            kernel_size=3,
                            strides=(2, ) * 3,
                            padding="SAME",
                            activation='relu'))
        decoder.add(
            Conv3DTranspose(filters=16,
                            kernel_size=3,
                            strides=(2, ) * 3,
                            padding="SAME",
                            activation='relu'))
        decoder.add(
            Conv3DTranspose(filters=1,
                            kernel_size=3,
                            strides=(2, ) * 3,
                            padding="SAME",
                            activation='relu'))

        #decoder.add(Dense(1000, activation='relu'))
        #decoder.add(Dense(np.prod(img_shape), activation='sigmoid'))
        decoder.summary()
        return decoder

    def _getDescriminator(self, img_shape):
        """ Build Descriminator Model Based on Paper Configuration
        Args:
            encoded_dim (int) : number of latent variables
        Return:
            A sequential keras model
        """
        discriminator = Sequential()
        discriminator.add(
            keras.layers.Conv3D(input_shape=img_shape,
                                filters=16,
                                kernel_size=3,
                                strides=(1, ) * 3,
                                padding="SAME",
                                activation='relu'))
        discriminator.add(
            keras.layers.MaxPool3D(pool_size=(2, ) * 3, padding="SAME"))
        discriminator.add(
            keras.layers.Conv3D(input_shape=img_shape,
                                filters=32,
                                kernel_size=3,
                                strides=(1, ) * 3,
                                padding="SAME",
                                activation='relu'))
        discriminator.add(
            keras.layers.MaxPool3D(pool_size=(2, ) * 3, padding="SAME"))
        discriminator.add(
            keras.layers.Conv3D(input_shape=img_shape,
                                filters=64,
                                kernel_size=3,
                                strides=(1, ) * 3,
                                padding="SAME",
                                activation='relu'))
        discriminator.add(
            keras.layers.MaxPool3D(pool_size=(2, ) * 3, padding="SAME"))
        discriminator.add(
            keras.layers.Conv3D(input_shape=img_shape,
                                filters=128,
                                kernel_size=3,
                                strides=(1, ) * 3,
                                padding="SAME",
                                activation='relu'))
        discriminator.add(
            keras.layers.MaxPool3D(pool_size=(2, ) * 3, padding="SAME"))
        discriminator.add(keras.layers.GlobalAvgPool3D())
        discriminator.add(keras.layers.Flatten())
        discriminator.add(Dense(32, activation="relu"))
        discriminator.add(Dense(1, activation="sigmoid"))
        discriminator.summary()
        return discriminator

    def _initAndCompileFullModel(self, img_shape, encoded_dim):
        self.encoder = self._genEncoderModel(img_shape, encoded_dim)
        self.decoder = self._getDecoderModel(encoded_dim, img_shape)
        self.discriminator = self._getDescriminator(img_shape)
        img = Input(shape=img_shape)
        encoded_repr = self.encoder(img)
        gen_img = self.decoder(encoded_repr)
        self.autoencoder = Model(img, gen_img)
        self.autoencoder.compile(optimizer=self.optimizer_autoencoder,
                                 loss='mse')
        self.discriminator.compile(optimizer=self.optimizer_discriminator,
                                   loss='binary_crossentropy',
                                   metrics=['accuracy'])
        for layer in self.discriminator.layers:
            layer.trainable = False

        is_real = self.discriminator(gen_img)
        self.autoencoder_discriminator = Model(img, is_real)
        self.autoencoder_discriminator.compile(optimizer=self.optimizer,
                                               loss='binary_crossentropy',
                                               metrics=['accuracy'])

    def imagegrid(self, epochnumber):
        fig = plt.figure(figsize=[20, 20])
        for i in range(-5, 5):
            for j in range(-5, 5):
                topred = np.array((i * 0.5, j * 0.5))
                topred = topred.reshape((1, 2))
                img = self.decoder.predict(topred)
                img = img.reshape(self.img_shape)
                ax = fig.add_subplot(10, 10, (i + 5) * 10 + j + 5 + 1)
                ax.set_axis_off()
                ax.imshow(img, cmap="gray")
        fig.savefig(str(epochnumber) + ".png")
        plt.show()
        plt.close(fig)

    def train(self, x_train, batch_size=4, epochs=5):
        self.autoencoder.fit(x_train, x_train, epochs=1)
        for epoch in range(epochs):
            #---------------Train Discriminator -------------
            # Select a random half batch of images
            idx = np.random.randint(0, x_train.shape[0], batch_size)
            imgs_real = x_train[idx]
            idx = np.random.randint(0, x_train.shape[0], batch_size)
            imgs_real2 = x_train[idx]
            # Generate a half batch of new images
            #gen_imgs = self.decoder.predict(latent_fake)
            imgs_fake = self.autoencoder.predict(imgs_real2)
            valid = np.ones((batch_size, 1))
            fake = np.zeros((batch_size, 1))
            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs_real, valid)
            d_loss_fake = self.discriminator.train_on_batch(imgs_fake, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
            #d_loss = (0,0)
            idx = np.random.randint(0, x_train.shape[0], batch_size)
            imgs_real = x_train[idx]
            # Generator wants the discriminator to label the generated representations as valid
            valid_y = np.ones((batch_size, 1))
            # Train generator
            g_logg_similarity = self.autoencoder_discriminator.train_on_batch(
                imgs_real, valid_y)
            # Plot the progress
            print(
                "%d [D loss = %.2f, accuracy: %.2f] [G loss = %f, accuracy: %.2f]"
                % (epoch, d_loss[0], d_loss[1], g_logg_similarity[0],
                   g_logg_similarity[1]))


#            if(epoch % save_interval == 0):
#                self.imagegrid(epoch)
        codes = self.encoder.predict(x_train)
        #        params = {'bandwidth': [3.16]}#np.logspace(0, 2, 5)}
        #        grid = GridSearchCV(KernelDensity(), params, n_jobs=4)
        #        grid.fit(codes)
        #        print grid.best_params_
        #        self.kde = grid.best_estimator_
        self.kde = KernelDensity(kernel='gaussian', bandwidth=3.16).fit(codes)

    def generate(self, n=10000):
        codes = self.kde.sample(n)
        images = self.decoder.predict(codes)
        return images

    def autoEncode(self, image):
        codes = self.encoder(image)
        gen_image = self.decoder(codes)
        return gen_image

    def generateAndPlot(self, x_train, n=10, fileName="generated.png"):
        fig = plt.figure(figsize=[20, 20])
        images = self.generate(n * n)
        index = 1
        for image in images:
            image = image.reshape(self.img_shape)
            ax = fig.add_subplot(n, n + 1, index)
            index = index + 1
            ax.set_axis_off()
            ax.imshow(image, cmap="gray")
            if ((index) % (n + 1) == 0):
                nearest = helpers.findNearest(x_train, image)
                ax = fig.add_subplot(n, n + 1, index)
                index = index + 1
                ax.imshow(nearest, cmap="gray")
        fig.savefig(fileName)
        plt.show()

    def meanLogLikelihood(self, x_test):
        KernelDensity(kernel='gaussian', bandwidth=0.2).fit(codes)
class Texture:
    def __init__(self):
        self.rocks = []
        self.data = []

    '''
	add a rock to the texture if the rock doesn't any rock in self.rocks 
	returns bool
	'''

    def add(self, rock):
        if not self.intersect(rock):
            self.rocks.append(rock)
            self.data.append(rock.data())
            return True
        return False

    '''
	Create the tree for the kernelDensity
	'''

    def learn(self):
        self.kde = KernelDensity(kernel='gaussian',
                                 bandwidth=0.02).fit(self.data)

    '''
	samples rocks from kde one by one making sure there are no intersection
	returns a Texture
	'''

    def sample(self, n_rocks=None):
        length = n_rocks
        if length == None:
            length = len(self.data)

        mtexture = Texture()
        i = 0
        while i < length:
            new_rock = rock.dataToRock(
                self.kde.sample(1, random_state=None)[0])

            if mtexture.add(new_rock):
                i = i + 1

        return mtexture

    '''
	compute the distance between two points
	return float
	'''

    def __distance(self, center1, center2):
        center1 = np.array(center1)
        center2 = np.array(center2)

        sub = center1 - center2
        sub = sub**2

        return math.sqrt(np.sum(sub))

    '''
	Test if two rocks intersect. 
	returns bool
	'''

    def __intersect(self, rock1, rock2):
        return self.__distance(rock1.center,
                               rock2.center) < rock1.radius + rock2.radius

    '''
	Test if a rock intersects another rock in self.rocks
	'''

    def intersect(self, rock):
        for r in self.rocks:
            if self.__intersect(rock, r):
                return True
        return False
Beispiel #24
0
class skde(object):
    r"""
    
    Custom wrapper around `sklearn.neighbors.kde.KernelDensity` to conform
    to our prefered syntax calling (following scipy conventions)

    """

    def __init__(self, data, mirror=False, **kwds):
        self.mirror = mirror
        if kwds is None:
            if self.mirror:
                self.kde_object = KernelDensity(kernel='gaussian').fit( np.vstack([-data, data]) )
            else:
                self.kde_object = KernelDensity(kernel='gaussian').fit(data)
        else:
            if self.mirror:
                self.kde_object = KernelDensity(**kwds).fit( np.vstack([-data, data]) )
            else:
                self.kde_object = KernelDensity(**kwds).fit(data)
        try:
            self.d = data.shape[1]
        except IndexError:
            self.d = 1
        self.n = data.shape[0]
        
    
    def rvs(self, size=1):
        r"""
        Generates random variables from a kde object. Wrapper function for 
        `sklearn.neighbors.kde.KernelDensity.sample`.
        
        :param int size: number of random samples to generate
        :param tuple size: number of samples is taken to be the first argument
        """
        if type(size) is tuple: 
            size=size[0]
                        
        if self.mirror: # have to generate twice as many samples
            num_samps = 0
            samps = []
            while num_samps < size:
                samp_proposal = self.kde_object.sample()
                if samp_proposal > 0:
                    samps.append(samp_proposal)
                    num_samps += 1
            samps = np.array(samps).reshape(size,self.d)
        else:
            samps = self.kde_object.sample(size)
            
        return samps
        #TODO write a test that makes sure this returns the correct shape
    
    def pdf(self, eval_points):
        r"""
        Generates random variables from a kde object. Wrapper function for 
        `sklearn.neighbors.kde.KernelDensity.score_samples`.
        
        :param eval_points: points on which to evaluate the density.
        :type eval_points: :class:`numpy.ndarray` of shape (num, dim)
        """
        
        #: TODO write a test that makes sure this returns the correct shape
        num_samples = eval_points.shape[0]
        if self.mirror:
            p = 2*np.exp( self.kde_object.score_samples( eval_points ) ).reshape(num_samples)
        else:
            try:
                p = np.exp( self.kde_object.score_samples( eval_points ) )
            except ValueError:
                p = np.exp( self.kde_object.score_samples( eval_points.reshape(-1,1) ) )
        return p
class GAE():
    def __init__(self, img_shape=(28, 28), encoded_dim=2):
        self.encoded_dim = encoded_dim
        self.optimizer = Adam(0.001)
        self.optimizer_discriminator = Adam(0.00001)
        self._initAndCompileFullModel(img_shape, encoded_dim)
        self.img_shape = img_shape

    def _genEncoderModel(self, img_shape, encoded_dim):
        """ Build Encoder Model Based on Paper Configuration
        Args:
            img_shape (tuple) : shape of input image
            encoded_dim (int) : number of latent variables
        Return:
            A sequential keras model
        """
        encoder = Sequential()
        encoder.add(Flatten(input_shape=img_shape))
        encoder.add(Dense(1000, activation='relu'))
        encoder.add(Dense(1000, activation='relu'))
        encoder.add(Dense(encoded_dim))
        encoder.summary()
        return encoder

    def _getDecoderModel(self, encoded_dim, img_shape):
        """ Build Decoder Model Based on Paper Configuration
        Args:
            encoded_dim (int) : number of latent variables
            img_shape (tuple) : shape of target images
        Return:
            A sequential keras model
        """
        decoder = Sequential()
        decoder.add(Dense(1000, activation='relu', input_dim=encoded_dim))
        decoder.add(Dense(1000, activation='relu'))
        decoder.add(Dense(np.prod(img_shape), activation='sigmoid'))
        decoder.add(Reshape(img_shape))
        decoder.summary()
        return decoder

    def _getDescriminator(self, img_shape):
        """ Build Descriminator Model Based on Paper Configuration
        Args:
            encoded_dim (int) : number of latent variables
        Return:
            A sequential keras model
        """
        discriminator = Sequential()
        discriminator.add(Flatten(input_shape=img_shape))
        discriminator.add(
            Dense(1000,
                  activation='relu',
                  kernel_initializer=initializer,
                  bias_initializer=initializer))
        discriminator.add(
            Dense(1000,
                  activation='relu',
                  kernel_initializer=initializer,
                  bias_initializer=initializer))
        discriminator.add(
            Dense(1,
                  activation='sigmoid',
                  kernel_initializer=initializer,
                  bias_initializer=initializer))
        discriminator.summary()
        return discriminator

    def _initAndCompileFullModel(self, img_shape, encoded_dim):
        self.encoder = self._genEncoderModel(img_shape, encoded_dim)
        self.decoder = self._getDecoderModel(encoded_dim, img_shape)
        self.discriminator = self._getDescriminator(img_shape)
        img = Input(shape=img_shape)
        encoded_repr = self.encoder(img)
        gen_img = self.decoder(encoded_repr)
        self.autoencoder = Model(img, gen_img)
        self.autoencoder.compile(optimizer=self.optimizer, loss='mse')
        self.discriminator.compile(optimizer=self.optimizer,
                                   loss='binary_crossentropy',
                                   metrics=['accuracy'])
        for layer in self.discriminator.layers:
            layer.trainable = False

        latent = Input(shape=(encoded_dim, ))
        gen_image_from_latent = self.decoder(latent)
        is_real = self.discriminator(gen_image_from_latent)
        self.decoder_discriminator = Model(latent, is_real)
        self.decoder_discriminator.compile(
            optimizer=self.optimizer_discriminator,
            loss='binary_crossentropy',
            metrics=['accuracy'])

    def imagegrid(self, epochnumber):
        fig = plt.figure(figsize=[20, 20])
        for i in range(-5, 5):
            for j in range(-5, 5):
                topred = np.array((i * 0.5, j * 0.5))
                topred = topred.reshape((1, 2))
                img = self.decoder.predict(topred)
                img = img.reshape(self.img_shape)
                ax = fig.add_subplot(10, 10, (i + 5) * 10 + j + 5 + 1)
                ax.set_axis_off()
                ax.imshow(img, cmap="gray")
        fig.savefig(str(epochnumber) + ".png")
        plt.show()
        plt.close(fig)

    def train(self, x_train, batch_size=32, epochs=5):
        fileNames = glob.glob('models/GAE/weights_mnist_autoencoder.*')
        fileNames.sort()
        if (len(fileNames) != 0):
            savedEpoch = int(fileNames[-1].split('.')[1])
            self.autoencoder.load_weights(fileNames[-1])
        else:
            savedEpoch = -1
        if (savedEpoch < epochs - 1):
            self.autoencoder.fit(
                x_train,
                x_train,
                batch_size=batch_size,
                epochs=epochs,
                callbacks=[
                    keras.callbacks.ModelCheckpoint(
                        'models/GAE/weights_autoencoder.{epoch:02d}.hdf5',
                        verbose=0,
                        save_best_only=False,
                        save_weights_only=False,
                        mode='auto',
                        period=1)
                ])
        print "Training KDE"
        codes = self.encoder.predict(x_train)
        #        params = {'bandwidth': [3.16]}#np.logspace(0, 2, 5)}
        #        grid = GridSearchCV(KernelDensity(), params, n_jobs=4)
        #        grid.fit(codes)
        #        print grid.best_params_
        #        self.kde = grid.best_estimator_
        self.kde = KernelDensity(kernel='gaussian', bandwidth=3.16).fit(codes)
        print "Initial Training of discriminator"
        fileNames = glob.glob('models/GAE/weights_mnist_discriminator.*')
        fileNames.sort()
        if (len(fileNames) != 0):
            savedEpoch = int(fileNames[-1].split('.')[1])
            self.discriminator.load_weights(fileNames[-1])
        else:
            savedEpoch = -1
        if (savedEpoch < epochs - 1):
            imgs_fake = self.generate(n=len(x_train))
            #gen_imgs = self.decoder.predict(latent_fake)
            valid = np.ones((len(x_train), 1))
            fake = np.zeros((len(x_train), 1))
            labels = np.vstack([valid, fake])
            images = np.vstack([x_train, imgs_fake])
            # Train the discriminator
            self.discriminator.fit(
                images,
                labels,
                epochs=epochs,
                batch_size=batch_size,
                shuffle=True,
                callbacks=[
                    keras.callbacks.ModelCheckpoint(
                        'models/GAE/weights_discriminator.{epoch:02d}.hdf5',
                        verbose=0,
                        save_best_only=False,
                        save_weights_only=False,
                        mode='auto',
                        period=1)
                ])

        print "Training GAN"
        self.generateAndPlot(x_train, fileName="before_gan.png")
        self.trainGAN(x_train,
                      epochs=len(x_train) / batch_size,
                      batch_size=batch_size)
        self.generateAndPlot(x_train, fileName="after_gan.png")

    def trainGAN(self, x_train, epochs=1000, batch_size=32):
        half_batch = batch_size / 2
        for epoch in range(epochs):
            #---------------Train Discriminator -------------
            # Select a random half batch of images
            idx = np.random.randint(0, x_train.shape[0], half_batch)
            imgs_real = x_train[idx]
            # Generate a half batch of new images
            imgs_fake = self.generate(n=half_batch)
            #gen_imgs = self.decoder.predict(latent_fake)
            valid = np.ones((half_batch, 1))
            fake = np.zeros((half_batch, 1))
            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs_real, valid)
            d_loss_fake = self.discriminator.train_on_batch(imgs_fake, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
            #d_loss = (0,0)
            codes = self.kde.sample(batch_size)
            # Generator wants the discriminator to label the generated representations as valid
            valid_y = np.ones((batch_size, 1))
            # Train generator
            g_logg_similarity = self.decoder_discriminator.train_on_batch(
                codes, valid_y)
            # Plot the progress
            print("%d [D accuracy: %.2f] [G accuracy: %.2f]" %
                  (epoch, d_loss[1], g_logg_similarity[1]))


#            if(epoch % save_interval == 0):
#                self.imagegrid(epoch)

    def generate(self, n=10000):
        codes = self.kde.sample(n)
        images = self.decoder.predict(codes)
        return images

    def generateAndPlot(self, x_train, n=10, fileName="generated.png"):
        fig = plt.figure(figsize=[20, 20])
        images = self.generate(n * n)
        index = 1
        for image in images:
            image = image.reshape(self.img_shape)
            ax = fig.add_subplot(n, n + 1, index)
            index = index + 1
            ax.set_axis_off()
            ax.imshow(image, cmap="gray")
            if ((index) % (n + 1) == 0):
                nearest = helpers.findNearest(x_train, image)
                ax = fig.add_subplot(n, n + 1, index)
                index = index + 1
                ax.imshow(nearest, cmap="gray")
        fig.savefig(fileName)
        plt.show()

    def meanLogLikelihood(self, x_test):
        KernelDensity(kernel='gaussian', bandwidth=0.2).fit(codes)
class KDEestimator:
    """
    An interface for generating random numbers according
    to a given Kernel Density Estimation (KDE) parametrization based on the 
    data. 
    """
    def __init__(self, bandwidth=1.0):
        from sklearn.neighbors.kde import KernelDensity
        self.bandwidth = bandwidth
        self.model = KernelDensity(bandwidth=self.bandwidth)

    def _botev_fixed_point(self, t, M, I, a2):
        # Find the largest float available for this numpy
        if hasattr(np, 'float128'):
            large_float = np.float128
        elif hasattr(np, 'float96'):
            large_float = np.float96
        else:
            large_float = np.float64

        l = 7
        I = large_float(I)
        M = large_float(M)
        a2 = large_float(a2)
        f = 2 * np.pi**(2 * l) * np.sum(I**l * a2 * np.exp(-I * np.pi**2 * t))
        for s in range(l, 1, -1):
            K0 = np.prod(np.arange(1, 2 * s, 2)) / np.sqrt(2 * np.pi)
            const = (1 + (1 / 2)**(s + 1 / 2)) / 3
            time = (2 * const * K0 / M / f)**(2 / (3 + 2 * s))
            f = 2 * np.pi ** (2 * s) * \
                np.sum(I ** s * a2 * np.exp(-I * np.pi ** 2 * time))
        return t - (2 * M * np.sqrt(np.pi) * f)**(-2 / 5)

    def finite(self, val):
        """ Checks if a value is finite or not """
        return val is not None and np.isfinite(val)

    def botev_bandwidth(self, data):
        """ Implementation of the KDE bandwidth selection method outline in:
            
        Z. I. Botev, J. F. Grotowski, and D. P. Kroese. *Kernel density estimation via diffusion.* The Annals of Statistics, 38(5):2916-2957, 2010.

        Based on the implementation of Daniel B. Smith, PhD. The object is a callable returning the bandwidth for a 1D kernel.
        
        Forked from the package `PyQT_fit <https://code.google.com/archive/p/pyqt-fit/>`_. 
        
        :param data: 1D array containing the data to model with a 1D KDE. 
        :type data: numpy.ndarray
        :returns: Optimal bandwidth according to the data. 
        """
        from scipy import fftpack, optimize
        #    def __init__(self, N=None, **kword):
        #        if 'lower' in kword or 'upper' in kword:
        #            print("Warning, using 'lower' and 'upper' for botev bandwidth is "
        #                  "deprecated. Argument is ignored")
        #        self.N = N
        #
        #    def __call__(self, data):#, model):
        #        """
        #        Returns the optimal bandwidth based on the data
        #        """
        N = 2**10  #if self.N is None else int(2 ** np.ceil(np.log2(self.N)))
        #        lower = getattr(model, 'lower', None)
        #        upper = getattr(model, 'upper', None)
        #        if not finite(lower) or not finite(upper):
        minimum = np.min(data)
        maximum = np.max(data)
        span = maximum - minimum
        lower = minimum - span / 10  #if not finite(lower) else lower
        upper = maximum + span / 10  #if not finite(upper) else upper
        # Range of the data
        span = upper - lower

        # Histogram of the data to get a crude approximation of the density
        #        weights = model.weights
        #        if not weights.shape:
        weights = None
        M = len(data)
        DataHist, bins = np.histogram(data,
                                      bins=N,
                                      range=(lower, upper),
                                      weights=weights)
        DataHist = DataHist / M
        DCTData = fftpack.dct(DataHist, norm=None)

        I = np.arange(1, N, dtype=int)**2
        SqDCTData = (DCTData[1:] / 2)**2
        guess = 0.1

        try:
            t_star = optimize.brentq(self._botev_fixed_point,
                                     0,
                                     guess,
                                     args=(M, I, SqDCTData))
        except ValueError:
            t_star = .28 * N**(-.4)

        return np.sqrt(t_star) * span

    def fit(self, x):
        self.bandwidth = self.botev_bandwidth(x.flatten())
        self.model.set_params(**{'bandwidth': self.bandwidth})
        self.model.fit(x.reshape(-1, 1))

    def sample(self, dimension=1.0):
        return self.model.sample(dimension)

    def pdf(self, x):
        return self.model.score_samples(x)
Beispiel #27
0
class GAE:
    def __init__(self, img_shape=(28, 28), encoded_dim=2):
        self.img_shape = img_shape
        self.encoded_dim = encoded_dim
        self.optimizer = Adam(0.001)
        self.optimizer_discriminator = Adam(0.00001)
        self.discriminator = self.get_discriminator_model(img_shape)
        self.decoder = self.get_decoder_model(encoded_dim, img_shape)
        self.encoder = self.get_encoder_model(img_shape, encoded_dim)
        # Initialize Autoencoder
        img = Input(shape=self.img_shape)
        encoded_repr = self.encoder(img)
        gen_img = self.decoder(encoded_repr)
        self.autoencoder = Model(img, gen_img)
        # Initialize Discriminator
        latent = Input(shape=(encoded_dim,))
        gen_image_from_latent = self.decoder(latent)
        is_real = self.discriminator(gen_image_from_latent)
        self.decoder_discriminator = Model(latent, is_real)
        # Finally compile models
        self.initialize_full_model(encoded_dim)

    def initialize_full_model(self, encoded_dim):
        self.autoencoder.compile(optimizer=self.optimizer, loss='mse')
        self.discriminator.compile(optimizer=self.optimizer,
                                   loss='binary_crossentropy',
                                   metrics=['accuracy'])
        # Default start discriminator is not trainable
        for layer in self.discriminator.layers:
            layer.trainable = False

        self.decoder_discriminator.compile(optimizer=self.optimizer_discriminator,
                                           loss='binary_crossentropy',
                                           metrics=['accuracy'])

    @staticmethod
    def get_encoder_model(img_shape, encoded_dim):
        encoder = Sequential()
        encoder.add(Flatten(input_shape=img_shape))
        encoder.add(Dense(1000, activation='relu'))
        encoder.add(Dense(1000, activation='relu'))
        encoder.add(Dense(encoded_dim))
        encoder.summary()
        return encoder

    @staticmethod
    def get_decoder_model(encoded_dim, img_shape):
        decoder = Sequential()
        decoder.add(Dense(1000, activation='relu', input_dim=encoded_dim))
        decoder.add(Dense(1000, activation='relu'))
        decoder.add(Dense(np.prod(img_shape), activation='sigmoid'))
        decoder.add(Reshape(img_shape))
        decoder.summary()
        return decoder

    @staticmethod
    def get_discriminator_model(img_shape):
        discriminator = Sequential()
        discriminator.add(Flatten(input_shape=img_shape))
        discriminator.add(Dense(1000, activation='relu',
                                kernel_initializer=initializer,
                                bias_initializer=initializer))
        discriminator.add(Dense(1000, activation='relu', kernel_initializer=initializer,
                                bias_initializer=initializer))
        discriminator.add(Dense(1, activation='sigmoid', kernel_initializer=initializer,
                                bias_initializer=initializer))
        discriminator.summary()
        return discriminator

    def imagegrid(self, epochnumber):
        fig = plt.figure(figsize=[20, 20])
        for i in range(-5, 5):
            for j in range(-5, 5):
                topred = np.array((i * 0.5, j * 0.5))
                topred = topred.reshape((1, 2))
                img = self.decoder.predict(topred)
                img = img.reshape(self.img_shape)
                ax = fig.add_subplot(10, 10, (i + 5) * 10 + j + 5 + 1)
                ax.set_axis_off()
                ax.imshow(img)
        fig.savefig(str(epochnumber) + ".png")
        plt.show()
        plt.close(fig)

    def train(self, x_train_input, batch_size=128, epochs=5):
        fileNames = glob.glob('models/weights_mnist_autoencoder.*')
        fileNames.sort()
        if len(fileNames) != 0:
            saved_epoch = int(fileNames[-1].split('.')[1])
            self.autoencoder.load_weights(fileNames[-1])
        else:
            saved_epoch = -1
        if saved_epoch < epochs - 1:
            self.autoencoder.fit(x_train_input, x_train_input, batch_size=batch_size,
                                 epochs=epochs,
                                 callbacks=[
                                     keras.callbacks.ModelCheckpoint('models/weights_autoencoder.{epoch:02d}.hdf5',
                                                                     verbose=0,
                                                                     save_best_only=False,
                                                                     save_weights_only=False,
                                                                     mode='auto',
                                                                     period=1),
                                     keras.callbacks.EarlyStopping(monitor='loss', patience=3, min_delta=1e-4,
                                                                   restore_best_weights=True)])
        print("Training KDE")
        codes = self.encoder.predict(x_train_input)
        self.kde = KernelDensity(kernel='gaussian', bandwidth=3.16).fit(codes)
        print("Initial Training of discriminator")
        fileNames = glob.glob('models/weights_mnist_discriminator.*')
        fileNames.sort()
        if len(fileNames) != 0:
            saved_epoch = int(fileNames[-1].split('.')[1])
            self.discriminator.load_weights(fileNames[-1])
        else:
            saved_epoch = -1

        train_count = len(x_train_input)
        if saved_epoch < epochs - 1:
            # Combine real and fake images for discriminator training
            imgs_fake = self.generate(n=train_count)
            valid = np.ones((train_count, 1))  # result for training images
            fake = np.zeros((train_count, 1))  # result for generated fakes
            labels = np.vstack([valid, fake])  # combine together
            images = np.vstack([x_train_input, imgs_fake])
            # Train the discriminator
            self.discriminator.fit(images, labels, epochs=epochs, batch_size=batch_size, shuffle=True,
                                   callbacks=[
                                       keras.callbacks.ModelCheckpoint(
                                           'models/weights_discriminator.{epoch:02d}.hdf5',
                                           verbose=0,
                                           save_best_only=False,
                                           save_weights_only=False,
                                           mode='auto',
                                           period=1),
                                       keras.callbacks.EarlyStopping(monitor='loss', patience=3, min_delta=1e-4,
                                                                     restore_best_weights=True)])

        print("Training GAN")
        self.generateAndPlot(x_train_input, fileName="before_gan.png")
        self.trainGAN(x_train_input, epochs=int(train_count / batch_size), batch_size=batch_size)
        self.generateAndPlot(x_train_input, fileName="after_gan.png")

    def trainGAN(self, x_train_input, epochs=1000, batch_size=128):
        half_batch = int(batch_size / 2)
        for epoch in range(epochs):
            # ---------------Train Discriminator -------------
            # Select a random half batch of images
            idx = np.random.randint(0, x_train_input.shape[0], half_batch)
            imgs_real = x_train_input[idx]
            # Generate a half batch of new images
            imgs_fake = self.generate(n=half_batch)
            valid = np.ones((half_batch, 1))
            fake = np.zeros((half_batch, 1))
            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs_real, valid)
            d_loss_fake = self.discriminator.train_on_batch(imgs_fake, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
            codes = self.kde.sample(batch_size)
            # Generator wants the discriminator to label the generated representations as valid
            valid_y = np.ones((batch_size, 1))
            # Train generator
            g_logg_similarity = self.decoder_discriminator.train_on_batch(codes, valid_y)
            # Plot the progress
            if epoch % 50 == 0:
                print("epoch %d [D accuracy: %.2f] [G accuracy: %.2f]" % (epoch, d_loss[1], g_logg_similarity[1]))

    def generate(self, n=10000):
        codes = self.kde.sample(n)
        images = self.decoder.predict(codes)
        return images

    def generateAndPlot(self, x_train_input, n=10, fileName="generated.png"):
        fig = plt.figure(figsize=[20, 20])
        images = self.generate(n * n)
        index = 1
        for image in images:
            image = image.reshape(self.img_shape)
            ax = fig.add_subplot(n, n + 1, index)
            index = index + 1
            ax.set_axis_off()
            ax.imshow(image)
            if index % (n + 1) == 0:
                nearest = findNearest(x_train_input, image)
                ax = fig.add_subplot(n, n + 1, index)
                index = index + 1
                ax.imshow(nearest)
        fig.savefig(fileName)
        plt.show()

    @staticmethod
    def mean_log_likelihood(x_test_input):
        KernelDensity(kernel='gaussian', bandwidth=0.2).fit(x_test_input)
Beispiel #28
0
counts = np.array(counts).reshape(-1, 1)
data['day'] = [s[:2] for s in data.Date]
day = np.array(data['day']).reshape(-1, 1)
trans = np.array(data.Amount).reshape(-1, 1)

X1 = np.linspace(0, 120, 1000)[:, np.newaxis]
X2 = np.linspace(0, 32, 1000)[:, np.newaxis]
X3 = np.linspace(min(trans), max(trans)+1, 1000)[:, np.newaxis]
for kernel in ['gaussian', 'tophat']:
    kde1 = KernelDensity(kernel=kernel, bandwidth=5).fit(counts)
    kde2 = KernelDensity(kernel=kernel, bandwidth=5).fit(day)
    kde3 = KernelDensity(kernel=kernel, bandwidth=5).fit(trans) 
    log_dens1 = kde1.score_samples(X1)
    log_dens2 = kde2.score_samples(X2)
    log_dens3 = kde3.score_samples(X3)
    samples = int(kde1.sample(1)[0][0])
    print('There are', samples, 'transactions', '\n')
    num_days = kde2.sample(samples)
    for m in range(len(num_days)):
        num_days[m] = int(round(num_days[m][0]))
        while num_days[m] <= 0 or num_days[m] > 31:
            num_days[m] = int(round(kde2.sample(1)[0][0]))           
    print('The days are:')
    print(num_days, '\n')
    num_trans = kde3.sample(samples)
    print('The transactions are:')
    print(num_trans, '\n')

    #Plotting density of number of transactions in a month
    plt.plot(X1[:, 0], np.exp(log_dens1), '-',
            label="kernel = '{0}'".format(kernel))
Beispiel #29
0
for i, label, color, bw in zip([64, 16, 4, 1], labels, colors, bandwidths):
    print(i)
    data = np.loadtxt(
        'STP_09_lm55_t{:02d}_joint_Lframe_resampled.dat'.format(i))
    ra, dec = data[:, 8], data[:, 9]
    #ra -= np.pi

    # create the KDE estimator
    # we could use grid-search cross validation to estimate the bandwidth here
    radec = np.vstack((dec, ra)).T
    kde = KernelDensity(kernel='tophat', bandwidth=bw,
                        metric='haversine').fit(radec)

    # find the density levels corresponding to 1, 2, 3 sigmas
    n = 10000
    sample = kde.sample(n)
    sample_densities = np.sort(np.exp(kde.score_samples(sample)))
    # Note: are those levels appropriate for 2d ?
    levels = [
        sample_densities[int(n * (1 - p))] for p in [0.9973, 0.9545, 0.6827]
    ]

    gra = np.linspace(-np.pi, np.pi, 300)
    gdec = np.linspace(-np.pi / 2, np.pi / 2, 300)
    ggra, ggdec = np.meshgrid(gra, gdec)
    d = np.exp(kde.score_samples(np.vstack((ggdec.ravel(), ggra.ravel())).T))
    d = np.reshape(d, ggra.shape)
    cs = ax.contour(gra,
                    gdec,
                    d,
                    colors=color,
    transformer = RobustScaler(quantile_range=(25, 75))
    transformer.fit(X)
    X = transformer.transform(X)

    from sklearn.model_selection import GridSearchCV

    bandwidths = 10**np.linspace(-2, 0, 20)

    grid = GridSearchCV(KernelDensity(kernel='gaussian'),
                        {'bandwidth': bandwidths},
                        cv=2)
    grid.fit(X)

    kde = KernelDensity(kernel='gaussian', bandwidth=.021)
    kde.fit(X)
    samples = kde.sample(n_samples=45 * 198)
    samples = transformer.inverse_transform(samples)
    x_int = r_0_g * np.cos(phi_0_g)
    y_int = r_0_g * np.sin(phi_0_g)
    X_int = sp.interpolate.griddata(samples[:, 1:],
                                    samples[:, 0],
                                    (x_int.flatten(), y_int.flatten()),
                                    method='linear')

im = plt.contourf(x_int, y_int, np.reshape(X_int, r_0_g.shape), cmap='jet')
plt.colorbar(im)

with open('K_CNR.pkl', 'wb') as writer:
    pickle.dump(K_CNR, writer)
#####Dataframes