def compute_reconstructions(rbm, v_data, fit, n_recon=10, vertical=False, num_to_avg=1): v_model = be.zeros_like(v_data) # Average over n reconstruction attempts for k in range(num_to_avg): data_state = State.from_visible(v_data, rbm) visible = data_state.units[0] reconstructions = fit.DrivenSequentialMC(rbm) reconstructions.set_state(data_state) dropout_scale = State.dropout_rescale(rbm) reconstructions.update_state(1, dropout_scale) v_model += rbm.deterministic_iteration(1, reconstructions.state, dropout_scale).units[0] v_model /= num_to_avg idx = numpy.random.choice(range(len(v_model)), n_recon, replace=False) grid = numpy.array( [[be.to_numpy_array(visible[i]), be.to_numpy_array(v_model[i])] for i in idx]) if vertical: return grid else: return grid.swapaxes(0, 1)
def compute_reconstructions(rbm, v_data, fit): sampler = fit.DrivenSequentialMC(rbm) sampler.set_state(v_data) sampler.update_state(1) v_model = rbm.deterministic_step(sampler.state) idx = numpy.random.choice(range(len(v_model)), 5, replace=False) return numpy.array( [[be.to_numpy_array(v_data[i]), be.to_numpy_array(v_model[i])] for i in idx])
def compute_reconstructions(rbm, v_data, fit): sampler = fit.DrivenSequentialMC(rbm) data_state = State.from_visible(v_data, rbm) sampler.set_positive_state(data_state) sampler.update_positive_state(1) v_model = rbm.deterministic_iteration(1, sampler.pos_state).units[0] idx = numpy.random.choice(range(len(v_model)), 5, replace=False) return numpy.array( [[be.to_numpy_array(v_data[i]), be.to_numpy_array(v_model[i])] for i in idx])
def compute_reconstructions(rbm, v_data, n_recon=10, vertical=False, num_to_avg=1): v_model = be.zeros_like(v_data) # Average over n reconstruction attempts for k in range(num_to_avg): reconstructions = rbm.compute_reconstructions(v_data) v_model += reconstructions.get_visible() / num_to_avg idx = np.random.choice(range(len(v_model)), n_recon, replace=False) grid = np.array([[be.to_numpy_array(v_data[i]), be.to_numpy_array(v_model[i])] for i in idx]) if vertical: return grid else: return grid.swapaxes(0,1)
def compute_one_hot_reconstructions(rbm, fit, level, n_recon, num_to_avg=1): n = rbm.layers[level].len grid_size = int(sqrt(n_recon)) one_hotz = rbm.layers[level].onehot(n) v_model = be.zeros((n, rbm.layers[0].len)) for k in range(num_to_avg): # set up the initial state state = State.from_model(n, rbm) state.units[level] = one_hotz dropout_scale = State.dropout_rescale(rbm) # set up a sampler and update the state reconstructions = fit.SequentialMC(rbm, clamped=[level], updater='mean_field_iteration') reconstructions.set_state(state) reconstructions.update_state(10, dropout_scale) v_model += reconstructions.state.units[0] v_model /= num_to_avg # plot the resulting visible unit activations idx = numpy.random.choice(range(len(v_model)), n_recon, replace=False) recons = numpy.array([be.to_numpy_array(v_model[i]) for i in idx]) recons = recons.reshape(grid_size, grid_size, -1) return recons
def plot_image_grid(image_array, shape, vmin=0, vmax=1, filename=None, show=True): array = be.to_numpy_array(image_array) nrows, ncols = array.shape[:-1] f = plt.figure(figsize=(2 * ncols, 2 * nrows)) grid = gs.GridSpec(nrows, ncols) axes = [[plt.subplot(grid[i, j]) for j in range(ncols)] for i in range(nrows)] for i in range(nrows): for j in range(ncols): sns.heatmap(numpy.reshape(array[i][j], shape), ax=axes[i][j], cmap="gray_r", cbar=False, vmin=vmin, vmax=vmax) axes[i][j].set(yticks=[]) axes[i][j].set(xticks=[]) plt.tight_layout(pad=0.5, h_pad=0.2, w_pad=0.2) if show: plt.show(f) if filename is not None: f.savefig(filename) plt.close(f)
def plot_image(image_vector, shape, vmin=0, vmax=1, filename=None, show=True, cmap=cm.gray_r, nan_color='red'): f, ax = plt.subplots(figsize=(4, 4)) # reshape the data and cast to a numpy array img = numpy.reshape(be.to_numpy_array(image_vector), shape) # construct a masked numpy array from the data in case of nan img = numpy.ma.array(img, mask=numpy.isnan(img)) # choose the color map and the color for nan cmap.set_bad(nan_color, 1.) # make the plot ax.imshow(img, interpolation='none', cmap=cmap, vmin=vmin, vmax=vmax) ax.set(yticks=[]) ax.set(xticks=[]) if show: plt.show(f) if filename is not None: f.savefig(filename) plt.close(f)
def plot_image_grid(image_array, shape, vmin=0, vmax=1, filename=None, show=True, cmap=cm.gray, nan_color='red'): # cast to a numpy array img_array = be.to_numpy_array(image_array) nrows, ncols = img_array.shape[:-1] f = plt.figure(figsize=(2 * ncols, 2 * nrows)) grid = gs.GridSpec(nrows, ncols) axes = [[plt.subplot(grid[i, j]) for j in range(ncols)] for i in range(nrows)] for i in range(nrows): for j in range(ncols): axes[i][j].imshow(numpy.reshape(img_array[i][j], shape), cmap=cmap, interpolation='none', vmin=vmin, vmax=vmax) axes[i][j].set(yticks=[]) axes[i][j].set(xticks=[]) plt.tight_layout(pad=0.5, h_pad=0.2, w_pad=0.2) if show: plt.show(f) if filename is not None: f.savefig(filename) plt.close(f)
def test_hdf_table_batch(): # the temporary storage file store_file = tempfile.NamedTemporaryFile() # create data num_rows = 10000 num_cols = 10 df_A = pd.DataFrame(np.arange(num_rows*num_cols).reshape(num_rows, num_cols)) # save it with pd.HDFStore(store_file.name, mode="w", format="table") as store: store.append("train", df_A) # read it back with the HDFtable batch_size = 1000 num_train_batches = num_rows // batch_size data = batch.HDFtable(store_file.name, "train", batch_size) # loop through thrice, checking the data for i_loop in range(3): i_batch = 0 while True: # get the data try: batch_data = data.get() except StopIteration: assert i_batch == num_train_batches i_batch = 0 break # check it assert np.all(be.to_numpy_array(batch_data) == \ df_A.values[i_batch * batch_size: (i_batch + 1) * batch_size]) i_batch += 1
def example_plot(grid, show_plot, Filename=None, dim=56, vmin=0, vmax=1, cmap=plotting.cm.gray): first_dim = grid.shape[0] second_dim = grid.shape[1] new_grid = np.zeros((first_dim, second_dim, dim * dim)) triu_i = np.triu_indices(dim, 1) for x in range(0, first_dim): for y in range(0, second_dim): W1 = grid[x, y, :] Z1 = np.zeros((dim, dim)) Z1[triu_i] = W1 flatten_Z1 = Z1.flatten() new_grid[x, y, :] = flatten_Z1 numpy_grid = be.to_numpy_array(new_grid) if show_plot: plotting.plot_image_grid(numpy_grid, (dim, dim), vmin, vmax, cmap=cmap) if Filename: plotting.plot_image_grid(numpy_grid, (dim, dim), vmin, vmax, cmap=cmap, show=False, filename=Filename)
def plot_image(image_vector, shape): f, ax = plt.subplots(figsize=(4,4)) array = be.to_numpy_array(image_vector) hm = sns.heatmap(numpy.reshape(array, shape), ax=ax, cmap="gray_r", cbar=False) hm.set(yticks=[]) hm.set(xticks=[]) plt.show(f) plt.close(f)
def example_plot(grid, show_plot, dim=28, vmin=0, vmax=1, cmap=plotting.cm.gray_r): numpy_grid = be.to_numpy_array(grid) if show_plot: plotting.plot_image_grid(numpy_grid, (dim, dim), vmin, vmax, cmap=cmap)
def compute_fantasy_particles(rbm, v_data, fit): random_samples = rbm.random(v_data) sampler = fit.DrivenSequentialMC(rbm) sampler.set_state(random_samples) sampler.update_state(1000) v_model = rbm.deterministic_step(sampler.state) idx = numpy.random.choice(range(len(v_model)), 5, replace=False) return numpy.array([[be.to_numpy_array(v_model[i])] for i in idx])
def compute_fantasy_particles(rbm, v_data, fit): random_samples = rbm.random(v_data) model_state = State.from_visible(random_samples, rbm) sampler = fit.DrivenSequentialMC(rbm) sampler.set_negative_state(model_state) sampler.update_negative_state(1000) v_model = rbm.deterministic_iteration(1, sampler.neg_state).units[0] idx = numpy.random.choice(range(len(v_model)), 5, replace=False) return numpy.array([[be.to_numpy_array(v_model[i])] for i in idx])
def compute_weights(rbm, n_weights=25, l=0): grid_size = int(sqrt(n_weights)) assert grid_size == sqrt( n_weights), "n_weights must be the square of an integer" idx = numpy.random.choice(range(rbm.weights[l].shape[1]), n_weights, replace=False) grid = numpy.array( [be.to_numpy_array(rbm.weights[l].W()[:, i]) for i in idx]) return grid.reshape(grid_size, grid_size, -1)
def to_dataframe(self): """ Create a config DataFrame for the object. Args: None Returns: df (DataFrame): a DataFrame representation of the object. """ if self.num is None: return pandas.DataFrame(None) df = pandas.DataFrame(None, index=range(len(self.mean))) # we have to store a whole column of self.num even though it is constant df["num"] = self.num * be.ones((len(self.mean),), dtype=be.Long) df["mean"] = be.to_numpy_array(self.mean) df["var"] = be.to_numpy_array(self.var) df["square"] = be.to_numpy_array(self.square) return df
def plot_image(image_vector, shape, filename=None, show=True): f, ax = plt.subplots(figsize=(4, 4)) array = be.to_numpy_array(image_vector) hm = sns.heatmap(numpy.reshape(array, shape), ax=ax, cmap="gray_r", cbar=False) hm.set(yticks=[]) hm.set(xticks=[]) if show: plt.show(f) if filename is not None: f.savefig(filename) plt.close(f)
def plot_image_grid(image_array, shape, vmin=0, vmax=1): array = be.to_numpy_array(image_array) nrows, ncols = array.shape[:-1] f = plt.figure(figsize=(2*ncols, 2*nrows)) grid = gs.GridSpec(nrows, ncols) axes = [[plt.subplot(grid[i,j]) for j in range(ncols)] for i in range(nrows)] for i in range(nrows): for j in range(ncols): sns.heatmap(numpy.reshape(array[i][j], shape), ax=axes[i][j], cmap="gray_r", cbar=False, vmin=vmin, vmax=vmax) axes[i][j].set(yticks=[]) axes[i][j].set(xticks=[]) plt.show(f) plt.close(f)
def compute_fantasy_particles(rbm, n_fantasy=5, fantasy_steps=100, beta_std=0.6, run_mean_field=True): schedule = schedules.Linear(initial=1.0, delta = 1 / (fantasy_steps-1)) fantasy = samplers.SequentialMC.generate_fantasy_state(rbm, n_fantasy*n_fantasy, fantasy_steps, schedule=schedule, beta_std=beta_std, beta_momentum=0.0) if run_mean_field: fantasy = rbm.mean_field_iteration(1, fantasy) v_model = fantasy[0] grid = np.array([be.to_numpy_array(v) for v in v_model]) return grid.reshape(n_fantasy, n_fantasy, -1)
def weight_norm_histogram(rbm, show_plot=False, filename=None): import matplotlib.pyplot as plt import seaborn as sns fig, ax = plt.subplots() for l in range(rbm.num_connections): num_inputs = rbm.connections[l].shape[0] norm = be.to_numpy_array(be.norm(rbm.connections[l].weights.W(), axis=0) / sqrt(num_inputs)) sns.distplot(norm, ax=ax, label=str(l)) ax.legend() if show_plot: fig if filename is not None: fig.savefig(filename) plt.close(fig)
def compute_weights(rbm, n_weights=25, l=0, random=True): # can't sample more than what we've got n_weights = min(n_weights, rbm.connections[l].shape[1]) # floor to the nearest square below grid_size = int(sqrt(n_weights)) n_weights = grid_size**2 if random: idx = np.random.choice(range(rbm.connections[l].shape[1]), n_weights, replace=False) else: idx = np.arange(n_weights) wprod = rbm.connections[0].weights.W() for i in range(1,l+1): wprod = be.dot(wprod, rbm.connections[i].weights.W()) grid = np.array([be.to_numpy_array(wprod[:, i]) for i in idx]) return grid.reshape(grid_size, grid_size, -1)
def compute_fantasy_particles(rbm, v_data, fit, n_fantasy=25): grid_size = int(sqrt(n_fantasy)) assert grid_size == sqrt( n_fantasy), "n_fantasy must be the square of an integer" random_samples = rbm.random(v_data) model_state = State.from_visible(random_samples, rbm) schedule = schedules.PowerLawDecay(initial=1.0, coefficient=0.5) fantasy = fit.DrivenSequentialMC(rbm, schedule=schedule) dropout_scale = State.dropout_rescale(rbm) fantasy.set_state(model_state) fantasy.update_state(1000, dropout_scale) v_model = rbm.deterministic_iteration(1, fantasy.state, dropout_scale).units[0] idx = numpy.random.choice(range(len(v_model)), n_fantasy, replace=False) grid = numpy.array([be.to_numpy_array(v_model[i]) for i in idx]) return grid.reshape(grid_size, grid_size, -1)
def plot_image(image_vector, shape, vmin=0, vmax=1, filename=None, show=True, cmap=cm.gray, nan_color='red'): f, ax = plt.subplots(figsize=(4, 4)) # reshape the data and cast to a numpy array img = numpy.reshape(be.to_numpy_array(image_vector), shape) # make the plot ax.imshow(img, interpolation='none', cmap=cmap, vmin=vmin, vmax=vmax) ax.set(yticks=[]) ax.set(xticks=[]) if show: plt.show(f) if filename is not None: f.savefig(filename) plt.close(f)
def plot_image_grid(image_array, shape, vmin=0, vmax=1, filename=None, show=True, cmap=cm.gray_r, nan_color='red'): # cast to a numpy array img_array = be.to_numpy_array(image_array) # construct a masked numpy array from the data in case of nan img_array = numpy.ma.array(img_array, mask=numpy.isnan(img_array)) nrows, ncols = img_array.shape[:-1] # choose the color map and the color for nan cmap.set_bad(nan_color, 1.) f = plt.figure(figsize=(2 * ncols, 2 * nrows)) grid = gs.GridSpec(nrows, ncols) axes = [[plt.subplot(grid[i, j]) for j in range(ncols)] for i in range(nrows)] for i in range(nrows): for j in range(ncols): axes[i][j].imshow(numpy.reshape(img_array[i][j], shape), cmap=cmap, interpolation='none', vmin=vmin, vmax=vmax) axes[i][j].set(yticks=[]) axes[i][j].set(xticks=[]) plt.tight_layout(pad=0.5, h_pad=0.2, w_pad=0.2) if show: plt.show(f) if filename is not None: f.savefig(filename) plt.close(f)
cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps, beta_std=beta_std, burn_in=1) # evaluate the model util.show_metrics(rbm, cd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=100) # close the HDF5 store data.close() print("Done") return rbm if __name__ == "__main__": rbm = run(show_plot = True) import seaborn import matplotlib.pyplot as plt for conn in rbm.connections: c = be.corr(conn.weights.W(), conn.weights.W()) fig, ax = plt.subplots() seaborn.heatmap(be.to_numpy_array(c), vmin=-1, vmax=1, ax=ax) fig n = be.norm(conn.weights.W(), axis=0) fig, ax = plt.subplots() seaborn.distplot(be.to_numpy_array(n), ax=ax) fig
util.show_reconstructions(rbm, valid, show_plot, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=10, beta_std=beta_std, fantasy_steps=100) util.show_weights(rbm, show_plot, n_weights=16) print("Norms of the weights after training") util.weight_norm_histogram(rbm, show_plot=show_plot) # close the HDF5 store data.close() print("Done") return rbm if __name__ == "__main__": rbm = run(show_plot=True) import seaborn import matplotlib.pyplot as plt for conn in rbm.connections: c = be.corr(conn.weights.W(), conn.weights.W()) fig, ax = plt.subplots() seaborn.heatmap(be.to_numpy_array(c), vmin=-1, vmax=1, ax=ax) #fig.show()
def compute_weights(rbm): idx = numpy.random.choice(range(rbm.weights[0].shape[1]), 5, replace=False) return numpy.array([[be.to_numpy_array(rbm.weights[0].W()[:, i])] for i in idx])