def fit(self, trainset):, trainset) numUsers = trainset.n_users numItems = trainset.n_items trainingMatrix = np.zeros([numUsers, numItems, 10], dtype = np.float32) for (uid, iid, rating) in trainset.all_ratings(): adjustedRating = int(float(rating)*2.0) - 1 trainingMatrix[int(uid), int(iid), adjustedRating] = 1 trainingMatrix = np.reshape(trainingMatrix,[trainingMatrix.shape[0], - 1]) rbm = RBM(trainingMatrix.shape[1], hiddenDimensions = self.hiddenDim, learningRate = self.learningRate, batchSize = self.batchSize) rbm.Train(trainingMatrix) self.predictedRatings = np.zeros([numUsers, numItems], dtype = np.float32) for uiid in range(trainset.n_users): if(uiid % 50 == 0): print("Procissing user ", uiid) recs = rbm.GetRecommendations([trainingMatrix[uiid]]) recs = np.reshape(recs, [numItems, 10]) for itemID, rec in enumerate(recs): normalized = self.softmax(rec) rating = np.average(np.arange(10), weights = normalized) self.predictedRatings[uiid,itemID] = (rating + 1)* 0.5 return self
def train(self): #wb is a dictionary that stores the average Weight matrices and Bias matrices. The keys are where the #files are stored self.wb = {} """ For each user an RBM will be created. """ for i in range(self.dataset.training_X.shape[1]): user = self.dataset.training_X[:,i] rbm = RBM(hidden_layer_n=HIDDEN_LAYER_N,iterations=ITERATIONS,dataset=user) #After the an RBM is run the weights and biases are re-added to complete set. self.all_weights.append(rbm.full_weights) self.all_bv.append(rbm.full_bv) self.all_bh.append(rbm.full_bh) print("RBM number: " + str(i)) #Average all the weights and all the biases from all the RBM's (With each RBM corresponding to a user) self.wb[WEIGHTS_FILE] = self.average_matrices(self.all_weights) self.wb[VISIBLE_BIAS_FILE] = self.average_matrices(self.all_bv) self.wb[HIDDEN_BIAS_FILE] = self.average_matrices(self.all_bh) #Training can take a long time so we can save the weights and biases self.save_matrix(WEIGHTS_FILE) self.save_matrix(VISIBLE_BIAS_FILE) self.save_matrix(HIDDEN_BIAS_FILE)
def pre_train(self, train_round, size, rate, dr, scale): #for each pair of neighbor layers, training RBM for k in range(self.depth - 1): rbm = RBM(self.layers[k], self.layers[k+1], self.weightLayers[k], rate) # mini-batch training times for tr in range(train_round): #mini-batch size for i in range(size): index = random.randint(0, scale) im = dr.readOne(index) for p in range(len(im)): if im[p] >= 128: self.layers[0].outs[p] = 1.0 else: self.layers[0].outs[p] = 0.0 for j in range(k): toHiddenLayer(self.layers[j], self.weightLayers[j], self.layers[j+1]) #self.layers[j+1].outs = sample(self.layers[j+1].outs) sample(self.layers[j+1].outs) rbm.update_Parameters(size) #if tr%100 == 0: #print tr print "Layer: ", k+1, " Completed!" return
def train_DBN(self, x): for index, layer in enumerate(self.layers): if index == 0: vn = self.input_size else: vn = self.layers[index - 1] hn = self.layers[index] rbm = RBM(vn, hn, epochs=100, mode='bernoulli', lr=0.0005, k=10, batch_size=128, gpu=True, optimizer='adam', early_stopping_patience=10) x_dash = self.generate_input_for_layer(index, x) rbm.train(x_dash) self.layer_parameters[index]['W'] = rbm.W.cpu() self.layer_parameters[index]['hb'] = rbm.hb.cpu() self.layer_parameters[index]['vb'] = rbm.vb.cpu() print("Finished Training Layer:", index, "to", index + 1) if self.savefile is not None:, self.savefile)
def recommend(self, u, q): self.average_weights = self._load_matrix(WEIGHTS_FILE) self.average_bv = self._load_matrix(VISIBLE_BIAS_FILE) self.average_bh = self._load_matrix(HIDDEN_BIAS_FILE) #x is the input (the user we are predicting for), w is the weights of just the items rated by user x and bv is # just the visible biases of the corresponding to the items rated by the user x, w, bv = self._ratings_to_softmax_units(self.dataset.training_X[:,u], q) #bh is the same size for every user because they correspond to the hidden units bh = self.average_bh #Returns the new index for the query q. The index changes because we reomved all unrated items. #The units for the query are changed to 5 -1s as a placeholder x, new_q = self._new_index(x) results = [] #For each rating 1-5 we sample the RBM with the corresponding input. for r in range(RATING_MAX): x = self._set_rating(x, new_q, (r+1)) rbm = RBM(hidden_layer_n=HIDDEN_LAYER_N,iterations=ITERATIONS,dataset=x, training=False) sample = rbm.run_prediction(x, w, bh, bv) results.append(sample) #Get the expected output from each of the probablitlies of each input probs = self._get_probabilities(results, new_q) prediction = self._expected_value(self.softmax(probs)) print("Prediction: user " + str(u) + " will give movie: " + str(q) + " rating: " + str(prediction))
def load_RBM(self, file_path, layer_sizes): ''' load the autoencoder via the RBMs ''' RBM = Autoencoder(layer_sizes) RBM = RBM.pretrained_from_file(file_path) self.model, self.encoder, self.decoder = RBM.unroll() self.set_compiler() self.encoder.compile(loss={'encoded': self.kl_loss}, optimizer=Adam(
def main(): learningRate = float(sys.argv[1]) if len(sys.argv) >= 2 else 0.0001 maxIterations = int(sys.argv[2]) if len(sys.argv) >= 3 else 300 # load data data ='data/usps_resampled.mat') train_patterns = data['train_patterns'] train_labels = data['train_labels'] test_patterns = data['test_patterns'] test_labels = data['test_labels'] # initialize and train RBM rbm = RBM(192, train_patterns, learningRate=learningRate, verbose=True) iterationsCompleted = rbm.train(convThreshold=0.03, maxIterations=maxIterations) print 'Autoencoding. . . ' hidden_patterns = rbm.translate(train_patterns) ae_patterns = rbm.invert(hidden_patterns) print 'Finished.' while True: while True: try: sampleImage = raw_input("Pick a sample image from [0-" + str(train_patterns.shape[1] - 1) + "] (q to quit): ") if sampleImage == 'q': y = raw_input("Save this classifier (y)? ") fn = 'data/RBM_' + str( (learningRate, 192, iterationsCompleted)) if y in ['y', '']: f = open(fn, 'w') pickle.dump(TrainedRBM(rbm.W, rbm.b_h, rbm.b_v), f) print "RBM saved as " + fn sys.exit(0) sampleImage = int(sampleImage) if sampleImage not in range(train_patterns.shape[1]): raise ValueError except ValueError: continue except EOFError: sys.exit(0) except KeyboardInterrupt: sys.exit(0) break # show example image plt.matshow(train_patterns[:, sampleImage].reshape(16, 16)) plt.matshow(ae_patterns[:, sampleImage].reshape(16, 16))
def __init__(self, input=None, label=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer rbm_layer = RBM( input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression( input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood()
def pretrain_model(self, x): with tf.variable_scope('pre-train_dbn'): # Make an rbm between each layer of the dbn so that # we can train each layer greedily with tf.variable_scope('make_rbms'): rbm_layers = [x] rbms = [] for i in range(len(self.dbn_sizes) - 1): rbms.append(RBM(self.W[i], self.B[i], self.B[i + 1])) visible_layer = tf.sigmoid( tf.matmul(rbm_layers[-1], self.W[i]) + self.B[i + 1]) rbm_layers.append(sample(visible_layer)) # Create a list of optimizers and other subgraphs, one for each rbm # that we will train with tf.variable_scope('pre-train_ops'): costs = [] optimizers = [] for i in range(len(rbms)): cost, _ = rbms[i].free_energy_cost(rbm_layers[i], 1) optimizer = tf.train.AdamOptimizer().minimize(cost) costs.append(cost) optimizers.append(optimizer) return costs, optimizers
def __init__(self, visible_units=256, hidden_units=[64, 100], k=2, learning_rate=1e-5, learning_rate_decay=False, xavier_init=False, increase_to_cd_k=False, use_gpu=False): super(DBN, self).__init__() self.n_layers = len(hidden_units) self.rbm_layers = [] self.rbm_nodes = [] # Creating different RBM layers for i in range(self.n_layers): input_size = 0 if i == 0: input_size = visible_units else: input_size = hidden_units[i - 1] rbm = RBM(visible_units=input_size, hidden_units=hidden_units[i], k=k, learning_rate=learning_rate, learning_rate_decay=learning_rate_decay, xavier_init=xavier_init, increase_to_cd_k=increase_to_cd_k, use_gpu=use_gpu) self.rbm_layers.append(rbm) # rbm_layers = [RBM(rbn_nodes[i-1] , rbm_nodes[i],use_gpu=use_cuda) for i in range(1,len(rbm_nodes))] self.W_rec = [ nn.Parameter(self.rbm_layers[i] for i in range(self.n_layers - 1) ] self.W_gen = [ nn.Parameter(self.rbm_layers[i] for i in range(self.n_layers - 1) ] self.bias_rec = [ nn.Parameter(self.rbm_layers[i] for i in range(self.n_layers - 1) ] self.bias_gen = [ nn.Parameter(self.rbm_layers[i] for i in range(self.n_layers - 1) ] self.W_mem = nn.Parameter(self.rbm_layers[-1] self.v_bias_mem = nn.Parameter(self.rbm_layers[-1] self.h_bias_mem = nn.Parameter(self.rbm_layers[-1] for i in range(self.n_layers - 1): self.register_parameter('W_rec%i' % i, self.W_rec[i]) self.register_parameter('W_gen%i' % i, self.W_gen[i]) self.register_parameter('bias_rec%i' % i, self.bias_rec[i]) self.register_parameter('bias_gen%i' % i, self.bias_gen[i])
def __init__(self, layer_sizes): """On va traiter le DBN comme si c'etait une liste des RBM Paramètres: nb_couches doit être int > 0 """ self.nb_couches = len(layer_sizes) - 1 # Un DBN est un empilement de RBM self.model = [] for i in range(self.nb_couches): self.model.append(RBM(layer_sizes[i], layer_sizes[i + 1]))
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10): self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = theano_rng = RandomStreams(numpy_rng.randint(2**30)) self.x = T.matrix('x') self.y = T.ivector('y') for i in range(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likehood(self.y) self.errors = self.logLayer.errors(self.y)
def train_model(self, x): with tf.variable_scope('train_rnn_dbn'): with tf.variable_scope('propagate_states'): states = self.__unroll_rnn(x) state0 = self.rnn_s0 if self.num_rnn_cells > 1: states = states[-1] state0 = state0[-1] u_t = tf.reshape(states.c, [-1, self.s_size]) q_t = tf.reshape(states.h, [-1, self.s_size]) u_tm1 = tf.concat([state0.c, u_t], 0)[:-1, :] q_tm1 = tf.concat([state0.h, q_t], 0)[:-1, :] # Make an rbm between each layer of the dbn so that # we can train each layer greedily with tf.variable_scope('make_rbms'): rbm_layers = [x] rbms = [] for i in range(len(self.dbn_sizes) - 1): bv = tf.matmul(u_tm1, self.Wu[i]) + tf.matmul( q_tm1, self.Wq[i]) + self.B[i] bh = tf.matmul(u_tm1, self.Wu[i + 1]) + tf.matmul( q_tm1, self.Wq[i + 1]) + self.B[i + 1] rbms.append(RBM(self.W[i], bv, bh)) visible_layer = tf.sigmoid( tf.matmul(rbm_layers[-1], self.W[i]) + self.B[i + 1]) rbm_layers.append(sample(visible_layer)) # Create a list of optimizers and other subgraphs, one for each rbm # that we will train with tf.variable_scope('train_ops'): costs = [] optimizers = [] loglikelihoods = [] summaries = [] for i in range(len(rbms)): cost, loglikelihood = rbms[i].free_energy_cost( rbm_layers[i], 15) cost_summary = tf.summary.scalar('free_energy', cost) ll_summary = tf.summary.scalar('log_likelihood', loglikelihood) optimizer = tf.train.GradientDescentOptimizer( learning_rate=0.001) gradients = optimizer.compute_gradients(cost) gradients = [(tf.clip_by_value(grad, -10.0, 10.0), var) for grad, var in gradients if grad is not None] optimizer = optimizer.apply_gradients(gradients) costs.append(cost) optimizers.append(optimizer) loglikelihoods.append(loglikelihood) summaries.append([cost_summary, ll_summary]) return costs, optimizers, loglikelihoods, summaries
def evaluation_RBM(self, dr, scale, k): rbm = RBM(self.layers[k], self.layers[k+1], self.weightLayers[k], 0) for i in range(scale): im = dr.readOne(i) for p in range(len(im)): if im[p] >= 128: rbm.v_layer.outs[p] = 1.0 else: rbm.v_layer.outs[p] = 0.0 rbm.update_total_error() #just for testing size = [28, 28] image ="L", size) for p in range(len(self.layers[k].outs)): if self.layers[k].outs[p] == 1.0: image.putpixel([p%28, p/28], 255) else: image.putpixel([p%28, p/28], 0) rbm.print_average_error(k)
def main(args): train, test = read_data(args.in_dir) train, val = split_data(train) print('\n\nTrain: ', train.shape) print('Val: ', val.shape) print('Test: ', test.shape) # RBM object rbm = RBM(args.num_hidden, val.shape[1],, args.n, args.batch_size, args.epochs) # Train RBM train_loss, val_loss = rbm.Train(train, val) # Create output dir if it doesn't exist if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) # Plot error plot_error(train_loss, val_loss, args.out_dir) # Performance on Test set error_test = rbm.reconstruction_error(test.T) print("\n\n\nReconstruction error...\n") print("Train : ", train_loss[-1]) print("Val : ", val_loss[-1]) print("Test : ", error_test) # For viewing reconstruction reconstruct_images(rbm, test, (args.image_height, args.image_width), args.out_dir) # Saving the model learned weights pickle.dump([rbm.W, rbm.b_h, rbm.b_v], open(args.out_dir + '\\weights.pkl', 'wb')) print(f"\n\nRBM weights saved in {args.out_dir}\\weights.pkl")
def pretrain_autoencoder(net, x, x_val, rbm_lr=0.001, rbm_use_gauss_visible=False, rbm_use_gauss_hidden=True, rbm_mom=0.5, rbm_weight_decay=0.0000, rbm_lr_decay=0.0, rbm_batch_size=100, rbm_epochs=100, rbm_patience=-1, verbose=1): final_arch = net.arch[1:math.ceil(len(net.arch) / 2.0)] # without input layer n_dense_layers = len(final_arch) rbm_list = [] #loop for training the RBMs for i in range(n_dense_layers): print("\nFine tuning layer number " + str(i)) if (i == 0): x_new = x x_val_new = x_val else: x_new = rbm_list[-1].get_h(x_new) x_val_new = rbm_list[-1].get_h(x_val_new) rbm = RBM(x_new.shape[1], final_arch[i], use_gaussian_visible_sampling=rbm_use_gauss_visible, use_gaussian_hidden_sampling=rbm_use_gauss_hidden, use_sample_vis_for_learning=False) rbm.set_lr( rbm_lr, rbm_lr_decay, momentum=rbm_mom, weight_decay=rbm_weight_decay, ), x_val_new, batch_size=rbm_batch_size, epochs=rbm_epochs, patience=rbm_patience) rbm_list.append(rbm) rbm_iterator = 0 rbm_iterator_direction = 1 #loop to copy the weights from rbm to NN for n_layer in range(len(net.layers)): if (net.layers[n_layer].ID == "Dense"): copy_dense_weights_from_rbm(rbm_list[rbm_iterator], net.layers[n_layer], rbm_iterator_direction) if (rbm_iterator == len(rbm_list) - 1 and rbm_iterator_direction == 1): rbm_iterator_direction = -1 else: rbm_iterator += rbm_iterator_direction print("Pre training finished!") return rbm_list
def calcul_softmax(rbm: RBM, data: np.array) -> np.array: """Prend en argument un RBM, des données d’entrée et qui retourne des probabilités sur les unités de sortie à partir de la fonction softmax. Non utilisé Args: rbm (RBM): [description] data (np.array): [description] Returns: np.array: Probabilités sur les unités de sortie """ return softmax(RBM.entree_sortie(data))
def fit(self, X, transform=False): self.layer = [] X_input = X input_size = X_input.shape[1] self.input_shape = input_size for i in range(self.n_layers): output_size = self.params[i] print(input_size) self.layer.append(RBM(input_size, output_size)) input_size = output_size self.layer[i].fit(X_input, self.n_iter[i]) X_input = self.layer[i].predict(X_input) if transform: return self.predict(X)
def test_RBM(train_X, train_Y, test_X, test_Y): # Create and train RBM rbm = RBM(28 * 28, 500) rbm.get_filters() train_X = np.array([rbm.inference(x) for x in train_X]) test_X = np.array([rbm.inference(x) for x in test_X]) logreg = LogisticRegression(max_iter=10), train_Y) predict_Y = logreg.predict(train_X) print("Accuracy on training data") print(accuracy_score(train_Y, predict_Y)) predict_Y = logreg.predict(test_X) print("Accuracy on test data") print(accuracy_score(test_Y, predict_Y))
def __init__(self, hidden_units, visible_units=256, output_units=1, k=2, learning_rate=1e-5, learning_rate_decay=False, increase_to_cd_k=False, device='cpu'): super(DBN, self).__init__() self.n_layers = len(hidden_units) self.rbm_layers = [] self.rbm_nodes = [] self.device = device self.is_pretrained = False self.is_finetune = False # Creating different RBM layers for i in range(self.n_layers): if i == 0: input_size = visible_units else: input_size = hidden_units[i - 1] rbm = RBM(visible_units=input_size, hidden_units=hidden_units[i], k=k, learning_rate=learning_rate, learning_rate_decay=learning_rate_decay, increase_to_cd_k=increase_to_cd_k, device=device) self.rbm_layers.append(rbm) self.W_rec = [self.rbm_layers[i].weight for i in range(self.n_layers)] self.bias_rec = [ self.rbm_layers[i].h_bias for i in range(self.n_layers) ] for i in range(self.n_layers): self.register_parameter('W_rec%i' % i, self.W_rec[i]) self.register_parameter('bias_rec%i' % i, self.bias_rec[i]) self.bpnn = torch.nn.Linear(hidden_units[-1], output_units).to(self.device)
def fit(self, X, transform=False, load=False, name=None, save=True): if load: self.model = self.load() else: self.layer = [] X_input = X input_size = X_input.shape[1] self.input_shape = input_size for i in range(self.n_layers): output_size = self.params[i] print(input_size) self.layer.append(RBM(input_size, output_size)) input_size = output_size self.layer[i].fit(X_input, self.n_iter[i]) X_input = self.layer[i].predict(X_input) self.model = self.get_greed_model() if save: if transform: return self.predict(X)
def train(self, trainingData): rbmList = [] # list RBM's weights tempData = trainingData # start RBM's training and get the respective weights for n in range(self.nEncoderLayers): if (n == 0 or n == (self.nEncoderLayers - 1)): rbm = RBM(tempData, self.sEncoderLayers[n], rbmType='GBRBM') else: rbm = RBM(tempData, self.sEncoderLayers[n], rbmType='BBRBM') print('Start %d RBM training' % (n + 1)) rbm.train(batchSize=100) [weights, visBias, hidBias] = rbm.getWeights() rbmList.append(RBM_Weights(weights, visBias, hidBias)) data = tf.convert_to_tensor(tempData, dtype=tf.float32, name='data') probHid = tf.sigmoid(tf.matmul(data, weights) + hidBias) hid = tf.cast(tf.greater( probHid, tf.random_uniform(tf.shape(probHid), minval=0, maxval=1, dtype=tf.float32)), dtype=tf.float32) with tf.Session() as sess: if ((self.nEncoderLayers - 1) == n): tempData = else: tempData = # start the fine tuning process return self.fineTuning(rbmList, trainingData)
def main(): random.seed(0) size = 40 rbm = RBM(size * size, 1000) slp = 2 #for blob, params in randomBlobs(10, size, size): # rbm.v = blob.flatten() # plot(rbm, blob) # sleep(slp) # # result = reshape(rbm.v, (size, size)) # plot(rbm, result) # sleep(slp) # # rbm.v2h() # plot(rbm, blob) # sleep(slp) # # rbm.h2v() # result = reshape(rbm.v, (size, size)) # plot(rbm, result) # sleep(slp) every = 2000 #m = log(.01/.1) / log(10000) NN = 20001 bb = 1. / NN * log(.001 / 1.0) elapsed = array([]) for ii in range(NN): if ii % 100 == 0: blob, params = randomBlobs(10, size, size).next() #print params #epsilon = .1 * (ii+1) ** m #epsilon = .3 * exp(bb * ii) epsilon = min(.1, 1.0 * exp(bb * ii)) #time0 = time() rbm.learn1(blob.flatten(), epsilon=epsilon, activationH2V='gaussianReal', param=1) #elapsed = hstack((elapsed, time() - time0)) if ii % every == 0: print '%d: epsilon is %f' % (ii, epsilon), rbm.v = blob.flatten() result = reshape(rbm.v, (size, size)) plot(rbm, result, 'Iteration %d' % ii, 'Data') sleep(.1) if ii == 0 else sleep(.1) rbm.v2h() rbm.h2v(activation='gaussianReal', param=0) result = reshape(rbm.v, (size, size)) plot(rbm, result, 'Iteration %d' % ii, 'Reconstruction') sleep(.5) if ii == 0 else sleep(.5) print 'mean of last 50 errors is', mean(rbm.reconErrorNorms[-50:])
def fit(self, X, Y): # Create a report to be saved at the end of execution # (when running on the remote server) if self.do_report: report = {"learning_rate":self.learning_rate, "training_epochs":self.training_epochs, "batch_size":self.batch_size, "n_chains":self.n_chains, "n_samples":self.n_samples, "n_hidden":self.n_hidden, "k":self.k, "costs":np.zeros(self.training_epochs), # "accuracy":np.zeros(self.training_epochs), "pretraining_time":0} train_data = np.hstack([Y,X]) n_visible = train_data.shape[1] # Building of theano format datasets train_set = shared_dataset(train_data) # compute number of minibatches for training, validation and testing n_train_batches = train_set.get_value(borrow=True).shape[0] / \ self.batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(np.zeros((self.batch_size, self.n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class self.rbm = RBM(input=x, n_visible=n_visible, n_labels=self.n_labels, n_hidden=self.n_hidden, np_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-k cost, updates = self.rbm.get_cost_updates(lr=self.learning_rate, persistent=persistent_chain, k=self.k) # accuracy = self.rbm.get_cv_error() #%%==================================================================== # Training the RBM #====================================================================== # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function( [index], cost, updates=updates, givens={ x: train_set[index * self.batch_size: \ (index + 1) * self.batch_size] }, name='train_rbm' ) start_time = timeit.default_timer() max_score = -np.inf argmax_score = RBM(input=x, n_visible=n_visible, n_labels=self.n_labels, n_hidden=self.n_hidden, np_rng=rng, theano_rng=theano_rng) # count = 0 ## go through training epochs for epoch in xrange(self.training_epochs): # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): mean_cost += [train_rbm(batch_index)] print 'Training epoch %d, cost is ' % epoch, np.mean(mean_cost) score = np.mean(mean_cost) if score>max_score: max_score = score argmax_score.clone(self.rbm) # acc = accuracy.eval() # # if self.scoring=='cost': # score = np.mean(mean_cost) # elif self.scoring=='accuracy': # score = acc # else: # raise Warning('''scoring must be cost or accuracy, # set to accuracy''') # score = acc # # if score>max_score: # max_score = score # argmax_score.clone(self.rbm) # count = 0 # else: # count += 1 # # if count>2: # break if self.do_report: report["costs"][epoch] = np.mean(mean_cost) # report["accuracy"][epoch] = acc end_time = timeit.default_timer() pretraining_time = (end_time - start_time) report['pretraining_time'] = pretraining_time self.rbm = argmax_score if self.do_report: try:'/'+self.report_name, report) except OSError: os.mkdir(self.report_folder)'/'+self.report_name, report)
mnist.test.labels Nv = 784 v_shape = (28, 28) Nh = 100 h1_shape = (10, 10) batch_size = 100 epochs = 5 n_samples = mnist.train.num_examples total_batch = int(n_samples / batch_size) * epochs X = tf.placeholder("float", [None, 784]) with tf.variable_scope("l1"): rbm1 = RBM(X, 100) with tf.variable_scope("l2"): rbm2 = RBM(rbm1.out_prob, 784, inbias=rbm1.b_out) rbm2.create_rec() rbm1.create_rec(rbm2.rec) with tf.Session() as sess:, tf.transpose(rbm1.W))) for i in range(total_batch): batch, label = mnist.train.next_batch(batch_size) err, _ =[rbm1.mse, rbm1.train_op], feed_dict={X: batch}) if i % 100 == 0:
def __init__(self, n_visible=6, hidden_layer_sizes=[3, 3], sample_copies=1, sampler=None, optimizer=None, continuous_output=False, verbose=0, device=None): '''Constructor for the class. Arguments: :param n_visible: The number nodes in the visible layer :type n_visible: int :param hidden_layer_sizes: The number nodes in each of the hidden layers :type hidden_layer_sizes: list of int :param sample_copies: How many samples from a hidden layer are drawn to train the next layer :type sample_copies: int :param sampler: Method used to draw samples from the model :type sampler: :class:`samplers` :param optimizer: Optimizer used for parameter updates :type optimizer: :class:`optimizers` :param continuous_output: Optional parameter to indicate whether the visible layer is continuous-valued. :type continuous_output: bool :param verbose: Optional parameter to set verbosity mode :type verbose: int :param device: Device where to perform computations. None is CPU. :type device: torch.device ''' self.sample_copies = sample_copies self.verbose = verbose self.continuous_output = continuous_output self.gen_layers = [] self.inference_layers = [] self.n_layers = len(hidden_layer_sizes) assert self.n_layers > 0, 'You must specify at least one hidden layer' if device is None: device = torch.device('cpu') self.device = device if optimizer is None: raise Exception('You must provide an appropriate optimizer') self.optimizer = optimizer if sampler is None: raise Exception('You must provide an appropriate sampler') self.sampler = sampler # Construct DBN out of RBMs for i in range(self.n_layers): if i == 0: input_size = n_visible else: input_size = hidden_layer_sizes[i - 1] gen_layer = RBM(n_visible=input_size, n_hidden=hidden_layer_sizes[i], sampler=sampler, optimizer=optimizer, verbose=verbose, device=device) inf_layer = RBM(n_visible=input_size, n_hidden=hidden_layer_sizes[i], sampler=sampler, optimizer=optimizer, verbose=verbose, device=device) self.gen_layers.append(gen_layer) self.inference_layers.append(inf_layer)
def main(args): dbh = SQLHelper("database.sqlite") rbm = RBM(visibleSize=784, hiddenSize=500) cur = dbh.query("select * from weight where current=1;") if len(cur) > 0: if cur[0]['weight'] is not None: params = cur[0]['hiddenBias'], cur[0]['visibleBias'], cur[0][ 'weight'] rbm.setParams(params) if len(sys.argv) > 1: for arg in sys.argv: if arg == '-t': testSet = getTestSet(digits=[2]) if VISUALIZE: # n = X[0] > np.random.rand(*X[0].shape) # n = n.reshape(28,28) n = testSet[0].reshape(28, 28) strline = "" for l in n: for b in l: if b: strline += "*" else: strline += " " strline += "\n" print strline sys.exit() rbm.trainNetwork(testSet, dbh, 100) # for i in xrange(10): # testSet = getTestSet(digits=[i]) # if VISUALIZE: # n = X[0] > np.random.rand(*X[0].shape) # n = n.reshape(28,28) # strline = "" # for l in n: # for b in l: # if b: # strline += "*" # else: # strline += " " # strline += "\n" # print strline # sys.exit() # rbm.trainNetwork(testSet, dbh, 100) return if arg == '-w': #sqlite reg 830 i = 0 for weight in rbm.Weights: retval = rbm.sigmoid(weight.copy()) saveImage((retval * 255), "images/weight-" + str(i) + ".png") i += 1 #testSet = getTestSet(randint(0,9)) for i in range(0, 10): testSet = getTestSet(digits=[i], Bernoulli=False, dataSet='testing') retval = testSet[random.randint(0, len(testSet))].copy() saveImage(retval, "images/original-" + str(i) + ".png") retval = np.array((np.random.rand(*retval.shape) < retval), dtype=float) retval = rbm.check(retval) retval = retval.reshape(rbm.visibleLayerSize) saveImage((255 * retval), "images/result-" + str(i) + ".png") return return 0
image = axis.imshow(opt_W1[index, :].reshape(vis_patch_side, vis_patch_side), cmap =, interpolation = 'nearest') axis.set_frame_on(False) axis.set_axis_off() index += 1 """ Show the obtained plot """ train = data[:, 1:]/255 train[np.where(train>0)]=1 #works well for binary images, but features don't get properly extraced for non binary images. ones = train[np.where(np.where(data[:,0] == 5)[0]<100000)[0]] rbm = RBM(784, 196) m = 0.5 for i in range(10): if i > 5: m = 0.9 n = 10 for j in range(1000): rbm.train(train[j*10:j*10+9], momentum=m, w_cost=0.0001) w = rbm.w#.flatten() visualizeW1(w.T, 28, 14) # # plt.imshow(np.reshape(ones[20], (-1,28))) # #
def main(): random.seed(0) size = 2 rbm = RBM(size*2, 1) every = 40000 #every = 100 #m = log(.01/.1) / log(10000) NN = 40001 bb = 1. / NN * log(.001 / 1.0) elapsed = array([]) dataGenerator = randomSimpleLR(-1, size) for ii in range(NN): #if ii % 100 == 0: data, params = #print params #epsilon = .1 * (ii+1) ** m #epsilon = .3 * exp(bb * ii) #epsilon = min(.1, 1.0 * exp(bb * ii)) epsilon = .1 #time0 = time() #rbm.learn1(datablob.flatten(), epsilon = epsilon, activationH2V = 'gaussianReal', param = 1) #elapsed = hstack((elapsed, time() - time0)) if ii % every == 0: print 'Iteration %d' % ii #print '%d: epsilon is %f' % (ii, epsilon), rbm.v = data plot(rbm, 'Iteration %d' % ii, '0. Data') print 'Visible set to:' print rbm.v.T pause() #sleep(1) if ii == 0 else sleep(.1) rbm.v2h() plot(rbm, 'Iteration %d' % ii, '1. To hidden') print 'W * visible (then sampled) =' print dot(rbm._W, rbm._v).T[:,1:] print rbm.h.T pause() rbm.h2v(activation = 'logisticBinary') plot(rbm, 'Iteration %d' % ii, '2. To visible') print 'W.T * hidden (then sampled) =' print dot(rbm._W.T, rbm._h).T[:,1:] print rbm.v.T pause() print #sleep(.5) if ii == 0 else sleep(.5) #print 'mean of last 50 errors is', mean(rbm.reconErrorNorms[-50:]) #print 'average elapsed is:', mean(elapsed) #elapsed = array([]) rbm.learn1(data, epsilon = epsilon, activationH2V = 'logisticBinary')
test_idx = permutation[:test_n] np_test_set = data[test_idx,:] train_idx = permutation[test_n:] np_train_set = data[train_idx,:] del data # compute number of minibatches for training, validation and testing n_train_batches = len(np_train_set) / batch_size rng = np.random.RandomState(123) # construct the RBM class rbm = RBM(n_visible=n_visible, n_labels=n_labels, n_hidden=n_hidden, dropout_rate=dropout_rate, batch_size=batch_size, np_rng=rng) #%%======================================================================== # Training the RBM #========================================================================== max_score = -np.inf argmax_score = RBM(n_visible=n_visible, n_labels=n_labels, n_hidden=n_hidden, dropout_rate=dropout_rate, batch_size=batch_size, np_rng=rng) start_time = timeit.default_timer()
import torch import numpy as np import pandas as pd import os from RBM import RBM from load_dataset import MNIST if __name__ == '__main__': mnist = MNIST() train_x, train_y, test_x, test_y = mnist.load_dataset() print('MAE for all 0 selection:', torch.mean(train_x)) vn = train_x.shape[1] hn = 2500 rbm = RBM(vn, hn, epochs=100, mode='bernoulli', lr=0.0005, k=10, batch_size=128, gpu=True, optimizer='adam', savefile='', early_stopping_patience=10) rbm.train(train_x)
TrainingSetFile = Config.get("RBM", "trainingSetFile") ValidationSetFile = Config.get("RBM", "validationSetFile") ValidationFromTrainingSetFile = Config.get("RBM", "validationFromTrainingSetFile") TestSetFile = Config.get("RBM", "testSetFile") dataLoader = DataLoader(trainingSetFile = TrainingSetFile, validationSetFile = ValidationSetFile, validationFromTrainingSetFile = ValidationFromTrainingSetFile, testSetFile = TestSetFile, K = K, M = M, batchSizeForOneThread = batchSizeForOneThread, threadsNumber = threadsNumber, verbose = Verbose) whereUpdateMax = np.where(dataLoader.updateFrequency > updateFrequencyMAX) dataLoader.updateFrequency[whereUpdateMax] = updateFrequencyMAX dataLoader.vBiasesInitialization[np.where(dataLoader.vBiasesInitialization < np.float64(0.1e-100))] = np.float64(0.1e-100) momentum = 0.5 rbm = RBM(M, K, F, learningRate, momentum, wDecay, dataLoader.vBiasesInitialization, dataLoader.updateFrequency) numberOfMiniSets = / (threadsNumber * batchSizeForOneThread))) with open("Outs/"+sys.argv[1]+"_validation_RMSE.txt", "a") as rmsesFile: dataLoader.StartNewValidationSet() GetVisiableLayer = dataLoader.GiveVisibleLayerForValidation setSize = dataLoader.validationSetSize rmsesFile.write("Epoch {0}, RMSE {1}\n".format(0, computeRMSE(verbose=Verbose))) rmsesFile.flush() with open("Outs/"+sys.argv[1]+"_training_RMSE.txt", "a") as rmsesFile: dataLoader.StartNewValidationFromTrainingSet() GetVisiableLayer = dataLoader.GiveVisibleLayerForValidationFromTraining setSize = dataLoader.validationFromTrainingSetSize rmsesFile.write("Epoch {0}, RMSE {1}\n".format(0, computeRMSE(verbose=Verbose))) rmsesFile.flush()
train_idx = permutation[test_n:] train_set = data[train_idx,:] del data test_labels = np.argmax(test_set[:,:n_labels], axis=1) train_labels = np.argmax(train_set[:,:n_labels], axis=1) # compute number of minibatches for training, validation and testing batches = [train_set[i:i + batch_size,n_labels:] \ for i in range(0, train_set.shape[0], batch_size)] rng = np.random.RandomState(123) # construct the RBM class rbm = RBM(n_visible=n_visible, n_hidden=n_hidden, dropout_rate=dropout_rate, batch_size=batch_size, np_rng=rng) #%%============================================================================ # Training the RBM #============================================================================== start_time = timeit.default_timer() accuracies = [] argmax_acc = 0 for epoch in xrange(training_epochs): epoch_time = timeit.default_timer() mean_cost = []
import numpy as np from preprocessing import create_submission from RBM import RBM from preprocessing import read_data from sklearn.preprocessing import LabelEncoder def sigmoid(x): return 1/(1+np.exp(-x)) load_saved = True train_data = np.load('train_data.npy') if load_saved: report = np.load("report.npy").item() rbm = RBM(len(train_data), report["n_hidden"], report["batch_size"]) rbm.W = report["W"] rbm.hbias = report["hbias"] rbm.vbias = report["vbias"] Y = np.argmax(train_data[:,:20], axis=1) train_data = train_data[:,20:] X = sigmoid(, rbm.W) + rbm.hbias) #X = train_data classifier = lr(0.01, solver = 'lbfgs', multi_class='multinomial'), Y) test_data = np.load('test_data.npy') test_X = sigmoid(, rbm.W) + rbm.hbias)
def __init__(self, shapes, queue, noise=None): # Communication queue for the log self.queue = queue # Semantic variables for the input, corruption level # and learning rate X = T.matrix('X') self.inputs = X cl = T.scalar(dtype=theano.config.floatX, name='corruption level') = cl lr = T.scalar(dtype=theano.config.floatX, name='learning rate') = lr # Random number generators used for the noise np_rng = np.random.RandomState() theano_rng = RandomStreams(np_rng.randint(2**30)) # Layers initialisation, cast the shape # and fill the layers list. self.layers = [] self.mask = [] self.shapes = shapes (nv,_,_) = shapes[0] self.params = [] self.params_ft = [] output = X sample_up = X # Compute the droupout training function p_do = 0.5 self.p_do = p_do dropout_out =X rec_do = X # Build the layers, linking each one to the next # Fill the param list for i, s in enumerate(shapes[1:]): lay = RBM(nv, s[0], output, v_unit=s[2], unit_type=s[1]) self.layers.append(lay) self.params += lay.params self.params_ft += lay.params_ft nv = s[0] output = lay.up(output) sample_up = lay.sample_h_given_v(sample_up) if i != 0: mask = theano_rng.binomial(size=dropout_out.shape, n=1, p=p_do) dropout_out *= mask rec_do *= p_do dropout_out = lay.up(dropout_out) rec_do = lay.up(rec_do) # Define the up functions self.code = output self.sample_up = sample_up # Prepare the variables to decode self.N = len(self.layers) recstr = output decode = X sample_down = X sample = sample_up # Add noise to the output for the fine tuning part self.noise = noise if self.noise == 'MASK': fine_tune = T.clip(output * \ theano_rng.binomial( size=output.shape, n=1, p=1-cl),0.,1.) elif self.noise == 'GAUSS': fine_tune = T.clip(output +\ theano_rng.normal( size=output.shape, std=cl), 0.,1.) else: fine_tune = output # Down sample every variable for i in range(1, self.N+1): lay = self.layers[self.N-i] recstr = lay.down(recstr) decode = lay.down(decode) fine_tune = lay.down(fine_tune) sample_dowm = lay.sample_v_given_h(sample_down) sample = lay.sample_v_given_h(sample) if i!= self.N: rec_do *= p_do mask = theano_rng.binomial(size=dropout_out.shape, n=1, p=p_do) dropout_out *= mask dropout_out = lay.down(dropout_out) rec_do = lay.down(rec_do) #define the sampeling and decoding functions self.recstr = recstr self.decode = decode self.ft = fine_tune = dropout_out self.sample_down = sample_down self.sample = sample self.compile()
def initialize_variables(self): # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in range(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = self.n_in else: input_size = self.hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.input else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=self.numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=self.numpy_rng, theano_rng=self.theano_rng, input=layer_input, n_visible=input_size, n_hidden=self.hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logistic_regression_layer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_out ) self.params.extend(self.logistic_regression_layer.params)
index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(np.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = RBM( input=x, validation=test_set, n_visible=n_visible, n_labels=n_labels, n_hidden=n_hidden, np_rng=rng, theano_rng=theano_rng, ) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=k) accuracy = rbm.get_cv_error() # make a prediction for an unlablled sample. t_unlabelled = T.tensor3("unlabelled") label, confidence = rbm.predict(t_unlabelled) #%%======================================================================== # Training the RBM
MA_label = dataset['ylab'] MA_data, MA_label = shuffle(MA_data, MA_label) #Normalize for unit variance and zero mean MA_data = (MA_data - np.mean(MA_data)) / np.std(MA_data) n_feature = np.shape(MA_data)[1] n_hidden1 = int(feature2hiddenRatio * n_feature) n_hidden2 = int(feature2hiddenRatio * n_hidden1) n_hidden3 = int(feature2hiddenRatio * n_hidden2) # n_hidden4 = int(feature2hiddenRatio * n_hidden3) with tf.device("/gpu:0"): rbm1 = RBM(n_hidden=n_hidden1, n_visible=n_feature, alpha=0.0001, datatype="gaussian") rbm2 = RBM(n_hidden=n_hidden2, n_visible=n_hidden1, alpha=0.0001, datatype="binary") rbm3 = RBM(n_hidden=n_hidden3, n_visible=n_hidden2, alpha=0.0001, datatype="binary") # rbm4 = RBM(n_hidden=n_hidden4, n_visible=n_hidden3, alpha=0.0001, datatype="binary") for num in range(Epoch): new_w, new_hb, new_vb, ReconErr = rbm1.train(MA_data) print("Epoch: {}, Reconstruction Error: {}".format(num, ReconErr))
