def main(): # initialize CUDA cm.cublas_init() # training parameters epsilon = 0.01 momentum = 0.9 num_epochs = 30 batch_size = 128 num_batches = 92 # model parameters dim_in = 784 dim_out = 1 num_hid = 1024 # load data util.load('data/mnist49.dat', globals()) global dat_train global dat_test global lbl_train global lbl_test # Put training data onto the GPU. dat_train = dat_train/255. dat_train = dat_train - (np.mean(dat_train, 1)+10**-8)[:, np.newaxis] dev_train = cm.CUDAMatrix(dat_train) dev_lbl = cm.CUDAMatrix(lbl_train) net = ffnet.FFNet(epsilon, momentum, num_epochs, batch_size, num_batches, dim_in, dim_out, num_hid) net.train(dev_train, dev_lbl) # Load test data onto the GPU. dat_test = dat_test/255. dat_test = dat_test - np.mean(dat_test, 1)[:, np.newaxis] dev_test = cm.CUDAMatrix(dat_test) dev_lbl = cm.CUDAMatrix(lbl_test) net.reinitTestStorage(dat_test.shape[1]) net.test(dev_test, dev_lbl) cm.cublas_shutdown()
# Evaluate neural network on test data. # Load test data onto the GPU. dat_test = dat_test/255. dat_test = dat_test - np.mean(dat_test, 1)[:, np.newaxis] dev_test = cm.CUDAMatrix(dat_test) dev_lbl = cm.CUDAMatrix(lbl_test) # Initalize temporary storage. h = cm.empty((num_hid, dat_test.shape[1])) out = cm.empty((dim_out, dat_test.shape[1])) # forward pass cm.dot(w_w1.T, dev_test, target = h) h.add_col_vec(w_b1) h.apply_sigmoid() cm.dot(w_w2.T, h, target = out) out.add_col_vec(w_b2) out.apply_sigmoid() # compute error out.subtract(dev_lbl) print "Testing misclassification rate: " + str(np.mean(np.abs(out.asarray())>0.5)) cm.cublas_shutdown()
def FreeGPU(): cm.cublas_shutdown()
def FreeGPU(board): cm.cublas_shutdown()
# prepare the input A = np.ones(N, dtype=dtype) B = np.ones(N, dtype=dtype) start = timer() C = VectorAdd(A, B) vactoradd_time = timer() - start print("Tempo %f segundos " % vactoradd_time) # print result print(C[0:10]) start = timer() C = cuVectorAdd(A, B) vactoradd_time = timer() - start print("Tempo %f segundos " % vactoradd_time) # print result print(C[0:10]) import cudamat as cm n, p = int(2e3), int(40e3) A = np.random.randn(n, p) B = np.random.randn(p, n) A @ B cm.cublas_init() cm.CUDAMatrix.init_random() A_cm = cm.empty((n, p)).fill_with_randn() B_cm = cm.empty((p, n)).fill_with_randn() A_cm.dot(B_cm) cm.cublas_shutdown()
def teardown(): cm.cublas_shutdown()
def FreeGPU(board): """ Frees the board. """ cm.cublas_shutdown() gpu_lock.free_lock(board)
def calc_output_legacy(self, data, batch_size): """ Calculate the output (probababilies) for a set of data The purpose of this function is to calculate the output of a DN on some set of data. The values will calculated using rbm_cudamat on slices of data specified by the batch size """ import cudamat as cm import rbm_numpy, rbm_cudamat # Initialize CUDA cm.cublas_init() cm.CUDAMatrix.init_random(1) if self.legacy_card_number != 0: cm.cuda_set_device(self.legacy_card_number) # Create output, use the size of the last layer to do this output = np.empty( (data.shape[0], self.arch[(self.layer_count - 1)]['node_count'])) # Slice up data, handling batches of batch_size. USE INT DIVISION processed = 0 for j in range(data.shape[0] // batch_size): curr_data = data[j * batch_size:(j + 1) * batch_size, :] for i in range(1, self.layer_count): # Handle a sigmoid node if self.arch[i]['node_type'] == 'S': curr_data = \ rbm_cudamat.calc_hidden_probs(curr_data, self.weights[i]['w'], self.weights[i]['hb'], batch_size) output[j * batch_size:(j + 1) * batch_size, :] = curr_data[:, :] processed = processed + batch_size # Now handle anything that was left over i.e., what didn't fit in if processed != data.shape[0]: curr_data = data[processed:, :] for i in range(1, self.layer_count): # Handle a sigmoid node if self.arch[i]['node_type'] == 'S': curr_data = \ rbm_numpy.calc_hidden_probs(curr_data, self.weights[i]['w'], self.weights[i]['hb']) output[processed:, :] = curr_data[:, :] cm.cublas_shutdown() return output
def FreeGPU(board): cm.cublas_shutdown() gpu_lock.free_lock(board) # Optional.
# Init CUBLAS cb.cublas_init() # Créer du réservoir reservoir = Oger.nodes.CUDAReservoirNode(input_dim = digitImport.nbInputs, output_dim = rc_Size, input_scaling = rc_InputScaling, spectral_radius = rc_SpectralRadius) readout = Oger.nodes.RidgeRegressionNode(output_dim = rc_nbDigits, dtype='float64') classifier = DigitClassifierNode(mnist_space = digitImport.interImagesSpace, label_space_ratio = digitImport.interImagesRatio, digit_space_ratio = digitImport.digitImageRatio, image_size = digitImport.imagesSize, nb_digit = rc_nbDigits, method = "average", input_dim = rc_nbDigits, dtype='float64') # Récupère une partie du jeu d'entrainement et des labels inputs, outputs = digitImport.getTrainingSet(length = rc_TrainingLength) inputs_test, outputs_test = digitImport.getTestSet(length = rc_TestLength) data = [None, [(inputs, outputs)], None] # Construction du flux flow = mdp.Flow([reservoir, readout, classifier], verbose=0) # Entrainement du réseau flow.train(data) # Applique le réseau entraîné au jeu de test testout, out = flow(inputs_test) # Digit error rate count += float(digitImport.digitErrorRate(testout)) print "Digit Error Rate : {}".format(der) # Shutdown CUBLAS cb.cublas_shutdown()
def train_finalize(self): self.Wgpu.copy_to_host() self.W = self.Wgpu.numpy_array print "CUDA try shutdown" cm.cublas_shutdown()