def test_column_set_get(): m = 256 n = 128 data = np.random.randn(m, n) cm_data = cm.CUDAMatrix(cm.reformat(data)) indices = permutation(n) cm_indices = cm.CUDAMatrix(cm.reformat(indices.reshape(1, -1))) start = 0 end = 10 cm_columns = cm_data.get_column_vectors(cm_indices, start, end) get_error = np.sum((cm_columns.asarray() - data[:, indices[start:end]])**2) data_set = np.random.randn(m, end - start) cm_columns.free_device_memory() cm_columns = cm.CUDAMatrix(cm.reformat(data_set)) cm_data.set_column_vectors(cm_indices, start, end, cm_columns) data[:, indices[start:end]] = data_set set_error = np.sum((cm_data.asarray() - data)**2) print "Get Error = ", get_error print "Set Error = ", set_error assert get_error < 10**-2 or set_error < 10**-2, \ "Error in CUDAMatrix.get_column_vectors exceeded threshold"
def load_params(self, param_file): targetDict = {} util.load(param_file, targetDict, verbose=False) self.cmW = cm.CUDAMatrix(cm.reformat(targetDict['W'])) self.cmBiasesHid = cm.CUDAMatrix(cm.reformat(targetDict['biasesHid'])) self.cmBiasesVis = cm.CUDAMatrix(cm.reformat(targetDict['biasesVis'])) self.input_dim, self.num_units = self.cmW.shape
def test_softmax(): m = 2000 n = 128 data = np.random.randn(m, n) prob = data - data.max(axis=0).reshape(1, -1) prob = np.exp(prob) / np.exp(prob).sum(axis=0).reshape(1, -1) cm_data = cm.CUDAMatrix(cm.reformat(data)) cm_prob = cm.CUDAMatrix(cm.reformat(np.zeros(data.shape))) cm_data.compute_softmax(cm_prob) error = np.sum((cm_prob.asarray() - prob)**2) print "Error = ", error assert error < 10**-2, "Error in CUDAMatrix.compute_softmax exceeded threshold"
def permute_indices_for_loaded_data(self): ''' Repermutes indices for currently loaded data. Can be used if we load all the data at one, and don't want to reload it. ''' data_permutation = permutation(self._data_indices.size) self._data_indices = self._data_indices[0,\ data_permutation].reshape((1,-1)) self._cm_indices_matrix = cm.CUDAMatrix(\ cm.reformat(self._data_indices)) self._batch_index = 0 self._is_setup = True
def test_columnwise_dot(): m = 64 n = 64 a = np.array(np.random.randn(m, n), dtype=np.float32, order='F') b = np.array(np.random.randn(m, n), dtype=np.float32, order='F') res = np.sum(a * b, axis=0).reshape(1, -1) m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) cm_res = cm.CUDAMatrix(cm.reformat(np.zeros(res.shape))) cm.columnwise_dot(m1, m2, cm_res) err = np.sum(np.abs(res - cm_res.asarray())) assert err < 10**-2, "Error in cudamat_ext.columnwise_dot exceeded threshold"
def get_iterator(self, batch_size, return_labels=True): if not hasattr(self, '_is_setup'): raise Exception, "Call setup_data or permute_indices first" if not self._is_setup: self.permute_file_indices_for_loading() self._cm_data_for_batch = cm.empty((self._data_dim, batch_size)) target_shape = ((self.get_label_dim(), self._num_outputs_per_pt * batch_size)) multi_target_shape = ((self.get_label_dim() * self._num_outputs_per_pt, batch_size)) self._cm_targets_for_batch = cm.empty(target_shape) self._cm_data_indices_for_batch = cm.empty((1, batch_size)) self._cm_data_indices_with_frames = cm.empty( (self._num_frames_per_pt, batch_size)) self._cm_target_indices_with_frames = cm.empty( (self._num_outputs_per_pt, batch_size)) self._cm_target_indices_for_batch = cm.empty( (1, self._num_outputs_per_pt * batch_size)) self._cm_range_frames = cm.CUDAMatrix(cm.reformat(arange(\ self._num_frames_per_pt).reshape((-1,1)))) self._cm_range_target_frames = cm.CUDAMatrix(cm.reformat(arange(\ self._num_outputs_per_pt).reshape((-1,1)))) self._cm_target_vectors_matrix = cm.CUDAMatrix(\ eye(self.get_label_dim())) while True: if self._batch_index + batch_size > self._num_frames_for_training: if self._start_file_num >= self._num_files: break self.load_next_data() self._cm_indices_matrix.get_col_slice( self._batch_index, self._batch_index + batch_size, self._cm_data_indices_for_batch) self._cm_data_indices_with_frames.reshape( (self._num_frames_per_pt, batch_size)) self._cm_data_indices_with_frames.assign(0) self._cm_data_indices_with_frames.add_col_vec(\ self._cm_range_frames) self._cm_data_indices_with_frames.add_row_vec(\ self._cm_data_indices_for_batch) self._cm_data_indices_with_frames.reshape( (1, self._num_frames_per_pt * batch_size)) self._cm_target_indices_with_frames.reshape( (self._num_outputs_per_pt, batch_size)) self._cm_target_indices_with_frames.assign(0) self._cm_target_indices_with_frames.add_col_vec(\ self._cm_range_target_frames) self._cm_target_indices_with_frames.add_row_vec(\ self._cm_data_indices_for_batch) self._cm_target_indices_with_frames.add(self.label_offset) self._cm_target_indices_with_frames.reshape( (1, self._num_outputs_per_pt * batch_size)) self._cm_data_matrix.select_columns(\ self._cm_data_indices_with_frames, self._cm_data_for_batch) self._cm_data_for_batch.reshape((self._data_dim, batch_size)) if self.dropout_rate != 0: self._cm_data_for_batch.dropout(self.dropout_rate) self._cm_data_for_batch.mult(1. / (1 - self.dropout_rate)) self._batch_index += batch_size if return_labels: self._cm_targets_matrix.select_columns(\ self._cm_target_indices_with_frames, self._cm_target_indices_for_batch) self._cm_targets_for_batch.reshape(target_shape) self._cm_target_vectors_matrix.select_columns(\ self._cm_target_indices_for_batch, self._cm_targets_for_batch) self._cm_targets_for_batch.reshape(multi_target_shape) yield self._cm_data_for_batch, self._cm_targets_for_batch else: yield self._cm_data_for_batch self._is_setup = False
import GPULock GPULock.GetGPULock() import cudamat_ext as cm cm.cublas_init() cm.CUDAMatrix.init_random(42) import numpy as np from pylab import log, sum m = 299 n = 128 target = np.random.rand(m, n) > 0.5 prob = np.random.rand(m, n) cm_target = cm.CUDAMatrix(cm.reformat(target)) cm_prob = cm.CUDAMatrix(cm.reformat(prob)) cm_log_prob = cm.empty((1,n)) cm.compute_logistic_log_prob(cm_prob, cm_target, cm_log_prob) lg_prob = sum(target*log(1e-8+prob) + (1-target)*log(1-prob+1e-8), axis=0).reshape((1,-1)) error = np.sum((cm_log_prob.asarray() - lg_prob)**2) print "Error = ", error, " sum_lg_prob = ", str(sum(lg_prob))