def test_column_set_get(): m = 256 n = 128 data = np.random.randn(m, n) cm_data = cm.CUDAMatrix(cm.reformat(data)) indices = permutation(n) cm_indices = cm.CUDAMatrix(cm.reformat(indices.reshape(1, -1))) start = 0 end = 10 cm_columns = cm_data.get_column_vectors(cm_indices, start, end) get_error = np.sum((cm_columns.asarray() - data[:, indices[start:end]])**2) data_set = np.random.randn(m, end - start) cm_columns.free_device_memory() cm_columns = cm.CUDAMatrix(cm.reformat(data_set)) cm_data.set_column_vectors(cm_indices, start, end, cm_columns) data[:, indices[start:end]] = data_set set_error = np.sum((cm_data.asarray() - data)**2) print "Get Error = ", get_error print "Set Error = ", set_error assert get_error < 10**-2 or set_error < 10**-2, \ "Error in CUDAMatrix.get_column_vectors exceeded threshold"
def load_params(self, param_file): targetDict = {} util.load(param_file, targetDict, verbose=False) self.cmW = cm.CUDAMatrix(cm.reformat(targetDict['W'])) self.cmBiasesHid = cm.CUDAMatrix(cm.reformat(targetDict['biasesHid'])) self.cmBiasesVis = cm.CUDAMatrix(cm.reformat(targetDict['biasesVis'])) self.input_dim, self.num_units = self.cmW.shape
def get_iterator(self, batch_size, return_labels=False): self.f = open(self.data_file, 'rb') cm_data = CM.empty((self._data_dim, batch_size)) batch_num = 0 data_dim = self._data_dim num_batches = self.num_pts / batch_size num_batches_per_load = 1000 num_bytes_per_batch = 4 * data_dim * batch_size num_bytes_per_load = num_batches_per_load * num_bytes_per_batch batch_num_since_last_load = 0 num_batches_loaded = 0 while batch_num < num_batches: if batch_num_since_last_load == num_batches_loaded: cur_data_str = self.f.read(num_bytes_per_load) num_batches_loaded = len(cur_data_str) / num_bytes_per_batch num_pts_read = num_batches_loaded * batch_size cur_data = zeros((data_dim, num_pts_read), 'float32') for b in arange(0, num_pts_read, batch_size): str_s = b * 4 * data_dim str_e = str_s + 4 * data_dim * batch_size data_arr = numpy.fromstring(cur_data_str[str_s:str_e], dtype='float32') cur_data[:,b:(b+batch_size)] = data_arr.reshape(\ (data_dim, batch_size), order='F') try: cm_data_big.free_device_memory() cm_indices.free_device_memory() cm_data_big, cm_indices = None, None except NameError: pass cm_data_big = CM.CUDAMatrix(cur_data) cm_indices = CM.CUDAMatrix( permutation(num_pts_read).reshape(1, -1)) batch_num_since_last_load = 0 cur_data_str = None start = batch_num_since_last_load * batch_size cm_data_big.select_columns( cm_indices.slice(start, start + batch_size), cm_data) batch_num_since_last_load += 1 batch_num += 1 yield cm_data cm_data.free_device_memory() cm_data_big.free_device_memory() cm_indices.free_device_memory() cm_data, cm_data_big, cm_indices = None, None, None self.f.close() self.f = None
def test_softmax(): m = 2000 n = 128 data = np.random.randn(m, n) prob = data - data.max(axis=0).reshape(1, -1) prob = np.exp(prob) / np.exp(prob).sum(axis=0).reshape(1, -1) cm_data = cm.CUDAMatrix(cm.reformat(data)) cm_prob = cm.CUDAMatrix(cm.reformat(np.zeros(data.shape))) cm_data.compute_softmax(cm_prob) error = np.sum((cm_prob.asarray() - prob)**2) print "Error = ", error assert error < 10**-2, "Error in CUDAMatrix.compute_softmax exceeded threshold"
def test_columnwise_dot(): m = 64 n = 64 a = np.array(np.random.randn(m, n), dtype=np.float32, order='F') b = np.array(np.random.randn(m, n), dtype=np.float32, order='F') res = np.sum(a * b, axis=0).reshape(1, -1) m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) cm_res = cm.CUDAMatrix(cm.reformat(np.zeros(res.shape))) cm.columnwise_dot(m1, m2, cm_res) err = np.sum(np.abs(res - cm_res.asarray())) assert err < 10**-2, "Error in cudamat_ext.columnwise_dot exceeded threshold"
def permute_indices_for_loaded_data(self): ''' Repermutes indices for currently loaded data. Can be used if we load all the data at one, and don't want to reload it. ''' data_permutation = permutation(self._data_indices.size) self._data_indices = self._data_indices[0,\ data_permutation].reshape((1,-1)) self._cm_indices_matrix = cm.CUDAMatrix(\ cm.reformat(self._data_indices)) self._batch_index = 0 self._is_setup = True
def test_softmax_sample(): dim, num_pts = 160, 128 num_draws = 10000 probs = rand(dim, num_pts) for i in range(min(dim, num_pts)): probs[i, i] = 2.0 probs = probs / probs.sum(axis=0).reshape(1, -1) cm_prob = cm.CUDAMatrix(log(probs)) cm_data = cm.empty(probs.shape) cm_rands = cm.empty(probs.shape) cm_counts = cm.empty(probs.shape).assign(0) s = datetime.datetime.now() for draw in range(num_draws): cm_rands.fill_with_rand() cm_prob.SampleSoftMax(cm_rands, cm_data) cm_counts.add(cm_data) cm_data.assign(0) e = datetime.datetime.now() diff = e - s cm_counts.divide(num_draws) est_probs = cm_counts.asarray().copy() print "Total time for %d draws = %d microseconds\n" % (num_draws, diff.microseconds) print "Average case error = %.5f \n" % (np.mean(abs(est_probs - probs))) from matplotlib.pyplot import subplot, imshow, draw subplot(311), imshow(probs, aspect='auto', interpolation='nearest') subplot(312), imshow(est_probs, aspect='auto', interpolation='nearest') subplot(313), plot(est_probs[:, 0]) subplot(313), plot(probs[:, 0]) draw(), time.sleep(0.2) raw_input('enter to finish') return est_probs, probs
def load_next_data(self): last_file = min(self._start_file_num + self._num_files_per_load, self._num_files) data_lst = [] label_lst = [] indices_lst = [] num_frames = 0 num_indices = 0 for file_index in range(self._start_file_num, last_file): file_num = self._file_indices[file_index] data, cur_labels = self._data_src.get_spectrogram_and_labels(\ file_num, self._speaker_cmn,\ self._speaker_cmvn, self._normalize) if self._skip_borders != 0: data = data[:, self._skip_borders:(-self._skip_borders)] cur_labels = cur_labels[self._skip_borders:( -self._skip_borders)] if self._borders_only: I = flatnonzero(cur_labels[1:] != cur_labels[:-1]) if I[0] != 0: indices = concatenate(([0], I, I + 1)) else: indices = concatenate((I, I + 1)) indices.sort() indices = unique(indices) indices -= int(self.label_offset) indices = indices[indices < (data.shape[1] - self._num_frames_per_pt + 1)] indices = indices[indices >= 0] else: indices = arange( max(0, data.shape[1] - self._num_frames_per_pt + 1)) data_lst.append(data) label_lst.append(cur_labels.copy()) indices_lst.append(indices.copy()) num_frames += data.shape[1] num_indices += indices.size self._num_frames = 0 self._num_frames_for_training = num_indices self._data_matrix = zeros((self._frame_dim, num_frames)) self._label_matrix = zeros((1, num_frames)) self._data_indices = zeros((1, num_indices)) num_frames_so_far = 0 num_indices_so_far = 0 for (cur_data, cur_labels, cur_indices) in zip(data_lst, label_lst, indices_lst): num_frames_cur = cur_data.shape[1] self._data_matrix[:, num_frames_so_far:( num_frames_so_far + num_frames_cur)] = cur_data.copy() self._label_matrix[0, num_frames_so_far:( num_frames_so_far + num_frames_cur)] = cur_labels.copy() num_indices_cur = cur_indices.size self._data_indices[0, num_indices_so_far:( num_indices_so_far + num_indices_cur)] = cur_indices + num_frames_so_far num_frames_so_far += num_frames_cur num_indices_so_far += num_indices_cur assert (num_indices_so_far == num_indices) assert (num_frames_so_far == num_frames) try: self._cm_data_matrix.free_device_memory() self._cm_targets_matrix.free_device_memory() self._cm_indices_matrix.free_device_memory() except AttributeError: pass self._cm_data_matrix = cm.CUDAMatrix(self._data_matrix) self._cm_targets_matrix = cm.CUDAMatrix(self._label_matrix) self._start_file_num = last_file self.permute_indices_for_loaded_data()
def get_iterator(self, batch_size, return_labels=True): if not hasattr(self, '_is_setup'): raise Exception, "Call setup_data or permute_indices first" if not self._is_setup: self.permute_file_indices_for_loading() self._cm_data_for_batch = cm.empty((self._data_dim, batch_size)) target_shape = ((self.get_label_dim(), self._num_outputs_per_pt * batch_size)) multi_target_shape = ((self.get_label_dim() * self._num_outputs_per_pt, batch_size)) self._cm_targets_for_batch = cm.empty(target_shape) self._cm_data_indices_for_batch = cm.empty((1, batch_size)) self._cm_data_indices_with_frames = cm.empty( (self._num_frames_per_pt, batch_size)) self._cm_target_indices_with_frames = cm.empty( (self._num_outputs_per_pt, batch_size)) self._cm_target_indices_for_batch = cm.empty( (1, self._num_outputs_per_pt * batch_size)) self._cm_range_frames = cm.CUDAMatrix(cm.reformat(arange(\ self._num_frames_per_pt).reshape((-1,1)))) self._cm_range_target_frames = cm.CUDAMatrix(cm.reformat(arange(\ self._num_outputs_per_pt).reshape((-1,1)))) self._cm_target_vectors_matrix = cm.CUDAMatrix(\ eye(self.get_label_dim())) while True: if self._batch_index + batch_size > self._num_frames_for_training: if self._start_file_num >= self._num_files: break self.load_next_data() self._cm_indices_matrix.get_col_slice( self._batch_index, self._batch_index + batch_size, self._cm_data_indices_for_batch) self._cm_data_indices_with_frames.reshape( (self._num_frames_per_pt, batch_size)) self._cm_data_indices_with_frames.assign(0) self._cm_data_indices_with_frames.add_col_vec(\ self._cm_range_frames) self._cm_data_indices_with_frames.add_row_vec(\ self._cm_data_indices_for_batch) self._cm_data_indices_with_frames.reshape( (1, self._num_frames_per_pt * batch_size)) self._cm_target_indices_with_frames.reshape( (self._num_outputs_per_pt, batch_size)) self._cm_target_indices_with_frames.assign(0) self._cm_target_indices_with_frames.add_col_vec(\ self._cm_range_target_frames) self._cm_target_indices_with_frames.add_row_vec(\ self._cm_data_indices_for_batch) self._cm_target_indices_with_frames.add(self.label_offset) self._cm_target_indices_with_frames.reshape( (1, self._num_outputs_per_pt * batch_size)) self._cm_data_matrix.select_columns(\ self._cm_data_indices_with_frames, self._cm_data_for_batch) self._cm_data_for_batch.reshape((self._data_dim, batch_size)) if self.dropout_rate != 0: self._cm_data_for_batch.dropout(self.dropout_rate) self._cm_data_for_batch.mult(1. / (1 - self.dropout_rate)) self._batch_index += batch_size if return_labels: self._cm_targets_matrix.select_columns(\ self._cm_target_indices_with_frames, self._cm_target_indices_for_batch) self._cm_targets_for_batch.reshape(target_shape) self._cm_target_vectors_matrix.select_columns(\ self._cm_target_indices_for_batch, self._cm_targets_for_batch) self._cm_targets_for_batch.reshape(multi_target_shape) yield self._cm_data_for_batch, self._cm_targets_for_batch else: yield self._cm_data_for_batch self._is_setup = False
data_dim = 5 target_dim = 15 batch_size=20 nn_def_file = "params/nn_def_20_10.txt" nn_train = nnet_train.nn() nn_train.create_nnet_from_def(nn_def_file, data_dim = data_dim, target_dim = target_dim) nn_train.create_activations_and_probs(batch_size) lst_num_hid = list(nn_train._lst_num_hid) data, targets, lst_hids, lst_wts = init_data(lst_num_hid, data_dim, target_dim, batch_size) set_nn_wts(nn_train, lst_wts) cm_data = cm.CUDAMatrix(data) cm_targets = cm.CUDAMatrix(targets) nn_train.fwd_prop(cm_data) check_hid_activities(nn_train, lst_hids) lgprob_orig, lgprob_orig_cpu = check_lg_probs(cm_data, cm_targets, nn_train) nn_train.fwd_prop(cm_data) cm_predictions = nn_train._lst_outputs[-1] cm_targets.subtract(cm_predictions, nn_train._lst_activations_grad[-1]) nn_train.back_prop(cm_data) layer_grads = [layer._wts_grad.asarray().copy() for layer in \
def compute_predictions_for_sentence_multi(db, nnet_model, fileNum, use_sum=False, get_labels=False, decoding_context=-1): data, labels = db.get_data_for_file(fileNum, return_labels=True) data_striped = StripeData(data, db.get_num_frames_per_pt(), append=True) dataDim, numFrames = data_striped.shape predictions = nnet_model.predict(data_striped, unnormalized=not use_sum) num_out_frames_per_pt = db.get_num_outputs_frames_per_pt() #if use_sum: #wts = hamming(num_out_frames_per_pt)[newaxis,:] #wts = tile(wts, (predictions.shape[0]/num_out_frames_per_pt,1)).reshape(-1,1, order='F') #prob = 0.1 #predictions *= (prob*wts + (1-prob)) if db.get_num_outputs_frames_per_pt() != 1 and decoding_context == -1: predictions = UnStripeData(predictions, db.get_num_outputs_frames_per_pt()) extra_left = floor((db.get_num_outputs_frames_per_pt() - 1) / 2) extra_right = db.get_num_outputs_frames_per_pt() - 1 - extra_left predictions = predictions[:, extra_left:-extra_right] else: frame_dim = predictions.shape[0] / db.get_num_outputs_frames_per_pt() mid_frame = floor(db.get_num_outputs_frames_per_pt() / 2) start_frame = mid_frame - decoding_context end_frame = mid_frame + decoding_context + 1 predictions = predictions[(frame_dim * (start_frame)):(frame_dim * (end_frame)), :] if decoding_context != 0: predictions = UnStripeData(predictions, decoding_context * 2 + 1) predictions = predictions[:, decoding_context:-decoding_context] pred_class = predictions.argmax(axis=0) if not use_sum: cm_pred = cm.CUDAMatrix(predictions) cm_probs = cm.empty(cm_pred.shape).assign(0) cm_pred.compute_softmax(cm_probs) cm.log(cm_probs) predictions = cm_probs.asarray().copy() cm_probs.free_device_memory() cm_pred.free_device_memory() cm_probs, cm_pred = None, None else: predictions = log(predictions + 1e-35) ones_matrix = eye(predictions.shape[0]) class_matrix = ones_matrix[:, labels] log_probs = sum(predictions * class_matrix) num_correct = sum(pred_class == labels.reshape(-1)) if get_labels: return predictions, num_correct, log_probs, pred_class, labels.reshape( -1) else: return predictions, num_correct, log_probs
def copy_params_from_dict(self, params_dict): self._wts = cm.CUDAMatrix(params_dict[self.name + "_wts"]) self._b = cm.CUDAMatrix(params_dict[self.name + "_b"])
def set_input_mask(self, mask): self._wt_mask = cm.CUDAMatrix(mask)
def copy_params_from_dict(self, params_dict): self._wts = cm.CUDAMatrix(params_dict[self.name + "_wts"]) self._b = cm.CUDAMatrix(params_dict[self.name + "_b"]) self.num_softmaxes = params_dict[self.name + "_num_softmaxes"] self.num_units = self.num_hid / self.num_softmaxes
import GPULock GPULock.GetGPULock() import cudamat_ext as cm cm.cublas_init() cm.CUDAMatrix.init_random(42) import numpy as np from pylab import log, sum m = 299 n = 128 target = np.random.rand(m, n) > 0.5 prob = np.random.rand(m, n) cm_target = cm.CUDAMatrix(cm.reformat(target)) cm_prob = cm.CUDAMatrix(cm.reformat(prob)) cm_log_prob = cm.empty((1,n)) cm.compute_logistic_log_prob(cm_prob, cm_target, cm_log_prob) lg_prob = sum(target*log(1e-8+prob) + (1-target)*log(1-prob+1e-8), axis=0).reshape((1,-1)) error = np.sum((cm_log_prob.asarray() - lg_prob)**2) print "Error = ", error, " sum_lg_prob = ", str(sum(lg_prob))