def gpu_grads(cm_data, cm_targets, nn_train, eps=1e-6): batch_size = cm_data.shape[1] cm_probs = cm.empty((1, batch_size)) cm_correct = cm.empty((1, batch_size)) nn_train.fwd_prop(cm_data) cm_predictions = nn_train._lst_outputs[-1] cm.compute_softmax_accuraccy(cm_predictions, cm_targets, cm_probs, cm_correct) lg_prob = sum(cm_probs.asarray()) lst_grads = [] for layer_num, (layer, wts) in enumerate(zip( nn_train._lst_layers, lst_wts)): wts_grad = zeros(wts.shape) for i in range(wts.shape[0]): for j in range(wts.shape[1]): wts_cpy = wts.copy() wts_cpy[i,j] += eps layer._wts.load_matrix(wts_cpy) nn_train.fwd_prop(cm_data) cm.compute_softmax_accuraccy(cm_predictions, cm_targets, cm_probs, cm_correct) lg_prob_cur = sum(cm_probs.asarray()) wts_grad[i,j] = (lg_prob_cur-lg_prob)/eps/batch_size # reset wt to original layer._wts.load_matrix(wts.copy()) lst_grads.append(wts_grad.copy()) return lst_grads
def create_gradients(self): self.cmWInc = cm.empty(self.cmW.shape) self.cmWInc.assign(0) self.cmBiasesHidInc = cm.empty(self.cmBiasesHid.shape) self.cmBiasesHidInc.assign(0) self.cmBiasesVisInc = cm.empty(self.cmBiasesVis.shape) self.cmBiasesVisInc.assign(0)
def init_from_config(self, config_def): self.input_dim = config_def.input_dim self.num_units = config_def.num_units self.cmW = None self.cmW = cm.empty((config_def.input_dim, config_def.num_units)) self.cmW.fill_with_randn() self.cmW.mult(config_def.wt_sigma) self.cmBiasesVis = None self.cmBiasesVis = cm.empty((config_def.input_dim, 1)) self.cmBiasesVis.assign(config_def.vis_bias) self.cmBiasesHid = None self.cmBiasesHid = cm.empty((config_def.num_units, 1)) self.cmBiasesHid.assign(config_def.hid_bias)
def add_gradients_transposed(self, src): try: self._wts_grad_cpy except AttributeError: self._wts_grad_cpy = cm.empty(self._wts.shape) src._wts_grad.transpose(self._wts_grad_cpy) self._wts_grad.add(self._wts_grad_cpy)
def allocate_activations(self, batch_size): cm_recon = cm.empty((self.input_dim, batch_size)) cm_hidprobs = cm.empty((self.num_units, batch_size)) cm_hidstates = cm.empty((self.num_units, batch_size)) cm_posprods = cm.empty((self.input_dim, self.num_units)) cm_negprods = cm.empty((self.input_dim, self.num_units)) cm_poshidacts = cm.empty((self.num_units, 1)) cm_neghidacts = cm.empty((self.num_units, 1)) cm_posvisacts = cm.empty((self.input_dim, 1)) cm_negvisacts = cm.empty((self.input_dim, 1)) return cm_recon, cm_hidprobs, cm_hidstates, cm_posprods, cm_negprods, \ cm_poshidacts, cm_neghidacts, cm_posvisacts, cm_negvisacts
def get_iterator(self, batch_size, return_labels=False): self.f = open(self.data_file, 'rb') cm_data = CM.empty((self._data_dim, batch_size)) batch_num = 0 data_dim = self._data_dim num_batches = self.num_pts / batch_size num_batches_per_load = 1000 num_bytes_per_batch = 4 * data_dim * batch_size num_bytes_per_load = num_batches_per_load * num_bytes_per_batch batch_num_since_last_load = 0 num_batches_loaded = 0 while batch_num < num_batches: if batch_num_since_last_load == num_batches_loaded: cur_data_str = self.f.read(num_bytes_per_load) num_batches_loaded = len(cur_data_str) / num_bytes_per_batch num_pts_read = num_batches_loaded * batch_size cur_data = zeros((data_dim, num_pts_read), 'float32') for b in arange(0, num_pts_read, batch_size): str_s = b * 4 * data_dim str_e = str_s + 4 * data_dim * batch_size data_arr = numpy.fromstring(cur_data_str[str_s:str_e], dtype='float32') cur_data[:,b:(b+batch_size)] = data_arr.reshape(\ (data_dim, batch_size), order='F') try: cm_data_big.free_device_memory() cm_indices.free_device_memory() cm_data_big, cm_indices = None, None except NameError: pass cm_data_big = CM.CUDAMatrix(cur_data) cm_indices = CM.CUDAMatrix( permutation(num_pts_read).reshape(1, -1)) batch_num_since_last_load = 0 cur_data_str = None start = batch_num_since_last_load * batch_size cm_data_big.select_columns( cm_indices.slice(start, start + batch_size), cm_data) batch_num_since_last_load += 1 batch_num += 1 yield cm_data cm_data.free_device_memory() cm_data_big.free_device_memory() cm_indices.free_device_memory() cm_data, cm_data_big, cm_indices = None, None, None self.f.close() self.f = None
def check_lg_probs(cm_data, cm_targets, nn_train): lst_wts = [layer._wts.asarray().copy() for layer in \ nn_train._lst_layers] lgprob_cpu = calc_lg_prob(cm_data.asarray().copy(), cm_targets.asarray().copy(), lst_wts) batch_size = cm_data.shape[1] cm_probs = cm.empty((1, batch_size)) cm_correct = cm.empty((1, batch_size)) cm_predictions = nn_train._lst_outputs[-1] cm.compute_softmax_accuraccy(cm_predictions, cm_targets, cm_probs, cm_correct) lgprob_gpu = sum(cm_probs.asarray()) err = abs(lgprob_gpu - lgprob_cpu)/batch_size tol = 1e-6 if err > tol: sys.stdout.write("FAILED test for log probs. " + \ " cpu, gpu = %.4f, %.4f\n"%(lgprob_cpu, lgprob_gpu)) return lgprob_gpu, lgprob_cpu
def compute_recon_error_for_db(self, data_src, batch_size): err_sum, num_pts = 0, 0 for batch_data in data_src.get_iterator(batch_size, return_labels=False): ######### START POSITIVE PHASE ############ try: cm_hidprobs except NameError: cm_hidprobs = cm.empty((self.num_units, batch_size)) cm_recon = cm.empty(batch_data.shape).assign(0) cm.dot(self.cmW.T, batch_data, cm_hidprobs) cm_hidprobs.add_col_vec(self.cmBiasesHid) cm_hidprobs.apply_sigmoid() cm.dot(self.cmW, cm_hidprobs, target=cm_recon) cm_recon.add_col_vec(self.cmBiasesVis) cm_recon.subtract(batch_data) err = cm_recon.euclid_norm()**2 err_sum = err + err_sum num_pts = num_pts + batch_size return sqrt(err_sum*1./(self.input_dim*num_pts))
def test_softmax_sample(): dim, num_pts = 160, 128 num_draws = 10000 probs = rand(dim, num_pts) for i in range(min(dim, num_pts)): probs[i, i] = 2.0 probs = probs / probs.sum(axis=0).reshape(1, -1) cm_prob = cm.CUDAMatrix(log(probs)) cm_data = cm.empty(probs.shape) cm_rands = cm.empty(probs.shape) cm_counts = cm.empty(probs.shape).assign(0) s = datetime.datetime.now() for draw in range(num_draws): cm_rands.fill_with_rand() cm_prob.SampleSoftMax(cm_rands, cm_data) cm_counts.add(cm_data) cm_data.assign(0) e = datetime.datetime.now() diff = e - s cm_counts.divide(num_draws) est_probs = cm_counts.asarray().copy() print "Total time for %d draws = %d microseconds\n" % (num_draws, diff.microseconds) print "Average case error = %.5f \n" % (np.mean(abs(est_probs - probs))) from matplotlib.pyplot import subplot, imshow, draw subplot(311), imshow(probs, aspect='auto', interpolation='nearest') subplot(312), imshow(est_probs, aspect='auto', interpolation='nearest') subplot(313), plot(est_probs[:, 0]) subplot(313), plot(probs[:, 0]) draw(), time.sleep(0.2) raw_input('enter to finish') return est_probs, probs
def create_from_params(self, layer_def): sys.stderr.write("Initializing layer: %s of type %s\n"%\ (layer_def.name, type(self).__name__)) logging.info("Initializing layer: %s of type %s\n"%\ (layer_def.name, type(self).__name__)) init_params = layer_def.init_params self._wts = cm.empty((layer_def.input_dim, layer_def.num_units)) self._wts.fill_with_randn() self._wts.mult(init_params.wt_sigma * 1. / sqrt(layer_def.input_dim)) self._b = cm.empty((self._wts.shape[1], 1)) self._b.fill_with_rand().mult(init_params.biases_max-\ init_params.biases_min).add(init_params.biases_min) self._wts_grad = cm.empty(self._wts.shape).assign(0) self._wts_inc = cm.empty(self._wts.shape).assign(0) self._b_grad = cm.empty(self._b.shape).assign(0) self._b_inc = cm.empty(self._b.shape).assign(0)
def get_iterator(self, batch_size, return_labels=True): if not hasattr(self, '_is_setup'): raise Exception, "Call setup_data or permute_indices first" if not self._is_setup: self.permute_file_indices_for_loading() self._cm_data_for_batch = cm.empty((self._data_dim, batch_size)) target_shape = ((self.get_label_dim(), self._num_outputs_per_pt * batch_size)) multi_target_shape = ((self.get_label_dim() * self._num_outputs_per_pt, batch_size)) self._cm_targets_for_batch = cm.empty(target_shape) self._cm_data_indices_for_batch = cm.empty((1, batch_size)) self._cm_data_indices_with_frames = cm.empty( (self._num_frames_per_pt, batch_size)) self._cm_target_indices_with_frames = cm.empty( (self._num_outputs_per_pt, batch_size)) self._cm_target_indices_for_batch = cm.empty( (1, self._num_outputs_per_pt * batch_size)) self._cm_range_frames = cm.CUDAMatrix(cm.reformat(arange(\ self._num_frames_per_pt).reshape((-1,1)))) self._cm_range_target_frames = cm.CUDAMatrix(cm.reformat(arange(\ self._num_outputs_per_pt).reshape((-1,1)))) self._cm_target_vectors_matrix = cm.CUDAMatrix(\ eye(self.get_label_dim())) while True: if self._batch_index + batch_size > self._num_frames_for_training: if self._start_file_num >= self._num_files: break self.load_next_data() self._cm_indices_matrix.get_col_slice( self._batch_index, self._batch_index + batch_size, self._cm_data_indices_for_batch) self._cm_data_indices_with_frames.reshape( (self._num_frames_per_pt, batch_size)) self._cm_data_indices_with_frames.assign(0) self._cm_data_indices_with_frames.add_col_vec(\ self._cm_range_frames) self._cm_data_indices_with_frames.add_row_vec(\ self._cm_data_indices_for_batch) self._cm_data_indices_with_frames.reshape( (1, self._num_frames_per_pt * batch_size)) self._cm_target_indices_with_frames.reshape( (self._num_outputs_per_pt, batch_size)) self._cm_target_indices_with_frames.assign(0) self._cm_target_indices_with_frames.add_col_vec(\ self._cm_range_target_frames) self._cm_target_indices_with_frames.add_row_vec(\ self._cm_data_indices_for_batch) self._cm_target_indices_with_frames.add(self.label_offset) self._cm_target_indices_with_frames.reshape( (1, self._num_outputs_per_pt * batch_size)) self._cm_data_matrix.select_columns(\ self._cm_data_indices_with_frames, self._cm_data_for_batch) self._cm_data_for_batch.reshape((self._data_dim, batch_size)) if self.dropout_rate != 0: self._cm_data_for_batch.dropout(self.dropout_rate) self._cm_data_for_batch.mult(1. / (1 - self.dropout_rate)) self._batch_index += batch_size if return_labels: self._cm_targets_matrix.select_columns(\ self._cm_target_indices_with_frames, self._cm_target_indices_for_batch) self._cm_targets_for_batch.reshape(target_shape) self._cm_target_vectors_matrix.select_columns(\ self._cm_target_indices_for_batch, self._cm_targets_for_batch) self._cm_targets_for_batch.reshape(multi_target_shape) yield self._cm_data_for_batch, self._cm_targets_for_batch else: yield self._cm_data_for_batch self._is_setup = False
def compute_predictions_for_sentence_multi(db, nnet_model, fileNum, use_sum=False, get_labels=False, decoding_context=-1): data, labels = db.get_data_for_file(fileNum, return_labels=True) data_striped = StripeData(data, db.get_num_frames_per_pt(), append=True) dataDim, numFrames = data_striped.shape predictions = nnet_model.predict(data_striped, unnormalized=not use_sum) num_out_frames_per_pt = db.get_num_outputs_frames_per_pt() #if use_sum: #wts = hamming(num_out_frames_per_pt)[newaxis,:] #wts = tile(wts, (predictions.shape[0]/num_out_frames_per_pt,1)).reshape(-1,1, order='F') #prob = 0.1 #predictions *= (prob*wts + (1-prob)) if db.get_num_outputs_frames_per_pt() != 1 and decoding_context == -1: predictions = UnStripeData(predictions, db.get_num_outputs_frames_per_pt()) extra_left = floor((db.get_num_outputs_frames_per_pt() - 1) / 2) extra_right = db.get_num_outputs_frames_per_pt() - 1 - extra_left predictions = predictions[:, extra_left:-extra_right] else: frame_dim = predictions.shape[0] / db.get_num_outputs_frames_per_pt() mid_frame = floor(db.get_num_outputs_frames_per_pt() / 2) start_frame = mid_frame - decoding_context end_frame = mid_frame + decoding_context + 1 predictions = predictions[(frame_dim * (start_frame)):(frame_dim * (end_frame)), :] if decoding_context != 0: predictions = UnStripeData(predictions, decoding_context * 2 + 1) predictions = predictions[:, decoding_context:-decoding_context] pred_class = predictions.argmax(axis=0) if not use_sum: cm_pred = cm.CUDAMatrix(predictions) cm_probs = cm.empty(cm_pred.shape).assign(0) cm_pred.compute_softmax(cm_probs) cm.log(cm_probs) predictions = cm_probs.asarray().copy() cm_probs.free_device_memory() cm_pred.free_device_memory() cm_probs, cm_pred = None, None else: predictions = log(predictions + 1e-35) ones_matrix = eye(predictions.shape[0]) class_matrix = ones_matrix[:, labels] log_probs = sum(predictions * class_matrix) num_correct = sum(pred_class == labels.reshape(-1)) if get_labels: return predictions, num_correct, log_probs, pred_class, labels.reshape( -1) else: return predictions, num_correct, log_probs
import GPULock GPULock.GetGPULock() import cudamat_ext as cm cm.cublas_init() cm.CUDAMatrix.init_random(42) import numpy as np from pylab import log, sum m = 299 n = 128 target = np.random.rand(m, n) > 0.5 prob = np.random.rand(m, n) cm_target = cm.CUDAMatrix(cm.reformat(target)) cm_prob = cm.CUDAMatrix(cm.reformat(prob)) cm_log_prob = cm.empty((1,n)) cm.compute_logistic_log_prob(cm_prob, cm_target, cm_log_prob) lg_prob = sum(target*log(1e-8+prob) + (1-target)*log(1-prob+1e-8), axis=0).reshape((1,-1)) error = np.sum((cm_log_prob.asarray() - lg_prob)**2) print "Error = ", error, " sum_lg_prob = ", str(sum(lg_prob))