Exemple #1
0
def gpu_grads(cm_data, cm_targets, nn_train, eps=1e-6):
    batch_size = cm_data.shape[1]
    cm_probs = cm.empty((1, batch_size))
    cm_correct = cm.empty((1, batch_size))

    nn_train.fwd_prop(cm_data)
    cm_predictions = nn_train._lst_outputs[-1]
    cm.compute_softmax_accuraccy(cm_predictions, 
                          cm_targets, cm_probs, cm_correct)
    lg_prob = sum(cm_probs.asarray())

    lst_grads = []
    for layer_num, (layer, wts) in enumerate(zip(
                       nn_train._lst_layers, lst_wts)):
        wts_grad = zeros(wts.shape)
        for i in range(wts.shape[0]):
            for j in range(wts.shape[1]):
                wts_cpy = wts.copy()
                wts_cpy[i,j] += eps
                layer._wts.load_matrix(wts_cpy)

                nn_train.fwd_prop(cm_data)
                cm.compute_softmax_accuraccy(cm_predictions, 
                          cm_targets, cm_probs, cm_correct)
                lg_prob_cur = sum(cm_probs.asarray())
                wts_grad[i,j] = (lg_prob_cur-lg_prob)/eps/batch_size

        # reset wt to original
        layer._wts.load_matrix(wts.copy())
        lst_grads.append(wts_grad.copy())
    return lst_grads
Exemple #2
0
    def create_gradients(self):
        self.cmWInc = cm.empty(self.cmW.shape)
        self.cmWInc.assign(0)
         
        self.cmBiasesHidInc = cm.empty(self.cmBiasesHid.shape)
        self.cmBiasesHidInc.assign(0)

        self.cmBiasesVisInc = cm.empty(self.cmBiasesVis.shape)
        self.cmBiasesVisInc.assign(0)
Exemple #3
0
    def init_from_config(self, config_def):
        self.input_dim = config_def.input_dim
        self.num_units = config_def.num_units

        self.cmW = None
        self.cmW = cm.empty((config_def.input_dim, config_def.num_units))
        self.cmW.fill_with_randn()
        self.cmW.mult(config_def.wt_sigma)
       
        self.cmBiasesVis = None 
        self.cmBiasesVis = cm.empty((config_def.input_dim, 1))
        self.cmBiasesVis.assign(config_def.vis_bias)

        self.cmBiasesHid = None
        self.cmBiasesHid = cm.empty((config_def.num_units, 1))
        self.cmBiasesHid.assign(config_def.hid_bias)
Exemple #4
0
 def add_gradients_transposed(self, src):
     try:
         self._wts_grad_cpy
     except AttributeError:
         self._wts_grad_cpy = cm.empty(self._wts.shape)
     src._wts_grad.transpose(self._wts_grad_cpy)
     self._wts_grad.add(self._wts_grad_cpy)
Exemple #5
0
    def allocate_activations(self, batch_size):
        cm_recon = cm.empty((self.input_dim, batch_size))
        cm_hidprobs = cm.empty((self.num_units, batch_size))
        cm_hidstates = cm.empty((self.num_units, batch_size))

        cm_posprods = cm.empty((self.input_dim, self.num_units))
        cm_negprods = cm.empty((self.input_dim, self.num_units))

        cm_poshidacts = cm.empty((self.num_units, 1))
        cm_neghidacts = cm.empty((self.num_units, 1))

        cm_posvisacts = cm.empty((self.input_dim, 1))
        cm_negvisacts = cm.empty((self.input_dim, 1))

        return cm_recon, cm_hidprobs, cm_hidstates, cm_posprods, cm_negprods, \
            cm_poshidacts, cm_neghidacts, cm_posvisacts, cm_negvisacts
Exemple #6
0
    def get_iterator(self, batch_size, return_labels=False):
        self.f = open(self.data_file, 'rb')
        cm_data = CM.empty((self._data_dim, batch_size))
        batch_num = 0
        data_dim = self._data_dim
        num_batches = self.num_pts / batch_size
        num_batches_per_load = 1000
        num_bytes_per_batch = 4 * data_dim * batch_size
        num_bytes_per_load = num_batches_per_load * num_bytes_per_batch
        batch_num_since_last_load = 0
        num_batches_loaded = 0

        while batch_num < num_batches:
            if batch_num_since_last_load == num_batches_loaded:
                cur_data_str = self.f.read(num_bytes_per_load)
                num_batches_loaded = len(cur_data_str) / num_bytes_per_batch
                num_pts_read = num_batches_loaded * batch_size
                cur_data = zeros((data_dim, num_pts_read), 'float32')
                for b in arange(0, num_pts_read, batch_size):
                    str_s = b * 4 * data_dim
                    str_e = str_s + 4 * data_dim * batch_size
                    data_arr = numpy.fromstring(cur_data_str[str_s:str_e],
                                                dtype='float32')
                    cur_data[:,b:(b+batch_size)] = data_arr.reshape(\
                                     (data_dim, batch_size), order='F')

                try:
                    cm_data_big.free_device_memory()
                    cm_indices.free_device_memory()
                    cm_data_big, cm_indices = None, None
                except NameError:
                    pass
                cm_data_big = CM.CUDAMatrix(cur_data)
                cm_indices = CM.CUDAMatrix(
                    permutation(num_pts_read).reshape(1, -1))
                batch_num_since_last_load = 0
                cur_data_str = None

            start = batch_num_since_last_load * batch_size
            cm_data_big.select_columns(
                cm_indices.slice(start, start + batch_size), cm_data)
            batch_num_since_last_load += 1
            batch_num += 1
            yield cm_data

        cm_data.free_device_memory()
        cm_data_big.free_device_memory()
        cm_indices.free_device_memory()
        cm_data, cm_data_big, cm_indices = None, None, None

        self.f.close()
        self.f = None
Exemple #7
0
def check_lg_probs(cm_data, cm_targets, nn_train):
    lst_wts = [layer._wts.asarray().copy() for layer in \
                                          nn_train._lst_layers]
    lgprob_cpu = calc_lg_prob(cm_data.asarray().copy(), 
                                cm_targets.asarray().copy(), 
                                   lst_wts)

    batch_size = cm_data.shape[1]
    cm_probs = cm.empty((1, batch_size))
    cm_correct = cm.empty((1, batch_size))
    cm_predictions = nn_train._lst_outputs[-1]
    cm.compute_softmax_accuraccy(cm_predictions, cm_targets, 
                                   cm_probs, cm_correct)

    lgprob_gpu = sum(cm_probs.asarray())
    err = abs(lgprob_gpu - lgprob_cpu)/batch_size
    tol = 1e-6
    if err > tol:
        sys.stdout.write("FAILED test for log probs. " + \
             " cpu, gpu  = %.4f, %.4f\n"%(lgprob_cpu, lgprob_gpu))

    return lgprob_gpu, lgprob_cpu
Exemple #8
0
    def compute_recon_error_for_db(self, data_src, batch_size):
        err_sum, num_pts = 0, 0

        for batch_data in data_src.get_iterator(batch_size, return_labels=False):
            ######### START POSITIVE PHASE ############
            try: 
                cm_hidprobs
            except NameError:
                cm_hidprobs = cm.empty((self.num_units, batch_size))
                cm_recon = cm.empty(batch_data.shape).assign(0)

            cm.dot(self.cmW.T, batch_data, cm_hidprobs)
            cm_hidprobs.add_col_vec(self.cmBiasesHid)
            cm_hidprobs.apply_sigmoid()
         
            cm.dot(self.cmW, cm_hidprobs, target=cm_recon)
            cm_recon.add_col_vec(self.cmBiasesVis)
            cm_recon.subtract(batch_data)
            err = cm_recon.euclid_norm()**2
            err_sum = err + err_sum
            num_pts = num_pts + batch_size
        return sqrt(err_sum*1./(self.input_dim*num_pts))
Exemple #9
0
def test_softmax_sample():
    dim, num_pts = 160, 128
    num_draws = 10000

    probs = rand(dim, num_pts)
    for i in range(min(dim, num_pts)):
        probs[i, i] = 2.0

    probs = probs / probs.sum(axis=0).reshape(1, -1)

    cm_prob = cm.CUDAMatrix(log(probs))
    cm_data = cm.empty(probs.shape)
    cm_rands = cm.empty(probs.shape)
    cm_counts = cm.empty(probs.shape).assign(0)

    s = datetime.datetime.now()
    for draw in range(num_draws):
        cm_rands.fill_with_rand()
        cm_prob.SampleSoftMax(cm_rands, cm_data)
        cm_counts.add(cm_data)
        cm_data.assign(0)
    e = datetime.datetime.now()
    diff = e - s
    cm_counts.divide(num_draws)
    est_probs = cm_counts.asarray().copy()

    print "Total time for %d draws = %d microseconds\n" % (num_draws,
                                                           diff.microseconds)
    print "Average case error = %.5f \n" % (np.mean(abs(est_probs - probs)))

    from matplotlib.pyplot import subplot, imshow, draw
    subplot(311), imshow(probs, aspect='auto', interpolation='nearest')
    subplot(312), imshow(est_probs, aspect='auto', interpolation='nearest')
    subplot(313), plot(est_probs[:, 0])
    subplot(313), plot(probs[:, 0])
    draw(), time.sleep(0.2)
    raw_input('enter to finish')
    return est_probs, probs
Exemple #10
0
    def create_from_params(self, layer_def):
        sys.stderr.write("Initializing  layer: %s of type %s\n"%\
                      (layer_def.name, type(self).__name__))
        logging.info("Initializing  layer: %s of type %s\n"%\
                      (layer_def.name, type(self).__name__))

        init_params = layer_def.init_params
        self._wts = cm.empty((layer_def.input_dim, layer_def.num_units))
        self._wts.fill_with_randn()
        self._wts.mult(init_params.wt_sigma * 1. / sqrt(layer_def.input_dim))

        self._b = cm.empty((self._wts.shape[1], 1))
        self._b.fill_with_rand().mult(init_params.biases_max-\
                init_params.biases_min).add(init_params.biases_min)

        self._wts_grad = cm.empty(self._wts.shape).assign(0)
        self._wts_inc = cm.empty(self._wts.shape).assign(0)

        self._b_grad = cm.empty(self._b.shape).assign(0)
        self._b_inc = cm.empty(self._b.shape).assign(0)
Exemple #11
0
    def get_iterator(self, batch_size, return_labels=True):
        if not hasattr(self, '_is_setup'):
            raise Exception, "Call setup_data or permute_indices first"
        if not self._is_setup:
            self.permute_file_indices_for_loading()

        self._cm_data_for_batch = cm.empty((self._data_dim, batch_size))

        target_shape = ((self.get_label_dim(),
                         self._num_outputs_per_pt * batch_size))
        multi_target_shape = ((self.get_label_dim() * self._num_outputs_per_pt,
                               batch_size))

        self._cm_targets_for_batch = cm.empty(target_shape)
        self._cm_data_indices_for_batch = cm.empty((1, batch_size))
        self._cm_data_indices_with_frames = cm.empty(
            (self._num_frames_per_pt, batch_size))
        self._cm_target_indices_with_frames = cm.empty(
            (self._num_outputs_per_pt, batch_size))
        self._cm_target_indices_for_batch = cm.empty(
            (1, self._num_outputs_per_pt * batch_size))

        self._cm_range_frames = cm.CUDAMatrix(cm.reformat(arange(\
                                  self._num_frames_per_pt).reshape((-1,1))))
        self._cm_range_target_frames = cm.CUDAMatrix(cm.reformat(arange(\
                                             self._num_outputs_per_pt).reshape((-1,1))))
        self._cm_target_vectors_matrix = cm.CUDAMatrix(\
                                                  eye(self.get_label_dim()))

        while True:
            if self._batch_index + batch_size > self._num_frames_for_training:
                if self._start_file_num >= self._num_files:
                    break
                self.load_next_data()

            self._cm_indices_matrix.get_col_slice(
                self._batch_index, self._batch_index + batch_size,
                self._cm_data_indices_for_batch)

            self._cm_data_indices_with_frames.reshape(
                (self._num_frames_per_pt, batch_size))
            self._cm_data_indices_with_frames.assign(0)
            self._cm_data_indices_with_frames.add_col_vec(\
                                            self._cm_range_frames)
            self._cm_data_indices_with_frames.add_row_vec(\
                                          self._cm_data_indices_for_batch)
            self._cm_data_indices_with_frames.reshape(
                (1, self._num_frames_per_pt * batch_size))

            self._cm_target_indices_with_frames.reshape(
                (self._num_outputs_per_pt, batch_size))
            self._cm_target_indices_with_frames.assign(0)
            self._cm_target_indices_with_frames.add_col_vec(\
                                            self._cm_range_target_frames)
            self._cm_target_indices_with_frames.add_row_vec(\
                                          self._cm_data_indices_for_batch)
            self._cm_target_indices_with_frames.add(self.label_offset)
            self._cm_target_indices_with_frames.reshape(
                (1, self._num_outputs_per_pt * batch_size))

            self._cm_data_matrix.select_columns(\
                                       self._cm_data_indices_with_frames,
                                                 self._cm_data_for_batch)
            self._cm_data_for_batch.reshape((self._data_dim, batch_size))
            if self.dropout_rate != 0:
                self._cm_data_for_batch.dropout(self.dropout_rate)
                self._cm_data_for_batch.mult(1. / (1 - self.dropout_rate))

            self._batch_index += batch_size

            if return_labels:
                self._cm_targets_matrix.select_columns(\
                                   self._cm_target_indices_with_frames,
                                   self._cm_target_indices_for_batch)

                self._cm_targets_for_batch.reshape(target_shape)
                self._cm_target_vectors_matrix.select_columns(\
                                       self._cm_target_indices_for_batch,
                                       self._cm_targets_for_batch)
                self._cm_targets_for_batch.reshape(multi_target_shape)

                yield self._cm_data_for_batch, self._cm_targets_for_batch
            else:
                yield self._cm_data_for_batch

        self._is_setup = False
def compute_predictions_for_sentence_multi(db,
                                           nnet_model,
                                           fileNum,
                                           use_sum=False,
                                           get_labels=False,
                                           decoding_context=-1):
    data, labels = db.get_data_for_file(fileNum, return_labels=True)
    data_striped = StripeData(data, db.get_num_frames_per_pt(), append=True)
    dataDim, numFrames = data_striped.shape

    predictions = nnet_model.predict(data_striped, unnormalized=not use_sum)

    num_out_frames_per_pt = db.get_num_outputs_frames_per_pt()
    #if use_sum:
    #wts = hamming(num_out_frames_per_pt)[newaxis,:]
    #wts = tile(wts, (predictions.shape[0]/num_out_frames_per_pt,1)).reshape(-1,1, order='F')
    #prob = 0.1
    #predictions *= (prob*wts + (1-prob))

    if db.get_num_outputs_frames_per_pt() != 1 and decoding_context == -1:
        predictions = UnStripeData(predictions,
                                   db.get_num_outputs_frames_per_pt())
        extra_left = floor((db.get_num_outputs_frames_per_pt() - 1) / 2)
        extra_right = db.get_num_outputs_frames_per_pt() - 1 - extra_left
        predictions = predictions[:, extra_left:-extra_right]
    else:
        frame_dim = predictions.shape[0] / db.get_num_outputs_frames_per_pt()
        mid_frame = floor(db.get_num_outputs_frames_per_pt() / 2)
        start_frame = mid_frame - decoding_context
        end_frame = mid_frame + decoding_context + 1
        predictions = predictions[(frame_dim * (start_frame)):(frame_dim *
                                                               (end_frame)), :]
        if decoding_context != 0:
            predictions = UnStripeData(predictions, decoding_context * 2 + 1)
            predictions = predictions[:, decoding_context:-decoding_context]

    pred_class = predictions.argmax(axis=0)

    if not use_sum:
        cm_pred = cm.CUDAMatrix(predictions)
        cm_probs = cm.empty(cm_pred.shape).assign(0)
        cm_pred.compute_softmax(cm_probs)
        cm.log(cm_probs)
        predictions = cm_probs.asarray().copy()

        cm_probs.free_device_memory()
        cm_pred.free_device_memory()
        cm_probs, cm_pred = None, None
    else:
        predictions = log(predictions + 1e-35)

    ones_matrix = eye(predictions.shape[0])
    class_matrix = ones_matrix[:, labels]
    log_probs = sum(predictions * class_matrix)

    num_correct = sum(pred_class == labels.reshape(-1))

    if get_labels:
        return predictions, num_correct, log_probs, pred_class, labels.reshape(
            -1)
    else:
        return predictions, num_correct, log_probs
import GPULock
GPULock.GetGPULock()
import cudamat_ext as cm
cm.cublas_init()
cm.CUDAMatrix.init_random(42)
import numpy as np
from pylab import log, sum

m = 299
n = 128

target = np.random.rand(m, n) > 0.5
prob = np.random.rand(m, n)

cm_target = cm.CUDAMatrix(cm.reformat(target))
cm_prob = cm.CUDAMatrix(cm.reformat(prob))

cm_log_prob = cm.empty((1,n))
cm.compute_logistic_log_prob(cm_prob, cm_target, cm_log_prob)

lg_prob = sum(target*log(1e-8+prob) + (1-target)*log(1-prob+1e-8), axis=0).reshape((1,-1))
error =  np.sum((cm_log_prob.asarray() - lg_prob)**2)
print "Error = ", error, " sum_lg_prob = ", str(sum(lg_prob))