예제 #1
0
def test_column_set_get():
    m = 256
    n = 128

    data = np.random.randn(m, n)
    cm_data = cm.CUDAMatrix(cm.reformat(data))

    indices = permutation(n)
    cm_indices = cm.CUDAMatrix(cm.reformat(indices.reshape(1, -1)))

    start = 0
    end = 10
    cm_columns = cm_data.get_column_vectors(cm_indices, start, end)

    get_error = np.sum((cm_columns.asarray() - data[:, indices[start:end]])**2)

    data_set = np.random.randn(m, end - start)
    cm_columns.free_device_memory()
    cm_columns = cm.CUDAMatrix(cm.reformat(data_set))
    cm_data.set_column_vectors(cm_indices, start, end, cm_columns)

    data[:, indices[start:end]] = data_set
    set_error = np.sum((cm_data.asarray() - data)**2)

    print "Get Error = ", get_error
    print "Set Error = ", set_error
    assert get_error < 10**-2 or set_error < 10**-2,  \
             "Error in CUDAMatrix.get_column_vectors exceeded threshold"
예제 #2
0
    def load_params(self, param_file):
        targetDict = {}
        util.load(param_file, targetDict, verbose=False)
        self.cmW = cm.CUDAMatrix(cm.reformat(targetDict['W']))
        self.cmBiasesHid = cm.CUDAMatrix(cm.reformat(targetDict['biasesHid']))
        self.cmBiasesVis = cm.CUDAMatrix(cm.reformat(targetDict['biasesVis']))

        self.input_dim, self.num_units = self.cmW.shape
예제 #3
0
    def get_iterator(self, batch_size, return_labels=False):
        self.f = open(self.data_file, 'rb')
        cm_data = CM.empty((self._data_dim, batch_size))
        batch_num = 0
        data_dim = self._data_dim
        num_batches = self.num_pts / batch_size
        num_batches_per_load = 1000
        num_bytes_per_batch = 4 * data_dim * batch_size
        num_bytes_per_load = num_batches_per_load * num_bytes_per_batch
        batch_num_since_last_load = 0
        num_batches_loaded = 0

        while batch_num < num_batches:
            if batch_num_since_last_load == num_batches_loaded:
                cur_data_str = self.f.read(num_bytes_per_load)
                num_batches_loaded = len(cur_data_str) / num_bytes_per_batch
                num_pts_read = num_batches_loaded * batch_size
                cur_data = zeros((data_dim, num_pts_read), 'float32')
                for b in arange(0, num_pts_read, batch_size):
                    str_s = b * 4 * data_dim
                    str_e = str_s + 4 * data_dim * batch_size
                    data_arr = numpy.fromstring(cur_data_str[str_s:str_e],
                                                dtype='float32')
                    cur_data[:,b:(b+batch_size)] = data_arr.reshape(\
                                     (data_dim, batch_size), order='F')

                try:
                    cm_data_big.free_device_memory()
                    cm_indices.free_device_memory()
                    cm_data_big, cm_indices = None, None
                except NameError:
                    pass
                cm_data_big = CM.CUDAMatrix(cur_data)
                cm_indices = CM.CUDAMatrix(
                    permutation(num_pts_read).reshape(1, -1))
                batch_num_since_last_load = 0
                cur_data_str = None

            start = batch_num_since_last_load * batch_size
            cm_data_big.select_columns(
                cm_indices.slice(start, start + batch_size), cm_data)
            batch_num_since_last_load += 1
            batch_num += 1
            yield cm_data

        cm_data.free_device_memory()
        cm_data_big.free_device_memory()
        cm_indices.free_device_memory()
        cm_data, cm_data_big, cm_indices = None, None, None

        self.f.close()
        self.f = None
예제 #4
0
def test_softmax():
    m = 2000
    n = 128

    data = np.random.randn(m, n)
    prob = data - data.max(axis=0).reshape(1, -1)
    prob = np.exp(prob) / np.exp(prob).sum(axis=0).reshape(1, -1)

    cm_data = cm.CUDAMatrix(cm.reformat(data))
    cm_prob = cm.CUDAMatrix(cm.reformat(np.zeros(data.shape)))

    cm_data.compute_softmax(cm_prob)

    error = np.sum((cm_prob.asarray() - prob)**2)
    print "Error = ", error
    assert error < 10**-2, "Error in CUDAMatrix.compute_softmax exceeded threshold"
예제 #5
0
def test_columnwise_dot():
    m = 64
    n = 64
    a = np.array(np.random.randn(m, n), dtype=np.float32, order='F')
    b = np.array(np.random.randn(m, n), dtype=np.float32, order='F')

    res = np.sum(a * b, axis=0).reshape(1, -1)

    m1 = cm.CUDAMatrix(a)
    m2 = cm.CUDAMatrix(b)
    cm_res = cm.CUDAMatrix(cm.reformat(np.zeros(res.shape)))

    cm.columnwise_dot(m1, m2, cm_res)

    err = np.sum(np.abs(res - cm_res.asarray()))
    assert err < 10**-2, "Error in cudamat_ext.columnwise_dot exceeded threshold"
예제 #6
0
 def permute_indices_for_loaded_data(self):
     ''' Repermutes indices for currently loaded data. Can be used if 
         we load all the data at one, and don't want to reload it.
     '''
     data_permutation = permutation(self._data_indices.size)
     self._data_indices = self._data_indices[0,\
                               data_permutation].reshape((1,-1))
     self._cm_indices_matrix = cm.CUDAMatrix(\
                                 cm.reformat(self._data_indices))
     self._batch_index = 0
     self._is_setup = True
예제 #7
0
def test_softmax_sample():
    dim, num_pts = 160, 128
    num_draws = 10000

    probs = rand(dim, num_pts)
    for i in range(min(dim, num_pts)):
        probs[i, i] = 2.0

    probs = probs / probs.sum(axis=0).reshape(1, -1)

    cm_prob = cm.CUDAMatrix(log(probs))
    cm_data = cm.empty(probs.shape)
    cm_rands = cm.empty(probs.shape)
    cm_counts = cm.empty(probs.shape).assign(0)

    s = datetime.datetime.now()
    for draw in range(num_draws):
        cm_rands.fill_with_rand()
        cm_prob.SampleSoftMax(cm_rands, cm_data)
        cm_counts.add(cm_data)
        cm_data.assign(0)
    e = datetime.datetime.now()
    diff = e - s
    cm_counts.divide(num_draws)
    est_probs = cm_counts.asarray().copy()

    print "Total time for %d draws = %d microseconds\n" % (num_draws,
                                                           diff.microseconds)
    print "Average case error = %.5f \n" % (np.mean(abs(est_probs - probs)))

    from matplotlib.pyplot import subplot, imshow, draw
    subplot(311), imshow(probs, aspect='auto', interpolation='nearest')
    subplot(312), imshow(est_probs, aspect='auto', interpolation='nearest')
    subplot(313), plot(est_probs[:, 0])
    subplot(313), plot(probs[:, 0])
    draw(), time.sleep(0.2)
    raw_input('enter to finish')
    return est_probs, probs
예제 #8
0
    def load_next_data(self):
        last_file = min(self._start_file_num + self._num_files_per_load,
                        self._num_files)
        data_lst = []
        label_lst = []
        indices_lst = []
        num_frames = 0
        num_indices = 0
        for file_index in range(self._start_file_num, last_file):
            file_num = self._file_indices[file_index]
            data, cur_labels = self._data_src.get_spectrogram_and_labels(\
                                   file_num, self._speaker_cmn,\
                                   self._speaker_cmvn, self._normalize)
            if self._skip_borders != 0:
                data = data[:, self._skip_borders:(-self._skip_borders)]
                cur_labels = cur_labels[self._skip_borders:(
                    -self._skip_borders)]

            if self._borders_only:
                I = flatnonzero(cur_labels[1:] != cur_labels[:-1])
                if I[0] != 0:
                    indices = concatenate(([0], I, I + 1))
                else:
                    indices = concatenate((I, I + 1))
                indices.sort()
                indices = unique(indices)
                indices -= int(self.label_offset)
                indices = indices[indices < (data.shape[1] -
                                             self._num_frames_per_pt + 1)]
                indices = indices[indices >= 0]
            else:
                indices = arange(
                    max(0, data.shape[1] - self._num_frames_per_pt + 1))

            data_lst.append(data)
            label_lst.append(cur_labels.copy())
            indices_lst.append(indices.copy())

            num_frames += data.shape[1]
            num_indices += indices.size

        self._num_frames = 0
        self._num_frames_for_training = num_indices

        self._data_matrix = zeros((self._frame_dim, num_frames))
        self._label_matrix = zeros((1, num_frames))
        self._data_indices = zeros((1, num_indices))

        num_frames_so_far = 0
        num_indices_so_far = 0

        for (cur_data, cur_labels, cur_indices) in zip(data_lst, label_lst,
                                                       indices_lst):
            num_frames_cur = cur_data.shape[1]
            self._data_matrix[:, num_frames_so_far:(
                num_frames_so_far + num_frames_cur)] = cur_data.copy()
            self._label_matrix[0, num_frames_so_far:(
                num_frames_so_far + num_frames_cur)] = cur_labels.copy()

            num_indices_cur = cur_indices.size
            self._data_indices[0, num_indices_so_far:(
                num_indices_so_far +
                num_indices_cur)] = cur_indices + num_frames_so_far

            num_frames_so_far += num_frames_cur
            num_indices_so_far += num_indices_cur

        assert (num_indices_so_far == num_indices)
        assert (num_frames_so_far == num_frames)

        try:
            self._cm_data_matrix.free_device_memory()
            self._cm_targets_matrix.free_device_memory()
            self._cm_indices_matrix.free_device_memory()
        except AttributeError:
            pass

        self._cm_data_matrix = cm.CUDAMatrix(self._data_matrix)
        self._cm_targets_matrix = cm.CUDAMatrix(self._label_matrix)
        self._start_file_num = last_file
        self.permute_indices_for_loaded_data()
예제 #9
0
    def get_iterator(self, batch_size, return_labels=True):
        if not hasattr(self, '_is_setup'):
            raise Exception, "Call setup_data or permute_indices first"
        if not self._is_setup:
            self.permute_file_indices_for_loading()

        self._cm_data_for_batch = cm.empty((self._data_dim, batch_size))

        target_shape = ((self.get_label_dim(),
                         self._num_outputs_per_pt * batch_size))
        multi_target_shape = ((self.get_label_dim() * self._num_outputs_per_pt,
                               batch_size))

        self._cm_targets_for_batch = cm.empty(target_shape)
        self._cm_data_indices_for_batch = cm.empty((1, batch_size))
        self._cm_data_indices_with_frames = cm.empty(
            (self._num_frames_per_pt, batch_size))
        self._cm_target_indices_with_frames = cm.empty(
            (self._num_outputs_per_pt, batch_size))
        self._cm_target_indices_for_batch = cm.empty(
            (1, self._num_outputs_per_pt * batch_size))

        self._cm_range_frames = cm.CUDAMatrix(cm.reformat(arange(\
                                  self._num_frames_per_pt).reshape((-1,1))))
        self._cm_range_target_frames = cm.CUDAMatrix(cm.reformat(arange(\
                                             self._num_outputs_per_pt).reshape((-1,1))))
        self._cm_target_vectors_matrix = cm.CUDAMatrix(\
                                                  eye(self.get_label_dim()))

        while True:
            if self._batch_index + batch_size > self._num_frames_for_training:
                if self._start_file_num >= self._num_files:
                    break
                self.load_next_data()

            self._cm_indices_matrix.get_col_slice(
                self._batch_index, self._batch_index + batch_size,
                self._cm_data_indices_for_batch)

            self._cm_data_indices_with_frames.reshape(
                (self._num_frames_per_pt, batch_size))
            self._cm_data_indices_with_frames.assign(0)
            self._cm_data_indices_with_frames.add_col_vec(\
                                            self._cm_range_frames)
            self._cm_data_indices_with_frames.add_row_vec(\
                                          self._cm_data_indices_for_batch)
            self._cm_data_indices_with_frames.reshape(
                (1, self._num_frames_per_pt * batch_size))

            self._cm_target_indices_with_frames.reshape(
                (self._num_outputs_per_pt, batch_size))
            self._cm_target_indices_with_frames.assign(0)
            self._cm_target_indices_with_frames.add_col_vec(\
                                            self._cm_range_target_frames)
            self._cm_target_indices_with_frames.add_row_vec(\
                                          self._cm_data_indices_for_batch)
            self._cm_target_indices_with_frames.add(self.label_offset)
            self._cm_target_indices_with_frames.reshape(
                (1, self._num_outputs_per_pt * batch_size))

            self._cm_data_matrix.select_columns(\
                                       self._cm_data_indices_with_frames,
                                                 self._cm_data_for_batch)
            self._cm_data_for_batch.reshape((self._data_dim, batch_size))
            if self.dropout_rate != 0:
                self._cm_data_for_batch.dropout(self.dropout_rate)
                self._cm_data_for_batch.mult(1. / (1 - self.dropout_rate))

            self._batch_index += batch_size

            if return_labels:
                self._cm_targets_matrix.select_columns(\
                                   self._cm_target_indices_with_frames,
                                   self._cm_target_indices_for_batch)

                self._cm_targets_for_batch.reshape(target_shape)
                self._cm_target_vectors_matrix.select_columns(\
                                       self._cm_target_indices_for_batch,
                                       self._cm_targets_for_batch)
                self._cm_targets_for_batch.reshape(multi_target_shape)

                yield self._cm_data_for_batch, self._cm_targets_for_batch
            else:
                yield self._cm_data_for_batch

        self._is_setup = False
예제 #10
0
data_dim = 5
target_dim = 15
batch_size=20
nn_def_file = "params/nn_def_20_10.txt"
nn_train = nnet_train.nn()
nn_train.create_nnet_from_def(nn_def_file,
                              data_dim = data_dim,
                              target_dim = target_dim)
nn_train.create_activations_and_probs(batch_size)

lst_num_hid = list(nn_train._lst_num_hid)
data, targets, lst_hids, lst_wts = init_data(lst_num_hid,
                                    data_dim, target_dim, 
                                    batch_size)
set_nn_wts(nn_train, lst_wts)
cm_data = cm.CUDAMatrix(data)
cm_targets = cm.CUDAMatrix(targets)


nn_train.fwd_prop(cm_data)
check_hid_activities(nn_train, lst_hids)

lgprob_orig, lgprob_orig_cpu = check_lg_probs(cm_data, cm_targets, 
                                                      nn_train)

nn_train.fwd_prop(cm_data)
cm_predictions = nn_train._lst_outputs[-1]
cm_targets.subtract(cm_predictions,
                       nn_train._lst_activations_grad[-1])
nn_train.back_prop(cm_data)
layer_grads = [layer._wts_grad.asarray().copy() for layer in \
예제 #11
0
def compute_predictions_for_sentence_multi(db,
                                           nnet_model,
                                           fileNum,
                                           use_sum=False,
                                           get_labels=False,
                                           decoding_context=-1):
    data, labels = db.get_data_for_file(fileNum, return_labels=True)
    data_striped = StripeData(data, db.get_num_frames_per_pt(), append=True)
    dataDim, numFrames = data_striped.shape

    predictions = nnet_model.predict(data_striped, unnormalized=not use_sum)

    num_out_frames_per_pt = db.get_num_outputs_frames_per_pt()
    #if use_sum:
    #wts = hamming(num_out_frames_per_pt)[newaxis,:]
    #wts = tile(wts, (predictions.shape[0]/num_out_frames_per_pt,1)).reshape(-1,1, order='F')
    #prob = 0.1
    #predictions *= (prob*wts + (1-prob))

    if db.get_num_outputs_frames_per_pt() != 1 and decoding_context == -1:
        predictions = UnStripeData(predictions,
                                   db.get_num_outputs_frames_per_pt())
        extra_left = floor((db.get_num_outputs_frames_per_pt() - 1) / 2)
        extra_right = db.get_num_outputs_frames_per_pt() - 1 - extra_left
        predictions = predictions[:, extra_left:-extra_right]
    else:
        frame_dim = predictions.shape[0] / db.get_num_outputs_frames_per_pt()
        mid_frame = floor(db.get_num_outputs_frames_per_pt() / 2)
        start_frame = mid_frame - decoding_context
        end_frame = mid_frame + decoding_context + 1
        predictions = predictions[(frame_dim * (start_frame)):(frame_dim *
                                                               (end_frame)), :]
        if decoding_context != 0:
            predictions = UnStripeData(predictions, decoding_context * 2 + 1)
            predictions = predictions[:, decoding_context:-decoding_context]

    pred_class = predictions.argmax(axis=0)

    if not use_sum:
        cm_pred = cm.CUDAMatrix(predictions)
        cm_probs = cm.empty(cm_pred.shape).assign(0)
        cm_pred.compute_softmax(cm_probs)
        cm.log(cm_probs)
        predictions = cm_probs.asarray().copy()

        cm_probs.free_device_memory()
        cm_pred.free_device_memory()
        cm_probs, cm_pred = None, None
    else:
        predictions = log(predictions + 1e-35)

    ones_matrix = eye(predictions.shape[0])
    class_matrix = ones_matrix[:, labels]
    log_probs = sum(predictions * class_matrix)

    num_correct = sum(pred_class == labels.reshape(-1))

    if get_labels:
        return predictions, num_correct, log_probs, pred_class, labels.reshape(
            -1)
    else:
        return predictions, num_correct, log_probs
예제 #12
0
 def copy_params_from_dict(self, params_dict):
     self._wts = cm.CUDAMatrix(params_dict[self.name + "_wts"])
     self._b = cm.CUDAMatrix(params_dict[self.name + "_b"])
예제 #13
0
 def set_input_mask(self, mask):
     self._wt_mask = cm.CUDAMatrix(mask)
예제 #14
0
 def copy_params_from_dict(self, params_dict):
     self._wts = cm.CUDAMatrix(params_dict[self.name + "_wts"])
     self._b = cm.CUDAMatrix(params_dict[self.name + "_b"])
     self.num_softmaxes = params_dict[self.name + "_num_softmaxes"]
     self.num_units = self.num_hid / self.num_softmaxes
예제 #15
0
import GPULock
GPULock.GetGPULock()
import cudamat_ext as cm
cm.cublas_init()
cm.CUDAMatrix.init_random(42)
import numpy as np
from pylab import log, sum

m = 299
n = 128

target = np.random.rand(m, n) > 0.5
prob = np.random.rand(m, n)

cm_target = cm.CUDAMatrix(cm.reformat(target))
cm_prob = cm.CUDAMatrix(cm.reformat(prob))

cm_log_prob = cm.empty((1,n))
cm.compute_logistic_log_prob(cm_prob, cm_target, cm_log_prob)

lg_prob = sum(target*log(1e-8+prob) + (1-target)*log(1-prob+1e-8), axis=0).reshape((1,-1))
error =  np.sum((cm_log_prob.asarray() - lg_prob)**2)
print "Error = ", error, " sum_lg_prob = ", str(sum(lg_prob))