Beispiel #1
0
def nn_save(w, b, path="./", file_prefix=""):
    """Temporary function for saving neural network weights to disk
    """
    layer_count = len(w)
    for i in range(layer_count):
        np.save(path + file_prefix + "L" + repr(i + 1) + "_w.npy", gnp.as_numpy_array(w[i]))
        np.save(path + file_prefix + "L" + repr(i + 1) + "_b.npy", gnp.as_numpy_array(b[i]))
Beispiel #2
0
 def partition_function(self, batch_size, prec):
     """The exact value of Z calculated with precision prec. 
     Only feasible for small number of hidden units."""
     with decimal.localcontext() as ctx:
         if prec != 0:
             ctx.prec = prec
         batches = ml.common.util.pack_in_batches(all_states(self.n_hid),
                                               batch_size)
         if prec != 0:
             s = decimal.Decimal(0)
         else:
             allfhes = np.array([])
         seen_samples = 0L
         total_samples = 2L**self.n_hid
         for hid in batches:
             print >>stderr, "%i / %i           \r" % (seen_samples, total_samples),
             fhes = self.free_hidden_energy(hid)
             if prec != 0:
                 for fhe in gp.as_numpy_array(fhes):
                     p = decimal.Decimal(-fhe).exp()
                     s += p
             else:
                 allfhes = np.concatenate((allfhes, 
                                           -gp.as_numpy_array(fhes)))
             seen_samples += hid.shape[0]
         if prec != 0:
             return s
         else:
             return logsum(allfhes)
Beispiel #3
0
def tuple_to_array(t):
    x = t[0]
    y = t[1]
    out = np.zeros((x.shape[0], x.shape[1], x.shape[2], 2))
    out[:,:,:,0] = gp.as_numpy_array(x)
    out[:,:,:,1] = gp.as_numpy_array(y)
    return out    
Beispiel #4
0
Datei: util.py Projekt: surban/ml
def plot_samples(samples, samples_force=None, twod=False, width=28, height=28):
    samples = gp.as_numpy_array(samples)
    samples = np.asarray(samples)
    if samples_force is not None:
        samples_force = gp.as_numpy_array(samples_force)
        samples_force = np.asarray(samples_force)
   
    if (not twod and samples.ndim == 1) or (twod and samples.ndim == 2):
        if twod:
            height = samples.shape[0]
            width = samples.shape[1]
        return _plot_one_sample(samples, samples_force, twod=twod,
                                width=width, height=height)
    else:
        n_samples = samples.shape[0]
        if twod:
            height = samples.shape[1]
            width = samples.shape[2]
        out = np.zeros((height, width*n_samples, 3))
        for s in range(n_samples):
            if samples_force is not None:
                o = _plot_one_sample(samples[s], samples_force[s], twod=twod,
                                     width=width, height=height)
            else:
                o =  _plot_one_sample(samples[s], None, twod=twod,
                                      width=width, height=height)
            out[:, s*width : (s+1)*width, :] = o
        return out
Beispiel #5
0
    def output_errors(self, n_plot=100000, alpha=0.05,
                      plot_only_guilty_samples=True):

        # output statistics
        acc_low, acc_high, acc_mle = \
            self.generator_accuracy_interval(alpha=alpha)
        print "Error probability: %g [%g, %g]" % (1-acc_mle, 
                                                  1-acc_high, 1-acc_low)
        fe_low, fe_high = self.fe_interval(alpha=alpha)
        print "Free energy:       [%g, %g]" % (fe_low, fe_high)
 
        if self.tmpl_X is not None:
            # collect incorrectly classified samples
            tmpl_X = gp.as_numpy_array(self.tmpl_X)
            gen_X = gp.as_numpy_array(self.gen_X)
            if plot_only_guilty_samples:
                s = self.guilty_samples
            else:
                s = self.incorrect_samples

            err_tmpl_X = tmpl_X[s,:]
            err_gen_X = gen_X[s,:]
            err_tmpl_Z = self.tmpl_Z[s]
            err_gen_Z = np.asarray(self.gen_Z[s], dtype='uint8')
    
            # output
            print "Misclassified samples:"
            print "True labels:      ", err_tmpl_Z[0:n_plot]
            print "Generated labels: ", err_gen_Z[0:n_plot]
            if err_tmpl_X.shape[0] > 0:
                myplt = np.concatenate((common.util.plot_samples(err_tmpl_X[0:n_plot]), 
                                        ml.common.util.plot_samples(err_gen_X[0:n_plot])))
                plt.imshow(myplt, interpolation='none')
Beispiel #6
0
Datei: rbm.py Projekt: surban/ml
 def free_energies_during_gibbs_sampling(self, x, kmax, beta=1):
     fes = []
     fes.append(gp.as_numpy_array(self.free_energy(x)))
     for k in range(kmax):
         x, _ = self.gibbs_sample(x, 1, beta=beta)
         fes.append(gp.as_numpy_array(self.free_energy(x)))
     fes=np.asarray(fes)
     return fes
Beispiel #7
0
def dbn_save(ws_vh, ws_v, ws_h, path="./", file_prefix=""):
    """Temporary function for saving dbn weights from disk
    """
    layer_count = len(ws_vh)
    for i in range(layer_count):
        np.save(path + file_prefix + "L" + repr(i + 1) + "_w_vh.npy", gnp.as_numpy_array(ws_vh[i]))
        np.save(path + file_prefix + "L" + repr(i + 1) + "_w_v.npy", gnp.as_numpy_array(ws_v[i]))
        np.save(path + file_prefix + "L" + repr(i + 1) + "_w_h.npy", gnp.as_numpy_array(ws_h[i]))
Beispiel #8
0
def generation_accuracy(label, ref_predict, myrbm,
                        tmpl_X, tmpl_Z, tmpl_ref_Z,
                        gen_X, gen_Z=None,
                        output_data_line=True, store_samples=True):

    tmpl_Z = gp.as_numpy_array(tmpl_Z)
    n_samples = gen_X.shape[0]

    if n_samples < tmpl_X.shape[0]:
        print "Warning: less generated samples than template samples were provided"
        tmpl_X = tmpl_X[0:n_samples,:]
        tmpl_Z = tmpl_Z[0:n_samples]
        tmpl_ref_Z = tmpl_ref_Z[0:n_samples]

    # calculate accuracy of reference classifier
    diff = tmpl_Z - tmpl_ref_Z
    errs = np.count_nonzero(diff)
    corr = n_samples - errs
    svc_acc = corr / n_samples

    # classify generated data
    if gen_Z is None:
        gen_Z = ml.common.util.map(gp.as_numpy_array(gen_X), 100, ref_predict,
                                caption="Classifying results with reference predictor")

    # count correctly classified samples
    diff = tmpl_Z - gen_Z
    errs = np.count_nonzero(diff)
    corr = n_samples - errs

    # find incorrect samples
    incorrect_samples = np.nonzero(diff)[0]
    guilty_samples = \
        incorrect_samples[tmpl_ref_Z[incorrect_samples] == 
                          tmpl_Z[incorrect_samples]]

    # calculate free energy
    fes = ml.common.util.map(gen_X, 1000, myrbm.free_energy,
                          caption="Calculating free energy")
    fes = gp.as_numpy_array(fes)
    fe_mean = np.mean(fes)
    fe_variance = ml.common.stats.unbiased_varince(fes)

    # create stability object
    s = Stability(label, n_samples, corr, svc_acc, 
                  fe_mean, fe_variance, incorrect_samples, guilty_samples)
    if store_samples:
        s.tmpl_X = tmpl_X
        s.tmpl_Z = tmpl_Z
        s.tmpl_ref_Z = tmpl_ref_Z
        s.gen_X = gen_X
        s.gen_Z = gen_Z

    # output performance data in table format
    if output_data_line:
        s.output_data_line()

    return s
Beispiel #9
0
def or_rest_fast(z, x):
    z = gp.as_numpy_array(z)
    x = gp.as_numpy_array(x)
    
    y = np.zeros(z.shape)
    ym = np.ones(z.shape)
    ym[(z == 1) & (x == 1)] = 0
    y[(z == 1) & (x == 0)] = 1
    
    return gp.as_garray(y), gp.as_garray(ym)   
Beispiel #10
0
def save_parameters(rbm, epoch_or_filename):
    if type(epoch_or_filename) == str:
        filename = epoch_or_filename
    else:
        filename = "weights-%02i.npz" % epoch_or_filename
    np.savez_compressed(
        filename,
        weights=gp.as_numpy_array(rbm.weights),
        bias_vis=gp.as_numpy_array(rbm.bias_vis),
        bias_hid=gp.as_numpy_array(rbm.bias_hid),
    )
Beispiel #11
0
def nn_forward_pass(x, w, b, return_all=True):
    """
    Forward pass for multilayer feed-forward sigmoid neural network
    
    Hidden units have sigmoid non-linearity. 
    Output is soft-max.

    x: DxN matrix of input data
    w: Weights. List of weight matrices for each layer.
    b: Biases. List of bias vectors for each layer
    return_all: If True, returns hidden unit activations for each layer. If False
        just returns the output layer activations
    Returns a list h where each element is a matrix containing the activations
    for that layer. h[0] is input data x. 
    """
    # ---- TEMP HACK --------------
    # I should find a more seamless way of running in mixed (some operations
    # with numpy, some with gnumpy) mode.
    # I had to resort to this, because i needed the validation classification
    # step in nn_train to run on CPU with numpy. GPU ran out of memory.
    if isinstance(x, gnp.garray):
        use_gpu = True
    else:
        use_gpu = False

    layer_count = len(w)
    if return_all:
        hs = [x]  # unit activations for each layer
    h = x

    # all layers except the output layer
    for l in range(layer_count - 1):
        if use_gpu:
            a = gnp.dot(w[l].T, h) + b[l]
            h = gnp.logistic(a)
        else:
            a = np.dot(gnp.as_numpy_array(w[l]).T, h) + gnp.as_numpy_array(b[l])
            h = 1.0 / (1 + np.exp(-a))
        if return_all:
            hs.append(h)

    # output layer
    if use_gpu:
        h = gnp.dot(w[-1].T, h) + b[-1]
        h = gnp.exp(h) / gnp.sum(gnp.exp(h), axis=0)  # soft-max
    else:
        h = np.dot(gnp.as_numpy_array(w[-1]).T, h) + gnp.as_numpy_array(b[-1])
        h = np.exp(h) / np.sum(np.exp(h), axis=0)  # soft-max

    if return_all:
        hs.append(h)
        return hs
    else:
        return h
Beispiel #12
0
def generate_or_dataset(X, Z, samples):
    X = gp.as_numpy_array(X)
    Z = gp.as_numpy_array(Z)

    si = np.random.randint(0, X.shape[0], size=(samples, 2))

    x = X[si[:, 0], :]
    y = X[si[:, 1], :]
    O = or_sample(x, y)

    OZ = np.zeros((samples, 2))
    OZ[:, 0] = Z[si[:, 0]]
    OZ[:, 1] = Z[si[:, 1]]

    return O, OZ
Beispiel #13
0
def apply_nn_test(P, net, nCxt, outLayer, feat_dir, FeatList, outFeatDir, useDropout):
    "Sends the test features for feedforward, and applies the PCA calculated from training files"

    fdir = '';      
    inFeatList = open(feat_dir + FeatList).readlines()

    for fname in inFeatList:
        if fname == '\n':
            continue
        elif fname.rstrip()[-1] == ':':
            fdir = fname.rstrip()[:-1]+'/'
            print fdir
            continue
        elif fname.rstrip()[-3:]=='txt':
            utt = np.loadtxt(feat_dir + fdir + fname[:-1])
            # if not useDropout:
            outputs = gpu.as_numpy_array(net.fprop_xf(utt, outLayer))
            # else:
            # outputs = gpu.as_numpy_array(net.fpropDropout(utt, outLayer))
                
            assert(outputs.shape[1] == 40)
            outputs = np.dot(outputs, P)
            # if i/1*1 == i:
                # gpu.free_reuse_cache()

        outfile=htkmfc.HTKFeat_write(feat_dir + outFeatDir + 'test_feat/' + fdir[-9:] + fname[:-5], outputs.shape[1], htkmfc.USER)
        outfile.writeall(outputs)
        del outfile
        del outputs
        gpu.free_reuse_cache()
Beispiel #14
0
    def forward(self, X):
        self.X = X
        # Num of examples
        N = X.shape[0]
        # Timespan
        T = X.shape[1]
        # Windows size
        S = self.windowSize
        # Channels
        D = self.numChannels
        # Num filters
        F = self.numFilters
        Z = np.zeros((N, T - S + 1, S, D), X.dtype)
        for i in range(T - S + 1):
            Z[:, i, :, :] = X[:, i:i + S, :]
        Z = Z.reshape(N * (T - S + 1), S * D)
        if self.gpu:
            Z = gpu.as_garray(Z.astype('float32'))
            Y = gpu.dot(Z, self.W)
            Y = gpu.as_numpy_array(Y)
        else:
            Y = np.dot(Z, self.W)

        Y = Y.reshape(N, T - S + 1, F)
        self.Z = Z
        return Y
Beispiel #15
0
    def extract_patches(self, X, data_shape):
        """
        Extract patches from input data according to its shape and the kernel
        configurations.

        Return patches matrix of size (H*W*N)x(C*ksize*ksize)
        """
        X = gnp.as_numpy_array(X).reshape(-1, data_shape.c, data_shape.h,
                                          data_shape.w)

        out_shape = self.compute_output_shape(data_shape)
        padded_h = (out_shape.h - 1) * self.stride + self.ksize
        padded_w = (out_shape.w - 1) * self.stride + self.ksize

        if padded_h > data_shape.h or padded_w > data_shape.w:
            new_X = np.zeros((X.shape[0], X.shape[1], padded_h, padded_w),
                             dtype=X.dtype)
            new_X[:, :, :data_shape.h, :data_shape.w] = X
            X = new_X

        assert data_shape.c == self.n_ic

        patches = []
        for i in xrange(0, X.shape[-2] - self.ksize + 1, self.stride):
            for j in xrange(0, X.shape[-1] - self.ksize + 1, self.stride):
                patches.append(X[:, :, i:i + self.ksize, j:j + self.ksize])

        return np.concatenate(patches, axis=0).reshape(
            -1, self.ksize * self.ksize * self.n_ic)
Beispiel #16
0
 def recover_input(self, Y, out_shape, in_shape, **kwargs):
     """
     Return recovered input and input_shape
     """
     Y = gnp.as_numpy_array(Y).reshape(-1, out_shape.c, out_shape.h, out_shape.w).transpose((0,2,3,1)).reshape(-1, out_shape.c)
     P = self.recover_patches_from_responses(Y, **kwargs)
     return self.overlay_patches(P, out_shape, in_shape)
Beispiel #17
0
    def extract_patches(self, X, data_shape):
        """
        Extract patches from input data according to its shape and the kernel
        configurations.

        Return patches matrix of size (H*W*N)x(C*ksize*ksize)
        """
        X = gnp.as_numpy_array(X).reshape(-1, data_shape.c, data_shape.h, data_shape.w)

        out_shape = self.compute_output_shape(data_shape)
        padded_h = (out_shape.h - 1) * self.stride + self.ksize
        padded_w = (out_shape.w - 1) * self.stride + self.ksize

        if padded_h > data_shape.h or padded_w > data_shape.w:
            new_X = np.zeros((X.shape[0], X.shape[1], padded_h, padded_w), dtype=X.dtype)
            new_X[:,:,:data_shape.h, :data_shape.w] = X
            X = new_X
        
        assert data_shape.c == self.n_ic

        patches = []
        for i in xrange(0, X.shape[-2] - self.ksize + 1, self.stride):
            for j in xrange(0, X.shape[-1] - self.ksize + 1, self.stride):
                patches.append(X[:,:,i:i+self.ksize, j:j+self.ksize])

        return np.concatenate(patches, axis=0).reshape(-1, self.ksize*self.ksize*self.n_ic)
Beispiel #18
0
def get_random_patches(X, in_shape, ksize, n_patches_per_image, batch_size=100, pad_h=0, pad_w=0):
    """
    Extract random patches from images X.

    X: Nx(C*H*W) matrix, each row is an image
    in_shape: shape information for each input image
    ksize: size of the patches
    n_patches_per_image: number of patches per image
    batch_size: size of a batch.  In each batch the patch locations will be the
        same.

    Return (n_patches_per_image*N)x(C*ksize*ksize) matrix, each row is one
        patch.
    """
    X = gnp.as_numpy_array(X).reshape(-1, in_shape.c, in_shape.h, in_shape.w)
    if pad_h > 0 or pad_w > 0:
        new_X = np.zeros((X.shape[0], in_shape.c, in_shape.h + pad_h, in_shape.w + pad_w), dtype=X.dtype)
        new_X[:,:,:in_shape.h,:in_shape.w] = X
        X = new_X

    patches = []
    for n in xrange(n_patches_per_image):
        for im_idx in xrange(0, X.shape[0], batch_size):
            h_start = np.random.randint(X.shape[-2] - ksize + 1)
            w_start = np.random.randint(X.shape[-1] - ksize + 1)

            patches.append(X[im_idx:im_idx+batch_size,:,h_start:h_start+ksize,w_start:w_start+ksize])

    return np.concatenate(patches, axis=0).reshape(-1, in_shape.c*ksize*ksize)
Beispiel #19
0
    def backward(self, dEdY):
        N = dEdY.shape[0]
        S = self.windowSize
        T = dEdY.shape[1] + S - 1
        F = dEdY.shape[2]
        D = self.X.shape[2]
        dEdY = dEdY.reshape(N * (T - S + 1), F)
        dEdX = np.zeros(self.X.shape, self.X.dtype)
        
        if self.gpu:
            gdEdY = gpu.as_garray(dEdY.astype('float32'))
            self.dEdW = gpu.dot(self.Z.transpose(), gdEdY)
        else:
            self.dEdW = np.dot(self.Z.transpose(), dEdY)

        if self.outputdEdX:
            if self.gpu:
                gdEdZ = gpu.dot(gdEdY, self.W.transpose())
                dEdZ = gpu.as_numpy_array(gdEdZ)
            else:
                dEdZ = np.dot(dEdY, self.W.transpose())

            dEdZ = dEdZ.reshape(N, T - S + 1, S, D)
            for t in range(0, T):
                if t <= S - 1:
                    dEdX[:, t, :] = np.sum(dEdZ[:, range(0, t + 1), range(t, -1, -1), :], axis=1)
                elif t >= T - S + 1:
                    dEdX[:, t, :] = np.sum(dEdZ[:, range(t - S + 1, T - S + 1), range(S - 1, S - (T - t) - 1, -1), :], axis=1)
                else:
                    dEdX[:, t, :] = np.sum(dEdZ[:, range(t - S + 1, t + 1), range(S - 1, -1, -1), :], axis=1)
        return dEdX
Beispiel #20
0
    def forward(self, X):
        self.X = X
        # Num of examples
        N = X.shape[0]
        # Timespan
        T = X.shape[1]
        # Windows size
        S = self.windowSize
        # Channels
        D = self.numChannels
        # Num filters
        F = self.numFilters
        Z = np.zeros((N, T - S + 1, S, D), X.dtype)
        for i in range(T - S + 1):
            Z[:, i, :, :] = X[:, i : i + S, :]
        Z = Z.reshape(N * (T - S + 1), S * D)
        if self.gpu:
            Z = gpu.as_garray(Z.astype('float32'))
            Y = gpu.dot(Z, self.W)
            Y = gpu.as_numpy_array(Y)
        else:
            Y = np.dot(Z, self.W)

        Y = Y.reshape(N, T - S + 1, F)
        self.Z = Z
        return Y
Beispiel #21
0
def or_rest(z, x):
    z = gp.as_numpy_array(z)
    x = gp.as_numpy_array(x)

    y = np.zeros(z.shape)
    ym = np.ones(z.shape)
    ym[(z == 1) & (x == 1)] = 0
    y[(z == 1) & (x == 0)] = 1

    # If pixels that are needed to explain the picture are forced on
    # this results in pixels that cannot be turned off by the ml.rbm.
    ym[(z == 1) & (x == 0)] = 0

    # turn off whole force:
    # ym = ym * 0

    return gp.as_garray(y), gp.as_garray(ym)
Beispiel #22
0
 def backprop(self, dLdY, return_on_gpu=False):
     """Perform backprop through this layer.
     """
     # Backprop is just multiplication by the mask from feedforward
     dLdX = gp.garray(dLdY) * self.dYdX
     if not return_on_gpu:
         dLdX = gp.as_numpy_array(dLdX).astype(np.float32)
     return dLdX
Beispiel #23
0
 def constrainMaxNorm(self):
     if self.max_norm == -1:
       return
     for i in range(len(self.weights)):
         wf = gnp.as_numpy_array(self.weights[i]).flatten()
         if l2norm(wf) > self.max_norm:
            wf = (wf/l2norm(wf)) * self.max_norm
            self.weights[i] = gnp.garray(wf.reshape(self.weights[i].shape))
Beispiel #24
0
def check_performance(svc):
    X, TX, y, Ty = ml.rbm.util.load_mnist(False)
    X = gp.as_numpy_array(X)
    y = gp.as_numpy_array(y)
    TX = gp.as_numpy_array(TX)
    Ty = gp.as_numpy_array(Ty)

    print "Checking performance..."
    nt = 10000
    Py = svc.predict(X[0:nt])
    training_err = ml.common.util.classification_error(Py, y[0:nt])
    PTy = svc.predict(TX)
    test_err = ml.common.util.classification_error(PTy, Ty)

    print "Prediction error on first %d training samples: %g" % (nt, training_err)
    print "Prediction error on test set:                  %g" % test_err

    return svc
Beispiel #25
0
 def train(self, x):
     if self.prev:
         x = self.prev.process(x)
     x = gnp.as_garray(x)
     self.avg = x.mean(axis=0)
     cov = (x - self.avg).T.dot(x - self.avg) / x.shape[0]
     cov = gnp.as_numpy_array(cov)
     self.sqrcov = la.cholesky(cov + np.eye(cov.shape[0]) * 1e-5)
     self.m = gnp.as_garray(la.inv(self.sqrcov + np.eye(x.shape[1]) * 1e-5))
Beispiel #26
0
 def train(self, x):
     if self.prev:
         x = self.prev.process(x)
     x = gnp.as_garray(x)
     self.avg = x.mean(axis=0)
     cov = (x - self.avg).T.dot(x - self.avg) / x.shape[0]
     cov = gnp.as_numpy_array(cov)
     self.sqrcov = la.cholesky(cov + np.eye(cov.shape[0]) * 1e-5)
     self.m = gnp.as_garray(la.inv(self.sqrcov + np.eye(x.shape[1]) * 1e-5))
Beispiel #27
0
def generate_or_dataset_with_shift(S, SZ, ref_SZ, x_shift, y_shift, n_samples, sample_indices=None):
    S = gp.as_numpy_array(S)
    SZ = gp.as_numpy_array(SZ)

    if sample_indices is not None:
        si = sample_indices
    else:
        si = generate_sample_indices_for_or_dataset(S, n_samples)

    X = S[si[:, 0]]
    XZ = SZ[si[:, 0]]
    ref_XZ = ref_SZ[si[:, 0]]
    Y = S[si[:, 1]]
    YZ = SZ[si[:, 1]]
    ref_YZ = ref_SZ[si[:, 1]]

    O = or_sample_with_shift(X, Y, x_shift, y_shift)

    return X, XZ, ref_XZ, Y, YZ, ref_YZ, O
Beispiel #28
0
 def recover_input(self, Y, out_shape, in_shape, **kwargs):
     """
     Return recovered input and input_shape
     """
     Y = gnp.as_numpy_array(Y).reshape(-1, out_shape.c, out_shape.h,
                                       out_shape.w).transpose(
                                           (0, 2, 3,
                                            1)).reshape(-1, out_shape.c)
     P = self.recover_patches_from_responses(Y, **kwargs)
     return self.overlay_patches(P, out_shape, in_shape)
Beispiel #29
0
def or_rest(z, x):
    z = gp.as_numpy_array(z)
    x = gp.as_numpy_array(x)
    
    y = np.zeros(z.shape)
    ym = np.ones(z.shape)
    ym[(z == 1) & (x == 1)] = 0
    y[(z == 1) & (x == 0)] = 1
    
    # "no on force":
    # If pixels that are needed to explain the picture are forced on
    # this results in pixels that cannot be turned off by the ml.rbm.
    ym[(z == 1) & (x == 0)] = 0

    # "no force":
    #ym = ym * 0

    # best is to have "no force" off and "no on force" on
    
    return gp.as_garray(y), gp.as_garray(ym)   
Beispiel #30
0
def train():
    X, TX, y, Ty = ml.rbm.util.load_mnist(False)
    X = gp.as_numpy_array(X)
    y = gp.as_numpy_array(y)
    TX = gp.as_numpy_array(TX)
    Ty = gp.as_numpy_array(Ty)

    #X = X[0:3000, ...]
    #y = y[0:3000, ...]

    print "Fitting SVM..."
    svc = svm.SVC(kernel='rbf', verbose=True)
    svc.fit(X, y)

    filename = "mnist_svm.dat"
    print "Writing model to %s" % filename
    with gzip.open(filename, 'wb') as file:
        pickle.dump(svc, file, pickle.HIGHEST_PROTOCOL)

    return svc
Beispiel #31
0
def array(x, dtype=None, **kwargs):
    if gnp.is_garray(x):
        if dtype is gpu_float32:
            return x
        else:
            return np.array(gnp.as_numpy_array(x), dtype=dtype, **kwargs)
    else:
        if dtype is gpu_float32:
            return gnp.as_garray(np.array(x, **kwargs))
        else:
            return np.array(x, dtype=dtype, **kwargs)
Beispiel #32
0
def or_performance(myrbm, svc, OX, OZ, iters, gibbs_steps, beta):
    OZ = gp.as_numpy_array(OZ)
    batch_size = 1000
    errs = 0

    for i in range(int(math.ceil(OX.shape[0] / float(batch_size)))):
        ox = OX[i*batch_size : (i+1)*batch_size, :]
        oz = OZ[i*batch_size : (i+1)*batch_size, :]

        x1, x2 = ml.rbm.orrbm.or_infer(myrbm, ox, iters, gibbs_steps, beta=beta)
        y1 = svc.predict(gp.as_numpy_array(x1))
        y2 = svc.predict(gp.as_numpy_array(x2))

        z1 = oz[:, 0]
        z2 = oz[:, 1]

        diff = (z1 - y1)**2 + (z2 - y2)**2
        errs += np.count_nonzero(diff)

    err_prob = errs / float(OX.shape[0])
    return err_prob
Beispiel #33
0
def train_model():
    m = build_model()

    stop = climin.stops.any_([
        climin.stops.after_n_iterations(max_iter),
        ])
    pause = climin.stops.modulo_n_iterations(n_report)

    weight_decay = ((m.parameters.hidden_to_out ** 2).sum())
    #                + (m.parameters.hidden_to_hidden_0**2).sum()
    #                + (m.parameters.hidden_to_out**2).sum())
    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay

    f_wd = m.function(['inpt'], c_wd * weight_decay)
    n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean()
    f_n_wrong = m.function(['inpt', 'target'], n_wrong)
                
    losses = []
    v_losses = []
    print 'max iter', max_iter

    start = time.time()
    # Set up a nice printout.
    keys = '#', 'loss', 'val loss', 'seconds', 'wd', 'train emp', 'test emp'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    print header
    print '-' * len(header)

    f_loss = m.function(['inpt', 'target'], ['true_loss', 'loss'])

    for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):
        if info['n_iter'] % n_report != 0:
            continue
        passed = time.time() - start
        losses.append(info['loss'])
        v_losses.append(info['val_loss'])
    
        #img = tile_raster_images(fe.parameters['in_to_hidden'].T, image_dims, feature_dims, (1, 1))
        #save_and_display(img, 'filters-%i.png' % i)  
        info.update({
            'time': passed,
            'l2-loss': scalar(f_wd(X)),
            'train_emp': scalar(f_n_wrong(X, Z)),
            'test_emp': scalar(f_n_wrong(TX, TZ)),
        })
        row = '%(n_iter)i\t%(loss)g\t%(val_loss)g\t%(time)g\t%(l2-loss)g\t%(train_emp)g\t%(test_emp)g' % info
        print row


    np.savez_compressed(savepath, parameters=gp.as_numpy_array(m.parameters.data[...]))
Beispiel #34
0
def mixing_quality(samples):
    samples = gp.as_numpy_array(samples)
    n_steps = samples.shape[0]

    avg_dists = []
    for step in range(n_steps-1):
        v_now = samples[step, :, :]
        v_next = samples[step+1, :, :]

        dists = np.sqrt(np.sum(np.power(v_next - v_now, 2), axis=1))
        avg_dists.append(np.mean(dists))

    return np.mean(avg_dists)
Beispiel #35
0
    def gradDebug(self, inputBatch, targetBatch):
        inputBatch = inputBatch if isinstance(inputBatch, gnp.garray) else gnp.garray(inputBatch)
        targetBatch = targetBatch if isinstance(targetBatch, gnp.garray) else gnp.garray(targetBatch)
        

        mbsz = inputBatch.shape[0]
        outputActs = self.fprop(inputBatch)
        outputErrSignal = -self.outputActFunct.dErrordNetInput(targetBatch, self.state[-1], outputActs)
        errSignals = self.bprop(outputErrSignal)
        for i, (WGrad, biasGrad) in enumerate(self.gradients(self.state, errSignals)):
            self.WGrads[i] = WGrad
            self.biasGrads[i] = biasGrad
        allWeightGrads = itertools.chain(self.WGrads, self.biasGrads)
        return gnp.as_numpy_array(gnp.concatenate([dw.ravel() for dw in allWeightGrads])) 
Beispiel #36
0
Datei: dbn.py Projekt: caomw/gdbn
    def gradDebug(self, inputBatch, targetBatch):
        inputBatch = inputBatch if isinstance(inputBatch, gnp.garray) else gnp.garray(inputBatch)
        targetBatch = targetBatch if isinstance(targetBatch, gnp.garray) else gnp.garray(targetBatch)

        mbsz = inputBatch.shape[0]
        outputActs = self.fprop(inputBatch)
        outputErrSignal = -self.outputActFunct.dErrordNetInput(targetBatch, self.state[-1], outputActs)
        # error = self.outputActFunct.error(targetBatch, self.state[-1], outputActs)
        errSignals = self.bprop(outputErrSignal)
        for i, (WGrad, biasGrad) in enumerate(self.gradients(self.state, errSignals)):
            # update the weight increments
            self.WGrads[i] = WGrad
            self.biasGrads[i] = biasGrad
        allWeightGrads = itertools.chain(self.WGrads, self.biasGrads)
        return gnp.as_numpy_array(gnp.concatenate([dw.ravel() for dw in allWeightGrads]))
Beispiel #37
0
 def backward(self, dEdY):
     dEdZ = self.activeFn.backward(dEdY, self.Y, 0)
     if self.gpu:
         gdEdZ = gpu.as_garray(dEdZ.astype('float32'))
         self.dEdW = gpu.dot(self.X.transpose(), gdEdZ)
         if self.bias:
             dEdX = gpu.dot(gdEdZ, self.W[:-1, :].transpose())
         else:
             dEdX = gpu.dot(gdEdZ, self.W.transpose())
         dEdX = gpu.as_numpy_array(dEdX)
     else:
         self.dEdW = np.dot(self.X.transpose(), dEdZ)
         if self.bias:
             dEdX = np.dot(dEdZ, self.W[:-1, :].transpose())
         else:
             dEdX = np.dot(dEdZ, self.W.transpose())
     return dEdX if self.outputdEdX else None
Beispiel #38
0
 def write(self, dat):
     """
     add dat to buffer
     """
     dat=gp.as_numpy_array(dat)
     end=self.index+dat.shape[0]
     if end<=self.maxrows:
         if self.data==None:
             self.data=np.empty((self.maxrows, dat.shape[1]))
         self.data[self.index:end]=dat
         self.index=end
     elif self.index==self.maxrows:
         self.flush()
         self.index=0
         self.write(dat)
     else:
         raise Exception("disk write buffer is not algined with batchsize")
Beispiel #39
0
 def write(self, dat):
     """
     add dat to buffer
     """
     dat = gp.as_numpy_array(dat)
     end = self.index + dat.shape[0]
     if end <= self.maxrows:
         if self.data == None:
             self.data = np.empty((self.maxrows, dat.shape[1]))
         self.data[self.index:end] = dat
         self.index = end
     elif self.index == self.maxrows:
         self.flush()
         self.index = 0
         self.write(dat)
     else:
         raise Exception("disk write buffer is not algined with batchsize")
Beispiel #40
0
    def backward(self, dEdY):
        N = dEdY.shape[0]
        S = self.windowSize
        T = dEdY.shape[1] + S - 1
        F = dEdY.shape[2]
        D = self.X.shape[2]
        dEdY = dEdY.reshape(N * (T - S + 1), F)
        dEdX = np.zeros(self.X.shape, self.X.dtype)

        if self.gpu:
            gdEdY = gpu.as_garray(dEdY.astype('float32'))
            self.dEdW = gpu.dot(self.Z.transpose(), gdEdY)
        else:
            self.dEdW = np.dot(self.Z.transpose(), dEdY)

        if self.outputdEdX:
            if self.gpu:
                gdEdZ = gpu.dot(gdEdY, self.W.transpose())
                dEdZ = gpu.as_numpy_array(gdEdZ)
            else:
                dEdZ = np.dot(dEdY, self.W.transpose())

            dEdZ = dEdZ.reshape(N, T - S + 1, S, D)
            for t in range(0, T):
                if t <= S - 1:
                    dEdX[:, t, :] = np.sum(dEdZ[:,
                                                range(0, t + 1),
                                                range(t, -1, -1), :],
                                           axis=1)
                elif t >= T - S + 1:
                    dEdX[:, t, :] = np.sum(dEdZ[:,
                                                range(t - S + 1, T - S + 1),
                                                range(S - 1, S - (T - t) -
                                                      1, -1), :],
                                           axis=1)
                else:
                    dEdX[:, t, :] = np.sum(dEdZ[:,
                                                range(t - S + 1, t + 1),
                                                range(S - 1, -1, -1), :],
                                           axis=1)
        return dEdX
Beispiel #41
0
def apply_nn_train_prePCA(net, nCxt, outLayer, feat_dir, FeatList, outFeatDir, Nframes, useDropout):
    """Sends the training features for feedforward and collects the output in a matrix X for performing PCA"""

    fdir = '';
    dim = net.weights[-2].shape[1]
    X = np.zeros((Nframes,dim))

    inFeatList = open(feat_dir + FeatList).readlines()
    
    fro = 0;
    to = 0;

    for fname in inFeatList:
        if fname.rstrip()[-1] == ':':
            fdir = fname.rstrip()[:-1]+'/'
            continue
        elif fname.rstrip()[-3:]=='txt':
            utt = np.loadtxt(feat_dir + fdir + fname.rstrip())
            # if not useDropout:
            outputs = gpu.as_numpy_array(net.fprop_xf(utt, outLayer))
            # else:
            #     outputs = gpu.as_numpy_array(net.fpropDropout(utt, outLayer))
            assert(outputs.shape[1] == 40)
            fro = to
            to = fro + outputs.shape[0]
            # if X == None:
            # 	X = outputs
            # else:
            X[fro:to] = outputs
            # X = np.concatenate((X,outputs))
            # if i/1*1 == i:
            #   gpu.free_reuse_cache()
            # np.savetxt(feat_dir + outFeatDir + 'train_16k_prePCA/' + fname, gpu.as_numpy_array(outputs))
            np.save(feat_dir + outFeatDir + 'train_prePCA/' + fname[:-5], outputs)
        del outputs
        gpu.free_reuse_cache()

    #End of for
    return X
Beispiel #42
0
def get_random_patches(X,
                       in_shape,
                       ksize,
                       n_patches_per_image,
                       batch_size=100,
                       pad_h=0,
                       pad_w=0):
    """
    Extract random patches from images X.

    X: Nx(C*H*W) matrix, each row is an image
    in_shape: shape information for each input image
    ksize: size of the patches
    n_patches_per_image: number of patches per image
    batch_size: size of a batch.  In each batch the patch locations will be the
        same.

    Return (n_patches_per_image*N)x(C*ksize*ksize) matrix, each row is one
        patch.
    """
    X = gnp.as_numpy_array(X).reshape(-1, in_shape.c, in_shape.h, in_shape.w)
    if pad_h > 0 or pad_w > 0:
        new_X = np.zeros(
            (X.shape[0], in_shape.c, in_shape.h + pad_h, in_shape.w + pad_w),
            dtype=X.dtype)
        new_X[:, :, :in_shape.h, :in_shape.w] = X
        X = new_X

    patches = []
    for n in xrange(n_patches_per_image):
        for im_idx in xrange(0, X.shape[0], batch_size):
            h_start = np.random.randint(X.shape[-2] - ksize + 1)
            w_start = np.random.randint(X.shape[-1] - ksize + 1)

            patches.append(X[im_idx:im_idx + batch_size, :,
                             h_start:h_start + ksize, w_start:w_start + ksize])

    return np.concatenate(patches, axis=0).reshape(-1,
                                                   in_shape.c * ksize * ksize)
Beispiel #43
0
    def costAndGrad(self, data, labels):

        # forward prop
        self.hActs[0] = data
        i = 1
        for w, b in self.stack:
            self.hActs[i] = w.dot(self.hActs[i - 1]) + b
            if i <= len(self.layerSizes):
                self.hActs[i] = self.activation(self.hActs[i])
            i += 1

        probs = self.hActs[-1] - gp.max(self.hActs[-1], axis=0)
        probs = gp.exp(probs)
        probs = probs / gp.sum(probs, axis=0)

        labelMat = np.zeros(probs.shape)
        labelMat[labels, range(self.mbSize)] = 1
        labelMat = gp.garray(labelMat)
        cost = -(1. / self.mbSize) * np.nansum(
            gp.as_numpy_array(labelMat * gp.log(probs)))

        if not self.train:
            return cost, None

        # back prop
        self.deltas[-1] = probs - labelMat
        i = len(self.layerSizes) - 1
        for w, b in reversed(self.stack[1:]):
            grad = self.activation(self.hActs[i + 1], True)
            self.deltas[i] = w.T.dot(self.deltas[i + 1]) * grad
            i -= 1

        # compute gradients
        for i in range(len(self.grad)):
            self.grad[i][0] = (1. / self.mbSize) * self.deltas[i].dot(
                self.hActs[i].T)
            self.grad[i][1] = (1. / self.mbSize) * gp.sum(
                self.deltas[i], axis=1).reshape(-1, 1)
        return cost, self.grad
Beispiel #44
0
 def getWeights(self):
     if self.gpu:
         return gpu.as_numpy_array(self.W)
     else:
         return self.W
Beispiel #45
0
    def costAndGrad(self, data, labels, key=None):
        """
        Forward prop entire utterance
        Call CTC cost function
        Compute gradient

        data is a 2-D matrix where each column is a single time frame
        Number of input frames changes across iterations
        
        labels is a vector of symbol ids, length unknown and does not
        depend on the number of time frames
        """

        ## forward prop
        T = data.shape[1]
        sizes = [self.inputDim] + self.layerSizes + [self.outputDim]
        stackMax = len(self.stack) - 1
        if self.temporalLayer > 0:
            stackMax -= 1

        self.hActs = [gp.empty((s, T)) for s in sizes]
        self.hActs[0] = data
        #for t in range(T):
        i = 1
        for l in range(stackMax + 1):
            w, b = self.stack[l]

            self.hActs[i] = w.dot(self.hActs[i - 1]) + b
            # loop over time for recurrent layer
            if (self.temporalLayer - 1) == l:
                for t in range(T):
                    if t > 0:
                        self.hActs[i][:, t] += self.stack[-1][0].dot(
                            self.hActs[i][:, t - 1])
                    # nonlinearity
                    if i <= stackMax:
                        self.hActs[i][:, t] = self.activation(self.hActs[i][:,
                                                                            t])
            # hidden layer activation function for batch forward prop
            elif i <= stackMax:
                self.hActs[i] = self.activation(self.hActs[i])

            #    w_t,b_t = self.stack[-1][0]
            #    self.hActs[i][:,t] += self.stack[-1][0].dot(self.hActs[i][:,t-1])
            i += 1

        # convert final layer to probs after all time iteration complete
        probs = self.hActs[-1] - gp.max(self.hActs[-1], axis=0)
        probs = gp.as_numpy_array(probs)
        probs = np.exp(probs)
        probs = probs / np.sum(probs, axis=0)

        ## pass probs and label string to ctc loss
        # TODO how much does passing to different function cost us?
        cost, delta_output, skip = ctc.ctc_loss(probs,
                                                labels.squeeze(),
                                                blank=0)

        # Store probabilities and error signal for a given key
        if key is not None and key in self.hist:
            self.hist[key].append((probs, delta_output))

        if not self.train:
            return cost, None

        delta_output = gp.garray(delta_output)
        ## back prop through time
        # zero gradients
        self.grad = [[gp.zeros(w.shape), gp.zeros(b.shape)]
                     for w, b in self.stack]
        if self.temporalLayer > 0:
            delta_t = np.zeros(self.layerSizes[self.temporalLayer - 1])
        for t in reversed(range(T)):
            # get delta from loss function
            delta = delta_output[:, t].T

            # compute gradient for output layer
            #print self.hActs[-2].shape, delta.shape, self.stack[stackMax][0].shape
            #print delta.reshape(-1,1).shape, self.hActs[-2][:,t].reshape(-1,1).shape
            # TODO can we get rid of some of these annoying reshape -1 1?
            self.grad[stackMax][0] += delta.reshape(-1, 1).dot(
                self.hActs[-2][:, t].reshape(-1, 1).T)
            self.grad[stackMax][1] += delta.reshape(-1, 1)

            # push delta through output layer
            delta = self.stack[stackMax][0].T.dot(delta)

            # iterate over lower layers
            i = len(self.layerSizes) - 1
            while i >= 0:
                # add the temporal delta if this is the recurrent layer
                if (self.temporalLayer - 1) == i:
                    #print delta.shape, delta_t.shape
                    delta += delta_t
                # push delta through activation function for this layer
                #print i, stackMax, delta.shape, self.hActs[i+1][:,t].shape
                delta = delta * self.activation(self.hActs[i + 1][:, t], True)
                #embed()
                # compute the gradient
                #print i, delta.shape, self.hActs[i][:,t].T.reshape(1,-1).shape, self.grad[i][0].shape
                self.grad[i][0] += delta.reshape(-1, 1).dot(
                    self.hActs[i][:, t].T.reshape(1, -1))
                self.grad[i][1] += delta.reshape(-1, 1)

                # add the temporal delta if this is the recurrent layer
                if (self.temporalLayer - 1) == i and t > 0:
                    self.grad[-1][0] += delta.reshape(-1, 1).dot(
                        self.hActs[i + 1][:, t - 1].T.reshape(1, -1))
                    # push delta through temporal connections
                    delta_t = self.stack[-1][0].T.dot(delta)

                    # HACK no bias for temporal layer. Give it a gradient of 0
                    self.grad[-1][1] = np.zeros((2, 1))

                # push the delta downward
                w, b = self.stack[i]
                delta = w.T.dot(delta)
                i -= 1
        #print self.grad
        return cost, self.grad, skip
Beispiel #46
0
 def decode(self, z, in_shape, **kwargs):
     r = gnp.as_numpy_array(self.ae.decoder.forward_prop(z))
     out_shape = self.convnet.compute_output_shape(in_shape)
     assert out_shape.size() == r.shape[1]
     return self.convnet.recover_input(r, in_shape, **kwargs)
Beispiel #47
0
def gather(x):
    """Copys array from GPU if running on GPU"""
    if GPU:
        return gnumpy.as_numpy_array(x)
    else:
        return x
Beispiel #48
0
    def costAndGrad(self,data,labels=None,key=None):
        """
        Forward prop entire utterance
        Call CTC cost function
        Compute gradient

        data is a 2-D matrix where each column is a single time frame
        Number of input frames changes across iterations
        
        labels is a vector of symbol ids, length unknown and does not
        depend on the number of time frames
        """

        ## forward prop
        # this is the same as minibatch forward prop 
        # since we pre-compute context window features for each time
        self.hActs[0] = data
        i = 1
        for w,b in self.stack:
            self.hActs[i] = w.dot(self.hActs[i-1])+b
            if i <= len(self.layerSizes):
                self.hActs[i] = self.activation(self.hActs[i])
            i += 1

        probs = self.hActs[-1]-gp.max(self.hActs[-1],axis=0)
	probs = gp.as_numpy_array(probs)
        probs = np.exp(probs)
        probs = probs/np.sum(probs,axis=0)
#	probs[probs<1e-12] = 1e-12 # TODO have to clamp?

        ## pass probs and label string to ctc loss
        # TODO how much does passing to different function cost us? 
	if not self.train:
	    return ctc.decode_best_path(probs, ref=labels, blank=0)
	    #return ctc.decode_bp_bigrams(probs, blank=0, B=None)

        cost, self.deltas[-1], skip = ctc.ctc_loss(probs, labels, blank=0)

	# Bad utterance ?
	if skip:
	    return cost,self.grad,skip

	# Store probabilities and error signal for a given key
	#if key is not None and key in self.hist:
	#    self.hist[key].append((probs,self.deltas[-1]))

	self.deltas[-1] = gp.garray(self.deltas[-1])

        # back prop
        i = len(self.layerSizes)-1
        for w,b in reversed(self.stack[1:]):
            grad = self.activation(self.hActs[i+1], True)
            self.deltas[i] = w.T.dot(self.deltas[i+1])*grad
            i -= 1

        # compute gradients
        # NOTE we do not divide by utterance length. 
        #    Will need to scale up weight norm penalty accordingly
        for i in range(len(self.grad)):
            self.grad[i][0] = self.deltas[i].dot(self.hActs[i].T)
            self.grad[i][1] = gp.sum(self.deltas[i],axis=1).reshape(-1,1)

        return cost,self.grad,skip