コード例 #1
0
ファイル: deepnet.py プロジェクト: Ambier/translational-DBN
 def __init__(self, n_visible, n_hidden=None, vistype='sigmoid', 
         hidtype='sigmoid', W=None, hbias=None, vbias=None, batch_size=128):
     # initialize parameters
     self.SIZE_LIMIT = 80000000 # the size of the largest gpu array
     self.vistype = vistype
     self.hidtype = hidtype
     self.batch_size = batch_size
     self.n_visible = n_visible
     if n_hidden is None:
         n_hidden = self.n_visible
     self.n_hidden = n_hidden
     n = self.n_visible*self.n_hidden + self.n_hidden
     bound = 2.38 / np.sqrt(n)
     if W is None:
         W = np.zeros((self.n_visible, self.n_hidden))
         for i in range(self.n_visible):
             for j in range(self.n_hidden):
                 W[i,j] = np.random.uniform(-bound, bound)
     W = gp.garray(W)
     self.W = W
     if vbias is None:
         vbias = gp.zeros(self.n_visible)
     else:
         vbias = gp.garray(vbias)
     self.vbias = vbias
     if hbias is None:
         hbias = np.zeros((self.n_hidden,))
         for i in range(self.n_hidden):
             hbias[i] = np.random.uniform(-bound, bound)
     hbias = gp.garray(hbias)
     self.hbias = hbias
     #initialize updates
     self.wu_vh = gp.zeros((self.n_visible, self.n_hidden))
     self.wu_v = gp.zeros(self.n_visible)
     self.wu_h = gp.zeros(self.n_hidden)
コード例 #2
0
 def computeNumericGradient(self, input, factor=1.0, eps=1e-4, sampleNum=500):
     """
     compute gradients throught numeric way for gradient check
     gradient of J w.r.t. x computed by (J(x+eps)-J(x-eps))/2eps
     only check param at sampleNum positions
     J=0.5*(a[0]-a[-1])**2+WeightCost
     """
     param=self.combineParam()
     plen=param.size
     if factor==0:
         plen=plen/2
     sample=np.random.randint(0,plen,sampleNum)
     grad=gp.zeros(sampleNum)
     for (i,idx) in enumerate(sample):
         if i%100==0:
             sys.stdout.write('.')
             sys.stdout.flush()
         q=gp.zeros(param.shape)
         q[idx]=eps
         p1=param+q
         p2=param-q
         c1,_=self.getCost(p1, input,factor)
         c2,_=self.getCost(p2, input,factor)
         grad[i]=(c1-c2)/(2.0*eps)
     print "end"
     return grad, sample
コード例 #3
0
ファイル: ae.py プロジェクト: yysherlock/msae
    def __init__(self, config, name):
        super(AE, self).__init__(config, name) 

        #dimension of hidden layer
        self.hDim = int(self.readField(config, name, "hidden_dimension"))

        #dimension of visible layer
        self.vDim = int(self.readField(config, name, "visible_dimension"))

        #baise for hidden layer
        if self.hDim>0:
            self.b1 = gp.zeros(self.hDim)

        #biase for visible layer
        if self.vDim>0:
            self.b2 = gp.zeros(self.vDim)

        #init weight: uniform between +-sqrt(6)/sqrt(v+h+1)
        if self.hDim*self.vDim>0:
            gp.seed_rand()
            r=gp.sqrt(6)/gp.sqrt(self.hDim+self.vDim+1)
            self.W1 = gp.randn(self.vDim, self.hDim) * 2 * r - r
            self.W2 = gp.randn(self.hDim, self.vDim) * 2 * r - r
            self.initUpdate()
            self.initHyperParam(config, name)
コード例 #4
0
ファイル: GPULayers.py プロジェクト: Philip-Bachman/NN-Python
 def init_params(self, w_scale=0.01, b_scale=0.0):
     """Randomly initialize the weights in this layer."""
     self.params['W'] = w_scale * gp.randn((self.dim_input, self.dim_output))
     self.grads['W'] = gp.zeros((self.dim_input, self.dim_output))
     self.params['b'] = gp.zeros((1, self.dim_output))
     self.grads['b'] = gp.zeros((1, self.dim_output))
     return
コード例 #5
0
ファイル: test_fang.py プロジェクト: xy1234552/fang
def check_against_exact():
    with misc.gnumpy_conversion_check('allow'):
        rbm = test_tractable.random_rbm(NVIS, NHID)
        G, s = tractable.exact_fisher_information(rbm,
                                                  return_mean=True,
                                                  batch_units=2)
        rw = fisher.RegressionWeights.from_maximum_likelihood(G, NVIS, NHID)

        G, s = gnp.garray(G), gnp.garray(s)
        S = G + np.outer(s, s)
        m_unary = s[:NVIS + NHID]
        S_unary = S[:NVIS + NHID, :NVIS + NHID]

        m_pair = gnp.zeros((NVIS, NHID, 3))
        S_pair = gnp.zeros((NVIS, NHID, 3, 3))
        for i in range(NVIS):
            for j in range(NHID):
                vis_idx = i
                hid_idx = NVIS + j
                vishid_idx = NVIS + NHID + NHID * i + j
                idxs = np.array([vis_idx, hid_idx, vishid_idx])

                m_pair[i, j, :] = s[idxs]
                S_pair[i, j, :] = S[idxs[:, nax], idxs[nax, :]]

        stats = fang.Statistics(m_unary, S_unary, m_pair, S_pair)
        beta, sigma_sq = stats.compute_regression_weights()
        assert np.allclose(beta, rw.beta)
        assert np.allclose(sigma_sq, rw.sigma_sq)

        Sigma = stats.unary_covariance()
        assert np.max(np.abs(Sigma - G[:NVIS + NHID, :NVIS + NHID])) < 1e-6
コード例 #6
0
def generation_on_a_line(net,
                         n_points=100,
                         imsz=[28, 28],
                         nrows=10,
                         h_seeds=None):
    if h_seeds is None:
        h = net.sample_hiddens(2)
        z = gnp.zeros((n_points, h.shape[1]))
        diff = h[1] - h[0]
        step = diff / (n_points - 1)
        for i in range(n_points):
            z[i] = h[0] + step * i
    else:
        n_seeds = h_seeds.shape[0]
        z = gnp.zeros((n_points * n_seeds, h_seeds.shape[1]))
        for i in range(n_seeds):
            h0 = h_seeds[i]
            h1 = h_seeds[(i + 1) % n_seeds]
            diff = h1 - h0
            step = diff / (n_points - 1)
            for j in range(n_points):
                z[i * n_points + j] = h0 + step * j

    x = net.generate_samples(z=z)
    vt.bwpatchview(x.asarray(), imsz, nrows, rowmajor=True, gridintensity=1)
コード例 #7
0
ファイル: dbn.py プロジェクト: evolu8/gdbn
 def __init__(self, initialWeights, initialBiases, initialGenBiases, outputActFunct, realValuedVis = False, useReLU = False, max_norm=-1, noises = [], dropout_adv = 0.0):
     self.realValuedVis = realValuedVis
     self.learnRates = [0.05 for i in range(len(initialWeights))]
     self.momentum = 0.9
     self.L2Costs = [0.0001 for i in range(len(initialWeights))]
     self.dropouts = [0 for i in range(len(initialWeights))]
     self.nesterov = False
     self.nestCompare = False
     self.rmsLims = [None for i in range(len(initialWeights))]
     
     if self.realValuedVis:
         self.learnRates[0] = 0.005
     
     self.weights = initialWeights
     self.biases = initialBiases
     self.genBiases = initialGenBiases
     
     if useReLU:
         self.RBMHidUnitType = RBMReLU()
         self.hidActFuncts = [ReLU() for i in range(len(self.weights) - 1)]
     else:
         self.RBMHidUnitType = RBMBinary()
         self.hidActFuncts = [Sigmoid() for i in range(len(self.weights) - 1)]
     self.outputActFunct = outputActFunct
     
     #state variables modified in bprop
     self.WGrads = [gnp.zeros(self.weights[i].shape) for i in range(len(self.weights))]
     self.biasGrads = [gnp.zeros(self.biases[i].shape) for i in range(len(self.biases))]
     self.max_norm = max_norm
     self.noises = noises
     self.dropout_adv = dropout_adv
コード例 #8
0
ファイル: ae.py プロジェクト: romaad/msae
    def __init__(self, config, name):
        super(AE, self).__init__(config, name)

        #dimension of hidden layer
        self.hDim = int(self.readField(config, name, "hidden_dimension"))

        #dimension of visible layer
        self.vDim = int(self.readField(config, name, "visible_dimension"))

        #baise for hidden layer
        if self.hDim > 0:
            self.b1 = gp.zeros(self.hDim)

        #biase for visible layer
        if self.vDim > 0:
            self.b2 = gp.zeros(self.vDim)

        #init weight: uniform between +-sqrt(6)/sqrt(v+h+1)
        if self.hDim * self.vDim > 0:
            gp.seed_rand()
            r = gp.sqrt(6) / gp.sqrt(self.hDim + self.vDim + 1)
            self.W1 = gp.randn(self.vDim, self.hDim) * 2 * r - r
            self.W2 = gp.randn(self.hDim, self.vDim) * 2 * r - r
            self.initUpdate()
            self.initHyperParam(config, name)
コード例 #9
0
ファイル: msae.py プロジェクト: yysherlock/msae
 def singlePathNumericGrad(self, saes, inputs, factor=1,sampleNum=500,eps=1e-4): 
     """
     get gradient for single path by numeric computing
     aes: (my_aes, other_aes), autoencoders for this path and the other path
     inputs:(my_input, other_input), input data for this path and the other_path
     Since the param of the other path is fixed, no need to compute its cost
     """
     mysae, osae=saes
     myinput, oinput=inputs
     myparam=mysae.combineParam(down=False)
     #aes[0] is None
     oas=osae.forward(oinput)
     plen=myparam.size
     sample=np.random.randint(0,plen,sampleNum)
     grad=gp.zeros(sampleNum)
     for (i,idx) in enumerate(sample):
         if i%100==0:
             sys.stdout.write('.')
             sys.stdout.flush()
         q=gp.zeros(myparam.shape)
         q[idx]=eps
         p1=myparam+q
         p2=myparam-q
         c1,a=mysae.getCost(p1,myinput,factor)
         c1+=self.getDiffLoss(a[self.depth-1],oas[self.depth-1])
         c2,a=mysae.getCost(p2,myinput,factor)
         c2+=self.getDiffLoss(a[self.depth-1],oas[self.depth-1])
         grad[i]=(c1-c2)/(2.0*eps)
     return grad, sample
コード例 #10
0
ファイル: dbn.py プロジェクト: evoup/cause-effect
 def __init__(self, initialWeights, initialBiases, initialGenBiases, outputActFunct, realValuedVis = False, useReLU = False):
     self.realValuedVis = realValuedVis
     self.learnRates = [0.05 for i in range(len(initialWeights))]
     self.momentum = 0.9
     self.L2Costs = [0.0001 for i in range(len(initialWeights))]
     self.dropouts = [0.0 for i in range(len(initialWeights))]
     self.nesterov = False
     self.nestCompare = False
     self.rmsLims = [None for i in range(len(initialWeights))]
     
     if self.realValuedVis:
         self.learnRates[0] = 0.005 # TODO - This should not be set here - should be an optional variable
     
     self.weights = initialWeights
     self.biases = initialBiases
     self.genBiases = initialGenBiases #### FIXME - generative biases - does this mean input biases or something like that?
     
     if useReLU:
         self.RBMHidUnitType = RBMReLU()
         self.hidActFuncts = [ReLU() for i in range(len(self.weights) - 1)]
     else:
         self.RBMHidUnitType = RBMBinary()
         self.hidActFuncts = [Sigmoid() for i in range(len(self.weights) - 1)]
     self.outputActFunct = outputActFunct
     
     # State variables modified in bprop
     self.WGrads = [gnp.zeros(self.weights[i].shape) for i in range(len(self.weights))]
     self.biasGrads = [gnp.zeros(self.biases[i].shape) for i in range(len(self.biases))]
コード例 #11
0
ファイル: tractable.py プロジェクト: xy1234552/fang
def get_scores(rbm, batch_units=10, show_progress=False):
    nhid = rbm.nhid
    assert nhid <= 30
    prefix_len = nhid - batch_units
    batch_size = 2 ** batch_units
    prefixes = combinations_array(prefix_len)
    num_batches = prefixes.shape[0]

    hid = gnp.zeros((batch_size, nhid))
    hid[:, prefix_len:] = combinations_array(batch_units)
    scores = gnp.zeros((num_batches, batch_size))

    if show_progress:
        pbar = misc.pbar(num_batches)

    for i, prefix in enumerate(prefixes):
        hid[:, :prefix_len] = prefix
        scores[i, :] = rbm.free_energy_hid(hid)

        if show_progress:
            pbar.update(i)

    if show_progress:
        pbar.finish()

    return scores
コード例 #12
0
ファイル: ada_dnn.py プロジェクト: ryaninhust/Apep
    def __init__(self, layer_sizes, scale=0.05, verbose=1, l2=0.0001,
                 momentum=0.9, epochs=20, batch_size=256,dropouts=0.0,
                 learning_rate=0.01, learning_rate_decays=0.9):

        self.layer_sizes = layer_sizes
        self.scale = scale
        self.verbose = 1
        self.l2 = l2
        self.momentum = momentum

        self.epochs = epochs
        self.batch_size = batch_size
        self.dropouts = [dropouts for l in range(len(layer_sizes)-1)]

        self.learning_rate = learning_rate
        self.learning_rate_decays = learning_rate_decays

        shapes = [(layer_sizes[i-1], layer_sizes[i])
                  for i in range(1, len(layer_sizes))]

        self.biases = init_biases_matrix(layer_sizes)
        self.weights = init_weights_matrix(shapes, scale)
        self.rms_limits = [None for i in range(len(self.weights))]

        self.hidden_functions = [self.hidden_function for i in range(len(self.weights) - 1)]

        self.weight_grads_l2_norm = [gnp.ones(weight.shape) for weight in self.weights]
        self.bias_gradis_l2_norm = [gnp.ones(bias.shape) for bias in self.biases]
        self.weight_grads = [gnp.zeros(weight.shape) for weight in self.weights]
        self.bias_grads = [gnp.zeros(bias.shape) for bias in self.biases]
コード例 #13
0
ファイル: dbn.py プロジェクト: wavelets/copper
    def __init__(self, initialWeights, initialBiases, initialGenBiases, outputActFunct, realValuedVis = False, useReLU = False):
        self.realValuedVis = realValuedVis
        self.learnRates = [0.05 for i in range(len(initialWeights))]
        self.momentum = 0.9
        self.L2Costs = [0.0001 for i in range(len(initialWeights))]
        self.dropouts = [0 for i in range(len(initialWeights))]
        self.nesterov = False
        self.nestCompare = False
        self.rmsLims = [None for i in range(len(initialWeights))]

        if self.realValuedVis:
            self.learnRates[0] = 0.005

        self.weights = initialWeights
        self.biases = initialBiases
        self.genBiases = initialGenBiases

        if useReLU:
            self.RBMHidUnitType = RBMReLU()
            self.hidActFuncts = [ReLU() for i in range(len(self.weights) - 1)]
        else:
            self.RBMHidUnitType = RBMBinary()
            self.hidActFuncts = [Sigmoid() for i in range(len(self.weights) - 1)]
        self.outputActFunct = outputActFunct

        #state variables modified in bprop
        self.WGrads = [gnp.zeros(self.weights[i].shape) for i in range(len(self.weights))]
        self.biasGrads = [gnp.zeros(self.biases[i].shape) for i in range(len(self.biases))]
コード例 #14
0
ファイル: nnet.py プロジェクト: trb116/pythonanalyzer
    def costAndGradSFO(self,stack,datums):
        """
	Wrapper function used for SFO optimizer.
        """
	N = len(datums)
	cost = 0.
	grad = [[gp.zeros(w.shape),gp.zeros(b.shape)] 
			  for w,b in self.stack]

        # Push stack to device
	self.stack = [[gp.garray(w),gp.garray(b)] 
                        for w,b in stack]

	for datum in datums:
            data = gp.garray(self.data_dict[datum])
            labels =  np.array(self.alis[datum], dtype=np.int32)
	    costSingle,gradSingle,skip = self.costAndGrad(data,labels)
            if skip:
                print "LOGGING SKIP" #TODO what to do here?
                N -= 1
                continue
	    grad = [[gs[0]+g[0],gs[1]+g[1]]
			      for gs,g in zip(gradSingle,grad)]
	    cost += costSingle
            
            # Have to force GC the gpu... gnumpy lameness
	    gp.free_reuse_cache()

        # Pull gradient from device
        grad = [[((1./N)*gw).as_numpy_array(), ((1./N)*gb).as_numpy_array()] 
                      for gw,gb in grad]
        cost *= 1./N

        return cost,grad
コード例 #15
0
ファイル: lda_gibbs_gpu.py プロジェクト: pvarin/LDA_GPU
    def _initialize(self, matrix):
        n_docs, vocab_size = matrix.shape

        print "initializing state matrices"
        # number of times document m and topic z co-occur
        self.nmz = gpu.zeros((n_docs, self.n_topics))
        # number of times topic z and word w co-occur
        self.nzw = gpu.zeros((self.n_topics, vocab_size))
        self.nm = gpu.zeros(n_docs)
        self.nz = gpu.zeros(self.n_topics)
        self.topics = {}
        print "populating state matrices"
        time = datetime.now() # begin timer
        for m in xrange(n_docs):
            # i is a number between 0 and doc_length-1
            # w is a number between 0 and vocab_size-1
            for i, w in enumerate(word_indices(matrix[m, :])):
                # choose an arbitrary topic as first topic for word i
                z = np.random.randint(self.n_topics)
                self.nmz[m,z] += 1
                self.nm[m] += 1
                self.nzw[z,w] += 1
                self.nz[z] += 1
                self.topics[(m,i)] = z
        print datetime.now() - time # end timer
コード例 #16
0
ファイル: sae.py プロジェクト: yysherlock/msae
 def computeNumericGradient(self, input, factor=1.0, eps=1e-4, sampleNum=500):
     """
     compute gradients throught numeric way for gradient check
     gradient of J w.r.t. x computed by (J(x+eps)-J(x-eps))/2eps
     only check param at sampleNum positions
     J=0.5*(a[0]-a[-1])**2+WeightCost
     """
     param=self.combineParam()
     plen=param.size
     if factor==0:
         plen=plen/2
     sample=np.random.randint(0,plen,sampleNum)
     grad=gp.zeros(sampleNum)
     for (i,idx) in enumerate(sample):
         if i%100==0:
             sys.stdout.write('.')
             sys.stdout.flush()
         q=gp.zeros(param.shape)
         q[idx]=eps
         p1=param+q
         p2=param-q
         c1,_=self.getCost(p1, input,factor)
         c2,_=self.getCost(p2, input,factor)
         grad[i]=(c1-c2)/(2.0*eps)
     print "end"
     return grad, sample
コード例 #17
0
 def singlePathNumericGrad(self,
                           saes,
                           inputs,
                           factor=1,
                           sampleNum=500,
                           eps=1e-4):
     """
     get gradient for single path by numeric computing
     aes: (my_aes, other_aes), autoencoders for this path and the other path
     inputs:(my_input, other_input), input data for this path and the other_path
     Since the param of the other path is fixed, no need to compute its cost
     """
     mysae, osae = saes
     myinput, oinput = inputs
     myparam = mysae.combineParam(down=False)
     #aes[0] is None
     oas = osae.forward(oinput)
     plen = myparam.size
     sample = np.random.randint(0, plen, sampleNum)
     grad = gp.zeros(sampleNum)
     for (i, idx) in enumerate(sample):
         if i % 100 == 0:
             sys.stdout.write('.')
             sys.stdout.flush()
         q = gp.zeros(myparam.shape)
         q[idx] = eps
         p1 = myparam + q
         p2 = myparam - q
         c1, a = mysae.getCost(p1, myinput, factor)
         c1 += self.getDiffLoss(a[self.depth - 1], oas[self.depth - 1])
         c2, a = mysae.getCost(p2, myinput, factor)
         c2 += self.getDiffLoss(a[self.depth - 1], oas[self.depth - 1])
         grad[i] = (c1 - c2) / (2.0 * eps)
     return grad, sample
コード例 #18
0
ファイル: tractable.py プロジェクト: xy1234552/fang
def exact_fisher_information_biases(rbm, batch_units=10, show_progress=False):
    batch_size = 2 ** batch_units

    nvis, nhid = rbm.nvis, rbm.nhid
    num_params = nvis + nhid

    s = gnp.zeros(num_params)
    G = gnp.zeros((num_params, num_params))

    for hid, p in iter_configurations(rbm, batch_units=batch_units, show_progress=show_progress):
        g = gnp.zeros((batch_size, num_params))
        cond_vis = gnp.logistic(rbm.vis_inputs(hid))

        g[:, :nvis] = cond_vis
        g[:, nvis:] = hid

        s += gnp.dot(p, g)
        G += gnp.dot(g.T * p, g)

        diag_term = gnp.dot(p, g * (1. - g))
        G += np.diag(diag_term.as_numpy_array())

    G -= s[:, nax] * s[nax, :]

    return G
コード例 #19
0
ファイル: nn_utils.py プロジェクト: runngezhang/convnet-1
 def ConvDown(hidActs, filters, moduleStride, paddingStart):
     if filters.shape[3] == 1 and hidActs.shape[0] == 1:
         hidActs_16 = gp.zeros(
             (16, hidActs.shape[1], hidActs.shape[2], hidActs.shape[3]))
         hidActs_16[:1, :, :, :] = hidActs
         filters_16 = gp.zeros(
             (filters.shape[0], filters.shape[1], filters.shape[2], 16))
         filters_16[:, :, :, :1] = filters
         return ConvNet.convDown(hidActs_16,
                                 filters_16,
                                 moduleStride=moduleStride,
                                 paddingStart=paddingStart)
     elif filters.shape[3] == 3 and hidActs.shape[0] == 3:
         hidActs_16 = gp.zeros(
             (16, hidActs.shape[1], hidActs.shape[2], hidActs.shape[3]))
         hidActs_16[:3, :, :, :] = hidActs
         filters_16 = gp.zeros(
             (filters.shape[0], filters.shape[1], filters.shape[2], 16))
         filters_16[:, :, :, :3] = filters
         return ConvNet.convDown(hidActs_16,
                                 filters_16,
                                 moduleStride=moduleStride,
                                 paddingStart=paddingStart)
     elif filters.shape[3] % 16 == 0 and hidActs.shape[0] % 16 == 0:
         return ConvNet.convDown(hidActs, filters, moduleStride,
                                 paddingStart)
     else:
         raise Exception("Hidden or Filters Mode 16")
コード例 #20
0
ファイル: cudamat_conv_py.py プロジェクト: ANB2/deepnet
def localUp(images, filters, count_unused=False):
    #assert paddingStart <= 0

    

    numChannels, imSizeX, imSizeX, numImages = images.shape
    numModulesX, numModulesX, numFilterChannels, filterSizeX, filterSizeX, numFilters = filters.shape

    assert numModulesX <= imSizeX

    moduleStride = 1  

    paddingStart = -(numModulesX - imSizeX + filterSizeX - 1)

    #numModulesX = (abs(paddingStart) + imSizeX - filterSizeX + 1)
    numModules = numModulesX**2 
    numGroups = 1




    targets = g.zeros((numFilters, numModulesX, numModulesX, numImages))

    images2 = g.zeros((numChannels, 
                       imSizeX+2*abs(paddingStart), 
                       imSizeX+2*abs(paddingStart), 
                       numImages))
    if paddingStart != 0:
        images2[:, 
            abs(paddingStart):-abs(paddingStart),
            abs(paddingStart):-abs(paddingStart),
            :] = images
    else:
        images2 = images


    used=0

    for i in range(numImages):
        for f in range(numFilters):
            for c in range(numChannels):
                for y1 in range(numModulesX):
                    for y2 in range(numModulesX):
                        for u1 in range(filterSizeX):
                            for u2 in range(filterSizeX):
                                x1 = y1 + u1 
                                x2 = y2 + u2
                                targets[f, y1, y2, i] += \
                                    filters[y1, y2, c ,u1,u2,f] * \
                                    images2[c,x1,x2,i]
                                # if images2 is exactly zero, it means we're the victims of padding.
                                used += (images2[c,x1,x2,i]!=0)

    if count_unused:
        unused = numImages*filters.size - used
        assert unused % numImages == 0
        print 'localUp: num unused filters: %s' % (unused / numImages)

    return targets
コード例 #21
0
def localDown(hidActs, filters, paddingStart=0):
    numGroups = 1
    moduleStride = 1

    assert paddingStart <= 0
    numFilters, numModulesX, numModulesX, numImages = hidActs.shape
    numModulesX, numModulesX, numFilterChannels, filterSizeX, filterSizeX, numFilters = filters.shape

    # what about the stride?  I don't support stride.  I don't like it.
    #paddingStart = -(numModulesX - imSizeX + filterSizeX + 1)

    #numModulesX = (abs(paddingStart) + imSizeX - filterSizeX + 1)
    imSizeX = numModulesX - abs(paddingStart) + filterSizeX - 1

    numChannels = numFilterChannels * numGroups

    numModules = numModulesX**2

    targets = g.zeros((numChannels, imSizeX, imSizeX, numImages))
    targets2 = g.zeros((numChannels, imSizeX + 2 * abs(paddingStart),
                        imSizeX + 2 * abs(paddingStart), numImages))

    numImgColors = numChannels

    #numFilters, numModulesX, numModulesX, numImages = hidActs.shape
    #numFilterChannels, filterSizeX, filterSizeX, numFilters = filters.shape

    moduleStride = 1

    numModulesX = (abs(paddingStart) + imSizeX - filterSizeX + 1)
    numModules = numModulesX**2
    numGroups = 1

    #targets = g.zeros((numFilters, numModulesX, numModulesX, numImages))

    for i in range(numImages):
        for f in range(numFilters):
            for c in range(numChannels):
                for y1 in range(numModulesX):
                    for y2 in range(numModulesX):
                        for u1 in range(filterSizeX):
                            for u2 in range(filterSizeX):
                                x1 = y1 + u1
                                x2 = y2 + u2
                                # targets[f, y1, y2, i] += \
                                #     filters[c ,u1,u2,f] * \
                                #     images2[c,x1,x2,i]
                                targets2[c,x1,x2,i] += \
                                    filters[y1, y2, c ,u1,u2,f] * \
                                    hidActs[f, y1, y2, i]

    if paddingStart != 0:
        targets[:] = targets2[:,
                              abs(paddingStart):-abs(paddingStart),
                              abs(paddingStart):-abs(paddingStart), :]
    else:
        targets = targets2

    return targets
コード例 #22
0
ファイル: layer.py プロジェクト: jakesnell/pynn
    def _load_from_stream(self, f):
        self._param_id, layer_dim = struct.unpack('ii', f.read(4*2))
        self.gamma = gnp.garray(np.fromstring(f.read(layer_dim * 4), dtype=np.float32))
        self.beta = gnp.garray(np.fromstring(f.read(layer_dim * 4), dtype=np.float32))
        self.param_size = self.gamma.size + self.beta.size

        self.gamma_grad = gnp.zeros(self.gamma.size)
        self.beta_grad = gnp.zeros(self.beta.size)
コード例 #23
0
def grad_costfunc_gpu_ReLU(x, *args):
    num_input, num_hidden, num_output, inputs, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    num_weights2 = (num_hidden + 1) * num_output
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    #hidden_activation = gpu.log(1+hidden_sum.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum)) * (hidden_sum > 0)
    hidden_activation = hidden_sum * relu_mask_hidden1
    #hidden_derivative = hidden_sum.logistic()
    hidden_derivative = relu_mask_hidden1
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    hidden_derivative = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_derivative),
                                        axis=0)
    outputs = gpu.dot(weights2, hidden_activation)
    weights1_grad = gpu.zeros(shape(weights1))
    weights2_grad = gpu.zeros(shape(weights2))
    p = outputs - inputs
    weights2_grad += gpu.dot(
        p, gpu.garray(transpose(hidden_activation.as_numpy_array())))
    q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())), p)
    #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation))
    q = q_temp * hidden_derivative
    delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array())))
    weights1_grad += delta2[1:shape(delta2)[0], :]
    weights1_grad = weights1_grad / nData
    weights2_grad = weights2_grad / nData
    weights1_grad[:, 1:shape(weights1_grad)[1]] = weights1_grad[:, 1:shape(
        weights1_grad)[1]] + weights1[:, 1:shape(weights1)[1]] * lambda_val
    weights2_grad[:, 1:shape(weights2_grad)[1]] = weights2_grad[:, 1:shape(
        weights2_grad)[1]] + weights2[:, 1:shape(weights2)[1]] * lambda_val
    #weights1_grad = reshape(weights1_grad, num_weights1)
    weights1_grad = weights1_grad.reshape(num_weights1)
    #weights2_grad = reshape(weights2_grad, num_weights2)
    weights2_grad = weights2_grad.reshape(num_weights2)
    del x
    del inputs
    del data
    del p
    del q_temp
    del q
    del delta2
    del hidden_sum
    del hidden_activation
    del weights1
    del weights2
    gpu.free_reuse_cache()
    return hstack(
        (weights1_grad.as_numpy_array(), weights2_grad.as_numpy_array()))
コード例 #24
0
    def initUpdate(self):
        #increment for hidden biase
        self.incb1 = gp.zeros(self.hDim)

        #increment for visible biase
        self.incb2 = gp.zeros(self.vDim)

        #increment for weight
        self.incW1 = gp.zeros((self.vDim, self.hDim))
        self.incW2 = gp.zeros((self.hDim, self.vDim))
コード例 #25
0
ファイル: ae.py プロジェクト: yysherlock/msae
    def initUpdate(self):
        #increment for hidden biase
        self.incb1 = gp.zeros(self.hDim)

        #increment for visible biase
        self.incb2 = gp.zeros(self.vDim)

        #increment for weight
        self.incW1 = gp.zeros((self.vDim, self.hDim))
        self.incW2 = gp.zeros((self.hDim, self.vDim))
コード例 #26
0
ファイル: LNFuncs.py プロジェクト: jianminsun/NN-Dropout
def dev_loss(A, dev_type=1, use_shepherd=0):
    """DEV regularizer, cool stuff."""
    b_reps = len(A)
    b_obs = A[0].shape[0]
    At = []
    for i in range(b_reps):
        if (dev_type == 1):
            At.append(norm_trans(A[i],'ff'))
        elif (dev_type == 2):
            At.append(tanh_trans(A[i],'ff'))
        elif (dev_type == 3):
            At.append(line_trans(A[i],'ff'))
        else:
            raise Exception('Unknown DEV types.')
    # Compute the mean activations for this ensemble sample
    N = float(A[0].shape[1])
    n = float(b_reps)
    m = float(b_obs * b_reps * N)
    Am = gp.zeros(At[0].shape)
    if (use_shepherd != 1):
        for i in range(b_reps):
            Am = Am + At[i]
        Am = Am / float(b_reps)
    else:
        Am = At[0]
    # Compute difference from mean of each set of droppy activations
    Ad = [(At[i] - Am) for i in range(b_reps)]
    L = sum([gp.sum(ad**2.0) for ad in Ad]) / m
    dLdA = []
    if (use_shepherd != 1):
        Add = gp.zeros(At[0].shape)
        for i in range(b_reps):
            Add = Add + Ad[i]
        for i in range(b_reps):
            dLdA.append(-(2.0/m) * ((((1.0/n) - 1.0) * Ad[i]) + \
                    ((1.0/n) * (Add - Ad[i]))))
    else:
        for i in range(b_reps):
            if (i == 0):
                dLdA.append(gp.zeros(Ad[0].shape))
            else:
                dLdA.append((2.0 / m) * Ad[i])
        for i in range(1,b_reps):
            dLdA[0] = dLdA[0] - dLdA[i]
    # Backpropagate gradient on variance through the desired transform
    for i in range(b_reps):
        BP = {'X': A[i], 'A': At[i], 'dLdA': dLdA[i]}
        if (dev_type == 1):
            dLdA[i] = norm_trans(BP, 'bp')
        elif (dev_type == 2):
            dLdA[i] = tanh_trans(BP, 'bp')
        elif (dev_type == 3):
            dLdA[i] = line_trans(BP, 'bp')
    return {'L': L, 'dLdA': dLdA}
コード例 #27
0
ファイル: LNFuncs.py プロジェクト: nagyistge/NN-Dropout
def dev_loss(A, dev_type=1, use_shepherd=0):
    """DEV regularizer, cool stuff."""
    b_reps = len(A)
    b_obs = A[0].shape[0]
    At = []
    for i in range(b_reps):
        if (dev_type == 1):
            At.append(norm_trans(A[i], 'ff'))
        elif (dev_type == 2):
            At.append(tanh_trans(A[i], 'ff'))
        elif (dev_type == 3):
            At.append(line_trans(A[i], 'ff'))
        else:
            raise Exception('Unknown DEV types.')
    # Compute the mean activations for this ensemble sample
    N = float(A[0].shape[1])
    n = float(b_reps)
    m = float(b_obs * b_reps * N)
    Am = gp.zeros(At[0].shape)
    if (use_shepherd != 1):
        for i in range(b_reps):
            Am = Am + At[i]
        Am = Am / float(b_reps)
    else:
        Am = At[0]
    # Compute difference from mean of each set of droppy activations
    Ad = [(At[i] - Am) for i in range(b_reps)]
    L = sum([gp.sum(ad**2.0) for ad in Ad]) / m
    dLdA = []
    if (use_shepherd != 1):
        Add = gp.zeros(At[0].shape)
        for i in range(b_reps):
            Add = Add + Ad[i]
        for i in range(b_reps):
            dLdA.append(-(2.0/m) * ((((1.0/n) - 1.0) * Ad[i]) + \
                    ((1.0/n) * (Add - Ad[i]))))
    else:
        for i in range(b_reps):
            if (i == 0):
                dLdA.append(gp.zeros(Ad[0].shape))
            else:
                dLdA.append((2.0 / m) * Ad[i])
        for i in range(1, b_reps):
            dLdA[0] = dLdA[0] - dLdA[i]
    # Backpropagate gradient on variance through the desired transform
    for i in range(b_reps):
        BP = {'X': A[i], 'A': At[i], 'dLdA': dLdA[i]}
        if (dev_type == 1):
            dLdA[i] = norm_trans(BP, 'bp')
        elif (dev_type == 2):
            dLdA[i] = tanh_trans(BP, 'bp')
        elif (dev_type == 3):
            dLdA[i] = line_trans(BP, 'bp')
    return {'L': L, 'dLdA': dLdA}
コード例 #28
0
ファイル: st_dnn_cm.py プロジェクト: ronanki/merlin
    def pre_wuw_wu(self, frame_number, static_dimension, var_base):

        wuw_mat = gnp.zeros((frame_number*static_dimension, frame_number*static_dimension))
        wu_mat  = gnp.zeros((frame_number*static_dimension, 3*frame_number*static_dimension))

        for i in xrange(static_dimension):
            temp_var_base = [var_base[i*3], var_base[i*3+1], var_base[i*3+2]]
            temp_wuw, temp_wu = self.pre_compute_wuw(frame_number, temp_var_base)
            wuw_mat[frame_number*i:frame_number*(i+1), frame_number*i:frame_number*(i+1)] = gnp.garray(temp_wuw[:])
            wu_mat[frame_number*i:frame_number*(i+1), frame_number*i:frame_number*(i+3)] = gnp.garray(temp_wu[:])
            
        return  wuw_mat, wu_mat
コード例 #29
0
def dbn_supervised_predict_exact(ws_vh, ws_v, ws_h, x):
    """
    Predict the class label of input x from supervised DBN
    Uses the exact method mentioned in section 6.2 of Hinton, Osindero, Teh 2006
    The free energy formula is taken from http://deeplearning.net/tutorial/rbm.html
    
    x: Input data. (NxD matrix)
    """
    L = len(ws_vh)
    N = x.shape[0]

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass, (use deterministic (we pass the activations, not
    # the stochastically sampled steps) forward pass)
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)

    H = ws_vh[-1].shape[0]  # number of visible units top level RBM
    Hx = h_prev.shape[0]  # number of hidden units in the penultimate layer
    K = H - Hx
    # (H - Hx) is the number of supervised inputs to top level RBM

    # for every class, assume it is the correct label and calculate its free energy
    y = gnp.zeros((K, N))
    free_energy = gnp.zeros((N, K))  # we actually calculate -free_energy
    for k in range(K):
        # set the current assumed class label
        y[k, :] = 1.0

        # visible unit vector
        v = gnp.concatenate((y, h_prev))
        e_v = gnp.dot(ws_v[-1].T, v)  # bias energy term

        ah = gnp.dot(ws_vh[-1].T, v) + ws_h[-1]
        e_h = gnp.sum(gnp.log(gnp.exp(ah) + 1.0), axis=0)

        free_energy[:, k] = e_v + e_h

        # zero the class labels for next iteration
        y[:, :] = 0.0

    # since these numbers may get pretty small, use the sum-exp trick for converting
    # these to probabilities
    pred_y = (
        gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis])
        / gnp.sum(gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis]), axis=1)[:, gnp.newaxis]
    )

    return pred_y
コード例 #30
0
   def __init__(self):
      """
      Create a Layer which contains no input, output, delta or gradient
      """

      # Initialize inputs, outputs, deltas and gradients to be None by default.
      # Then, getters and setters can easily be inherited
      self.input = None
      self.output = None
      self.delta = None
      self.gradient = gpu.zeros((0,0))
      self.parameters = gpu.zeros((0,0))
コード例 #31
0
    def __init__(self):
        """
      Create a Layer which contains no input, output, delta or gradient
      """

        # Initialize inputs, outputs, deltas and gradients to be None by default.
        # Then, getters and setters can easily be inherited
        self.input = None
        self.output = None
        self.delta = None
        self.gradient = gpu.zeros((0, 0))
        self.parameters = gpu.zeros((0, 0))
コード例 #32
0
ファイル: tractable.py プロジェクト: xy1234552/fang
def exact_moments(rbm, batch_units=10, show_progress=False):
    expect_vis = gnp.zeros(rbm.nvis)
    expect_hid = gnp.zeros(rbm.nhid)
    expect_prod = gnp.zeros((rbm.nvis, rbm.nhid))

    for hid, p in iter_configurations(rbm, batch_units=batch_units, show_progress=show_progress):
        cond_vis = gnp.logistic(rbm.vis_inputs(hid))
        expect_vis += gnp.dot(p, cond_vis)
        expect_hid += gnp.dot(p, hid)
        expect_prod += gnp.dot(cond_vis.T * p, hid)

    return binary_rbms.Moments(expect_vis, expect_hid, expect_prod)
コード例 #33
0
    def __init__(self, layerSizes=None, outputActFunct=Linear(), useReLU = True, \
                 initialWeights=None, initialBiases=None, targMean=None, targStd=None):
        """
        Construct a Neural Network object with Basic Structure:
         - layerSizes: [input size, hidden layer size list, output size]
         - outputActFunct: activation function for output layer, such as Linear() and LinearMasked()
         - useReLU: True/False, use ReLU() or Sigmoid() as activation function
        """
        self.layerSizes = layerSizes
        self.outputActFunct = outputActFunct
        self.useReLU = useReLU

        if useReLU:
            self.hidActFuncts = [ReLU() for i in range(len(layerSizes) - 2)]
        else:
            self.hidActFuncts = [Sigmoid() for i in range(len(layerSizes) - 2)]

        # initialize weights and biases
        if initialWeights is None:
            # set wscale for each layer according to 0.5*n*Var(w) = 1
            scale_list = [num.sqrt(2.0 / n) for n in layerSizes[:-1]]
            shapes = [(layerSizes[i - 1], layerSizes[i])
                      for i in range(1, len(layerSizes))]
            self.weights = [
                gnp.garray(
                    initWeightMatrix(shapes[i], scale_list[i], None, False))
                for i in range(len(shapes))
            ]
        else:
            self.weights = initialWeights

        if initialBiases is None:
            self.biases = [
                gnp.garray(0 * num.random.rand(1, self.layerSizes[i]))
                for i in range(1, len(self.layerSizes))
            ]
        else:
            self.biases = initialBiases

        # initialize gradients of weights and biases
        self.WGrads = [
            gnp.zeros(self.weights[i].shape) for i in range(len(self.weights))
        ]
        self.biasGrads = [
            gnp.zeros(self.biases[i].shape) for i in range(len(self.biases))
        ]

        # specify targMean and targStd with model since they are important model parameters
        #assert(len(targMean) == layerSizes[-1])
        self.targMean = targMean
        #assert(len(targStd) == layerSizes[-1])
        self.targStd = targStd
コード例 #34
0
def localUp(images, filters, count_unused=False):
    #assert paddingStart <= 0

    numChannels, imSizeX, imSizeX, numImages = images.shape
    numModulesX, numModulesX, numFilterChannels, filterSizeX, filterSizeX, numFilters = filters.shape

    assert numModulesX <= imSizeX

    moduleStride = 1

    paddingStart = -(numModulesX - imSizeX + filterSizeX - 1)

    #numModulesX = (abs(paddingStart) + imSizeX - filterSizeX + 1)
    numModules = numModulesX**2
    numGroups = 1

    targets = g.zeros((numFilters, numModulesX, numModulesX, numImages))

    images2 = g.zeros((numChannels, imSizeX + 2 * abs(paddingStart),
                       imSizeX + 2 * abs(paddingStart), numImages))
    if paddingStart != 0:
        images2[:,
                abs(paddingStart):-abs(paddingStart),
                abs(paddingStart):-abs(paddingStart), :] = images
    else:
        images2 = images

    used = 0

    for i in range(numImages):
        for f in range(numFilters):
            for c in range(numChannels):
                for y1 in range(numModulesX):
                    for y2 in range(numModulesX):
                        for u1 in range(filterSizeX):
                            for u2 in range(filterSizeX):
                                x1 = y1 + u1
                                x2 = y2 + u2
                                targets[f, y1, y2, i] += \
                                    filters[y1, y2, c ,u1,u2,f] * \
                                    images2[c,x1,x2,i]
                                # if images2 is exactly zero, it means we're the victims of padding.
                                used += (images2[c, x1, x2, i] != 0)

    if count_unused:
        unused = numImages * filters.size - used
        assert unused % numImages == 0
        print 'localUp: num unused filters: %s' % (unused / numImages)

    return targets
コード例 #35
0
 def preTrainIth(self, i, minibatchStream, epochs, mbPerEpoch):
     self.dW = gnp.zeros(self.weights[i].shape)
     self.dvb = gnp.zeros(self.genBiases[i].shape)
     self.dhb = gnp.zeros(self.biases[i].shape)
     
     for ep in range(epochs):
         recErr = 0
         totalCases = 0
         for j in range(mbPerEpoch):
             inpMB = minibatchStream.next()
             curRecErr = self.CDStep(inpMB, i, self.learnRates[i], self.momentum, self.L2Costs[i])
             recErr += curRecErr
             totalCases += inpMB.shape[0]
         yield recErr/float(totalCases)
コード例 #36
0
def grad_costfunc_gpu(x, *args):
    num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args
    num_weights1 = (num_input+1)*num_hidden
    num_weights2 = (num_hidden+1)*num_output
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1))
    weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = hidden_sum.logistic()
    p_avg = gpu.sum(hidden_activation,axis=1)/nData
    grad_sparse = -1*sparsityParam/p_avg.as_numpy_array() + (1-sparsityParam)/(1-p_avg.as_numpy_array())
    grad_sparse = append(0,grad_sparse)
    grad_sparse = tile(grad_sparse, (nData, 1))
    grad_sparse = gpu.garray(transpose(grad_sparse))
    hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0)
    outputs = gpu.dot(weights2, hidden_activation)
    weights1_grad = gpu.zeros(shape(weights1))
    weights2_grad = gpu.zeros(shape(weights2))
    p = outputs-inputs
    weights2_grad += gpu.dot(p, gpu.garray(transpose(hidden_activation.as_numpy_array())))
    q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())),p) + beta*grad_sparse
    #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation))
    q = (q_temp*hidden_activation)*(1-hidden_activation)
    delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array())))
    weights1_grad += delta2[1:shape(delta2)[0], :]
    weights1_grad = weights1_grad/nData
    weights2_grad = weights2_grad/nData
    weights1_grad[:,1:shape(weights1_grad)[1]] = weights1_grad[:,1:shape(weights1_grad)[1]] + weights1[:,1:shape(weights1)[1]] * lambda_val
    weights2_grad[:,1:shape(weights2_grad)[1]] = weights2_grad[:,1:shape(weights2_grad)[1]] + weights2[:,1:shape(weights2)[1]] * lambda_val
    #weights1_grad = reshape(weights1_grad, num_weights1)
    weights1_grad = weights1_grad.reshape(num_weights1)
    #weights2_grad = reshape(weights2_grad, num_weights2)
    weights2_grad = weights2_grad.reshape(num_weights2)
    del x
    del inputs
    del data
    del grad_sparse
    del p
    del q_temp
    del q
    del delta2
    del hidden_sum
    del hidden_activation
    del weights1
    del weights2
    gpu.free_reuse_cache()
    return hstack((weights1_grad.as_numpy_array(),weights2_grad.as_numpy_array()))
コード例 #37
0
ファイル: gnumpy_RBM.py プロジェクト: narayana1208/deepnet
    def train(self):
        self.time_interval = 0
        t1 = time.time()
        cd = 1
        for current_epochs, weight_size in zip(self.epochs,
                                               self.weights_to_do):
            self.initialize_weights(weight_size)
            for epoch in xrange(current_epochs):
                error = 0
                for start_idx in range(0, self.X.shape[0], self.batch_size):
                    self.w_updt = gpu.zeros((self.input, weight_size))
                    self.bias_h_updt = gpu.zeros((1, weight_size))
                    self.bias_v_updt = gpu.zeros((1, self.input))

                    self.allocate_batch(start_idx)
                    self.input_original = self.get_visible_vector(self.batch)
                    self.input_dropped = self.input_original
                    self.positive_phase()
                    self.gibbs_updates(weight_size)
                    for j in range(cd):
                        self.negative_phase()

                    self.w += self.alpha * self.w_updt / float(
                        self.current_batch_size)
                    self.bias_h += self.alpha * self.bias_h_updt / float(
                        self.current_batch_size)
                    self.bias_v += self.alpha * self.bias_v_updt / float(
                        self.current_batch_size)
                    t0 = time.time()
                    error += gpu.mean(
                        (self.input_dropped - self.input_original)**2)
                    self.time_interval += time.time() - t0

                s = 'EPOCH: ' + str(epoch + 1)
                self.log_message(s)
                s = 'Reconstruction error: ' + str(
                    error / (self.X.shape[0] / float(self.batch_size)))
                self.log_message(s)

            self.trained_weights.append(
                [self.w.as_numpy_array(),
                 self.bias_h.as_numpy_array()])
            self.input = self.w.shape[1]

        print 'Time interval: ' + str(self.time_interval)
        print 'Training time: ' + str(time.time() - t1)

        self.free_GPU_memory()

        return self.trained_weights
コード例 #38
0
ファイル: sgd.py プロジェクト: Fakhraddin/stanford-ctc
    def __init__(self,model,alpha=1e-2,minibatch=256,
	         optimizer='momentum',momentum=0.9):
        self.model = model

        assert self.model is not None, "Must define a function to optimize"
        self.it = 0
        self.momentum = momentum # momentum
        self.alpha = alpha # learning rate
        self.minibatch = minibatch # minibatch
	self.optimizer = optimizer
	if self.optimizer == 'momentum' or self.optimizer == 'nesterov':
	    print "Using %s.."%self.optimizer
	    self.velocity = [[gp.zeros(w.shape),gp.zeros(b.shape)] 
			      for w,b in self.model.stack]
	elif self.optimizer == 'adagrad' or self.optimizer == 'adagrad3' or self.optimizer == 'adadelta':
	    print "Using %s.."%self.optimizer
	    self.gradt = [[gp.zeros(w.shape),gp.zeros(b.shape)] 
			      for w,b in self.model.stack]
	elif self.optimizer == 'adaccel2':
	    print "Using adaccel2.."
	    self.gradt = [[gp.zeros(w.shape),gp.zeros(b.shape)] 
			      for w,b in self.model.stack]
	    self.velocity = [[gp.zeros(w.shape),gp.zeros(b.shape)] 
			      for w,b in self.model.stack]
	elif self.optimizer == 'sgd':
	    print "Using sgd.."
	else:
	    raise ValueError("Invalid optimizer")

	self.costt = []
	self.expcost = []
コード例 #39
0
ファイル: layer.py プロジェクト: jakesnell/pynn
    def __init__(self, layer_dim=None):
        if layer_dim is None:
            return

        self.gamma = gnp.ones(layer_dim)
        self.beta = gnp.zeros(layer_dim)

        self.gamma_grad = gnp.zeros(layer_dim)
        self.beta_grad = gnp.zeros(layer_dim)

        self.param_size = self.gamma.size + self.beta.size
        
        self._param_id = LayerParams._param_count
        LayerParams._param_count += 1
コード例 #40
0
ファイル: cudamat_conv_py.py プロジェクト: ANB2/deepnet
def convOutp(images, hidActs, paddingStart = 0):
    numGroups = 1
    moduleStride = 1  

    assert paddingStart <= 0
    numFilters, numModulesX, numModulesX, numImages = hidActs.shape
    numChannels, imSizeX, imSizeX, numImages = images.shape    
    numFilterChannels = numChannels / numGroups
    filterSizeX = imSizeX - numModulesX + abs(paddingStart) + 1

    targets = g.zeros((numFilterChannels, filterSizeX, filterSizeX, numFilters))




    numImgColors = numChannels

    images2 = g.zeros((numChannels, 
                       imSizeX+2*abs(paddingStart), 
                       imSizeX+2*abs(paddingStart), 
                       numImages))

    if paddingStart != 0:
        images2[:, 
            abs(paddingStart):-abs(paddingStart),
            abs(paddingStart):-abs(paddingStart),
            :] = images
    else:
        images2 = images


    for i in range(numImages):
        for f in range(numFilters):
            for c in range(numChannels):
                for y1 in range(numModulesX):
                    for y2 in range(numModulesX):
                        for u1 in range(filterSizeX):
                            for u2 in range(filterSizeX):
                                x1 = y1 + u1 
                                x2 = y2 + u2
                                # targets[f, y1, y2, i] += \
                                #     filters[c ,u1,u2,f] * \
                                #     images2[c,x1,x2,i]

                                targets[c ,u1,u2,f] += \
                                    hidActs[f, y1, y2, i] * \
                                    images2[c,x1,x2,i]


    return targets
コード例 #41
0
   def __init__(self, to_port):
      """
      Create a new bias
      """

      # Properly initialize the Bias
      AbstractConnection.__init__(self, None, to_port)

      self.parameters = gpu.zeros((1, to_port.size))
      self.dimensions = (1, to_port.size)

      self.input = gpu.zeros((0,0))
      self.output = gpu.zeros((1,to_port.size))

      self.gradient = gpu.zeros(self.dimensions)
コード例 #42
0
ファイル: dbn.py プロジェクト: evolu8/gdbn
 def preTrainIth(self, i, minibatchStream, epochs, mbPerEpoch):
     #initialize CD gradient variables
     self.dW = gnp.zeros(self.weights[i].shape)
     self.dvb = gnp.zeros(self.genBiases[i].shape)
     self.dhb = gnp.zeros(self.biases[i].shape)
     
     for ep in range(epochs):
         recErr = 0
         totalCases = 0
         for j in range(mbPerEpoch):
             inpMB = minibatchStream.next()
             curRecErr = self.CDStep(inpMB, i, self.learnRates[i], self.momentum, self.L2Costs[i])
             recErr += curRecErr
             totalCases += inpMB.shape[0]
         yield recErr/float(totalCases)
コード例 #43
0
def test_gnumpy(dat, num_epochs):
    import gnumpy as gpu
    import numpy
    import time
    # load data. <dat> is 2 dimensional: 60000 X 784
    #dat = gpu.garray(load('mnist_cudaTest').T/255.)
    # training parameters
    epsilon = 0.1
    momentum = 0.9
    batch_size = 128
    num_batches = dat.shape[0] / batch_size
    # model parameters
    num_vis = dat.shape[1]
    num_hid = 4096
    # initialize weights
    w_vh = 0.1 * gpu.randn(num_vis, num_hid)
    w_v = gpu.zeros(num_vis)
    w_h = -4. * gpu.ones(num_hid)
    # initialize weight updates
    wu_vh = gpu.zeros((num_vis, num_hid))
    wu_v = gpu.zeros(num_vis)
    wu_h = gpu.zeros(num_hid)
    for epoch in range(num_epochs):
        err = []
        tic = time.clock()
        for batch in range(num_batches):
            # positive phase
            v1 = dat[batch * batch_size:(batch + 1) * batch_size]
            h1 = (gpu.dot(v1, w_vh) + w_h).logistic()
            # sample hiddens
            hSampled = h1.rand() < h1
            # negative phase
            v2 = (gpu.dot(hSampled, w_vh.T) + w_v).logistic()
            h2 = (gpu.dot(v2, w_vh) + w_h).logistic()
            # update weights
            wu_vh = wu_vh * momentum + gpu.dot(v1.T, h1) - gpu.dot(v2.T, h2)
            wu_v = wu_v * momentum + v1.sum(0) - v2.sum(0)
            wu_h = wu_h * momentum + h1.sum(0) - h2.sum(0)

            w_vh += wu_vh * (epsilon / batch_size)
            w_v += wu_v * (epsilon / batch_size)
            w_h += wu_h * (epsilon / batch_size)
            # calculate reconstruction error
            err.append((v2 - v1).euclid_norm()**2 / (num_vis * batch_size))
        toc = time.clock()
        print "Mean squared error: %.4f, takes time: %d" % (numpy.mean(err),
                                                            toc - tic)
    return w_vh, w_v, w_h
コード例 #44
0
ファイル: LNNet.py プロジェクト: nagyistge/NN-Dropout
    def dev_loss(self, X, Y, M, Ws=[]):
        """Compute DEV-regularized loss for inputs X with target outputs Y.

        This loss function computes a combination of standard output loss
        (e.g. for classification/regression) and Dropout Ensemble Variance
        regularization loss. X should be a list of 'dev_reps' input arrays,
        where 'dev_reps' is the number of times each input will be pushed
        through a droppy network when computing the DEV regularizer. M should
        be a list of lists of per-layer dropout masks, matched to size of the
        input arrays in X. Y should contain the target outputs for X[0], for
        which inputs will be pushed through a drop-free network.
        """
        if (len(Ws) == 0):
            Ws = self.layer_weights()
        dev_reps = len(X)
        # Compute activations for observations in X
        A = [self.feedforward(X[i], M[i], Ws) for i in range(dev_reps)]
        # Compute loss and gradient for output-layer activations, for the
        # (should be) drop free feedforward of X[0].
        O = self.out_loss(A[0][-1], Y)
        # Make list of activation gradients
        dLdA = [[gp.zeros(Aj.shape) for Aj in A[0]] \
                for i in range(dev_reps)]
        dLdA[0][-1] = O['dL']
        # Compute DEV regularizer loss and gradients
        Ld = 0.0
        for i in range(self.layer_count):
            dev_type = self.dev_types[i]
            dev_lam = self.dev_lams[i]
            if (dev_lam > 0.0000001):
                Ai = [A[j][i] for j in range(dev_reps)]
                Di = lnf.dev_loss(Ai, dev_type, 0)
                Ld = Ld + (dev_lam * Di['L'])
                for j in range(dev_reps):
                    dLdA[j][i] = dLdA[j][i] + (dev_lam * Di['dLdA'][j])
        # Backpropagate gradients for each DEV rep
        B = {'dLdWs': [gp.zeros(W.shape) for W in Ws]}
        for i in range(dev_reps):
            Bi = self.backprop(dLdA[i], A[i], X[i], M[i], Ws)
            for j in range(self.layer_count):
                B['dLdWs'][j] = B['dLdWs'][j] + Bi['dLdWs'][j]
        # Compute parameter regularization loss and gradients
        R = self.reg_loss(Ws)
        # Combine output loss, DEV loss, and regularization loss
        L = [O['L'], Ld, R['L']]
        # Combine output loss gradient and regularization gradient
        dLdWs = [(dWb + dWr) for (dWb, dWr) in zip(B['dLdWs'], R['dLdWs'])]
        return {'L': L, 'dLdWs': dLdWs}
コード例 #45
0
ファイル: LNNet.py プロジェクト: nagyistge/NN-Dropout
    def sde_loss(self, X, Y, M, Ws=[], do_print=0):
        """Compute dropout loss for inputs X with target outputs Y.

        This loss function computes the standard dropout loss for some inputs
        X with target outputs Y, when dropout is applied following the masks
        in M, given the layer weights in Ws (default self.layer_weights()).
        """
        if (len(Ws) == 0):
            Ws = self.layer_weights()
        # Compute droppy activations for observations in X
        A = self.feedforward(X, M, Ws)
        # Compute loss and gradient for output-layer activations
        O = self.out_loss(A[-1], Y)
        # Make list of activation gradients
        dLdA = [gp.zeros(Ai.shape) for Ai in A]
        dLdA[-1] = O['dL']
        # Backprop the output loss gradient through network
        B = self.backprop(dLdA, A, X, M, Ws)
        # Compute parameter regularization loss and gradients
        R = self.reg_loss(Ws)
        # Combine output loss, DEV loss, and regularization loss
        L = [O['L'], 0.0, R['L']]
        # Combine output loss gradient and regularization gradient
        dLdWs = [(dWb + dWr) for (dWb, dWr) in zip(B['dLdWs'], R['dLdWs'])]
        return {'L': L, 'dLdWs': dLdWs}
コード例 #46
0
    def zeroHistoryDeltaBatch(self, batchSize):
        """
      Set the initial history delta to zeros for the provided batch size
      """

        zero_delta = gpu.zeros((batchSize, self.layerSize))
        self.setHistoryDelta(zero_delta)
コード例 #47
0
ファイル: util.py プロジェクト: k9triz/breze
    def __init__(self, **kwargs):
        dictlist.replace(kwargs, lambda x: (x,) if isinstance(x, int) else x)
        self.n_pars = n_pars_by_partition(kwargs)

        # Create two representations of the parameters of the object. The first
        # is the symbolic theano variable (of which the type is GPU/CPU
        # specific), the second either a gnumpy or numpy array (depending on
        # GPU/CPU again). Also set a default size for testing.
        if GPU:
            self.data = gnumpy.zeros(self.n_pars)
            self.flat = theano.sandbox.cuda.fvector('parameters')
        else:
            self.data = np.empty(self.n_pars).astype(theano.config.floatX)
            self.flat = T.vector('parameters')

        self.flat.tag.test_value = self.data

        # Go through parameters and assign space and variable.
        self.views = array_partition_views(self.data, kwargs)

        # Make sure the keys are legit -- that they do not overwrite
        # anything.
        for key in kwargs:
            if hasattr(self, key):
                raise ValueError("%s is an illegal name for a variable")

        variables = array_partition_views(self.flat, kwargs)
        variables = dictlist.copy(variables, dct_maker=attrdict.AttrDict)
        self.__dict__.update(variables)
コード例 #48
0
ファイル: cudamat_gnumpy.py プロジェクト: mdda/SparseNet
def MaxPool(images, subsX, startX, strideX, outputsX):

    numChannels, imSizeX, imSizeX, numImages = images.shape
    numImgColors = numChannels

    targets = g.zeros((numChannels, outputsX, outputsX, numImages))

    imagesCu = images._base.p_mat
    targetsCu = targets._base.p_mat

    from pylab import prod
    imagesCu_orig = tuple(imagesCu.contents.size)
    imagesTotSize = images.size
    imagesCu.contents.size[0] = numImages
    imagesCu.contents.size[1] = numImgColors * imSizeX**2
    #assert imagesTotSize == prod(imagesCu.contents.size)

    targetsCu_orig = tuple(targetsCu.contents.size)
    targetsTotSize = targets.size
    targetsCu.contents.size[0] = numImages
    targetsCu.contents.size[1] = numImgColors * outputsX**2
    #assert targetsTotSize == prod(targetsCu.contents.size)

    numFilters = numImgColors

    _ConvNet.MaxPool(imagesCu, targetsCu, numFilters, subsX, startX, strideX,
                     outputsX)

    for i in range(2):
        targetsCu.contents.size[i] = targetsCu_orig[i]
        imagesCu.contents.size[i] = imagesCu_orig[i]

    return targets
コード例 #49
0
def show_chains(rbm, state, dataset, num_particles=20, num_samples=20, show_every=10, display=True,
                figname='Gibbs chains', figtitle='Gibbs chains'):
    samples = gnp.zeros((num_particles, num_samples, state.v.shape[1]))
    state = state[:num_particles, :, :]

    for i in range(num_samples):
        samples[:, i, :] = rbm.vis_expectations(state.h)
        
        for j in range(show_every):
            state = rbm.step(state)

    npix = dataset.num_rows * dataset.num_cols
    rows = [vm.hjoin([samples[i, j, :npix].reshape((dataset.num_rows, dataset.num_cols)).as_numpy_array()
                      for j in range(num_samples)],
                     normalize=False)
            for i in range(num_particles)]
    grid = vm.vjoin(rows, normalize=False)

    if display:
        pylab.figure(figname)
        pylab.matshow(grid, cmap='gray', fignum=False)
        pylab.title(figtitle)
        pylab.gcf().canvas.draw()

    return grid
コード例 #50
0
    def initParams(self):
        # crude way of random initialization (random seed) for parameters
        import time
        self.seed = int(time.time()) % 100000
        # for tt in range(self.seed): gp.rand()

        sizes = [self.inputDim] + self.layerSizes + [self.outputDim]
        scales = [
            gp.sqrt(6) / gp.sqrt(n + m) for n, m in zip(sizes[:-1], sizes[1:])
        ]
        self.stack = [[gp.rand(m,n)*2*s-s,gp.zeros((m,1))] \
                            for n,m,s in zip(sizes[:-1],sizes[1:],scales)]
        self.hActs = [gp.empty((s, self.mbSize)) for s in sizes]

        if self.train:
            self.deltas = [gp.empty((s, self.mbSize)) for s in sizes[1:]]
            self.grad = [[gp.empty(w.shape),
                          gp.empty(b.shape)] for w, b in self.stack]
            for tt in range(self.seed):
                gp.rand()

            self.stack = [[
                ws[0] + .01 * gp.randn(ws[0].shape),
                ws[1] + .01 * gp.randn(ws[1].shape)
            ] for ws in self.stack]
コード例 #51
0
ファイル: loss.py プロジェクト: yujiali/pynn
 def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
     pred = gnp.as_garray(pred)
     M = 1 - pred * self.target
     loss = (((M > 0) * M)**2).sum()
     grad = -2 * ((M > 0) * self.target * M) if compute_grad else gnp.zeros(
         pred.shape)
     return loss, grad
コード例 #52
0
    def __init__(self, layer_shape, dropout_probability, n_epochs = 50, l2_max = 15.0, learning_rate = lambda x:1.0 * .998 ** x, doGradientCheck = False):
        assert(len(dropout_probability) == len(layer_shape))
        self.dropout_probability = dropout_probability
        self.activation_hidden = activation_relu
        self.gradient_hidden = gradient_relu
        self.activation_output = activation_softmax
        self.gradient_output = gradient_output_softmax
        self.n_epochs = n_epochs
        self.f_score = score_softmax
        self.learning_rate = learning_rate
        self.mini_batch_size = 100
        self.doGradientCheck = doGradientCheck
        self.l2_max = l2_max

        self.training_score = []
        self.training_validation_error = []
        
        self.weights = []
        self.activation = []
        self.gradient = []
        for i in range(1,len(layer_shape)):
            self.weights.append([g.randn(layer_shape[i-1],layer_shape[i])*0.01, g.zeros(layer_shape[i])])
            self.activation.append(self.activation_hidden)
            self.gradient.append(self.gradient_hidden)
        self.activation[-1] = self.activation_output
        self.gradient[-1] = self.gradient_output
コード例 #53
0
ファイル: nonlin.py プロジェクト: barapa/HF-RNN
 def sample(self, X):
     assert X.shape[1] == self.input_size
     batch_size = X.shape[0]
     ANS = g.zeros((batch_size ,self.output_size))
     for (f, (ir0,ir1), (or0,or1)) in zip(self.fns, self.input_ranges, self.output_ranges):
         ANS[:,or0:or1] =  f.sample(X[:, ir0:ir1])
     return ANS
コード例 #54
0
ファイル: lbl.py プロジェクト: SunnyWay/im2txtDemo
    def backward(self, Y, preds, acts, words, X):
        """
        Backward pass through the network
        """
        batchsize = preds.shape[0]

        # Compute part of df/dR
        Ix = gpu.garray(preds[:,:-1] - Y) / batchsize
        delta = gpu.dot(acts.T, Ix)
        dR = delta[:-1,:] + self.gamma_r * self.R
        db = delta[-1,:]
        dR = dR.as_numpy_array()

        # Compute df/dC and word inputs for df/dR
        Ix = gpu.dot(Ix, self.R.T)
        dC = gpu.zeros(np.shape(self.C))
        for i in range(self.context):
            delta = gpu.dot(words[:,:,i].T, Ix)
            dC[i,:,:] = delta + self.gamma_c * self.C[i,:,:]
            delta = gpu.dot(Ix, self.C[i,:,:].T)
            delta = delta.as_numpy_array()
            for j in range(X.shape[0]):
                dR[:,X[j,i]] = dR[:,X[j,i]] + delta.T[:,j]

        self.dR = gpu.garray(dR)
        self.db = db
        self.dC = dC
コード例 #55
0
ファイル: lbl.py プロジェクト: SunnyWay/im2txtDemo
    def forward(self, X, test=False):
        """
        Feed-forward pass through the model
        X: ('batchsize' x 'context') matrix of word indices
        """
        batchsize = X.shape[0]
        R = self.R
        C = self.C
        bw = self.bw

        # Obtain word features
        tmp = R.as_numpy_array()[:,X.flatten()].flatten(order='F')  # flatten(), default in row-major order, order='F' means Fortran(column-major) order
        tmp = tmp.reshape((batchsize, self.K * self.context))   # reshape(), in row-major order
        words = np.zeros((batchsize, self.K, self.context))
        for i in range(batchsize):
            words[i,:,:] = tmp[i,:].reshape((self.K, self.context), order='F')
        words = gpu.garray(words)

        # Compute the hidden layer (predicted next word representation)
        acts = gpu.zeros((batchsize, self.K))
        for i in range(self.context):
            acts = acts + gpu.dot(words[:,:,i], C[i,:,:]) # the dot() of 2-D matrix is equiverlent to multiply
        acts = gpu.concatenate((acts, gpu.ones((batchsize, 1))), 1)

        # Compute softmax
        preds = gpu.dot(acts, gpu.concatenate((R, bw)))
        preds = gpu.exp(preds - preds.max(1).reshape(batchsize, 1))
        denom = preds.sum(1).reshape(batchsize, 1)
        preds = gpu.concatenate((preds / denom, gpu.ones((batchsize, 1))), 1)

        return (words, acts, preds.as_numpy_array())