Example #1
0
 def _test_layer_stats(self, layer_output):
     """
     DESCRIPTION:
         This method is called every batch whereby the examples from test or valid set 
         is pass through, the final result will be the mean of all the results from all 
         the batches in an epoch from the test set or valid set.
     PARAM:
         layer_output: the output from the layer
     RETURN:
         A list of tuples of [('name_a', var_a), ('name_b', var_b)] whereby var is scalar 
     """
     
     w_len = T.sqrt((self.W ** 2).sum(axis=0))
     max_length = T.max(w_len)
     mean_length = T.mean(w_len)
     min_length = T.min(w_len)
     
     return [('max_col_length', max_length),
             ('mean_col_length', mean_length),
             ('min_col_length', min_length), 
             ('output_max', T.max(layer_output)),
             ('output_mean', T.mean(layer_output)), 
             ('output_min', T.min(layer_output)),
             ('max_W', T.max(self.W)),
             ('mean_W', T.mean(self.W)),
             ('min_W', T.min(self.W)),
             ('max_b', T.max(self.b)),
             ('mean_b', T.mean(self.b)),
             ('min_b', T.min(self.b))]
Example #2
0
    def define_network(self, layers_info=None):
        """
        Builds Theano graph of the network.
        """
        self.hidden_layers = [None]*self.n_hidden.size

        self.params = []
        for i, h in enumerate(self.n_hidden):
            if i == 0:
                self.hidden_layers[i] = LBNHiddenLayer(self.rng, self.trng, self.x, self.n_in,
                                        h, self.det_activation[i],
                                        self.stoch_n_hidden, self.stoch_activation,
                                        det_activation_name=self.det_activation_names[i],
                                        stoch_activation_names=self.stoch_activation_names,
                                        m=self.m,
                                        det_W=None if layers_info is None else
                                        np.array(
                                        layers_info['hidden_layers'][i]['LBNlayer']['detLayer']\
                                                                                            ['W']),
                                        det_b=None if layers_info is None else
                                        np.array(layers_info['hidden_layers'][i]\
                                                                    ['LBNlayer']['detLayer']['b']),
                                        stoch_mlp_info=None if layers_info is None else
                                        layers_info['hidden_layers'][i]['LBNlayer']['stochLayer'])
            else:
                self.hidden_layers[i] = LBNHiddenLayer(self.rng, self.trng,
                                        self.hidden_layers[i-1].output,
                                        self.n_hidden[i-1], h, self.det_activation[i],
                                        self.stoch_n_hidden, self.stoch_activation,
                                        det_activation_name=self.det_activation_names[i],
                                        stoch_activation_names=self.stoch_activation_names, 
                                        det_W=None if layers_info is None else
                                        np.array(layers_info['hidden_layers'][i]['LBNlayer']\
                                                                                ['detLayer']['W']),
                                        det_b=None if layers_info is None else
                                        np.array(layers_info['hidden_layers'][i]['LBNlayer']\
                                                                                ['detLayer']['b']),
                                        stoch_mlp_info=None if layers_info is None else
                                        layers_info['hidden_layers'][i]['LBNlayer']['stochLayer'])

            self.params.append(self.hidden_layers[i].params)

        self.output_layer = OutputLayer(self.rng, self.hidden_layers[-1].output, self.n_hidden[-1], 
                                                            self.n_out, self.det_activation[-1],
                                                            self.det_activation_names[-1],
                                                            V_values=None 
                                                            if layers_info is None else np.array(
                                                            layers_info['output_layer']['W']))

        self.params.append(self.output_layer.params)
        self.output = self.output_layer.output
        exp_value = -0.5*T.sum((self.output - self.y.dimshuffle('x',0,1))**2, axis=2)
        max_exp_value = theano.ifelse.ifelse(T.lt(T.max(exp_value), -1*T.min(exp_value)),
                                                                T.max(exp_value), T.min(exp_value))
 
        self.log_likelihood = T.sum(T.log(T.sum(T.exp(exp_value - max_exp_value), axis=0)) +
                                                                                    max_exp_value)-\
                                self.y.shape[0]*(T.log(self.m)+self.y.shape[1]/2.*T.log(2*np.pi))

        self.predict = theano.function(inputs=[self.x, self.m], outputs=self.output)
Example #3
0
def norm(x,ord):
    x = as_tensor_variable(x)
    ndim = x.ndim
    if ndim == 0:
        raise ValueError("'axis' entry is out of bounds.")
    elif ndim == 1:
        if ord == None:
            return tensor.sum(x**2)**0.5
        elif ord == 'inf':
            return tensor.max(abs(x))
        elif ord == '-inf':
            return tensor.min(abs(x))
        elif ord == 0:
            return x[x.nonzero()].shape[0]
        else:
            try:
                z = tensor.sum(abs(x**ord))**(1./ord)
            except TypeError:
                raise ValueError("Invalid norm order for vectors.")
            return z
    elif ndim == 2:
        if ord == None or ord == 'fro':
            return tensor.sum(abs(x**2))**(0.5)
        elif ord == 'inf':
            return tensor.max(tensor.sum(abs(x), 1))
        elif ord == '-inf':
            return tensor.min(tensor.sum(abs(x), 1))
        elif ord == 1:
            return tensor.max(tensor.sum(abs(x), 0))
        elif ord == -1:
            return tensor.min(tensor.sum(abs(x),0))
        else:
            raise ValueError(0)
    elif ndim > 2:
        raise NotImplementedError("We don't support norm witn ndim > 2")
    def _lowrnk_emp(N, D, K, mnp, masknp):
        Shared = lambda shape, name: theano.shared(value=np.ones(shape, dtype=theano.config.floatX),
                                                   name=name, borrow=True)
        srng = T.shared_randomstreams.RandomStreams(seed=120)
        mask = Shared((D, N), 'mask')
        mask.set_value(masknp)
        m = T.as_tensor_variable(mnp)
        y = mask * m
        zero_y = T.as_tensor_variable(np.zeros((D, N)))
        zero2 = T.as_tensor_variable(np.zeros((D, D)))
        zero = T.as_tensor_variable(np.zeros(D))
        st = T.sum(T.neq(y, zero_y), axis=0)
        s = st.eval()

        scale = 1 / ((T.dot(mask, mask.T)) + T.ones((D, N)))
        emp_cov = scale * (T.dot(y, y.T))
        [U,S,V] = T.nlinalg.svd(emp_cov)
        rk = T.sum(S>0.2)
        cov = (U[:,0:rk].dot(T.nlinalg.diag(S[0:rk]))).dot(V[0:rk,:])
        eigval = T.abs_(T.min([T.min(T.nlinalg.eig(cov)[0]), 0]))
        cov = cov + (eigval + 0.1) * T.eye(D)
        print('so far so good')
        w = theano.tensor.slinalg.cholesky(cov)
        print('w calculated')
        wwT = T.dot(w, w.T)

        # Define random variables for mVNscan component
        z_y = srng.normal([D])
        z_k = srng.normal([D])
        z_eps = srng.normal()

        return mask, m, y, zero_y, zero2, zero, st, scale, cov, w, eigval, wwT, z_y, z_k, z_eps
Example #5
0
    def plotUpdate(self, updates):
        '''
        >>>get update info of each layer
        >>>type updates: dict
        >>>para updates: update dictionary
        '''
        maxdict = T.zeros(shape=(self.deep * 2 + 1, ))
        mindict = T.zeros(shape=(self.deep * 2 + 1, ))
        meandict = T.zeros(shape=(self.deep * 2 + 1, ))

        for i in xrange(self.deep):
            updw = updates[self.layers[i].w] - self.layers[i].w
            maxdict = T.set_subtensor(maxdict[2 * i], T.max(updw))
            mindict = T.set_subtensor(mindict[2 * i], T.min(updw))
            meandict = T.set_subtensor(meandict[2 * i], T.mean(updw))
            updb = updates[self.layers[i].b] - self.layers[i].b
            maxdict = T.set_subtensor(maxdict[2 * i + 1], T.max(updb))
            mindict = T.set_subtensor(mindict[2 * i + 1], T.min(updb))
            meandict = T.set_subtensor(meandict[2 * i + 1], T.mean(updb))

        updw = updates[self.classifier.w] - self.classifier.w
        maxdict = T.set_subtensor(maxdict[self.deep * 2], T.max(updw))
        mindict = T.set_subtensor(mindict[self.deep * 2], T.min(updw))
        meandict = T.set_subtensor(meandict[self.deep * 2], T.mean(updw))
        return [maxdict, mindict, meandict]
Example #6
0
def chamfer_distance(xyz1, xyz2):
    print('Using Chamfer distance loss')
    xyz1 = T.as_tensor(xyz1)
    xyz2 = T.as_tensor(xyz2)

    def _batch_pairwise_dist(x, y):
        if x.ndim == 3 and y.ndim == 3:
            xx = T.batched_dot(x, x.dimshuffle(0, 2, 1))
            yy = T.batched_dot(y, y.dimshuffle(0, 2, 1))
            zz = T.batched_dot(x, y.dimshuffle(0, 2, 1))
        elif x.ndim == 2 and y.ndim == 2:
            xx = T.shape_padleft(T.dot(x, x.T))
            yy = T.shape_padleft(T.dot(y, y.T))
            zz = T.shape_padleft(T.dot(x, y.T))
        else:
            raise NotImplementedError

        indices_x = T.arange(0, xx.shape[1], dtype='int64')
        indices_y = T.arange(0, yy.shape[1], dtype='int64')
        rx = T.tile(xx[:, indices_x, indices_x].dimshuffle(0, 1, 'x'),
                    (1, 1, zz.shape[2]))
        ry = T.tile(yy[:, indices_y, indices_y].dimshuffle(0, 'x', 1),
                    (1, zz.shape[1], 1))
        P = rx + ry - 2. * zz
        return P

    P = _batch_pairwise_dist(xyz1, xyz2)
    mins = T.min(P, 1)
    loss_1 = T.sum(mins)
    mins = T.min(P, 2)
    loss_2 = T.sum(mins)
    return loss_1 + loss_2
Example #7
0
    def _layer_stats(self, state_below, layer_output):
        """
        DESCRIPTION:
            This method is called every batch whereby the examples from test or valid set
            is pass through, the final result will be the mean of all the results from all
            the batches in an epoch from the test set or valid set.
        PARAM:
            layer_output: the output from the layer
        RETURN:
            A list of tuples of [('name_a', var_a), ('name_b', var_b)] whereby var is scalar
        """
        w_len = T.sqrt((self.W**2).sum(axis=0))
        max_length = T.max(w_len)
        mean_length = T.mean(w_len)
        min_length = T.min(w_len)
        max_output = T.max(layer_output)
        mean_output = T.mean(T.abs_(layer_output))
        min_output = T.min(layer_output)
        max_state = T.max(state_below)
        mean_state = T.mean(T.abs_(state_below))
        min_state = T.min(state_below)

        return [('max_W', T.max(self.W)), ('mean_W', T.mean(self.W)),
                ('min_W', T.min(self.W)), ('max_b', T.max(self.b)),
                ('mean_b', T.mean(self.b)), ('min_b', T.min(self.b)),
                ('max_layer_output', max_output),
                ('mean_layer_output', mean_output),
                ('min_layer_output', min_output), ('max_col_length',
                                                   max_length),
                ('mean_col_length', mean_length),
                ('min_col_length', min_length), ('max_state_below', max_state),
                ('mean_state_below', mean_state),
                ('min_state_below', min_state)]
    def test_optimization_min(self):
        data = np.asarray(np.random.rand(2, 3), dtype=config.floatX)
        n = tensor.matrix()

        for axis in [0, 1, -1]:
            f = function([n], tensor.min(n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 1
            assert isinstance(topo[0].op, CAReduce)
            f(data)

            # test variant with neg to make sure we optimize correctly
            f = function([n], tensor.min(-n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 2
            assert isinstance(topo[0].op, CAReduce)  # max
            assert isinstance(topo[1].op, Elemwise)
            assert isinstance(topo[1].op.scalar_op, scalar.Neg)
            f(data)

            f = function([n], -tensor.min(n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 2
            assert isinstance(topo[0].op, Elemwise)
            assert isinstance(topo[0].op.scalar_op, scalar.Neg)
            assert isinstance(topo[1].op, CAReduce)  # max
            f(data)

            f = function([n], -tensor.min(-n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 1
            assert isinstance(topo[0].op, CAReduce)  # max
            f(data)
Example #9
0
    def plotUpdate(self,updates):
        '''
        >>>get update info of each layer
        >>>type updates: dict
        >>>para updates: update dictionary
        '''
        maxdict=T.zeros(shape=(self.deep*2+1,))
        mindict=T.zeros(shape=(self.deep*2+1,))
        meandict=T.zeros(shape=(self.deep*2+1,))
        
        for i in xrange(self.deep):
            updw=updates[self.layers[i].w]-self.layers[i].w
            maxdict=T.set_subtensor(maxdict[2*i],T.max(updw))
            mindict=T.set_subtensor(mindict[2*i],T.min(updw))
            meandict=T.set_subtensor(meandict[2*i],T.mean(updw))
            updb=updates[self.layers[i].b]-self.layers[i].b
            maxdict=T.set_subtensor(maxdict[2*i+1],T.max(updb))
            mindict=T.set_subtensor(mindict[2*i+1],T.min(updb))
            meandict=T.set_subtensor(meandict[2*i+1],T.mean(updb))

        updw=updates[self.classifier.w]-self.classifier.w
        maxdict=T.set_subtensor(maxdict[self.deep*2],T.max(updw))
        mindict=T.set_subtensor(mindict[self.deep*2],T.min(updw))
        meandict=T.set_subtensor(meandict[self.deep*2],T.mean(updw))
        return [maxdict,mindict,meandict]
Example #10
0
 def compute_S(idx, Sp1, zAA, zBB):
     Sm = ifelse(T.eq(idx, nT-2), 
                 T.dot(zBB[iib[-1]], Tla.matrix_inverse(zAA[iia[-1]])),
                 T.dot(zBB[iib[idx]],Tla.matrix_inverse(zAA[iia[T.min([idx+1,nT-2])]] 
                 - T.dot(Sp1,T.transpose(zBB[iib[T.min([idx+1,nT-2])]]))))
             )
     return Sm
    def test_optimization_min(self):
        data = numpy.asarray(numpy.random.rand(2,3),dtype=config.floatX)
        n = tensor.matrix()

        f = function([n],tensor.min(n,0), mode=self.mode)
        topo = f.maker.env.toposort()
        assert len(topo)==1
        assert isinstance(topo[0].op,CAReduce)
        f(data)

        #test variant with neg to make sure we optimize correctly
        f = function([n],tensor.min(-n,0), mode=self.mode)
        topo = f.maker.env.toposort()
        assert len(topo)==2
        assert isinstance(topo[0].op,CAReduce)#max
        assert isinstance(topo[1].op, Elemwise)
        assert isinstance(topo[1].op.scalar_op, scalar.Neg)
        f(data)

        f = function([n],-tensor.min(n,0), mode=self.mode)
        topo = f.maker.env.toposort()
        assert len(topo)==2
        assert isinstance(topo[0].op, Elemwise)
        assert isinstance(topo[0].op.scalar_op, scalar.Neg)
        assert isinstance(topo[1].op,CAReduce)#max
        f(data)

        f = function([n],-tensor.min(-n,0), mode=self.mode)
        topo = f.maker.env.toposort()
        assert len(topo)==1
        assert isinstance(topo[0].op,CAReduce)#max
        f(data)
 def value_single(self, x, y, f):
     ret = T.mean([
         T.min([1. - (1 - y) + f[2], 1.]),
         T.min([1. - f[2] + (1 - y), 1.])
     ])
     ret = T.cast(ret, dtype=theano.config.floatX)
     return T.cast(ifelse(T.eq(self.condition_single(x, f), 1.), ret, 1.),
                   dtype=theano.config.floatX)
Example #13
0
 def calc_min_max(p_n, p_p):
     hminn = T.min(p_n)
     hmaxn = T.max(p_n)
     hminp = T.min(p_p)
     hmaxp = T.max(p_p)
     hmin = ifelse(T.lt(hminp, hminn), hminp, hminn)
     hmax = ifelse(T.lt(hmaxp, hmaxn), hmaxn, hmaxp)
     return hmax, hmin
Example #14
0
 def compute_S(idx, Sp1, zAA, zBB):
     Sm = ifelse(
         T.eq(idx, nT - 2),
         T.dot(zBB[iib[-1]], Tla.matrix_inverse(zAA[iia[-1]])),
         T.dot(
             zBB[iib[idx]],
             Tla.matrix_inverse(zAA[iia[T.min([idx + 1, nT - 2])]] - T.dot(
                 Sp1, T.transpose(zBB[iib[T.min([idx + 1, nT - 2])]])))))
     return Sm
Example #15
0
def eig_pos_barrier( theta = Th.dvector('theta'), M    = Th.dmatrix('M') ,
                 STA   = Th.dvector('STA'), STC  = Th.dmatrix('STC'), 
                 U = Th.dmatrix('U') , V1 = Th.dvector('V1'), **other):
     '''
     A barrier enforcing that the log-det of M should be > exp(-6), 
     and all the eigenvalues of M > 0.  Returns true if barrier is violated.
     '''
     ImM = Th.identity_like(M)-(M+M.T)/2
     w,v = eig( ImM )
     return 1-(Th.sum(Th.log(w))>-250)*(Th.min(w)>0)*(Th.min(V1.flatten())>0) \
Example #16
0
    def _get_hidden_layer_connectivity(self, layerIdx):
        layer_size = self._hidden_sizes[layerIdx]
        if layerIdx == 0:
            p_vals = self._get_p(T.min(self.layers_connectivity[layerIdx]))
        else:
            p_vals = self._get_p(T.min(self.layers_connectivity_updates[layerIdx-1]))

        # #Implementations of np.choose in theano GPU
        # return T.nonzero(self._mrng.multinomial(pvals=[self._p_vals] * layer_size, dtype=theano.config.floatX))[1].astype(dtype=theano.config.floatX)
        # return T.argmax(self._mrng.multinomial(pvals=[self._p_vals] * layer_size, dtype=theano.config.floatX), axis=1)
        return T.sum(T.cumsum(self._mrng.multinomial(pvals=T.tile(p_vals[::-1][None, :], (layer_size, 1)), dtype=theano.config.floatX), axis=1), axis=1)
Example #17
0
    def _get_hidden_layer_connectivity(self, layerIdx):
        layer_size = self._hidden_sizes[layerIdx]
        if layerIdx == 0:
            lc = self.layers_connectivity[layerIdx]
            p_vals = self._get_p(T.min(lc))
        else:
            lc = self.layers_connectivity_updates[layerIdx-1]
            p_vals = self._get_p(T.min(lc))

        return T.sum(
            T.cumsum(self._mrng.multinomial(
            pvals=T.tile(p_vals[::-1][None, :],(layer_size, 1)), 
            dtype=floatX), axis=1), axis=1
        )
Example #18
0
    def get_monitoring_channels(self, V):

        vb, hb, weights = self.get_params()
        norms = theano_norms(weights)
        return {'W_min': tensor.min(weights),
                'W_max': tensor.max(weights),
                'W_norm_mean': tensor.mean(norms),
                'bias_hid_min' : tensor.min(hb),
                'bias_hid_mean' : tensor.mean(hb),
                'bias_hid_max' : tensor.max(hb),
                'bias_vis_min' : tensor.min(vb),
                'bias_vis_mean' : tensor.mean(vb),
                'bias_vis_max': tensor.max(vb),
        }
Example #19
0
    def get_monitoring_channels(self, V):

        vb, hb, weights = self.get_params()
        norms = theano_norms(weights)
        return {
            'W_min': tensor.min(weights),
            'W_max': tensor.max(weights),
            'W_norm_mean': tensor.mean(norms),
            'bias_hid_min': tensor.min(hb),
            'bias_hid_mean': tensor.mean(hb),
            'bias_hid_max': tensor.max(hb),
            'bias_vis_min': tensor.min(vb),
            'bias_vis_mean': tensor.mean(vb),
            'bias_vis_max': tensor.max(vb),
        }
Example #20
0
 def attend(self, y_p):
   updates = self.default_updates()
   for g in range(self.attrs['glimpse']):
     for i in range(len(self.base)-1,-1,-1):
       factor = T.constant(self.base[i].attrs['factor'][0], 'int32') if i > 0 else 1
       B, C, I, H, W_att_in, b_att_in = self.get(y_p, i, g)
       if i == len(self.base) - 1:
         z_i = self.distance(C, H)
       else:
         length = T.cast(T.max(T.sum(I,axis=0))+1,'int32')
         ext = T.cast(T.minimum(ext/factor,T.min(length)),'int32')
         def pick(i_t, ext):
           pad = T.minimum(i_t+ext, B.shape[0]) - ext
           return T.concatenate([T.zeros((pad,), 'int8'), T.ones((ext,), 'int8'), T.zeros((B.shape[0]-pad-ext+1,), 'int8')], axis=0)
         idx, _ = theano.map(pick, sequences = [pos/factor], non_sequences = [ext])
         idx = (idx.dimshuffle(1,0)[:-1].flatten() > 0).nonzero()
         C = C.reshape((C.shape[0]*C.shape[1],C.shape[2]))[idx].reshape((ext,C.shape[1],C.shape[2]))
         z_i = self.distance(C, H)
         I = I.reshape((I.shape[0]*I.shape[1],))[idx].reshape((ext,I.shape[1]))
       if i > 0:
         pos = T.argmax(self.softmax(z_i,I),axis=0) * factor
         ext = factor
       else:
         w_i = self.softmax(z_i,I)
     B = B.reshape((B.shape[0]*B.shape[1],B.shape[2]))[idx].reshape((ext,B.shape[1],B.shape[2]))
     proto = T.sum(B * w_i.dimshuffle(0,1,'x').repeat(B.shape[2],axis=2),axis=0)
     for i in range(len(self.base)):
       self.glimpses[i].append(proto)
   return T.dot(proto, self.custom_vars['W_att_in_0']), updates
Example #21
0
    def get_stencil(self, t, r=None, texp=None):
        if r is None or texp is None:
            return tt.shape_padright(t)

        z = tt.zeros_like(self.a)
        r = tt.as_tensor_variable(r)
        R = self.r_star + z
        hp = 0.5 * self.period

        if self.ecc is None:
            # Equation 14 from Winn (2010)
            k = r / self.r_star
            arg1 = tt.square(1 + k) - tt.square(self.b)
            arg2 = tt.square(1 - k) - tt.square(self.b)
            factor = R / (self.a * self.sin_incl)
            hdur1 = hp * tt.arcsin(factor * tt.sqrt(arg1)) / np.pi
            hdur2 = hp * tt.arcsin(factor * tt.sqrt(arg2)) / np.pi
            ts = [-hdur1, -hdur2, hdur2, hdur1]
            flag = z

        else:
            M_contact1 = self.contact_points_op(self.a, self.ecc,
                                                self.cos_omega, self.sin_omega,
                                                self.cos_incl + z,
                                                self.sin_incl + z, R + r)
            M_contact2 = self.contact_points_op(self.a, self.ecc,
                                                self.cos_omega, self.sin_omega,
                                                self.cos_incl + z,
                                                self.sin_incl + z, R - r)

            flag = M_contact1[2] + M_contact2[2]

            ts = [
                tt.mod(
                    (M_contact1[0] - self.M0) / self.n + hp, self.period) - hp,
                tt.mod(
                    (M_contact2[0] - self.M0) / self.n + hp, self.period) - hp,
                tt.mod(
                    (M_contact2[1] - self.M0) / self.n + hp, self.period) - hp,
                tt.mod(
                    (M_contact1[1] - self.M0) / self.n + hp, self.period) - hp
            ]

        start = self.period * tt.floor((tt.min(t) - self.t0) / self.period)
        end = self.period * (tt.ceil((tt.max(t) - self.t0) / self.period) + 1)
        start += self.t0
        end += self.t0
        tout = []
        for i in range(4):
            if z.ndim < 1:
                tout.append(ts[i] + tt.arange(start, end, self.period))
            else:
                tout.append(
                    theano.scan(
                        fn=lambda t0, s0, e0, p0: t0 + tt.arange(s0, e0, p0),
                        sequences=[ts[i], start, end, self.period],
                    )[0].flatten())

        ts = tt.sort(tt.concatenate(tout))
        return ts, flag
Example #22
0
def _best_path_decode(activations):
    """Calculate the CTC best-path decoding for a given activation sequence.
       In the returned matrix, shorter sequences are padded with -1s."""

    # For each timestep, get the highest output
    decoding = T.argmax(activations, axis=2)

    # prev_outputs[time][example] == decoding[time - 1][example]
    prev_outputs = T.concatenate([T.alloc(_BLANK, 1, decoding.shape[1]), decoding], axis=0)[:-1]

    # Filter all repetitions to zero (blanks are already zero)
    decoding = decoding * T.neq(decoding, prev_outputs)

    # Calculate how many blanks each sequence has relative to longest sequence
    blank_counts = T.eq(decoding, 0).sum(axis=0)
    min_blank_count = T.min(blank_counts, axis=0)
    max_seq_length = decoding.shape[0] - min_blank_count # used later
    padding_needed = blank_counts - min_blank_count

    # Generate the padding matrix by ... doing tricky things
    max_padding_needed = T.max(padding_needed, axis=0)
    padding_needed = padding_needed.dimshuffle('x',0).repeat(max_padding_needed, axis=0)
    padding = T.arange(max_padding_needed).dimshuffle(0,'x').repeat(decoding.shape[1],axis=1)
    padding = PADDING * T.lt(padding, padding_needed)

    # Apply the padding
    decoding = T.concatenate([decoding, padding], axis=0)

    # Remove zero values
    nonzero_vals = decoding.T.nonzero_values()
    decoding = T.reshape(nonzero_vals, (decoding.shape[1], max_seq_length)).T

    return decoding
Example #23
0
    def compute_D(idx, Dm1, zS, zAA, zBB):
        D = ifelse(T.eq(idx, nT-1),
                   T.dot(Tla.matrix_inverse(zAA[iia[-1]]), 
		       III + T.dot(T.transpose(zBB[iib[idx-1]]),
			   T.dot(Dm1,S[0])))
                   , 
                   ifelse(T.eq(idx, 0), 
                          Tla.matrix_inverse(zAA[iia[0]]
			      - T.dot(zBB[iib[0]], T.transpose(S[-1]))),
                          T.dot(Tla.matrix_inverse(zAA[iia[idx]] 
                                - T.dot(zBB[iib[T.min([idx,nT-2])]],T.transpose(S[T.max([-idx-1,-nT+1])]))),
			        III + T.dot(T.transpose(zBB[iib[T.min([idx-1,nT-2])]]),
				  T.dot(Dm1,S[-idx])))
                      )
               )
        return D
Example #24
0
 def step(self, t, s_p, c_p, X):
     #x_t = X[:,t]
     #X = T.matrix()
     if len(self.input_shape) == 3:
         x_t = X[:, t]
     else:
         x_t = X[:, t:t+1]
     x_t = x_t/(1.0+(T.max(x_t)-T.min(x_t)))
     #x_t = X[:,t+self.input_shape[1]-self.hidden_dim+1:t+self.input_shape[1]+1]
     #x_t = x_t*self.E
     #test = T.dot(x_t, self.U)
     res_s = T.dot(x_t, self.U) + T.dot(s_p, self.W) + self.b#[index,channel,hidden_dim]
     i = T.nnet.hard_sigmoid(res_s[:, 0, :])# (index,hidden_dim)
     f = T.nnet.hard_sigmoid(res_s[:, 1, :])#(index,hidden_dim)
     o = T.nnet.hard_sigmoid(res_s[:, 2, :])#(index,hidden_dim)
     g = T.tanh(res_s[:, 3, :])#(index,hidden_dim)
     # i = T.nnet.hard_sigmoid(T.dot(x_t, self.U[0])+T.dot(s_p,self.W[0])+self.b[0])#(index,hidden_dim)
     # f = T.nnet.hard_sigmoid(T.dot(x_t, self.U[1])+T.dot(s_p,self.W[1])+self.b[1])#(index,hidden_dim)
     # o = T.nnet.hard_sigmoid(T.dot(x_t, self.U[2])+T.dot(s_p,self.W[2])+self.b[2])#(index,hidden_dim)
     # g = T.tanh(T.dot(x_t, self.U[3])+T.dot(s_p,self.W[3])+self.b[3])#(index,hidden_dim)
     c_t = c_p*f + g*i#(index,hidden_dim)
     s_t = T.tanh(c_t)*o#(index,hidden_dim)
     # o_t = T.dot(s_t, self.V)#(index,1)
     # o_t = o_t+self.c[0]
     o_t = s_t
     #return o_t
     # o_t = T.cast(o_t,"float32")
     # s_t = T.cast(s_t,"float32")
     # c_t = T.cast(c_t, "float32")
     return [o_t, s_t, c_t]
def make_experiment(l_out, dataset, batch_size=1000, 
        N_train=50000, N_valid=10000, N_test=10000, 
        loss_function=lasagne.objectives.categorical_crossentropy,
        extra_loss=0.0, limit_alpha=False):
    """
    Build a loop for training a model, evaluating loss on training, validation 
    and test.
    """
    expressions = holonets.monitor.Expressions(l_out, dataset, 
            batch_size=batch_size, update_rule=lasagne.updates.adam, 
            loss_function=loss_function, loss_aggregate=T.mean, 
            extra_loss=extra_loss, learning_rate=0.001, momentum=0.1)
    # only add channels for loss and accuracy
    for deterministic,dataset in zip([False, True, True],
                                     ["train", "valid", "test"]):
        expressions.add_channel(**expressions.loss(dataset, deterministic))
        expressions.add_channel(**expressions.accuracy(dataset, deterministic))
    channels = expressions.build_channels()
    if limit_alpha:
        # then add channel to reset all alphas at 1.0
        alphas = [p for p in lasagne.layers.get_all_params(l_out) 
                if p.name == "alpha"]
        alpha_ceiling = theano.function([], alphas, 
                updates=OrderedDict([(a, T.min([a, 1.0])) for a in alphas]))
        channels.append({'dataset': 'train',
                         'eval': lambda x: alpha_ceiling(),
                         'dimensions': ['Alpha']*len(alphas),
                         'names': ['alpha {0}'.format(i) for i in range(len(alphas))]})
    train = holonets.train.Train(channels, 
            n_batches={'train': N_train//batch_size, 
                       'valid':N_valid//batch_size, 
                       'test':N_test//batch_size})
    loop = holonets.run.EpochLoop(train, dimensions=train.dimensions)
    return loop
Example #26
0
    def test_max_pool_2d_3D(self):
        rng = numpy.random.RandomState(utt.fetch_seed())

        maxpoolshps = [(1,2)]
        imval = rng.rand(2,3,4)
        images = tensor.dtensor3()

        for maxpoolshp in maxpoolshps:
            for ignore_border in [True,False]:
                #print 'maxpoolshp =', maxpoolshp
                #print 'ignore_border =', ignore_border
                numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)

                output = max_pool_2d(images, maxpoolshp, ignore_border)
                output_val = function([images], output)(imval)
                assert numpy.all(output_val == numpy_output_val)

                c = tensor.sum(output)
                c_val = function([images], c)(imval)

                g = tensor.grad(c, images)
                g_val = function([images],
                        [g.shape,
                            tensor.min(g, axis=(0,1,2)),
                            tensor.max(g, axis=(0,1,2))]
                        )(imval)
    def _define_model(self):

        self.model = pm.Model()
        with self.model:


            p = pm.Dirichlet('p', a=np.array([1., 1., 1.]), shape=self.number_of_hidden_states)
            p_min_potential = pm.Potential('p_min_potential', tt.switch(tt.min(p) < .1, -np.inf, 0))

            means = pm.Normal('means', mu=[0, 0, 0], sd=2.0, shape=self.number_of_hidden_states)

            # break symmetry
            order_means_potential = pm.Potential('order_means_potential',
                                                 tt.switch(means[1] - means[0] < 0, -np.inf, 0)
                                                 + tt.switch(means[2] - means[1] < 0, -np.inf, 0))

            sd = pm.HalfCauchy('sd', beta=2, shape=self.number_of_hidden_states)
            category = pm.Categorical('category',
                                      p=p,
                                      shape=self.number_of_data)

            points = pm.Normal('obs',
                               mu=means[category],
                               sd=sd[category],
                               observed=self.data)
Example #28
0
    def m_lh(self, x, z, v):
        """ Compute likelihood term m_lh """
        # Evaluate bottom-up mask
        if self.p.input_type == 'binary':
            # self.p.input_noise since the structure might change
            z_tilde = z * np.float32(
                (1 - 2 * self.p.input_noise)) + np.float32(self.p.input_noise)
            loss = nn.soft_binary_crossentropy(z_tilde, x, 1e-4)
        elif self.p.input_type == 'continuous':
            noise_factor = np.float32(self.p.input_noise**2) + v**2
            # Represents negative log-p
            loss = np.float32(0.5) * T.log(noise_factor) + T.sqr(z - x) / (
                np.float32(2) * noise_factor)
        else:
            raise NotImplemented

        # normalize
        loss -= T.min(loss, axis=0, keepdims=True)
        normalizer = T.log(T.sum(T.exp(-loss), axis=0, keepdims=True))
        loss += normalizer

        assert loss.ndim in [3, 5]

        m_lh = T.exp(-loss)

        return m_lh
Example #29
0
def downsample(source, axis, factor, method="average"):
    assert factor == int(factor), "factor is expected to be an int"
    factor = int(factor)
    # make shape[axis] a multiple of factor
    src = source
    source = source[slice_for_axis(axis=axis,
                                   s=slice(0, (source.shape[axis] / factor) *
                                           factor))]
    # Add a temporary dimension as the factor.
    added_dim_shape = [source.shape[i] for i in range(source.ndim)]
    added_dim_shape = added_dim_shape[:axis] + [
        source.shape[axis] / factor, factor
    ] + added_dim_shape[axis + 1:]
    if method == "lstm":
        assert axis == 0
        return source
    source = T.reshape(source, added_dim_shape)
    if method == "average":
        return T.mean(source, axis=axis + 1)
    elif method == "max":
        return T.max(source, axis=axis + 1)
    elif method == "min":
        return T.min(source, axis=axis + 1)
    elif method == "concat" or method == 'mlp':  # concatenates in last dimension
        #return T.reshape(source, added_dim_shape[:axis+1] + added_dim_shape[axis+2:-1] + [added_dim_shape[-1] * factor])
        return source.swapaxes(axis + 1, src.ndim -
                               1).reshape([source.shape[0], src.shape[1]] +
                                          [factor * source.shape[3]])
    elif method == "batch":
        assert axis == 0
        return source.dimshuffle(1, 0, 2, 3).reshape(
            (source.shape[1], source.shape[0] * source.shape[2],
             source.shape[3]))
    else:
        assert False, "unknown downsample method %r" % method
Example #30
0
    def get_monitoring_channels(self, V, Y=None):

        try:
            self.compile_mode()

            rval = {}

            #from_ip = self.inference_procedure.get_monitoring_channels(V, self)

            #rval.update(from_ip)

            if self.monitor_params:
                for param in self.get_params():
                    rval[param.name + '_min'] = full_min(param)
                    rval[param.name + '_mean'] = T.mean(param)
                    rval[param.name + '_max'] = full_max(param)

                    if 'W' in param.name:
                        norms = theano_norms(param)

                        rval[param.name + '_norms_min'] = T.min(norms)
                        rval[param.name + '_norms_mean'] = T.mean(norms)
                        rval[param.name + '_norms_max'] = T.max(norms)

            new_rval = {}
            for key in rval:
                new_rval[self.monitoring_channel_prefix + key] = rval[key]

            rval = new_rval

            return rval
        finally:
            self.deploy_mode()
Example #31
0
File: render.py Project: zenna/ig
def mindist(translate, min_so_far, ro, rd):
    # ro: 3
    # transalate: nbatch * 3
    # min_so_far: nbatch * width * height
    # rd: width * height * 3
    ro = ro + translate
    # d_o = T.dot(rd, ro)   # 640, 480
    # d_o = dotty(rd, ro, axis=1)
    d_o = T.tensordot(rd, ro, axes=[2,1])
    o_o =  T.sum(ro**2,axis=1)
    b = 2*d_o
    c = o_o - 0.001 #FIXME, remove this squaring
    inner = b **2 - 4 * c   # 640 480
    does_not_intersect = inner < 0.0
    minus_b = -b
    # sqrt_inner = T.sqrt(T.maximum(0.0001, inner))
    eps = 1e-9
    background_dist = 10.0
    sqrt_inner = T.sqrt(T.maximum(eps, inner))
    root1 = (minus_b - sqrt_inner)/2.0
    root2 = (minus_b + sqrt_inner)/2.0
    depth = T.switch(does_not_intersect, background_dist,
                        T.switch(root1 > 0, root1,
                        T.switch(root2 > 0, root2, background_dist)))
    return T.min([min_so_far, depth], axis=0)
    def get_triplet_loss(self, data, deterministic=False):
        fc7 = get_output(self.net['fc7'], data,
                         deterministic=deterministic)  # (3, nb, 256)
        reshape = T.reshape(T.tanh(fc7), newshape=(3, -1, 256))
        anchor = reshape[0]  # (nb, 256)
        positive = reshape[1]
        negative = reshape[2]

        norm_pos = T.pow(
            T.sum(T.pow(positive - anchor, 2.0), axis=1) + self.eps,
            0.5)  # (nb, )
        norm_neg1 = T.pow(
            T.sum(T.pow(negative - anchor, 2.0), axis=1) + self.eps,
            0.5)  # (nb, )
        norm_neg2 = T.pow(
            T.sum(T.pow(negative - positive, 2.0), axis=1) + self.eps,
            0.5)  # (nb, )
        norm_neg = T.min([norm_neg1, norm_neg2], axis=0)

        max_norm = T.max([norm_pos, norm_neg], axis=0)

        d_pos = T.maximum(
            T.exp(norm_pos - max_norm) /
            (T.exp(norm_pos - max_norm) + T.exp(norm_neg - max_norm)),
            self.alpha)
        loss = T.mean(d_pos**2)
        return loss  #, T.mean(norm_pos), T.mean(norm_neg1), T.mean(norm_neg2), T.mean(norm_neg), T.mean(max_norm)
Example #33
0
def unet_crossentropy_loss_sampled(y_true, y_pred):
    print 'unet_crossentropy_loss_sampled'
    epsilon = 1.0e-4
    y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
    y_true = T.flatten(y_true)
    # this seems to work
    # it is super ugly though and I am sure there is a better way to do it
    # but I am struggling with theano to cooperate
    # filter the right indices
    indPos = T.nonzero(y_true)[0] # no idea why this is a tuple
    indNeg = T.nonzero(1-y_true)[0]
    # shuffle
    n = indPos.shape[0]
    indPos = indPos[srng.permutation(n=n)]
    n = indNeg.shape[0]
    indNeg = indNeg[srng.permutation(n=n)]
    # take equal number of samples depending on which class has less
    n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64')

    indPos = indPos[:n_samples]
    indNeg = indNeg[:n_samples]
    loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
    average_loss = T.mean(loss_vector)
    print 'average_loss:', average_loss
    return average_loss
Example #34
0
def compile_gpu_func(nan_is_error, inf_is_error, big_is_error):
    """ compile utility function used by contains_nan and contains_inf
    """
    global f_gpumin, f_gpumax, f_gpuabsmax
    if not cuda.cuda_available:
        return
    guard_input = cuda.fvector("nan_guard")
    cuda_compile_failed = False
    if (nan_is_error or inf_is_error) and f_gpumin is None:
        try:
            f_gpumin = theano.function([guard_input], T.min(guard_input), mode="FAST_RUN")
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True
    if inf_is_error and not cuda_compile_failed and f_gpumax is None:
        try:
            f_gpumax = theano.function([guard_input], T.max(guard_input), mode="FAST_RUN")
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True
    if big_is_error and not cuda_compile_failed and f_gpuabsmax is None:
        try:
            f_gpuabsmax = theano.function([guard_input], T.max(T.abs_(guard_input)), mode="FAST_RUN")
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True
Example #35
0
 def get_output_for(self, input, **kwargs):
     # take the minimal working slice size, and use that one.
     if self.allow_negative:
         inp_low_zero = input - T.min(input, axis=1).dimshuffle(0, 'x')
     else:
         inp_low_zero = input
     return inp_low_zero / T.sum(inp_low_zero, axis=1).dimshuffle(0, 'x') * self.norm_sum
Example #36
0
    def _get_hidden_layer_connectivity(self, layerIdx):
        layer_size = self._hidden_sizes[layerIdx]
        if layerIdx == 0:
            p_vals = self._get_p(T.min(self.layers_connectivity[layerIdx]))
        else:
            p_vals = self._get_p(
                T.min(self.layers_connectivity_updates[layerIdx - 1]))

        # #Implementations of np.choose in theano GPU
        # return T.nonzero(self._mrng.multinomial(pvals=[self._p_vals] * layer_size, dtype=theano.config.floatX))[1].astype(dtype=theano.config.floatX)
        # return T.argmax(self._mrng.multinomial(pvals=[self._p_vals] * layer_size, dtype=theano.config.floatX), axis=1)
        return T.sum(T.cumsum(self._mrng.multinomial(
            pvals=T.tile(p_vals[::-1][None, :], (layer_size, 1)),
            dtype=theano.config.floatX),
                              axis=1),
                     axis=1)
Example #37
0
 def attend(self, y_p):
   updates = self.default_updates()
   for g in range(self.attrs['glimpse']):
     for i in range(len(self.base)-1,-1,-1):
       factor = T.constant(self.base[i].attrs['factor'][0], 'int32') if i > 0 else 1
       B, C, I, h_p, _ = self.get(y_p, i, g)
       if i == len(self.base) - 1:
         z_i = self.distance(C, h_p)
       else:
         length = T.cast(T.max(T.sum(I,axis=0))+1,'int32')
         ext = T.cast(T.minimum(ext/factor,T.min(length)),'int32')
         def pick(i_t, ext):
           pad = T.minimum(i_t+ext, B.shape[0]) - ext
           return T.concatenate([T.zeros((pad,), 'int8'), T.ones((ext,), 'int8'), T.zeros((B.shape[0]-pad-ext+1,), 'int8')], axis=0)
         idx, _ = theano.map(pick, sequences = [pos/factor], non_sequences = [ext])
         idx = (idx.dimshuffle(1,0)[:-1].flatten() > 0).nonzero()
         C = C.reshape((C.shape[0]*C.shape[1],C.shape[2]))[idx].reshape((ext,C.shape[1],C.shape[2]))
         z_i = self.distance(C, h_p)
         I = I.reshape((I.shape[0]*I.shape[1],))[idx].reshape((ext,I.shape[1]))
       if i > 0:
         pos = T.argmax(self.softmax(z_i,I),axis=0) * factor
         ext = factor
       else:
         w_i = self.softmax(z_i,I)
     B = B.reshape((B.shape[0]*B.shape[1],B.shape[2]))[idx].reshape((ext,B.shape[1],B.shape[2]))
     proto = T.sum(B * w_i.dimshuffle(0,1,'x').repeat(B.shape[2],axis=2),axis=0)
     for i in range(len(self.base)):
       self.glimpses[i].append(proto)
   return T.dot(proto, self.custom_vars['W_att_in_0']), updates
Example #38
0
def LQLEP_wBarrier( LQLEP    = Th.dscalar(), ldet = Th.dscalar(), v1 = Th.dvector(), 
                    N_spike  = Th.dscalar(), ImM  = Th.dmatrix(),  U = Th.dmatrix(),
                    V2       = Th.dvector(),    u = Th.dvector(),  C = Th.dmatrix(),
                    **other):
    '''
    The actual Linear-Quadratic-Exponential-Poisson log-likelihood, 
    as a function of theta and M, 
    with a barrier on the log-det term and a prior.
    '''
    sq_nonlinearity = V2**2.*Th.sum( Th.dot(U,C)*U, axis=[1])  #Th.sum(U**2,axis=[1])
    nonlinearity = V2 * Th.sqrt( Th.sum( Th.dot(U,C)*U, axis=[1])) #Th.sum(U**2,axis=[1]) )
    if other.has_key('uc'):
        LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \
                     - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \
                     + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \
                     + 10. * Th.sum( (other['uc'][2:]+other['uc'][:-2]-2*other['uc'][1:-1])**2. ) \
                     + 0.000000001 * Th.sum( v1**2. )
#                     + 100. * Th.sum( v1 )
    #                 + 0.0001*Th.sum( V2**2 )
    else:
        LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \
                     - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \
                     + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \
                     + 0.000000001 * Th.sum( v1**2. )
#                     + 100. * Th.sum( v1 )
    #                 + 0.0001*Th.sum( V2**2 )
    eigsImM,barrier = eig( ImM )
    barrier   = 1-(Th.sum(Th.log(eigsImM))>-250) * \
                  (Th.min(eigsImM)>0) * (Th.max(4*sq_nonlinearity)<1)
    other.update(locals())
    return named( **other )
Example #39
0
 def _match(self, sample):
     diff = (T.sqr(self.codebook)).sum(
         axis=1, keepdims=True) + (T.sqr(sample)).sum(
             axis=1, keepdims=True) - 2 * T.dot(self.codebook, sample.T)
     bmu = T.argmin(diff)
     err = T.min(diff)
     return err, bmu
Example #40
0
def get_stats(input, stat=None):
    """
    Returns a dictionary mapping the name of the statistic to the result on the input.
    Currently gets mean, var, std, min, max, l1, l2.

    Parameters
    ----------
    input : tensor
        Theano tensor to grab stats for.

    Returns
    -------
    dict
        Dictionary of all the statistics expressions {string_name: theano expression}
    """
    stats = {
        'mean': T.mean(input),
        'var': T.var(input),
        'std': T.std(input),
        'min': T.min(input),
        'max': T.max(input),
        'l1': input.norm(L=1),
        'l2': input.norm(L=2),
        #'num_nonzero': T.sum(T.nonzero(input)),
    }
    stat_list = raise_to_list(stat)
    compiled_stats = {}
    if stat_list is None:
        return stats

    for stat in stat_list:
        if isinstance(stat, string_types) and stat in stats:
            compiled_stats.update({stat: stats[stat]})
    return compiled_stats
Example #41
0
    def compute_probabilistic_matrix(self,X, y, num_cases, k=5):

        z       = T.dot(X, self.A) #Transform x into z space 
        dists   = T.sqr(dist2hy(z,z))
        dists   = T.extra_ops.fill_diagonal(dists, T.max(dists)+1)
        nv      = T.min(dists,axis=1) # value of nearest neighbour 
        dists   = (dists.T - nv).T
        d       = T.extra_ops.fill_diagonal(dists, 0)
   
        #Take only k nearest 
        num     = T.zeros((num_cases, self.num_classes))
        denom   = T.zeros((num_cases,))
        for c_i in xrange(self.num_classes):

            #Mask for class i
            mask_i = T.eq(T.outer(T.ones_like(y),y),c_i)

            #K nearest neighbour within a class i 
            dim_ci = T.sum(mask_i[0])
            d_c_i = T.reshape(d[mask_i.nonzero()],(num_cases,dim_ci))
            k_indice = T.argsort(d_c_i, axis=1)[:,0:k]
            
            kd = T.zeros((num_cases,k))
            for it in xrange(k):
                kd = T.set_subtensor(kd[:,it], d_c_i[T.arange(num_cases),k_indice[:,it]]) 

            #Numerator
            value   = T.exp(-T.mean(kd,axis=1))
            num     = T.set_subtensor(num[:,c_i], value) 
            denom   += value 
            

        p = num / denom.dimshuffle(0,'x')    #prob that point i will be correctly classified    
        return p
Example #42
0
 def make_consensus(self, networks, axis=2):
   cns = self.attrs['consensus']
   if cns == 'max':
     return T.max(networks, axis=axis)
   elif cns == 'min':
     return T.min(networks, axis=axis)
   elif cns == 'mean':
     return T.mean(networks, axis=axis)
   elif cns == 'flat':
     if self.depth == 1:
       return networks
     if axis == 2:
       return networks.flatten(ndim=3)
       #return T.reshape(networks, (networks.shape[0], networks.shape[1], T.prod(networks.shape[2:]) ))
     else:
       return networks.flatten(ndim=2) # T.reshape(networks, (networks.shape[0], T.prod(networks.shape[1:]) ))
   elif cns == 'sum':
     return T.sum(networks, axis=axis, acc_dtype=theano.config.floatX)
   elif cns == 'prod':
     return T.prod(networks, axis=axis)
   elif cns == 'var':
     return T.var(networks, axis=axis)
   elif cns == 'project':
     p = self.add_param(self.create_random_uniform_weights(self.attrs['n_out'], 1, self.attrs['n_out'] + self.depth + 1))
     return T.tensordot(p, networks, [[1], [axis]])
   elif cns == 'random':
     idx = self.rng.random_integers(size=(1,), low=0, high=self.depth)
     if axis == 0: return networks[idx]
     if axis == 1: return networks[:,idx]
     if axis == 2: return networks[:,:,idx]
     if axis == 3: return networks[:,:,:,idx]
     assert False, "axis too large"
   else:
     assert False, "consensus method unknown: " + cns
Example #43
0
    def get_cost_updates(self, learning_rate, beta=0.9):
        """ This function computes the cost and the updates for one trainng
        step of the dA """
        max_x = T.max(self.x)
        min_x = T.min(self.x)
        self.x = (self.x - min_x) / (max_x - min_x)
        y = self.get_hidden_values(self.x)
        z = self.get_reconstructed_input(y)
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
        # L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
        L_prime = T.sum(T.square(z - self.x), axis=1)
        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(L_prime)

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = []
        for momen, gparm1 in zip(self.mom, gparams):
            updates.append((momen, momen * beta + (1 - beta) * gparm1))
        for param, momen in zip(self.params, self.mom):
            updates.append((param, param - learning_rate * momen))

        return (cost, updates)
    def NRMSE(self, y):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch ; zero one
        loss over the size of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        """

        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        # check if y is of the correct datatype
        if y.dtype.startswith('flo'): #CHANGED!!!!!
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction

            return T.sqrt(T.mean(T.sqr(y-self.y_pred))) / (T.max(y) - T.min(y)) #NRMSE

        else:
            raise NotImplementedError()
Example #45
0
def get_stats(input, stat=None):
    """
    Returns a dictionary mapping the name of the statistic to the result on the input.
    Currently gets mean, var, std, min, max, l1, l2.

    Parameters
    ----------
    input : tensor
        Theano tensor to grab stats for.

    Returns
    -------
    dict
        Dictionary of all the statistics expressions {string_name: theano expression}
    """
    stats = {
        'mean': T.mean(input),
        'var': T.var(input),
        'std': T.std(input),
        'min': T.min(input),
        'max': T.max(input),
        'l1': input.norm(L=1),
        'l2': input.norm(L=2),
        #'num_nonzero': T.sum(T.nonzero(input)),
    }
    stat_list = raise_to_list(stat)
    compiled_stats = {}
    if stat_list is None:
        return stats

    for stat in stat_list:
        if isinstance(stat, six.string_types) and stat in stats:
            compiled_stats.update({stat: stats[stat]})
    return compiled_stats
Example #46
0
    def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)
        
        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]
        
        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
        else:
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)
Example #47
0
def unet_crossentropy_loss_sampled(y_true, y_pred):
    # weighted version of pixel-wise crossrntropy loss function
    alpha = 0.6
    epsilon = 1.0e-5
    y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon))
    y_true = T.flatten(y_true)
    # this seems to work
    # it is super ugly though and I am sure there is a better way to do it
    # but I am struggling with theano to cooperate
    # filter the right indices
    indPos = T.nonzero(y_true)[0]  # no idea why this is a tuple
    indNeg = T.nonzero(1 - y_true)[0]
    # shuffle
    n = indPos.shape[0]
    indPos = indPos[srng.permutation(n=n)]
    n = indNeg.shape[0]
    indNeg = indNeg[srng.permutation(n=n)]

    # take equal number of samples depending on which class has less
    n_samples = T.cast(T.min([T.sum(y_true), T.sum(1 - y_true)]),
                       dtype='int64')
    # indPos = indPos[:n_samples]
    # indNeg = indNeg[:n_samples]

    total = np.float64(patchSize_out * patchSize_out * patchZ_out)
    loss_vector = ifelse(
        T.gt(n_samples, 0),
        # if this patch has positive samples, then calulate the first formula
        (-alpha * T.sum(T.log(y_pred_clipped[indPos])) -
         (1 - alpha) * T.sum(T.log(1 - y_pred_clipped[indNeg]))) / total,
        -(1 - alpha) * T.sum(T.log(1 - y_pred_clipped[indNeg])) / total)

    average_loss = T.mean(loss_vector) / (1 - alpha)
    return average_loss
Example #48
0
 def make_consensus(self, networks, axis=2):
   cns = self.attrs['consensus']
   if cns == 'max':
     return T.max(networks, axis=axis)
   elif cns == 'min':
     return T.min(networks, axis=axis)
   elif cns == 'mean':
     return T.mean(networks, axis=axis)
   elif cns == 'flat':
     if self.depth == 1:
       return networks
     if axis == 2:
       return networks.flatten(ndim=3)
       #return T.reshape(networks, (networks.shape[0], networks.shape[1], T.prod(networks.shape[2:]) ))
     else:
       return networks.flatten(ndim=2) # T.reshape(networks, (networks.shape[0], T.prod(networks.shape[1:]) ))
   elif cns == 'sum':
     return T.sum(networks, axis=axis, acc_dtype=theano.config.floatX)
   elif cns == 'prod':
     return T.prod(networks, axis=axis)
   elif cns == 'var':
     return T.var(networks, axis=axis)
   elif cns == 'project':
     p = self.add_param(self.create_random_uniform_weights(self.attrs['n_out'], 1, self.attrs['n_out'] + self.depth + 1))
     return T.tensordot(p, networks, [[1], [axis]])
   elif cns == 'random':
     idx = self.rng.random_integers(size=(1,), low=0, high=self.depth)
     if axis == 0: return networks[idx]
     if axis == 1: return networks[:,idx]
     if axis == 2: return networks[:,:,idx]
     if axis == 3: return networks[:,:,:,idx]
     assert False, "axis too large"
   else:
     assert False, "consensus method unknown: " + cns
Example #49
0
    def test_max_pool_2d_3D(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        maxpoolshps = [(1, 2)]
        imval = rng.rand(2, 3, 4)
        images = tensor.dtensor3()

        for maxpoolshp, ignore_border, mode in product(maxpoolshps,
                                                       [True, False],
                                                       ['max', 'sum',
                                                        'average_inc_pad',
                                                        'average_exc_pad']):
                # print 'maxpoolshp =', maxpoolshp
                # print 'ignore_border =', ignore_border
                numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp,
                                                          ignore_border,
                                                          mode)
                output = max_pool_2d(images, maxpoolshp, ignore_border,
                                     mode=mode)
                output_val = function([images], output)(imval)
                assert numpy.all(output_val == numpy_output_val), (
                    "output_val is %s, numpy_output_val is %s"
                    % (output_val, numpy_output_val))
                c = tensor.sum(output)
                c_val = function([images], c)(imval)
                g = tensor.grad(c, images)
                g_val = function([images],
                                 [g.shape,
                                 tensor.min(g, axis=(0, 1, 2)),
                                 tensor.max(g, axis=(0, 1, 2))]
                                 )(imval)
Example #50
0
    def test_max_pool_2d_3D(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        maxpoolshps = [(1, 2)]
        imval = rng.rand(2, 3, 4)
        images = tensor.dtensor3()

        for maxpoolshp, ignore_border, mode in product(
                maxpoolshps, [True, False],
            ['max', 'average_inc_pad', 'average_exc_pad']):
            # print 'maxpoolshp =', maxpoolshp
            # print 'ignore_border =', ignore_border
            numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp,
                                                      ignore_border, mode)
            output = max_pool_2d(images, maxpoolshp, ignore_border, mode=mode)
            output_val = function([images], output)(imval)
            assert numpy.all(output_val == numpy_output_val), (
                "output_val is %s, numpy_output_val is %s" %
                (output_val, numpy_output_val))
            c = tensor.sum(output)
            c_val = function([images], c)(imval)
            g = tensor.grad(c, images)
            g_val = function([images], [
                g.shape,
                tensor.min(g, axis=(0, 1, 2)),
                tensor.max(g, axis=(0, 1, 2))
            ])(imval)
Example #51
0
def apply_moving_average(params, avg_params, updates, steps, decay):
    # assert params and avg_params are aligned
    weight = T.min([decay, steps / (steps + 1.)]).astype(theano.config.floatX)
    avg_updates = []
    for p, a in zip(params, avg_params):
        avg_updates.append((a, a - (1. - weight) * (a - p)))
    return updates.items() + avg_updates
Example #52
0
def downsample(source, axis, factor, method="average"):
  assert factor == int(factor), "factor is expected to be an int"
  factor = int(factor)
  # make shape[axis] a multiple of factor
  src = source
  source = source[slice_for_axis(axis=axis, s=slice(0, (source.shape[axis] / factor) * factor))]
  # Add a temporary dimension as the factor.
  added_dim_shape = [source.shape[i] for i in range(source.ndim)]
  added_dim_shape = added_dim_shape[:axis] + [source.shape[axis] / factor, factor] + added_dim_shape[axis + 1:]
  if method == "lstm":
    assert axis == 0
    return source
  source = T.reshape(source, added_dim_shape)
  if method == "average":
    return T.mean(source, axis=axis + 1)
  elif method == "max":
    return T.max(source, axis=axis + 1)
  elif method == "min":
    return T.min(source, axis=axis + 1)
  elif method == "concat" or method == 'mlp': # concatenates in last dimension
    #return T.reshape(source, added_dim_shape[:axis+1] + added_dim_shape[axis+2:-1] + [added_dim_shape[-1] * factor])
    return source.swapaxes(axis+1,src.ndim-1).reshape([source.shape[0],src.shape[1]] + [factor * source.shape[3]])
  elif method == "batch":
    assert axis == 0
    return source.dimshuffle(1,0,2,3).reshape((source.shape[1],source.shape[0]*source.shape[2],source.shape[3]))
  else:
    assert False, "unknown downsample method %r" % method
Example #53
0
def unet_crossentropy_loss_sampled(y_true, y_pred):
    print 'unet_crossentropy_loss_sampled'
    epsilon = 1.0e-4
    y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon))
    y_true = T.flatten(y_true)
    # this seems to work
    # it is super ugly though and I am sure there is a better way to do it
    # but I am struggling with theano to cooperate
    # filter the right indices
    indPos = T.nonzero(y_true)[0]  # no idea why this is a tuple
    indNeg = T.nonzero(1 - y_true)[0]
    # shuffle
    n = indPos.shape[0]
    indPos = indPos[srng.permutation(n=n)]
    n = indNeg.shape[0]
    indNeg = indNeg[srng.permutation(n=n)]
    # take equal number of samples depending on which class has less
    n_samples = T.cast(T.min([T.sum(y_true), T.sum(1 - y_true)]),
                       dtype='int64')

    indPos = indPos[:n_samples]
    indNeg = indNeg[:n_samples]
    loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(
        T.log(1 - y_pred_clipped[indNeg]))
    average_loss = T.mean(loss_vector)
    print 'average_loss:', average_loss
    return average_loss
Example #54
0
    def get_monitoring_channels(self, V):

        try:
            self.compile_mode()

            rval = {}

            #from_ip = self.inference_procedure.get_monitoring_channels(V, self)

            #rval.update(from_ip)

            if self.monitor_params:
                for param in self.get_params():
                    rval[param.name + '_min'] = full_min(param)
                    rval[param.name + '_mean'] = T.mean(param)
                    rval[param.name + '_max'] = full_max(param)

                    if 'W' in param.name:
                        norms = theano_norms(param)

                        rval[param.name + '_norms_min' ]= T.min(norms)
                        rval[param.name + '_norms_mean'] = T.mean(norms)
                        rval[param.name + '_norms_max'] = T.max(norms)

            new_rval = {}
            for key in rval:
                new_rval[self.monitoring_channel_prefix+key] = rval[key]

            rval = new_rval

            return rval
        finally:
            self.deploy_mode()
Example #55
0
    def train_simple(self, X_train, y_train,
                           n_epochs, batch_size,
                           optimization_function,
                           cost_function):
        cost = cost_function(self.y, self.out, self.params)
        error = self.error()

        X_train_shared = shared(X_train.astype('float32'))
        y_train_shared = shared(y_train.astype('int32'))

        N = X_train.shape[0]
        n_batches = N // batch_size + (N % batch_size != 0)

        index = T.iscalar()
        batch_begin = index * batch_size
        batch_end   = T.min(((index+1) * batch_size, N))

        upd = optimization_function(self.params, cost)

        optimize = function([index], [cost, error],
                            givens=[(self.X, X_train_shared[batch_begin:batch_end]),
                                    (self.y, y_train_shared[batch_begin:batch_end])],
                            updates=upd)

        p = T.ivector()
        permute = function([p], updates=[(X_train_shared, X_train_shared[p]),
                                         (y_train_shared, y_train_shared[p])], allow_input_downcast=True)
        for j in range(n_epochs):
            for i in range(n_batches):
                print(optimize(i))
                permute(np.random.permutation(N))
Example #56
0
    def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(
            activation.dimshuffle(0, 1, 2, 'x') -
            activation.dimshuffle('x', 1, 2, 0)),
                         axis=2) +
                   1e6 * T.eye(input.shape[0]).dimshuffle(0, 'x', 1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2), axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x', 0, 'x')
            self.init_updates = [
                (self.log_weight_scale, self.log_weight_scale -
                 T.log(mean_min_abs_dif).dimshuffle(0, 'x'))
            ]

        f = T.sum(T.exp(-abs_dif), axis=2)

        if init:
            mf = T.mean(f, axis=0)
            f -= mf.dimshuffle('x', 0)
            self.init_updates.append((self.b, -mf))
        else:
            f += self.b.dimshuffle('x', 0)

        return T.concatenate([input, f], axis=1)
Example #57
0
def rank_objective(num_queries: int, num_w_samples: int,
                   samples: tt.TensorVariable, features: tt.TensorVariable,
                   beta_pref: float) -> float:
    """
    The ranking maximum volume removal objective function, using the Plackett-Luce model of human behavior.

    CANNOT BE USED WITH (INC_PREV_QUERY AND NO DEMPREF).

    :param features: a list containing the feature values of each query.
    :param samples: samples of w, used to approximate the objective.
    :return: the value of the objective function, evaluated on the given queries' features.
    """
    # features: n_queries x feature_size
    # samples: n_samples x feature_size
    exp_rewards = tt.sum(
        tt.dot(features, samples.T),
        axis=1) / samples.shape[0]  # n_queries x 1 -- summed across samples
    volumes_removed = []
    rankings = itertools.permutations(list(
        range(num_queries)))  # iterating over all possible rankings
    for rank in rankings:
        exp_rewards_sorted = [None] * len(rank)
        for i in range(len(rank)):
            exp_rewards_sorted[rank[i]] = exp_rewards[i]

        value, i = 1, 0
        for i in range(len(rank) - 1):
            exp_i = [
                beta_pref * (exp_rewards_sorted[j] - exp_rewards_sorted[i])
                for j in range(i,
                               len(rank) - 1)
            ]
            value *= (1. / tt.sum(tt.exp(exp_i)))
        volumes_removed.append(1 - value)
    return tt.min(volumes_removed)
Example #58
0
    def unet_crossentropy_loss_sampled(y_true, y_pred):
        epsilon = 1.0e-4
        y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
        y_true = T.flatten(y_true)
        # this seems to work
        # it is super ugly though and I am sure there is a better way to do it
        # but I am struggling with theano to cooperate
        # filter the right indices
        classPos = 1
        classNeg = 0
        indPos   = T.eq(y_true, classPos).nonzero()[0]
        indNeg   = T.eq(y_true, classNeg).nonzero()[0]
        #pos      = y_true[ indPos ]
        #neg      = y_true[ indNeg ]

        # shuffle
        n = indPos.shape[0]
        indPos = indPos[UNET.srng.permutation(n=n)]
        n = indNeg.shape[0]
        indNeg = indNeg[UNET.srng.permutation(n=n)]
        # take equal number of samples depending on which class has less
        n_samples = T.cast(T.min([ indPos.shape[0], indNeg.shape[0]]), dtype='int64')
        #n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64')

        indPos = indPos[:n_samples]
        indNeg = indNeg[:n_samples]
        #loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
        loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(y_pred_clipped[indNeg]))
        loss_vector = T.clip(loss_vector, epsilon, 1.0-epsilon)
        average_loss = T.mean(loss_vector)
        if T.isnan(average_loss):
            average_loss = T.mean( y_pred_clipped[indPos])
        return average_loss
Example #59
0
def adjust_estimate(estimate_r_t, risk_pref_t):
    # preserves TxN shape of estimates by broadcasting 1xN parameter array across the trial dimension
    estimate_r_t_adj = (estimate_r_t - 0.5) * risk_pref_t + 0.5

    # creates TxNx1
    estimate_r_t_adj3 = T.reshape(
        estimate_r_t_adj,
        newshape=[estimate_r_t_adj.shape[0], estimate_r_t_adj.shape[1], 1])

    # creates 0's that are TxNx1
    zeros_like_estimate_r_t_adj3 = T.reshape(
        T.zeros_like(estimate_r_t_adj),
        newshape=[estimate_r_t_adj.shape[0], estimate_r_t_adj.shape[1], 1])

    # create TxNx2 and then take the max over returning TxNx1
    estimate_r_t_adj_max = T.max(
        T.stack([estimate_r_t_adj3, zeros_like_estimate_r_t_adj3], axis=2),
        axis=2,
    )

    # create TxNx2 and then take the max over returning TxN
    estimate_r_t_adj_max_min = T.squeeze(
        T.min(T.stack(
            [estimate_r_t_adj_max,
             T.ones_like(estimate_r_t_adj_max)], axis=2),
              axis=2))
    return (estimate_r_t_adj_max_min)