def negative_log_likelihood_classwise_masking(self, y, mask_class_labeled, mask_class_not_present):
        """
        todo: test.
        :y: true classes (as integer value): (batchsize, x, y)
        :mask_class_labeled: matrix: (batchsize, num_classes) allowed values: 0 or 1; setting everything to 1 leads to the ordinary nll; all zeroes is an invalid state.
                    a zero for one class indicates that this class may be present but is not labeled as such.
        :mask_class_not_present: (batchsize, num_classes): similar to mask_class_labeled, but now a 1 indicates that a class is CERTAINLY NOT PRESENT in the batch.
        
        values of -1 in y count as "absolutely not labeled / ignore predictions"; this has PRIORITY over anything else (including mask_class_not_present).
        """
        y                      = y.dimshuffle(0, 'x', 1, 2)                        #(batchsize, 1, x, y)
        mask_class_labeled     = mask_class_labeled.dimshuffle(0, 1, 'x', 'x')     #(batchsize, num_classes,1 ,1)
        mask_class_not_present = mask_class_not_present.dimshuffle(0, 1, 'x', 'x') #(batchsize, num_classes,1 ,1)
        global_loss_mask = (y != -1) #apply to overall loss after everything is calculated; marks positions 
        
        
        pred = self.class_probabilities_realshape # (batchsize, num_classes, x, y)
        mod_y = T.where(y<0,0,y)
        
        #dirty hack: compute "standard" nll when most predictive weight is put on classes which are in fact labeled
        votes_not_for_unlabeled = T.where( T.sum(pred*mask_class_labeled,axis=1)>=0.5, 1, 0 ).dimshuffle(0,'x',1,2)

        # could also add '* mask_class_labeled' inside, but this should not change anything , provided there is no logical conflict between y and mask_class_labeled !
        nll = -T.mean((T.log(pred) * votes_not_for_unlabeled * global_loss_mask)[:,mod_y]) #standard loss part -> increase p(correct_prediction); thus disabled if the "correct" class is not known
        
        # penalize predictions: sign is a plus! (yes: '+')
        # remove <global_loss_mask> if <mask_class_not_present> should override 'unlabeled' areas.
        nll += T.mean(T.log(pred) * mask_class_not_present * global_loss_mask) 
        
        return nll
Esempio n. 2
0
        def lda_logp(rt, gaze, values, error_ll, v_index, tau_index,
                     gamma_index, s_index, t0_index, is_multiplicative,
                     zerotol):

            # compute drifts
            ## Select the right drift function
            drift = ifelse(
                is_multiplicative,
                glam.components.tt_drift_multiplicative(
                    v[0, tt.cast(v_index, dtype='int32')][:, None],
                    tau[0, tt.cast(tau_index, dtype='int32')][:, None],
                    gamma[0, tt.cast(gamma_index, dtype='int32')][:, None],
                    values, gaze, zerotol),
                glam.components.tt_drift_additive(
                    v[0, tt.cast(v_index, dtype='int32')][:, None],
                    tau[0, tt.cast(tau_index, dtype='int32')][:, None],
                    gamma[0, tt.cast(gamma_index, dtype='int32')][:, None],
                    values, gaze, zerotol))
            # drift = driftfun(v[0, tt.cast(v_index, dtype='int32')][:, None],
            #                  tau[0, tt.cast(tau_index, dtype='int32')][:, None],
            #                  gamma[0, tt.cast(gamma_index, dtype='int32')][:, None],
            #                  values,
            #                  gaze,
            #                  zerotol)
            glam_ll = glam.components.tt_wienerrace_pdf(
                rt[:, None], drift,
                s[0, tt.cast(s_index, dtype='int32')][:, None], b,
                t0[0, tt.cast(t0_index, dtype='int32')][:, None], zerotol)

            # mix likelihoods
            mixed_ll = ((1 - p_error) * glam_ll + p_error * error_ll)

            mixed_ll = tt.where(tt.isnan(mixed_ll), 0., mixed_ll)
            mixed_ll = tt.where(tt.isinf(mixed_ll), 0., mixed_ll)
            return tt.log(mixed_ll + zerotol)
Esempio n. 3
0
def irprop_minus_trainer(x, y, w, parameters, loss, random_stream,
                         positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6):
    """IRPROP- is batch trainer, for details see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.3428 .
    This is default trainer, very stable for classification.

    :param positive_step: factor, by which the step is increased when continuing going in the direction
    :param negative_step: factor, by which the step is increased when changing direction to opposite
    :param min_step: minimal change of weight during iteration
    :param max_step: maximal change of weight during iteration
    """
    shareds = []
    updates = []
    loss_value = loss(x, y, w)
    for name, param in parameters.items():
        old_derivative = theano.shared(param.get_value() * 0.)
        delta = theano.shared(param.get_value() * 0. + 1e-3)
        shareds.extend([old_derivative, delta])
        new_derivative = T.grad(loss_value, param)

        new_delta = T.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
        new_delta = T.clip(new_delta, min_step, max_step)

        updates.append([param, param - new_delta * T.sgn(new_derivative)])
        updates.append([delta, new_delta])

        new_old_derivative = T.where(new_derivative * old_derivative < 0, 0, new_derivative)
        updates.append([old_derivative, new_old_derivative])
    return shareds, updates
Esempio n. 4
0
def irprop_plus_trainer(x, y, w, parameters, loss, random_stream,
                        positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6):
    """IRPROP+ is batch trainer, for details see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.3428

    :param positive_step: factor, by which the step is increased when continuing going in the direction
    :param negative_step: factor, by which the step is increased when changing direction to opposite
    :param min_step: minimal change of weight during iteration
    :param max_step: maximal change of weight during iteration
    """
    loss_value = loss(x, y, w)
    prev_loss_value = theano.shared(1e10)
    shareds = [prev_loss_value]
    updates = []
    for name, param in parameters.items():
        old_derivative = theano.shared(param.get_value() * 0.)
        delta = theano.shared(param.get_value() * 0. + 1e-3)
        new_derivative = T.grad(loss_value, param)

        shift_if_bad_step = T.where(new_derivative * old_derivative < 0, delta * T.sgn(old_derivative), 0)
        shift = ifelse(loss_value > prev_loss_value, shift_if_bad_step, 0. * param)
        # unfortunately we can't do it this way: param += shift

        new_delta = T.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
        new_delta = T.clip(new_delta, min_step, max_step)

        updates.append([param, param + shift - new_delta * T.sgn(new_derivative)])
        updates.append([delta, new_delta])

        new_old_derivative = T.where(new_derivative * old_derivative < 0, 0, new_derivative)
        updates.append([old_derivative, new_old_derivative])
        shareds.extend([old_derivative, delta])

    updates.append([prev_loss_value, loss_value])
    return shareds, updates
Esempio n. 5
0
 def __init__(self,
              input,
              filter_shape=None,
              image_shape=None,
              W=None,
              b=None,
              poolsize=(3, 1)):
     assert image_shape[1] == filter_shape[1]
     self.W = W
     self.b = b
     tmp = numpy.ones((filter_shape[0], ), dtype=theano.config.floatX)
     tmp = -tmp * 10000
     self.test = theano.shared(value=tmp, borrow=True)
     conv_out = conv.conv2d(input=input,
                            filters=self.W,
                            filter_shape=filter_shape,
                            image_shape=image_shape)
     conv_out2 = T.where(T.neq(conv_out, 0), conv_out,
                         self.test.dimshuffle('x', 0, 'x', 'x'))
     pooled_out = downsample.max_pool_2d(conv_out2,
                                         ds=poolsize,
                                         ignore_border=True)
     pooled_out2 = T.where(T.neq(pooled_out, -10000), pooled_out,
                           -self.b.dimshuffle('x', 0, 'x', 'x'))
     self.output = ReLU(pooled_out2 + self.b.dimshuffle('x', 0, 'x', 'x'))
     #self.output = T.nnet.sigmoid(pooled_out2 + self.b.dimshuffle('x', 0, 'x', 'x'))
     self.params = [self.W, self.b]
Esempio n. 6
0
def irprop_plus_trainer(x, y, w, parameters, loss, random_stream,
                        positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6):
    """IRPROP+ trainer, see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.17.1332"""
    loss_value = loss(x, y, w)
    prev_loss_value = theano.shared(1e10)
    shareds = []
    updates = []
    for name, param in parameters.iteritems():
        old_derivative = theano.shared(param.get_value() * 0.)
        delta = theano.shared(param.get_value() * 0. + 1e-3)
        new_derivative = T.grad(loss_value, param)

        shift_if_bad_step = T.where(new_derivative * old_derivative < 0, delta * T.sgn(old_derivative), 0)
        # THIS doesn't work!
        shift = ifelse(loss_value > prev_loss_value, shift_if_bad_step, 0. * param)
        # unfortunately we can't do it this way: param += shift

        new_delta = T.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
        new_delta = T.clip(new_delta, min_step, max_step)

        updates.append([param, param + shift - new_delta * T.sgn(new_derivative)])
        updates.append([delta, new_delta])

        new_old_derivative = T.where(new_derivative * old_derivative < 0, 0, new_derivative)
        updates.append([old_derivative, new_old_derivative])
        shareds.extend([old_derivative, delta, prev_loss_value])

    updates.append([prev_loss_value, loss_value])
    return shareds, updates
Esempio n. 7
0
def irprop_minus_trainer(x, y, w, parameters, loss, random_stream,
                         positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6):
    """IRPROP- is batch trainer, for details see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.3428 .
    This is default trainer, very stable for classification.

    :param positive_step: factor, by which the step is increased when continuing going in the direction
    :param negative_step: factor, by which the step is increased when changing direction to opposite
    :param min_step: minimal change of weight during iteration
    :param max_step: maximal change of weight during iteration
    """
    shareds = []
    updates = []
    loss_value = loss(x, y, w)
    for name, param in parameters.items():
        old_derivative = theano.shared(param.get_value() * 0.)
        delta = theano.shared(param.get_value() * 0. + 1e-3)
        shareds.extend([old_derivative, delta])
        new_derivative = T.grad(loss_value, param)

        new_delta = T.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
        new_delta = T.clip(new_delta, min_step, max_step)

        updates.append([param, param - new_delta * T.sgn(new_derivative)])
        updates.append([delta, new_delta])

        new_old_derivative = T.where(new_derivative * old_derivative < 0, 0, new_derivative)
        updates.append([old_derivative, new_old_derivative])
    return shareds, updates
Esempio n. 8
0
def irprop_star_trainer(x, y, w, parameters, loss, random_stream,
                        positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6):
    """ IRPROP* trainer (own experimental modification, not recommended for usage) """
    shareds = []
    updates = []
    loss_value = loss(x, y, w)

    for name, param in parameters.items():
        param_shape = param.get_value().shape
        n = numpy.prod(param_shape).astype(int)
        new_derivative_ = T.grad(loss_value, param).flatten()
        lnewder, rnewder = new_derivative_.reshape([n, 1]), new_derivative_.reshape([1, n])
        new_derivative_plus = lnewder + rnewder
        new_derivative_minus = lnewder - rnewder
        new_param = param
        for new_derivative in [new_derivative_plus, new_derivative_minus]:
            delta = theano.shared(numpy.zeros([n, n], dtype=floatX) + 1e-3)
            old_derivative = theano.shared(numpy.zeros([n, n], dtype=floatX))

            new_delta = T.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
            new_delta = T.clip(new_delta, min_step, max_step)

            updates.append([delta, new_delta])
            new_old_derivative = T.where(new_derivative * old_derivative < 0, 0, new_derivative)
            updates.append([old_derivative, new_old_derivative])
            new_param = new_param - (new_delta * T.sgn(new_derivative)).sum(axis=1).reshape(param.shape)
            shareds.extend([old_derivative, delta])

        updates.append([param, new_param])

    return shareds, updates
Esempio n. 9
0
        def lda_logp(rt, gaze, values, error_lls, s_condition_index,
                     s_subject_index, v_condition_index, v_subject_index,
                     tau_condition_index, tau_subject_index,
                     gamma_condition_index, gamma_subject_index,
                     t0_condition_index, t0_subject_index, zerotol):

            # compute drifts
            drift = glam.components.expdrift(
                v[tt.cast(v_subject_index, dtype='int32'),
                  tt.cast(v_condition_index, dtype='int32')][:, None],
                tau[tt.cast(tau_subject_index, dtype='int32'),
                    tt.cast(tau_condition_index, dtype='int32')][:, None],
                gamma[tt.cast(gamma_subject_index, dtype='int32'),
                      tt.cast(gamma_condition_index, dtype='int32')][:, None],
                values, gaze, zerotol)
            glam_ll = glam.components.tt_wienerrace_pdf(
                rt[:, None], drift,
                s[tt.cast(s_subject_index, dtype='int32'),
                  tt.cast(s_condition_index, dtype='int32')][:, None], b,
                t0[tt.cast(t0_subject_index, dtype='int32'),
                   tt.cast(t0_condition_index, dtype='int32')][:,
                                                               None], zerotol)

            # mix likelihoods
            mixed_ll = ((1 - p_error) * glam_ll +
                        p_error * error_lls[subject_idx])

            mixed_ll = tt.where(tt.isnan(mixed_ll), 0., mixed_ll)
            mixed_ll = tt.where(tt.isinf(mixed_ll), 0., mixed_ll)
            return tt.sum(tt.log(mixed_ll + zerotol))
Esempio n. 10
0
def irprop_plus_trainer(x, y, w, parameters, loss, random_stream,
                        positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6):
    """IRPROP+ is batch trainer, for details see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.3428

    :param positive_step: factor, by which the step is increased when continuing going in the direction
    :param negative_step: factor, by which the step is increased when changing direction to opposite
    :param min_step: minimal change of weight during iteration
    :param max_step: maximal change of weight during iteration
    """
    loss_value = loss(x, y, w)
    prev_loss_value = theano.shared(1e10)
    shareds = [prev_loss_value]
    updates = []
    for name, param in parameters.items():
        old_derivative = theano.shared(param.get_value() * 0.)
        delta = theano.shared(param.get_value() * 0. + 1e-3)
        new_derivative = T.grad(loss_value, param)

        shift_if_bad_step = T.where(new_derivative * old_derivative < 0, delta * T.sgn(old_derivative), 0)
        shift = ifelse(loss_value > prev_loss_value, shift_if_bad_step, 0. * param)
        # unfortunately we can't do it this way: param += shift

        new_delta = T.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
        new_delta = T.clip(new_delta, min_step, max_step)

        updates.append([param, param + shift - new_delta * T.sgn(new_derivative)])
        updates.append([delta, new_delta])

        new_old_derivative = T.where(new_derivative * old_derivative < 0, 0, new_derivative)
        updates.append([old_derivative, new_old_derivative])
        shareds.extend([old_derivative, delta])

    updates.append([prev_loss_value, loss_value])
    return shareds, updates
Esempio n. 11
0
    def call(self, inputs):
        real = get_realpart(inputs)
        imag = get_imagpart(inputs)

        cond = T.and_(real >= 0, imag >= 0)
        x = T.where(cond, real, self.zeros)
        y = T.where(cond, imag, self.zeros)
        return K.concatenate((x, y), axis=-1)
Esempio n. 12
0
def deviance_negbin(y, μ, α, saturated="NegativeBinomial"):
    if saturated=="NegativeBinomial":
        logp_sat = tt.where(y==0, np.zeros_like(y,dtype=np.float32), pm.NegativeBinomial.dist(mu=y, alpha=α).logp(y))
    elif saturated=="Poisson":
        logp_sat = tt.where(y==0, np.zeros_like(y,dtype=np.float32), pm.Poisson.dist(mu=y).logp(y))
    else:
        raise NotImplementedError()
    logp_mod = pm.NegativeBinomial.dist(mu=μ, alpha=α).logp(y)
    return (2*(logp_sat - logp_mod)).eval()
Esempio n. 13
0
    def _create_iter_funcs(self, layers, objective, update, output_type):
        y_batch = output_type('y_batch')

        output_layer = layers[-1]
        objective_kw = self._get_params_for('objective')

        l3Layers = []
        for l3_name in self.l3_layers:
            l3Layers.append( layers[ l3_name ] )

        loss_train = objective(
            layers, target=y_batch, l3_layers=l3Layers, **objective_kw)
        loss_eval = objective(
            layers, target=y_batch, deterministic=True, **objective_kw)
        predict_proba = get_output(output_layer, None, deterministic=True)

        if not self.regression:
            predict = predict_proba.argmax(axis=1)
            accuracy = T.mean(T.eq(predict, y_batch))
        elif self.objective_loss_function is binary_crossentropy:
            predict = T.where( predict_proba >= 0.5, 1, 0 )
            accuracy = T.mean( T.eq(predict, y_batch) )
        else:
            predict = T.where( predict_proba > 0., 1, 0 )
            label   = T.where( y_batch > 0., 1, 0 )
            accuracy = T.mean( T.eq( predict, label ) )

        all_params = self.get_all_params(trainable=True)
        update_params = self._get_params_for('update')
        updates = update(loss_train, all_params, layer_weights=self.layer_weights, **update_params )

        input_layers = [layer for layer in layers.values()
                        if isinstance(layer, InputLayer)]

        X_inputs = [theano.Param(input_layer.input_var, name=input_layer.name)
                    for input_layer in input_layers]
        inputs = X_inputs + [theano.Param(y_batch, name="y")]

        train_iter = theano.function(
            inputs=inputs,
            outputs=[loss_train],
            updates=updates,
            allow_input_downcast=True,
            )
        eval_iter = theano.function(
            inputs=inputs,
            outputs=[loss_eval, accuracy],
            allow_input_downcast=True,
            )
        predict_iter = theano.function(
            inputs=X_inputs,
            outputs=predict_proba,
            allow_input_downcast=True,
            )

        return train_iter, eval_iter, predict_iter
Esempio n. 14
0
def _flaremodel(time, tpeak, fwhm, ampl):
    # reuses some code from AltaiPony and Apaloosa
    time = tt.as_tensor_variable(time)
    flare_lc = tt.zeros_like(time)
    flare_lc = tt.where((time <= tpeak) * ((time - tpeak) / fwhm > -1.),
                        _before_flare(time, tpeak, fwhm, ampl),
                        flare_lc
                        )
    flare_lc = tt.where((time > tpeak) * ((time - tpeak) / fwhm < 20.),
                        _after_flare(time, tpeak, fwhm, ampl),
                        flare_lc
                        )
    return flare_lc
Esempio n. 15
0
 def __init__(self, input, filter_shape=None, image_shape=None,W=None, b=None, poolsize=(3, 1)):
   assert image_shape[1] == filter_shape[1]
   self.W = W
   self.b = b 
   tmp = numpy.ones((filter_shape[0],), dtype=theano.config.floatX)
   tmp = -tmp*10000
   self.test = theano.shared(value=tmp, borrow=True)
   conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape)
   conv_out2 = T.where(T.neq(conv_out,0), conv_out, self.test.dimshuffle('x', 0, 'x', 'x'))
   pooled_out = downsample.max_pool_2d(conv_out2,ds=poolsize, ignore_border=True)
   pooled_out2 = T.where(T.neq(pooled_out, -10000) ,pooled_out, -self.b.dimshuffle('x', 0, 'x', 'x')) 
   self.output = ReLU(pooled_out2 + self.b.dimshuffle('x', 0, 'x', 'x'))
   #self.output = T.nnet.sigmoid(pooled_out2 + self.b.dimshuffle('x', 0, 'x', 'x'))
   self.params = [self.W, self.b]
Esempio n. 16
0
File: eca.py Progetto: afcentry/eca
def lerp(old, new, min_tau=0.0, en=None):
    """
    Return new interpolated value and a relative difference
    """
    diff = T.mean(T.sqr(new) - T.sqr(old), axis=1, keepdims=True)
    rel_diff = diff / (T.mean(T.sqr(old), axis=1, keepdims=True) + 1e-5)
    t = rel_diff * 20.
    t = T.where(t < 5, 5, t)
    t = T.where(t > 100, 100, t)
    t = t + min_tau
    if en is not None:
        lmbd = T.diagonal(en).dimshuffle(0, 'x') * (1. / t)
    else:
        lmbd = 1. / t
    return ((1 - lmbd) * old + lmbd * new, t, rel_diff)
Esempio n. 17
0
File: eca.py Progetto: arasmus/eca
    def compile_prop_f(self, signals, has_input, min_tau=0.0):
        tau_in = T.scalar('min_tau', dtype=FLOATX)
        inputs = [tau_in]
        x = self.signal(signals)

        # Get estimate of the state from layer above
        estimate = self.estimate(signals)

        # Feedforward originates from previous layer's state or given input
        if not has_input:
            feedforward = self.feedforward(signals)
            has_nans = T.as_tensor_variable(0)
            nans = 0.0
        else:
            input_t = T.matrix('input', dtype=FLOATX)
            inputs += [input_t]
            nans = T.isnan(input_t)
            has_nans = T.any(nans)
            feedforward = T.where(nans, 0.0, input_t)

        self.info('Compiling propagation: [%6s] -> %4s <- [%6s]' %
                  (",".join([p.name for p in self.prev] if self.prev else 'u/y'),
                   self.name,
                   ",".join([p.name for p in self.next] if self.next else '')))

        # Apply nonlinearity to feedforward path only
        if self.nonlin:
            feedforward = self.nonlin(feedforward)

        if self.merge_op:
            assert not self.persistent, 'cannot combine with merge_op'
            new_value = self.merge_op(feedforward, estimate)
        elif self.persistent:
            new_value = feedforward
        else:
            new_value = feedforward - estimate

        # If predicting missing values, force them to zero in residual so
        # that they don't influence learning
        new_value = ifelse(has_nans, T.where(nans, 0.0, new_value), new_value)

        (new_X, t, d) = lerp(x.var, new_value, tau_in)
        d = T.max(d)
        updates = [(x.var, ifelse(self.enabled, new_X, x.var))]

        return theano.function(inputs=inputs,
                               outputs=d,
                               updates=updates)
Esempio n. 18
0
def bspline_bfs(x, knots, P):
    """ temporal basis function
            x: t-delta distance to last knot (horizon 5)
    """
    knots = knots.astype(np.float32)
    idx = ((x >= knots[0]) & (x < knots[-1]))  # .nonzero()
    xx = x[idx]

    N = {}
    for p in range(P + 1):
        for i in range(len(knots) - 1 - p):
            if p == 0:
                N[(i, p)] = tt.where((knots[i] <= xx) * (xx < knots[i + 1]),
                                     1.0, 0.0)
            else:
                N[(i, p)] = \
                    (xx - knots[i]) / (knots[i + p] - knots[i]) * N[(i, p - 1)] + \
                    (knots[i + p + 1] - xx) / (knots[i + p + 1] - knots[i + 1]) * \
                    N[(i + 1, p - 1)]

    highest_level = []
    for i in range(len(knots) - 1 - P):
        res = tt.zeros_like(x)
        highest_level.append(tt.set_subtensor(res[idx], N[(i, P)]))
    return highest_level
Esempio n. 19
0
File: eca.py Progetto: arasmus/eca
def lerp(old, new, min_tau=0.0, en=None):
    """
    Return new interpolated value and a relative difference
    """
    diff = T.mean(T.sqr(new) - T.sqr(old), axis=1, keepdims=True)
    rel_diff = diff / (T.mean(T.sqr(old), axis=1, keepdims=True) + 1e-5)
    t = rel_diff * 20.
    t = T.where(t < 5, 5, t)
    t = T.where(t > 100, 100, t)
    t = t + min_tau
    if en is not None:
        lmbd = T.diagonal(en).dimshuffle(0, 'x') * (1. / t)
    else:
        lmbd = 1. / t
    return ((1 - lmbd) * old + lmbd * new,
            t, rel_diff)
Esempio n. 20
0
  def get_output_for(self, inputs, **kwargs):
    '''
    Take the exp() of all inputs, and divide by the total.
    '''
    exps = T.where(T.eq(inputs[0],0), np.float32(0.0), np.float32(1.0)) * T.exp(inputs[1])

    return exps / (exps.sum(axis=1).dimshuffle((0, 'x')) + 1e-6)
Esempio n. 21
0
def window_batch_timewise(t, b, w, full_index):
    for i in range(w):
        full_index = T.set_subtensor(full_index[i], T.roll(full_index[i], i))
        if i > 0:
            full_index = T.inc_subtensor(
                full_index[i], T.where(full_index[i] > 0, i * t * b - i, 0))
    return full_index
Esempio n. 22
0
    def dropout(self, rate=0.5, seed=None):
        obj = self.copy()

        srng = RandomStreams(seed)
        obj.out = T.where(srng.uniform(size=obj.out.shape) > rate, obj.out, 0)

        return obj
Esempio n. 23
0
File: eca.py Progetto: afcentry/eca
    def compile_adapt_f(self, signals):
        x = self.signal(signals)
        x_prev = [p.signal(signals) for p in self.prev]
        assert np.all([x.k == xp.k for xp in x_prev])
        assert self.m == [xp.n for xp in x_prev]
        assert x.n == self.n
        k = np.float32(x.k)
        # Modulate x
        if x.modulation is not None:
            x_ = x.var * T.as_tensor_variable(x.modulation)
        else:
            x_ = x.var

        updates = []
        upd = lambda en, old, new: [(old, ifelse(en, new, old))]

        E_XX_new, _, d = lerp(self.E_XX, T.dot(x_, x_.T) / k, self.min_tau)
        updates += upd(self.enabled, self.E_XX, E_XX_new)
        b = 1.
        d = T.diagonal(E_XX_new)
        stiff = T.scalar('stiffnes', dtype=FLOATX)
        Q_new = theano_diag(
            b / T.where(d < stiff * self.stiffx, stiff * self.stiffx, d))
        updates += upd(self.enabled, self.Q, Q_new)

        for i, x_p in enumerate(x_prev):
            E_XU_new, _, d_ = lerp(self.E_XU[i],
                                   T.dot(x_, x_p.var.T) / k, self.min_tau)
            updates += upd(self.enabled, self.E_XU[i], E_XU_new)
            d = T.maximum(d, d_)
            updates += upd(self.enabled, self.phi[i], T.dot(Q_new, E_XU_new).T)

        self.info('Compile layer update between: ' + self.name + ' and ' +
                  ', '.join([p.name for p in self.prev]))
        return theano.function(inputs=[stiff], outputs=d, updates=updates)
Esempio n. 24
0
    def init_layer_updates(self, layer):
        if not layer.parameters:
            return []

        prediction_func = self.variables.train_prediction_func
        network_output = self.variables.network_output
        network_input = self.variables.network_input
        step = self.variables.step

        normalized_input = network_input / network_input.norm(L=2)
        summated_output = network_input.dot(layer.weight) + layer.bias
        linear_error = prediction_func - network_output
        update = T.where(
            T.abs_(summated_output) >= self.dead_zone_radius,
            linear_error,
            network_output
        )

        weight_delta = normalized_input.T.dot(update)
        bias_delta = linear_error.sum(axis=0)

        return [
            (layer.weight, layer.weight - step * weight_delta),
            (layer.bias, layer.bias - step * bias_delta),
        ]
Esempio n. 25
0
def weighted_binary_cross_entropy_1(pred, target, mean_notes_activation):
    # Weights correspond to the mean number of positive occurences of the class in the training dataset
    # From :
    # Weighted Multi-label Binary Cross-entropy Criterion
    # https://github.com/Nanne/WeightedMultiLabelBinaryCrossEntropyCriterion
    # https://arxiv.org/pdf/1511.02251.pdf
    # From theano
    #
    # RESULTS :
    # Accuracy = 26%
    # Listening : quite good, a bit too much notes, but harmonically consistent
    # Weights : static biases on output still bias toward negative values, but in a more structured way, i.e. some values around the most likely notes are high (event positives)
    # W is highly structured, but past influence is weaker and less contrasted than piano influence
    match = target * T.log(pred) / T.where(mean_notes_activation == 0, 1e-10, mean_notes_activation)
    not_match = (1.0 - target) * T.log(1.0 - pred) / T.where(mean_notes_activation == 1, 1e-10, (1-mean_notes_activation))
    return -(match + not_match)
Esempio n. 26
0
File: eca.py Progetto: afcentry/eca
    def compile_prop_f(self, signals, has_input, min_tau=0.0):
        tau_in = T.scalar('min_tau', dtype=FLOATX)
        inputs = [tau_in]
        x = self.signal(signals)

        # Get estimate of the state from layer above
        estimate = self.estimate(signals)

        # Feedforward originates from previous layer's state or given input
        if not has_input:
            feedforward = self.feedforward(signals)
            has_nans = T.as_tensor_variable(0)
            nans = 0.0
        else:
            input_t = T.matrix('input', dtype=FLOATX)
            inputs += [input_t]
            nans = T.isnan(input_t)
            has_nans = T.any(nans)
            feedforward = T.where(nans, 0.0, input_t)

        self.info(
            'Compiling propagation: [%6s] -> %4s <- [%6s]' %
            (",".join([p.name for p in self.prev] if self.prev else 'u/y'),
             self.name, ",".join([p.name
                                  for p in self.next] if self.next else '')))

        # Apply nonlinearity to feedforward path only
        if self.nonlin:
            feedforward = self.nonlin(feedforward)

        if self.merge_op:
            assert not self.persistent, 'cannot combine with merge_op'
            new_value = self.merge_op(feedforward, estimate)
        elif self.persistent:
            new_value = feedforward
        else:
            new_value = feedforward - estimate

        # If predicting missing values, force them to zero in residual so
        # that they don't influence learning
        new_value = ifelse(has_nans, T.where(nans, 0.0, new_value), new_value)

        (new_X, t, d) = lerp(x.var, new_value, tau_in)
        d = T.max(d)
        updates = [(x.var, ifelse(self.enabled, new_X, x.var))]

        return theano.function(inputs=inputs, outputs=d, updates=updates)
Esempio n. 27
0
  def get_output_for(self, inputs, **kwargs):
    '''
    First layer is a batch of matrices of embedding indices:
    Second layer are the corresponding embeddings:
    ''' 

    return \
        T.where(T.eq(inputs[0],0), np.float32(0.0), np.float32(1.0)).dimshuffle((0,1,2,'x')) * inputs[1]
Esempio n. 28
0
    def get_output_for(self, inputs, **kwargs):
        '''
    First layer is a batch of matrices of embedding indices:
    Second layer are the corresponding embeddings:
    '''

        return \
            T.where(T.eq(inputs[0],0), np.float32(0.0), np.float32(1.0)).dimshuffle((0,1,2,'x')) * inputs[1]
Esempio n. 29
0
def weighted_binary_cross_entropy_3(pred, target, mean_notes_activation):
    # Mix of 1 and 2
    # From theano
    #
    # RESULTS
    # Accuracy = 31%
    # Listening : not good, not harmonic, strange ranges...
    # Weights : static biases strongly biased toward negative values
    # W shows that past is neglected
    BATCH_SIZE = pred.shape[0]
    DIM = pred.shape[1]
    N_on_per_batch = T.transpose(T.tile(target.sum(axis=1), (DIM, 1))) + 1
    N_off_per_batch = T.transpose(T.tile((1-target).sum(axis=1), (DIM, 1))) + 1
    mean_notes_on = T.tile(T.where(mean_notes_activation==0, 1e-10, mean_notes_activation), (BATCH_SIZE, 1))
    mean_notes_off = T.tile(T.where(mean_notes_activation==1, 1e-10, (1-mean_notes_activation)), (BATCH_SIZE, 1))
    # +1 to avoid zero weighting
    return - (N_on_per_batch * target * T.log(pred) / mean_notes_on + N_off_per_batch * (1.0 - target) * T.log(1.0 - pred) / mean_notes_off)
Esempio n. 30
0
    def MMD_class_penalty(self, target, Xlabel):
        #10個のリスト。順番に各クラスの数が入っている
        Num_c = T.sum(target, 0)
        D_num = Xlabel.shape[1]
        #C*Domain_numの行列。書くますに例えばクラスcにはドメイン1,2,3はそれぞれ何個ずついるか計算している
        Number_label = T.sum(target.T[:, None, :] * Xlabel.T[None, :, :], 2)

        K_base = self.kern.RBF(self.cal, self.cal)

        #10*N*N クラスごとの全てのドメインを無視したグラム行列
        K_class, updates = theano.scan(fn=lambda a: ((K_base * a).T * a),
                                       sequences=[target.T])
        #グラム行列の和をとっている
        K_allsum = T.sum(T.sum(K_class, -1), -1)
        #それぞれのクラスの数の2乗で割る必要があるが万が一クラスの数が0だと割っては無限になってしまうので、if文でチェックを入れている
        K_sum_tot, updates = theano.scan(
            fn=lambda a, b: T.switch(T.gt(b, 0), a / b**2, 0),
            sequences=[K_allsum, Num_c])

        #10*3*N*N(クラス、ドメイン、ごとグラム行列)ただし全てのドメインとのクラスではないのでフィルターであるxlabelを両方からかけている。またあるクラスについて、その中のドメインを順番に見るので、scan文の2重ループ
        K_class_domain_cross, updates = theano.scan(fn=lambda c: theano.scan(
            fn=lambda a: ((c * a).T * a), sequences=[Xlabel.T]),
                                                    sequences=[K_class])
        #domainごとにクラスごとになっている今はC*D グラム行列の和をとっている
        K_allsum = T.sum(T.sum(K_class_domain_cross, -1), -1)
        #割り方だが、あるクラスのあるドメインに属しているのが誰もいなかったらC*D_numのグラム行列和の成分が0になっているはず。それと同じ分母の数を入れている行列も0になっているはず。なのでもし0なら分母には1を変わりに入れる。しかし結局分子で0になるので問題ない
        Number_label2 = T.where(T.eq(Number_label, 0), 1, Number_label)
        K_class_sum = T.sum(K_allsum / (Number_label2**2))

        #あるクラスのあるドメインとあるクラスの全てのドメインのクロス。そのためフィルターは横方向からかけているだけ。
        K_class_domain_center_cross, updates = theano.scan(
            fn=lambda c: theano.scan(fn=lambda a: (c * a),
                                     sequences=[Xlabel.T]),
            sequences=[K_class])
        #上のドメインごとのものと同じ処理を繰り返す
        K_sum_cross = T.sum(T.sum(K_class_domain_center_cross, -1), -1)
        Number_label2 = T.where(T.eq(Number_label, 0), 1, Number_label)
        K_domain_cross_sum = T.sum(K_sum_cross /
                                   (Number_label2 * Num_c[:, None]))
        #z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y))

        MMD_class = K_class_sum + T.sum(
            K_sum_tot) * D_num - 2 * K_domain_cross_sum

        return MMD_class
Esempio n. 31
0
        def lda_logp(rt, gaze, values, error_ll, v_index, tau_index,
                     gamma_index, s_index, t0_index, zerotol):

            # compute drifts
            R = make_R(v[0, tt.cast(v_index, dtype='int32')][:, None],
                       tau[0, tt.cast(tau_index, dtype='int32')][:, None],
                       gamma[0, tt.cast(gamma_index, dtype='int32')][:, None],
                       values, gaze, zerotol)
            glam_ll = tt_wienerrace_pdf(
                rt[:, None], R, s[0, tt.cast(s_index, dtype='int32')][:, None],
                b, t0[0, tt.cast(t0_index, dtype='int32')][:, None], zerotol)

            # mix likelihoods
            mixed_ll = ((1 - p_error) * glam_ll + p_error * error_ll)

            mixed_ll = tt.where(tt.isnan(mixed_ll), 0., mixed_ll)
            mixed_ll = tt.where(tt.isinf(mixed_ll), 0., mixed_ll)
            return tt.log(mixed_ll + zerotol)
def robust_expit(x):
    def expit_p(z):
        return 1 / (1 + tt.exp(-z))

    def expit_n(z):
        exp_z = tt.exp(z)
        return exp_z / (1 + exp_z)

    return tt.where(x > 0, expit_p, expit_n)
Esempio n. 33
0
	def focal_loss_fixed(y_true, y_pred):
		if(K.backend()=="tensorflow"):
			import tensorflow as tf
			pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred)
			return -K.mean(alpha * K.pow(1. - pt, gamma) * K.log(pt))
		if(K.backend()=="theano"):
			import theano.tensor as T
			pt = T.where(T.eq(y_true, 1), y_pred, 1 - y_pred)
			return -K.mean(alpha * K.pow(1. - pt, gamma) * K.log(pt))
Esempio n. 34
0
 def MMD_class_penalty(self,target,Xlabel):
     #10個のリスト。順番に各クラスの数が入っている
     Num_c=T.sum(target,0)
     D_num=Xlabel.shape[1]
     #C*Domain_numの行列。書くますに例えばクラスcにはドメイン1,2,3はそれぞれ何個ずついるか計算している
     Number_label=T.sum(target.T[:,None,:]*Xlabel.T[None,:,:],2)
     
     K_base=self.kern.RBF(self.cal,self.cal)
     
     #10*N*N クラスごとの全てのドメインを無視したグラム行列
     K_class, updates = theano.scan(fn=lambda a: ((K_base*a).T*a),
                           sequences=[target.T])
     #グラム行列の和をとっている
     K_allsum=T.sum(T.sum(K_class,-1),-1)
     #それぞれのクラスの数の2乗で割る必要があるが万が一クラスの数が0だと割っては無限になってしまうので、if文でチェックを入れている
     K_sum_tot, updates = theano.scan(fn=lambda a,b: T.switch(T.gt(b,0), a/b**2, 0),
                           sequences=[K_allsum,Num_c])
     
     #10*3*N*N(クラス、ドメイン、ごとグラム行列)ただし全てのドメインとのクラスではないのでフィルターであるxlabelを両方からかけている。またあるクラスについて、その中のドメインを順番に見るので、scan文の2重ループ
     K_class_domain_cross,updates = theano.scan(fn=lambda c:
                                     theano.scan(fn=lambda a: ((c*a).T*a),
                                                 sequences=[Xlabel.T])
                                     ,sequences=[K_class])
     #domainごとにクラスごとになっている今はC*D グラム行列の和をとっている
     K_allsum=T.sum(T.sum(K_class_domain_cross,-1),-1)
     #割り方だが、あるクラスのあるドメインに属しているのが誰もいなかったらC*D_numのグラム行列和の成分が0になっているはず。それと同じ分母の数を入れている行列も0になっているはず。なのでもし0なら分母には1を変わりに入れる。しかし結局分子で0になるので問題ない
     Number_label2=T.where(T.eq(Number_label,0),1,Number_label)
     K_class_sum=T.sum(K_allsum/(Number_label2**2))
     
     
     #あるクラスのあるドメインとあるクラスの全てのドメインのクロス。そのためフィルターは横方向からかけているだけ。
     K_class_domain_center_cross,updates = theano.scan(fn=lambda c:
                                     theano.scan(fn=lambda a: (c*a),
                                                 sequences=[Xlabel.T])
                                     ,sequences=[K_class])
     #上のドメインごとのものと同じ処理を繰り返す
     K_sum_cross=T.sum(T.sum(K_class_domain_center_cross,-1),-1)
     Number_label2=T.where(T.eq(Number_label,0),1,Number_label)
     K_domain_cross_sum=T.sum(K_sum_cross/(Number_label2*Num_c[:,None]))
     #z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y))
     
     MMD_class=K_class_sum+T.sum(K_sum_tot)*D_num-2*K_domain_cross_sum
     
     return MMD_class
Esempio n. 35
0
def irprop_star_trainer(x,
                        y,
                        w,
                        parameters,
                        loss,
                        random_stream,
                        positive_step=1.2,
                        negative_step=0.5,
                        max_step=1.,
                        min_step=1e-6):
    """ IRPROP* trainer (own experimental modification of IRPROP-, not recommended for usage) """
    shareds = []
    updates = []
    loss_value = loss(x, y, w)

    for name, param in parameters.items():
        param_shape = param.get_value().shape
        n = int(numpy.prod(param_shape))
        new_derivative_ = T.grad(loss_value, param).flatten()
        lnewder, rnewder = new_derivative_.reshape(
            [n, 1]), new_derivative_.reshape([1, n])
        new_derivative_plus = lnewder + rnewder
        new_derivative_minus = lnewder - rnewder
        new_param = param
        for new_derivative in [new_derivative_plus, new_derivative_minus]:
            delta = theano.shared(numpy.zeros([n, n], dtype=floatX) + 1e-3)
            old_derivative = theano.shared(numpy.zeros([n, n], dtype=floatX))

            new_delta = T.where(new_derivative * old_derivative > 0,
                                delta * positive_step, delta * negative_step)
            new_delta = T.clip(new_delta, min_step, max_step)

            updates.append([delta, new_delta])
            new_old_derivative = T.where(new_derivative * old_derivative < 0,
                                         0, new_derivative)
            updates.append([old_derivative, new_old_derivative])
            new_param = new_param - (new_delta * T.sgn(new_derivative)).sum(
                axis=1).reshape(param.shape)
            shareds.extend([old_derivative, delta])

        updates.append([param, new_param])

    return shareds, updates
Esempio n. 36
0
    def censor_updates(self, updates):
        """
        Transition matrix should be non-negative
        """

        if self.W in updates:
            updated_W = updates[self.W]
            desired_W = tensor.where(updated_W < 0, self.W, updated_W)
            updates[self.W] = desired_W

        self.mlp.censor_updates(updates)
Esempio n. 37
0
    def censor_updates(self, updates):
        """
        Transition matrix should be non-negative
        """

        if self.W in updates:
            updated_W = updates[self.W]
            desired_W = tensor.where(updated_W < 0, self.W, updated_W)
            updates[self.W] = desired_W

        self.mlp.censor_updates(updates)
Esempio n. 38
0
def irprop_minus_trainer(x, y, w, parameters, loss, random_stream,
                         positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6):
    """ IRPROP- trainer, see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.3428 """
    shareds = []
    updates = []
    loss_value = loss(x, y, w)
    for name, param in parameters.items():
        old_derivative = theano.shared(param.get_value() * 0.)
        delta = theano.shared(param.get_value() * 0. + 1e-3)
        shareds.extend([old_derivative, delta])
        new_derivative = T.grad(loss_value, param)

        new_delta = T.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
        new_delta = T.clip(new_delta, min_step, max_step)

        updates.append([param, param - new_delta * T.sgn(new_derivative)])
        updates.append([delta, new_delta])

        new_old_derivative = T.where(new_derivative * old_derivative < 0, 0, new_derivative)
        updates.append([old_derivative, new_old_derivative])
    return shareds, updates
Esempio n. 39
0
def cartesianToSpherical_plus_mu(a):
	"""
	Convert Cartesian to spherical coordinates. The input must be theano tensors.
	Note that the angle coordinates follow the astronomical convention of using elevation (declination,
	latitude) rather than its complement (pi/2-elevation), which is commonly used in the mathematical
	treatment of spherical coordinates.
	Parameters
	----------

	x - Cartesian vector component along the X-axis
	y - Cartesian vector component along the Y-axis
	z - Cartesian vector component along the Z-axis
	vx - Cartesian vector component of velocity along the Phi   axis
	vy - Cartesian vector component of velocity along the Theta axis

	Returns
	-------

	The spherical coordinates:
	longitude phi, 
	latitude theta,
	parallax,
	proper motion phi,
	proper motion theta.

	NOTE THAT THE LONGITUDE ANGLE IS BETWEEN 0 AND +2PI. FOR r=0 AN EXCEPTION IS RAISED.
	"""
	x  = a[:,0]
	y  = a[:,1]
	z  = a[:,2]
	vx = a[:,3]
	vy = a[:,4]

	rCylSq=x*x+y*y
	r=tt.sqrt(rCylSq+z*z)
	phi = tt.arctan2(y,x)
	phi = tt.where(phi<0.0, phi+2*np.pi, phi)
	theta = tt.arctan2(z,tt.sqrt(rCylSq))

	#------- Velocity ------------------------------------
	mu_phi   = 1000.0*vx/(4.74*r) # Proper motion in mas/yr
	mu_theta = 1000.0*vy/(4.74*r) # Proper motion in mas/yr

	#-------- Units----------
	phi   = tt.rad2deg(phi)   # Degrees
	theta = tt.rad2deg(theta) # Degrees
	plx   = _auMasParsec/r          # mas


	#------- Join ------
	res = tt.stack([phi, theta ,plx, mu_phi, mu_theta],axis=1)
	return res
Esempio n. 40
0
        def lda_logp(rt, gaze, values, error_ll, zerotol):

            # compute drifts
            drift = glam.components.expdrift(v, tau, gamma, values, gaze,
                                             zerotol)
            glam_ll = glam.components.tt_wienerrace_pdf(
                rt[:, None], drift, s, b, t0, zerotol)

            # mix likelihoods
            mixed_ll = ((1 - p_error) * glam_ll + p_error * error_ll)

            mixed_ll = tt.where(tt.isnan(mixed_ll), 0., mixed_ll)
            return tt.sum(tt.log(mixed_ll + zerotol))
Esempio n. 41
0
    def negative_log_likelihood_classwise_masking(self, y, mask_class_labeled,
                                                  mask_class_not_present):
        """
        todo: test.
        :y: true classes (as integer value): (batchsize, x, y)
        :mask_class_labeled: matrix: (batchsize, num_classes) allowed values: 0 or 1; setting everything to 1 leads to the ordinary nll; all zeroes is an invalid state.
                    a zero for one class indicates that this class may be present but is not labeled as such.
        :mask_class_not_present: (batchsize, num_classes): similar to mask_class_labeled, but now a 1 indicates that a class is CERTAINLY NOT PRESENT in the batch.
        
        values of -1 in y count as "absolutely not labeled / ignore predictions"; this has PRIORITY over anything else (including mask_class_not_present).
        """
        y = y.dimshuffle(0, 'x', 1, 2)  #(batchsize, 1, x, y)
        mask_class_labeled = mask_class_labeled.dimshuffle(
            0, 1, 'x', 'x')  #(batchsize, num_classes,1 ,1)
        mask_class_not_present = mask_class_not_present.dimshuffle(
            0, 1, 'x', 'x')  #(batchsize, num_classes,1 ,1)
        global_loss_mask = (
            y != -1
        )  #apply to overall loss after everything is calculated; marks positions

        pred = self.class_probabilities_realshape  # (batchsize, num_classes, x, y)
        mod_y = T.where(y < 0, 0, y)

        #dirty hack: compute "standard" nll when most predictive weight is put on classes which are in fact labeled
        votes_not_for_unlabeled = T.where(
            T.sum(pred * mask_class_labeled, axis=1) >= 0.5, 1,
            0).dimshuffle(0, 'x', 1, 2)

        # could also add '* mask_class_labeled' inside, but this should not change anything , provided there is no logical conflict between y and mask_class_labeled !
        nll = -T.mean(
            (T.log(pred) * votes_not_for_unlabeled * global_loss_mask)[:,
                                                                       mod_y]
        )  #standard loss part -> increase p(correct_prediction); thus disabled if the "correct" class is not known

        # penalize predictions: sign is a plus! (yes: '+')
        # remove <global_loss_mask> if <mask_class_not_present> should override 'unlabeled' areas.
        nll += T.mean(T.log(pred) * mask_class_not_present * global_loss_mask)

        return nll
Esempio n. 42
0
def tt_wienerpos_fpt_cdf(t, drift, noise, boundary, numerical_stability=100):
    """
    Cumulative distribution function of first passage times of
    Wiener process with positive drift towards constant boundary.
    Theano tensor implementation

    Cf https://en.wikipedia.org/wiki/Inverse_Gaussian_distribution#Relationship_with_Brownian_motion
    """
    mu = boundary / drift
    lam = (boundary / noise)**2
    bounded_ratio = tt.where(
        lam/mu >= numerical_stability, numerical_stability, lam/mu)
    return (tt_normal_cdf(tt.sqrt(lam / t) * (t / mu - 1)) +
            tt.exp(2*bounded_ratio) * tt_normal_cdf(-(tt.sqrt(lam / t) * (t / mu + 1))))
Esempio n. 43
0
        def fn(x_t, h_tm1, c_tm1, hid_ref, mask_ref, V):
            x_ct = TT.dot(x_t, self.find('xh')) + self.find(
                'b')  # batch_size * h_size

            xi, xf, xc, xo = split(x_ct + TT.dot(h_tm1, self.find('hh')))
            i_t = TT.nnet.sigmoid(xi + c_tm1 * self.find('ci'))
            f_t = TT.nnet.sigmoid(xf + c_tm1 * self.find('cf'))
            c_t = f_t * c_tm1 + i_t * TT.tanh(xc)
            o_t = TT.nnet.sigmoid(xo + c_t * self.find('co'))
            h_t = o_t * TT.tanh(c_t)

            #hid_p = TT.dot(h_t, V) # batch_size * size.
            hid_p = TT.dot(h_tm1, V)  # batch_size * size.
            hid_p_dim = hid_p.dimshuffle(('x', 0, 1))
            x_ts = TT.extra_ops.repeat(hid_p_dim, hid_ref.shape[0],
                                       axis=0)  # mask_len * batch_size * size

            emb = x_ts * hid_ref  # mask_len * batch_size * size.
            beta = TT.sum(emb, axis=-1)  # mask_len * batch_size.
            beta_b = TT.where(mask_ref > 0, beta, beta.min())
            beat_b = beta_b - beta_b.max(axis=0, keepdims=True)
            #beat_b = beta_b.clip(-50, 0)
            z = TT.exp(beta_b * mask_ref) * mask_ref
            z_sum = TT.sum(z, axis=0, keepdims=True)
            #z = theano.printing.Print('this is a very important value')(z)
            #z_sum = theano.printing.Print('this is a very important value')(z_sum)
            #alpha = (z * mask_ref ) / ( z * mask_ref ).sum(axis=0, keepdims=True) # max_len *  batch_size.
            #alpha = z / TT.sum(z, axis = 0, keepdims = True)
            alpha = z / z_sum
            #if stage == 'train':
            #    alpha_sample = self.h_sampling_mask * self.rng.multinomial(pvals = alpha.T, dtype = 'float32') \
            #                   + (1. - self.h_sampling_mask) * alpha.T
            #    alpha_sample = alpha_sample.T
            #    logging.info('LSTMAtt: stage is %s, using the random.', stage)
            #elif stage == 'test': # argmax for prediction.
            #    alpha_sample = TT.cast(TT.eq(TT.arange(alpha.shape[0])[:,None], \
            #    #alpha_sample = TT.cast(TT.eq(TT.arange(alpha.shape[0])[None,:], \
            #        TT.argmax(alpha,axis=0,keepdims=True)), theano.config.floatX)
            #    logging.info('LSTMAtt: stage is %s, using the argmax.', stage)
            hid_ref_dim = hid_ref.dimshuffle(
                (2, 0, 1))  # emb_size * mask_len * batch_size
            #att = alpha * hid_ref_dim # now is size * max_len * batch_size
            att = hid_ref_dim * alpha  # now is size * max_len * batch_size
            att = att.sum(axis=1)  # size * batch_size

            return [h_t, c_t, alpha, att.T]
Esempio n. 44
0
def build_background_detector(BATCH_SIZE=None,
                              input_dim=(1000, 1000),
                              filter_size=11,
                              threshold=0.1):
    background_detector = OrderedDict()
    background_detector['input'] = InputLayer(
        (BATCH_SIZE, 1, input_dim[0], input_dim[1]))

    background_detector['background_detector'] = ConvLayer(
        background_detector['input'],
        num_filters=1,
        filter_size=filter_size,
        nonlinearity=lambda x: T.where(T.le(x, threshold), 0, 1),
        pad='same',
        W=1.0 / filter_size**2 * np.ones((1, 1, filter_size, filter_size)),
        b=None)

    return background_detector
Esempio n. 45
0
File: run.py Progetto: arasmus/eca
def main():
    repeats = 1  # per configuration

    relu = lambda x: T.where(x < 0., 0., x)
    runs = [
        # tanh requires higher tau
        {'config': {'layers': [70], 'tau': (20, 4, 0.95), 'iters': 200, 'nonlin': relu}},
        {'config': {'layers': [60], 'tau': (20, 4, 0.99), 'iters': 200, 'nonlin': T.tanh}},
        {'config': {'layers': [70], 'tau': (20, 4, 0.99), 'iters': 200, 'nonlin': T.tanh}},
    ]

    try:
        for run in runs:
            print 'Running configuration', run['config']
            results = []
            best = 0.0

            # Do several runs with different seeds
            for i in range(repeats):
                d = TrainData(35022, 0.6, i)
                pred, best_iter = eca_missing_value_prediction(d, run['config'])

                # Store results
                acc = d.accuracy(pred)
                results = [acc]
                best = max(best, best_iter)
                print '%5.2f %%' % (acc * 100.), str(run['config']), 'iteration', i

            run['results'] = (results, best)
            print '-----------------'
    except KeyboardInterrupt:
        pass

    print 'Summary'
    print '-------'
    for run in runs:
        if 'results' not in run:
            continue
        res, best = run['results']
        print "%5.2f +- %.2f (best: %5.2f): %s" % (100. * np.mean(res),
                                                   100. * np.sqrt(np.var(res)),
                                                   100. * best,
                                                   str(run['config']),)
Esempio n. 46
0
        def fn(x_t, h_tm1, c_tm1, hid_ref, mask_ref, V):
            x_ct = TT.dot(x_t, self.find('xh')) + self.find('b') # batch_size * h_size
             
            xi, xf, xc, xo = split(x_ct + TT.dot(h_tm1, self.find('hh')))
            i_t = TT.nnet.sigmoid(xi + c_tm1 * self.find('ci'))
            f_t = TT.nnet.sigmoid(xf + c_tm1 * self.find('cf'))
            c_t = f_t * c_tm1 + i_t * TT.tanh(xc)
            o_t = TT.nnet.sigmoid(xo + c_t * self.find('co'))
            h_t = o_t * TT.tanh(c_t)

            #hid_p = TT.dot(h_t, V) # batch_size * size.
            hid_p = TT.dot(h_tm1, V) # batch_size * size.
            hid_p_dim = hid_p.dimshuffle(('x', 0, 1))
            x_ts = TT.extra_ops.repeat(hid_p_dim, hid_ref.shape[0], axis = 0) # mask_len * batch_size * size

            emb = x_ts * hid_ref # mask_len * batch_size * size.
            beta = TT.sum(emb, axis=-1) # mask_len * batch_size.
            beta_b = TT.where( mask_ref > 0, beta, beta.min())
            beat_b = beta_b - beta_b.max(axis=0, keepdims=True)
            #beat_b = beta_b.clip(-50, 0)
            z = TT.exp(beta_b * mask_ref) * mask_ref
            z_sum = TT.sum(z, axis = 0, keepdims = True)
            #z = theano.printing.Print('this is a very important value')(z)
            #z_sum = theano.printing.Print('this is a very important value')(z_sum)
            #alpha = (z * mask_ref ) / ( z * mask_ref ).sum(axis=0, keepdims=True) # max_len *  batch_size.
            #alpha = z / TT.sum(z, axis = 0, keepdims = True)
            alpha = z / z_sum
            #if stage == 'train':
            #    alpha_sample = self.h_sampling_mask * self.rng.multinomial(pvals = alpha.T, dtype = 'float32') \
            #                   + (1. - self.h_sampling_mask) * alpha.T
            #    alpha_sample = alpha_sample.T
            #    logging.info('LSTMAtt: stage is %s, using the random.', stage)
            #elif stage == 'test': # argmax for prediction.
            #    alpha_sample = TT.cast(TT.eq(TT.arange(alpha.shape[0])[:,None], \
            #    #alpha_sample = TT.cast(TT.eq(TT.arange(alpha.shape[0])[None,:], \
            #        TT.argmax(alpha,axis=0,keepdims=True)), theano.config.floatX)
            #    logging.info('LSTMAtt: stage is %s, using the argmax.', stage)
            hid_ref_dim = hid_ref.dimshuffle((2,0,1)) # emb_size * mask_len * batch_size
            #att = alpha * hid_ref_dim # now is size * max_len * batch_size
            att = hid_ref_dim * alpha# now is size * max_len * batch_size
            att = att.sum(axis = 1) # size * batch_size

            return [h_t, c_t, alpha, att.T]
    def likelihood(self, z, y):
        η = z.flatten(min(2, z.ndim)) + self.bias
        Δ = self.binsize
        # 1st part of the likelihood
        L1 = tt.dot(y, η)
        if z.ndim > 1:
            ndim = z.ndim - 1
            shp_z = z.shape[-ndim:]
            L1 = L1.reshape(shp_z, ndim=ndim)
        # 2nd part of the likelihood
        λ = self.invlink(z + self.bias)
        L2 = Δ * tt.sum(λ, axis=0)
        # constant factors
        c1 = tt.sum(y) * tt.log(Δ)
        c2 = -tt.sum(tt.where(y > 1, tt.gammaln(y + 1), 0.0))
        const = c1 - c2

        L = L1 - L2 + const
        return as_tensor_variable(L, name='logL')
Esempio n. 48
0
    def init_layer_updates(self, layer):
        prediction_func = self.variables.train_prediction_func
        network_output = self.variables.network_output
        network_input = self.variables.network_input
        step = self.variables.step

        normalized_input = network_input / network_input.norm(L=2)
        summated_output = network_input.dot(layer.weight) + layer.bias
        linear_error = prediction_func - network_output
        update = T.where(
            T.abs_(summated_output) >= self.dead_zone_radius, linear_error,
            network_output)

        weight_delta = normalized_input.T.dot(update)
        bias_delta = linear_error.sum(axis=0)

        return [
            (layer.weight, layer.weight - step * weight_delta),
            (layer.bias, layer.bias - step * bias_delta),
        ]
Esempio n. 49
0
 def create_seg_wise_encoder_output(self, att, aligner=None):
   assert aligner,"please provide an inverted aligner!"
   t = self.base[0].output.shape[0]
   b = self.base[0].output.shape[1]
   att_with_first_index = T.concatenate([T.zeros((1,att.shape[1]))-numpy.float32(1),att],axis=0) #(N+1)B
   max_diff = T.cast(T.extra_ops.diff(att_with_first_index,axis=0).flatten().sort()[-1],'int32')
   reduced_index = aligner.reduced_index.repeat(max_diff).reshape((aligner.reduced_index.shape[0], aligner.reduced_index.shape[1],max_diff)) #NB(max_diff)
   att_wo_last_ind = att_with_first_index[:-1] #NB
   att_wo_last_ind +=numpy.int32(1)
   att_rep = att_wo_last_ind.repeat(max_diff).reshape((att_wo_last_ind.shape[0],att_wo_last_ind.shape[1],max_diff))#NB(max_diff)
   att_rep = T.switch(reduced_index>0, att_rep + T.arange(max_diff),T.zeros((1,),'float32')-numpy.float32(1))
   att_rep = att_rep.dimshuffle(0,2,1) #N(max_diff)B
   reduced_index = reduced_index.dimshuffle(0,2,1) #N(max_diff)B
   att_rep = T.switch(reduced_index > 0,att_rep + (T.arange(b) * t),T.zeros((1,),'float32')-numpy.float32(1))
   att_rep = att_rep.clip(0,(t*b-1))
   diff_arr = att_with_first_index[1:]-att_with_first_index[:-1]
   diff_arr = diff_arr.clip(0,max_diff) - numpy.float32(1)#NB
   mask = diff_arr.dimshuffle(0,'x',1).repeat(max_diff,axis=1) - T.arange(max_diff).dimshuffle('x',0,'x')
   ind = T.cast(T.where(T.lt(mask,numpy.float32(0)),T.zeros((1,),'float32'),numpy.float32(1)),'int8')
   self.rec_transform_enc = att_rep
   self.rec_transform_index = ind
Esempio n. 50
0
  def get_output_for(self, inputs, **kwargs):
    '''
    First layer is a batch of embedding indices:
    [[11,21,43,0,0],
     [234,543,0,0,0,],
     ...
    ]
    Second layer are the embeddings:
    [ [[.02, .01...],
       [.004, .005, ...],
       ...,
       .0 .0 .0 ... ,
       .0 .0 .0 ...],
      [[...],
       ....
      ]
    ]
    ''' 

    return \
        T.where(T.eq(inputs[0],0), np.float32(0.0), np.float32(1.0)).dimshuffle((0,1,'x')) * inputs[1]
Esempio n. 51
0
File: eca.py Progetto: arasmus/eca
    def compile_adapt_f(self, signals):
        x = self.signal(signals)
        x_prev = [p.signal(signals) for p in self.prev]
        assert np.all([x.k == xp.k for xp in x_prev])
        assert self.m == [xp.n for xp in x_prev]
        assert x.n == self.n
        k = np.float32(x.k)
        # Modulate x
        if x.modulation is not None:
            x_ = x.var * T.as_tensor_variable(x.modulation)
        else:
            x_ = x.var

        updates = []
        upd = lambda en, old, new: [(old, ifelse(en, new, old))]

        E_XX_new, _, d = lerp(self.E_XX, T.dot(x_, x_.T) / k, self.min_tau)
        updates += upd(self.enabled, self.E_XX, E_XX_new)
        b = 1.
        d = T.diagonal(E_XX_new)
        stiff = T.scalar('stiffnes', dtype=FLOATX)
        Q_new = theano_diag(b / T.where(d < stiff * self.stiffx,
                                        stiff * self.stiffx, d))
        updates += upd(self.enabled, self.Q, Q_new)

        for i, x_p in enumerate(x_prev):
            E_XU_new, _, d_ = lerp(self.E_XU[i], T.dot(x_, x_p.var.T) / k,
                                   self.min_tau)
            updates += upd(self.enabled, self.E_XU[i], E_XU_new)
            d = T.maximum(d, d_)
            updates += upd(self.enabled, self.phi[i], T.dot(Q_new, E_XU_new).T)

        self.info('Compile layer update between: ' + self.name + ' and '
                  + ', '.join([p.name for p in self.prev]))
        return theano.function(
            inputs=[stiff],
            outputs=d,
            updates=updates)
Esempio n. 52
0
    def forward_pass(self, orch_past, piano, batch_size):
        ################################################################
        ################################################################
        ################################################################
        # Normalization by the number of notes
        # orch_past_norm = self.number_note_normalization_fun(orch_past)
        # piano_norm = self.number_note_normalization_fun(piano)

        # TEST : batch norm on the input
        # orch_past_norm = batch_norm(orch_past, (self.temporal_order, self.n_o))
        # piano_norm = batch_norm(piano, (self.n_p,))
        #
        orch_past_norm = orch_past
        piano_norm = piano
        ################################################################
        ################################################################
        ################################################################

        # Time needs to be the first dimension
        orch_past_loop = orch_past_norm.dimshuffle((1, 0, 2))

        # Initialization
        input_layer = [None]*(self.n_layer+1)
        input_layer[0] = orch_past_loop
        n_lm1 = self.n_o

        # Loop
        for layer, n_h in enumerate(self.n_hs):
            s_0 = T.zeros((batch_size, n_h), dtype=theano.config.floatX)
            # Infer hidden states
            s_seq, updates = theano.scan(fn=self.iteration,
                                         sequences=[input_layer[layer]],
                                         outputs_info=[s_0],
                                         non_sequences=[self.W_z[layer], self.U_z[layer], self.b_z[layer],
                                                        self.W_r[layer], self.U_r[layer], self.b_r[layer],
                                                        self.W_h[layer], self.U_h[layer], self.b_h[layer],
                                                        n_lm1])

            # Inputs for the next layer are the hidden units of the current layer
            input_layer[layer+1] = s_seq
            # Update dimension
            n_lm1 = n_h

        # Last hidden units
        last_hidden = input_layer[self.n_layer]

        # Orchestra representation is the last state of the topmost rnn
        orchestra_repr = last_hidden[-1]

        ################################################################
        ################################################################
        ################################################################
        # Batch Normalization or no ??
        # orchestra_repr_norm = batch_norm(orchestra_repr, (n_lm1,))
        orchestra_repr_norm = orchestra_repr
        ################################################################
        ################################################################
        ################################################################
        
        ################################################################
        ################################################################
        # Piano through a mlp ?
        piano_repr = T.nnet.sigmoid(T.dot(piano_norm, self.W_piano) + self.b_piano)
        ################################################################
        ################################################################

        ################################################################
        ################################################################
        # Sum or concatenate
        # concat_input = T.concatenate([orchestra_repr_norm, piano_repr], axis=1)
        concat_input = orchestra_repr_norm + self.sum_coeff * piano_repr
        ################################################################
        ################################################################

        # Last layer
        orch_pred_mean = T.nnet.sigmoid(T.dot(concat_input, self.W) + self.b)

        ################################################################
        ################################################################
        ################################################################
        # Before sampling, we THRESHOLD
        orch_pred_mean_threshold = T.where(T.le(orch_pred_mean, self.threshold), 0, orch_pred_mean)
        ################################################################
        ################################################################
        ################################################################

        # Sampling
        orch_pred = self.rng.binomial(size=orch_pred_mean_threshold.shape, n=1, p=orch_pred_mean_threshold,
                                      dtype=theano.config.floatX)

        return orch_pred_mean, orch_pred_mean_threshold, orch_pred, updates
def binary_entropy(predictions, targets):
    predictions = T.where(T.lt(predictions, 0.01), 0.01, predictions)
    predictions = T.where(T.gt(predictions, 0.99), 0.99, predictions)
    loss  = - targets * (T.log(predictions) - T.log(targets))
    loss -= (1. - targets) * (T.log(1. - predictions) - T.log(1. - targets))
Esempio n. 54
0
    def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
        if not self.run_already:
            from theano import tensor as t
            import theano
            #Should really be a matrix for multiple outputs
            y = t.matrix(name='y')
            f = t.matrix(name='f')
            g = t.matrix(name='g')
            c = t.matrix(name='c')
            #ef = t.where(f > 18., f, t.log1p(t.exp(f)))
            #eg = t.where(g > 18., g, t.log1p(t.exp(g)))
            #ef = t.nnet.softplus(f)
            #eg = t.nnet.softplus(g)
            ef = t.exp(f)
            eg = t.exp(g)

            #In log(1+b) if b > 300, use log(b) as 1 isn't relevant anymore
            #inner_1 = (y/ef)**eg  # Naively
            #inner = t.exp(eg*(t.log(y) - t.log(ef)))  # do it in log space then exp, then do log1p
            #clip_log1p_inner_1 = t.where(inner_1 > 300, eg*(t.log(y) - t.log(ef)), t.log1p(inner_1))
            #clip_log1p_inner = t.log1p(inner)

            inner = eg*(t.log(y) - t.log(ef))  # We are going to do log(1+a) which is log(1+exp(log a)) which is softplus(log a) where log a is stable!
            clip_log1p_inner = t.nnet.softplus(inner)

            #Full log likelihood before expectations
            #logpy_t = (1-c)*(+t.log(eg) - eg*t.log(ef) + (eg - 1)*t.log(y) - 2*clip_log1p_inner_1) + c*(-clip_log1p_inner_1)
            #logpy_t_1 = t.where(c, -clip_log1p_inner_1, t.log(eg) - eg*t.log(ef) + (eg - 1)*t.log(y) - 2*clip_log1p_inner_1)
            logpy_t = t.where(c, -clip_log1p_inner, t.log(eg) - eg*t.log(ef) + (eg - 1)*t.log(y) - 2*clip_log1p_inner)
            logpy_sum_t = t.sum(logpy_t)

            dF_df_t = theano.grad(logpy_sum_t, f)
            d2F_df2_t = 0.5*theano.grad(t.sum(dF_df_t), f)  # This right?
            dF_dg_t = theano.grad(logpy_sum_t, g)
            d2F_dg2_t = 0.5*theano.grad(t.sum(dF_dg_t), g)  # This right?

            self.logpy_func = theano.function([f,g,y,c],logpy_t)
            self.dF_df_func = theano.function([f,g,y,c],dF_df_t)#, mode='DebugMode')
            self.d2F_df2_func = theano.function([f,g,y,c],d2F_df2_t)
            self.dF_dg_func = theano.function([f,g,y,c],dF_dg_t)
            self.d2F_dg2_func = theano.function([f,g,y,c],d2F_dg2_t)
            self.run_already = True

        #funcs = [self.logpy_func, self.dF_df_func, self.d2F_df2_func, self.dF_dg_func, self.d2F_dg2_func]
        funcs = [self.logpy_func, self.dF_df_func, self.d2F_df2_func, self.dF_dg_func, self.d2F_dg2_func]

        D = Y.shape[1]
        mf, mg = m[:, :D], m[:, D:]
        vf, vg = v[:, :D], v[:, D:]

        c = Y_metadata['censored']
        F = 0 # Could do analytical components here

        T = self.T
        #Need to get these now to duplicate the censored inputs for quadrature
        gh_x, gh_w = self._gh_points(T)
        Y_metadata_new= Y_metadata.copy()
        c = np.repeat(Y_metadata_new['censored'], gh_x.shape[0]**2, axis=0)

        ##Some little code to check the result numerically using quadrature
        #from scipy import integrate
        #i = 6  # datapoint index
        #def quad_func(fi, gi, yi, mgi, vgi, mfi, vfi,ci):
            ##x = safe_exp(-fi*safe_exp(gi))*yi**safe_exp(gi)
            #x = safe_exp(-fi*safe_exp(gi) + safe_exp(gi)*np.log(yi))
            #log1px = np.log1p(x)
            ##return ((*-gammaln(np.exp(fi)) - gammaln(np.exp(gi)) + gammaln(np.exp(fi) + np.exp(gi)))      #p(y|f,g)
            #return (((1-ci)*(-2*log1px) + ci*(-log1px))      #p(y|f,g)
                    #* np.exp(-0.5*np.log(2*np.pi*vgi) - 0.5*((gi - mgi)**2)/vgi) #q(g)
                    #* np.exp(-0.5*np.log(2*np.pi*vfi) - 0.5*((fi - mfi)**2)/vfi) #q(f)
                    #)
        #quad_func_l = partial(quad_func, yi=Y[i], mgi=mg[i], vgi=vg[i], mfi=mf[i], vfi=vf[i], ci=Y_metadata['censored'][i])
        #def integrl(gi):
            #return integrate.quad(quad_func_l, -30, 5, args=(gi))[0]
        #print "Numeric scipy F quad"
        #print integrate.quad(lambda fi: integrl(fi), -30, 5)

        #(F_quad, dF_dmf, dF_dvf, dF_dmg, dF_dvg) = self.quad2d(funcs=funcs, Y=Y, mf=mf, vf=vf, mg=mg, vg=vg,
                                                               #gh_points=gh_points, exp_f=False, exp_g=False, c=c)
        (F_quad, dF_dmf, dF_dvf, dF_dmg, dF_dvg) = self.quad2d(funcs=funcs, Y=Y, mf=mf, vf=vf, mg=mg, vg=vg,
                                                               gh_points=gh_points, exp_f=False, exp_g=False, c=c)

        #print "2d quad F quad"
        #print F_quad[i]
        F += F_quad
        #gprec = safe_exp(mg - 0.5*vg)
        dF_dmf += 0  #(1-c)*(-gprec)
        dF_dmg += 0  #(1-c)*(1 + gprec*(np.log(Y) - mf))
        dF_dvf += 0  # ?
        dF_dvg += 0  # ?

        dF_dm = np.hstack((dF_dmf, dF_dmg))
        dF_dv = np.hstack((dF_dvf, dF_dvg))

        if np.any(np.isnan(F_quad)):
            print("We have a nan in F_quad")
        if np.any(np.isnan(dF_dmf)):
            print("We have a nan in dF_dmf")
        if np.any(np.isnan(dF_dmg)):
            print("We have a nan in dF_dmg")

        return F, dF_dm, dF_dv, None
Esempio n. 55
0
def create_lasagne_network(num_features):
    # Create data for testing network dimensions
    x_sym = T.fmatrix('x_sym')
    y_sym = T.fmatrix('y_sym')

    # create test data
    X = np.random.rand((128* num_features)).astype('float32').reshape((-1, num_features))
    y = np.random.rand((128)).astype('float32').reshape((-1, 1))


    """
    Create lasagne layers
    """
    l_in = lasagne.layers.InputLayer((None, num_features),name='input')
    print "l_in shape: %s" % str((lasagne.layers.get_output(l_in, inputs={l_in: x_sym}).eval({x_sym: X}).shape))


    l_hidden = lasagne.layers.DenseLayer(l_in, num_units=500, name='l_hidden',nonlinearity=None)
    print "l_output shape: %s" % str((lasagne.layers.get_output(l_hidden, inputs={l_in: x_sym}).eval({x_sym: X}).shape))

    l_out = lasagne.layers.DenseLayer(l_hidden, num_units=1, name='l_out',nonlinearity=None)
    print "l_output shape: %s" % str((lasagne.layers.get_output(l_out, inputs={l_in: x_sym}).eval({x_sym: X}).shape))

    output_train = lasagne.layers.get_output(l_out, inputs={l_in: x_sym},deterministic=False)
    print "output_train shape: %s" % str(output_train.eval({x_sym: X}).shape)


    out = output_train.flatten()

    total_cost = T.where(y_sym.flatten()>0, (out-y_sym.flatten())/(y_sym.flatten()),0)**2

    print "total_cost: %s" % str((total_cost.eval({x_sym: X, y_sym: y})).shape)

    all_trainable_parameters = lasagne.layers.get_all_params([l_out], trainable=True)

    mean_cost = T.sqrt(T.mean(total_cost))


    all_grads = T.grad(mean_cost, all_trainable_parameters)
    learning_rate = theano.shared(np.cast['float32'](0.01))
    updates = lasagne.updates.adam(all_grads, all_trainable_parameters, learning_rate=learning_rate)


    """
    Create theano functions to be used in the training loop and for making predictions
    """
    train_func = theano.function([x_sym, y_sym], [mean_cost], updates=updates)
    test_func = theano.function([x_sym, y_sym], [mean_cost])
    predict_func = theano.function([x_sym], [out])

    # when the input X is a dict, the following definitions will allow LasagneNet to call train_func without
    # knowing the order of the inputs, using the syntax train_function(**X)
    def train_function(X, y):
        return train_func(X, y.reshape((-1,1)))

    def test_function(X, y):
        return test_func(X, y.reshape((-1,1)))

    def predict_function(X):
        return predict_func(X)


    return l_out, train_function, test_function, predict_function, learning_rate
Esempio n. 56
0
def maxabs(t1, t2):
    pos = T.where(t1 > t2, t1, t2)
    neg = T.where(-t1 > -t2, t1, t2)
    ret = T.where(pos >= -neg, pos, neg)
    return ret
Esempio n. 57
0
 def grad_(index, scores):
     ifnull = [T.zeros_like(p) for p in self.params.values()]
     g = IFEL(T.eq(self.idxs[index], self.NULL), ifnull, theano.grad(scores[index], self.params.values()))
     return [T.where(T.isnan(g_), T.zeros_like(g_), g_) for g_ in g]
Esempio n. 58
0
 def stable(x, stabilize=True):
     if stabilize:
         x = T.where(T.isnan(x), 1000., x)
         x = T.where(T.isinf(x), 1000., x)
     return x
Esempio n. 59
0
def huber(x, eps):
    return T.where(abs(x) < eps, x**2 / (2 * eps), abs(x) - eps/2.)