def _get_elbo(self):
        """
        negative elbo, an upper bound on NLL
        """

        logdets = self.logdets
        self.logqw = -logdets
        """
        originally...
        logqw = - (0.5*(ep**2).sum(1)+0.5*T.log(2*np.pi)*num_params+logdets)
            --> constants are neglected in this wrapperfrom utils import log_laplace
        """
        self.logpw = self.prior(self.weights, 0., -T.log(self.lbda)).sum(1)
        """
        using normal prior centered at zero, with lbda being the inverse 
        of the variance
        """
        self.kl = (self.logqw - self.logpw).mean()
        if self.output_type == 'categorical':
            self.logpyx = -cc(self.y, self.target_var).mean()
        elif self.output_type == 'real':
            self.logpyx = -se(self.y, self.target_var).mean()
        else:
            assert False
        self.loss = - (self.logpyx - \
                       self.weight * self.kl/T.cast(self.dataset_size,floatX))

        # DK - extra monitoring
        params = self.params
        ds = self.dataset_size
        self.logpyx_grad = flatten_list(
            T.grad(-self.logpyx, params, disconnected_inputs='warn')).norm(2)
        self.logpw_grad = flatten_list(
            T.grad(-self.logpw.mean() / ds, params,
                   disconnected_inputs='warn')).norm(2)
        self.logqw_grad = flatten_list(
            T.grad(self.logqw.mean() / ds, params,
                   disconnected_inputs='warn')).norm(2)
        self.monitored = [
            self.logpyx, self.logpw, self.logqw, self.logpyx_grad,
            self.logpw_grad, self.logqw_grad
        ]
    def _get_elbo(self):
        """
        negative elbo, an upper bound on NLL
        """
        self.logpyx = - cc(self.y,self.target_var).mean()
        self.loss = - (self.logpyx - \
                       self.weight * self.kl/T.cast(self.dataset_size,floatX))

        # DK - extra monitoring
        params = self.params
        ds = self.dataset_size
        self.logpyx_grad = flatten_list(T.grad(-self.logpyx, params, disconnected_inputs='warn')).norm(2)
        self.logpw_grad = flatten_list(T.grad(-self.logpw.mean() / ds, params, disconnected_inputs='warn')).norm(2)
        self.logqw_grad = flatten_list(T.grad(self.logqw.mean() / ds, params, disconnected_inputs='warn')).norm(2)
        self.monitored = [self.logpyx, self.logpw, self.logqw,
                          self.logpyx_grad, self.logpw_grad, self.logqw_grad]
        
        self.logpyx = - cc(self.y,self.target_var).mean()
        self.loss = - (self.logpyx - \
                       self.weight * self.kl/T.cast(self.dataset_size,floatX))
    def _get_elbo(self):
        # NTS: is KL waaay too big??
        self.kl = KL(self.prior_mean, self.prior_log_var, self.mean,
                     self.log_var).sum(-1).mean()

        if self.output_type == 'categorical':
            self.logpyx = -cc(self.y, self.target_var).mean()
        elif self.output_type == 'real':
            self.logpyx = -se(self.y, self.target_var).mean()
        else:
            assert False
        self.loss = - (self.logpyx - \
                       self.weight * self.kl/T.cast(self.dataset_size,floatX))

        # DK - extra monitoring
        params = self.params
        ds = self.dataset_size
        self.logpyx_grad = flatten_list(
            T.grad(-self.logpyx, params, disconnected_inputs='warn')).norm(2)
        self.monitored = [self.logpyx, self.logpyx_grad,
                          self.kl]  #, self.target_var]
Esempio n. 4
0
def main(data_raw_one, data_raw_two):
    
    #print("Data raw one: ", data_raw_one)
    #print("Data raw two: ", data_raw_two)
    
    global X_indices
    global mini_indices
    global maxi_indices
    
    global X_indices_real
    
    data_raw = generateInputData(data_raw_one, data_raw_two)
    data_split = [data_raw] # Data already split...
    
    extremeValue = False
    
    if 1018 in np.transpose(data_split)[0]:
        extremeValue = True
        
    if extremeValue:
        print("warning")
        
    else:
        data = data_split
        print("Data: ", data, len(data))
        
        # Get Extreme Points
        mini, maxi = hp.extremePointsCorrelationMain(data, 10, mini_indices, maxi_indices)
        
        #print("Mini zuzu, Maxi zuzuzu: ", mini, maxi, mini_indices, maxi_indices)
        
        # Get frequency bands
        cores_real_numbers = hp.getFrequencies(1,8, data)
        #print("COOOOOOOOORRRRREESSS 01010101", cores_real_numbers)
        #print("cores_real_numbers: ", cores_real_numbers)

        # Combine features
        X_whole_input = cores_real_numbers
        
        #print("X_whole_input: ", X_whole_input[0:20], len(X_whole_input))

        X_reduced = reduceFeatures(X_whole_input, X_indices)
        #X_reduced = X_reduced + mini + maxi
        #print("X_reduced: ", X_reduced)
        #print("Len X_reduced: ", len(X_reduced))
        #print("X_reduced: ", X_reduced[len(X_reduced) - 1])
        #print("Len X_reduced: ", X_reduced[len(X_reduced) - 1])
        
        X_predict = hp.flatten_list(X_reduced)
        
        for i in mini:
            for x in i:
                X_predict.append(x)
        for i in maxi:
            for x in i:
                X_predict.append(x)
                
                
        X_reduced_res_real = []
    
        c = 0
        for x in X_predict:
            if c in X_indices_real:
                X_reduced_res_real.append(x)
            c += 1
        
        print("X PREDICT: ", X_reduced_res_real, len(X_reduced_res_real))
        #    print("UIUO", data)
        #   print("UIUO", mini, maxi)
        #   print("UIUO", cores_real_numbers)
        # print("UIUO", X_whole_input)
        #    print("--------------------")
        #   print("X WHOLE INPUT: ", len(X_whole_input))
        #  print("X INDICES: ", X_indices)
        ##print("REDUCE FEATURES: ", reduceFeatures(X_whole_input, X_indices))
        #print(len(X_reduced))
        
        
        
        prediction = clf.predict([X_reduced_res_real])
        pred_linreg = reg.predict([X_reduced_res_real])
        
        pred.append([prediction, pred_linreg])
        
        
        #if prediction == 1:
            #print("SIGNAL!!!!!!! ", prediction)
        #else:
            #print("No Signal. ", prediction)
            
        print("RANDOM FOREST PREDICTION: ", prediction)
        print("LIN REG PREDICTION: ", pred_linreg)
        layer = lasagne.layers.InputLayer([None, 784])
        inputs = {layer: input_var}
        for nn, ws in enumerate(weight_shapes):
            layer = lasagne.layers.DenseLayer(layer, ws[1])
            if nn < len(weight_shapes) - 1 and model in [
                    'dropout', 'dropout2'
            ]:
                layer = lasagne.layers.dropout(layer, .5)
            print layer.output_shape
        layer.nonlinearity = nonlinearities.softmax
        y = get_output(layer, inputs)
        #y = T.clip(y, 0.001, 0.999) # stability
        loss = cc(y, target_var).mean()
        params = lasagne.layers.get_all_params([h_layer, layer])
        loss = loss + lasagne.regularization.l2(
            flatten_list(params)) * np.float32(1.e-5)

    # TRAIN FUNCTION
    grads = T.grad(loss, params)
    mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm)
    cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]
    updates = lasagne.updates.adam(cgrads, params, learning_rate=lr)

    train = theano.function([input_var, target_var, dataset_size, lr],
                            loss,
                            updates=updates,
                            on_unused_input='warn')
    predict = theano.function([input_var], y.argmax(1))

    ##################
    # TRAIN
        # filler
        h_layer = lasagne.layers.InputLayer([None, 784])
        # JUST primary net
        layer = lasagne.layers.InputLayer([None,784])
        inputs = {layer:input_var}
        for nn, ws in enumerate(weight_shapes):
            layer = lasagne.layers.DenseLayer(layer, ws[1], nonlinearity=nonlinearity)
            if nn < len(weight_shapes)-1 and model == 'dropout':
                layer = lasagne.layers.dropout(layer, .5)
            print layer.output_shape
        layer.nonlinearity = nonlinearities.softmax
        y = get_output(layer,inputs)
        y = T.clip(y, 0.001, 0.999) # stability 
        loss = cc(y,target_var).mean()
        params = lasagne.layers.get_all_params([h_layer,layer])
        loss = loss + lasagne.regularization.l2(flatten_list(params)) * np.float32(1.e-5)

    
    # TRAIN FUNCTION
    grads = T.grad(loss, params)
    mgrads = lasagne.updates.total_norm_constraint(grads,
                                                   max_norm=max_norm)
    cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]
    updates = lasagne.updates.adam(cgrads, params, 
                                   learning_rate=lr)
                                        
    train = theano.function([input_var,target_var,dataset_size,lr],
                            loss,updates=updates,
                            on_unused_input='warn')
    predict = theano.function([input_var],y.argmax(1))
    predict_probs = theano.function([input_var],y)
    def __init__(
            self,
            srng=RandomStreams(seed=427),
            prior_mean=0,
            prior_log_var=0,
            n_hiddens=2,
            n_units=800,
            n_inputs=784,
            n_classes=10,
            output_type='categorical',
            random_biases=1,
            #dataset_size=None,
            opt='adam',
            #weight=1.,# the weight of the KL term
            **kargs):

        self.__dict__.update(locals())

        # TODO
        self.dataset_size = T.scalar('dataset_size')
        self.weight = T.scalar('weight')
        self.learning_rate = T.scalar('learning_rate')

        self.weight_shapes = []
        self.weight_shapes = []
        if n_hiddens > 0:
            self.weight_shapes.append((n_inputs, n_units))
            #self.params.append((theano.shared()))
            for i in range(1, n_hiddens):
                self.weight_shapes.append((n_units, n_units))
            self.weight_shapes.append((n_units, n_classes))
        else:
            self.weight_shapes = [(n_inputs, n_classes)]

        if self.random_biases:
            self.num_params = sum(
                (ws[0] + 1) * ws[1] for ws in self.weight_shapes)
        else:
            self.num_params = sum((ws[0]) * ws[1] for ws in self.weight_shapes)

        self.wd1 = 1
        self.X = T.matrix()
        self.y = T.matrix()
        self.mean = ts(self.num_params)
        self.log_var = ts(self.num_params, scale=1e-6, bias=-1e8)
        self.params = [self.mean, self.log_var]
        self.ep = self.srng.normal(size=(self.num_params, ), dtype=floatX)
        self.weights = self.mean + (T.exp(self.log_var) +
                                    np.float32(.000001)) * self.ep

        t = 0
        acts = self.X
        for nn, ws in enumerate(self.weight_shapes):
            if self.random_biases:
                num_param = (ws[0] + 1) * ws[1]
                weight_and_bias = self.weights[t:t + num_param]
                weight = weight_and_bias[:ws[0] * ws[1]].reshape(
                    (ws[0], ws[1]))
                bias = weight_and_bias[ws[0] * ws[1]:].reshape((ws[1], ))
                acts = T.dot(acts, weight) + bias
            else:
                assert False  # TODO
            if nn < len(self.weight_shapes) - 1:
                acts = (acts > 0.) * (acts)
            else:
                acts = T.nnet.softmax(acts)

            t += num_param

        y_hat = acts
        #y_hat = T.clip(y_hat, 0.001, 0.999) # stability
        self.y_hat = y_hat

        self.kl = KL(self.prior_mean, self.prior_log_var, self.mean,
                     self.log_var).sum(-1).mean()
        self.logpyx = -cc(self.y_hat, self.y).mean()
        self.logpyx = -se(self.y_hat, self.y).mean()
        self.loss = -(self.logpyx - self.weight * self.kl /
                      T.cast(self.dataset_size, floatX))
        self.loss = se(self.y_hat, self.y).mean()
        self.logpyx_grad = flatten_list(
            T.grad(-self.logpyx, self.params,
                   disconnected_inputs='warn')).norm(2)
        self.monitored = [self.logpyx, self.logpyx_grad, self.kl]

        #def _get_useful_funcs(self):
        self.predict_proba = theano.function([self.X], self.y_hat)
        self.predict = theano.function([self.X], self.y_hat.argmax(1))
        self.predict_fixed_mask = theano.function([self.X, self.weights],
                                                  self.y_hat)
        self.sample_weights = theano.function([], self.weights)
        self.monitor_fn = theano.function(
            [self.X, self.y], self.monitored)  #, (self.predict(x) == y).sum()

        #def _get_grads(self):
        grads = T.grad(self.loss, self.params)
        #mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=self.max_norm)
        #cgrads = [T.clip(g, -self.clip_grad, self.clip_grad) for g in mgrads]
        cgrads = grads
        if self.opt == 'adam':
            self.updates = lasagne.updates.adam(
                cgrads, self.params, learning_rate=self.learning_rate)
        elif self.opt == 'momentum':
            self.updates = lasagne.updates.nesterov_momentum(
                cgrads, self.params, learning_rate=self.learning_rate)
        elif self.opt == 'sgd':
            self.updates = lasagne.updates.sgd(
                cgrads, self.params, learning_rate=self.learning_rate)

        #def _get_train_func(self):
        inputs = [
            self.X, self.y, self.dataset_size, self.learning_rate, self.weight
        ]
        train = theano.function(inputs,
                                self.loss,
                                updates=self.updates,
                                on_unused_input='warn')
        self.train_func_ = train
        # DK - putting this here, because is doesn't get overwritten by subclasses
        self.monitor_func = theano.function(
            [self.X, self.y, self.dataset_size, self.learning_rate],
            self.monitored,
            on_unused_input='warn')
Esempio n. 8
0
    def __init__(self,
                 arch=None,
                 lbda=1,
                 perdatapoint=False,
                 srng=RandomStreams(seed=427),
                 prior=log_normal,
                 opt='adam',
                 coupling=4,
                 coupling_dim=200,
                 pad='same',
                 stride=2,
                 pool=None,
                 uncoupled_init=0,
                 convex_combination=0):

        if arch == 'Riashat':
            kernel_width = 3
            self.kernel_width = kernel_width
            stride = 1
            self.stride = stride
            pad = 'valid'
            self.pad = pad
            self.weight_shapes = [
                (32, 1, kernel_width, kernel_width),  # -> (None, 16, 14, 14)
                (32, 32, kernel_width, kernel_width)
            ]  # -> (None, 16,  7,  7)
            self.args = [[32, kernel_width, stride, pad, rectify, 'none'],
                         [32, kernel_width, stride, pad, rectify, 'max']]
            self.pool_size = 5
        else:
            self.pool_size = 2

        self.n_kernels = np.array(self.weight_shapes)[:, 1].sum()
        self.kernel_shape = self.weight_shapes[0][:1] + self.weight_shapes[0][
            2:]
        print "kernel_shape", self.kernel_shape
        self.kernel_size = np.prod(self.weight_shapes[0])

        self.num_classes = 10
        if arch == 'Riashat':
            self.num_hids = 256
        else:
            self.num_hids = 128
        self.num_mlp_layers = 1
        self.num_mlp_params = self.num_classes + \
                              self.num_hids * self.num_mlp_layers
        self.num_cnn_params = np.sum(np.array(self.weight_shapes)[:, 0])
        self.num_params = self.num_mlp_params + self.num_cnn_params
        self.coupling = coupling
        self.extra_l2 = 0
        self.convex_combination = convex_combination

        #def __init__(self,

        self.lbda = lbda
        self.perdatapoint = perdatapoint
        self.srng = srng
        self.prior = prior
        self.__dict__.update(locals())

        if perdatapoint:
            self.wd1 = self.input_var.shape[0]
        else:
            self.wd1 = 1

    #def _get_theano_variables(self):
        self.input_var = T.matrix('input_var')
        self.input_var = T.tensor4('input_var')  # <-- for CNN
        self.target_var = T.matrix('target_var')
        self.dataset_size = T.scalar('dataset_size')
        self.learning_rate = T.scalar('learning_rate')

        #def _get_hyper_net(self):
        # inition random noise
        print self.num_params
        ep = self.srng.normal(size=(self.wd1, self.num_params), dtype=floatX)
        logdets_layers = []
        h_net = lasagne.layers.InputLayer([None, self.num_params])

        # mean and variation of the initial noise
        layer_temp = LinearFlowLayer(h_net)
        h_net = IndexLayer(layer_temp, 0)
        logdets_layers.append(IndexLayer(layer_temp, 1))

        if self.coupling:
            layer_temp = CoupledWNDenseLayer(h_net,
                                             coupling_dim,
                                             uncoupled_init=uncoupled_init)
            h_net = IndexLayer(layer_temp, 0)
            logdets_layers.append(IndexLayer(layer_temp, 1))

            for c in range(self.coupling - 1):
                h_net = PermuteLayer(h_net, self.num_params)

                layer_temp = CoupledWNDenseLayer(h_net,
                                                 coupling_dim,
                                                 uncoupled_init=uncoupled_init)
                h_net = IndexLayer(layer_temp, 0)
                logdets_layers.append(IndexLayer(layer_temp, 1))

        if self.convex_combination:
            layer_temp = ConvexBiasLayer(
                h_net, upweight_primary=self.convex_combination)
            h_net = IndexLayer(layer_temp, 0)
            logdets_layers.append(IndexLayer(layer_temp, 1))

        self.h_net = h_net
        self.weights = lasagne.layers.get_output(h_net, ep)
        self.logdets = sum([get_output(ld, ep) for ld in logdets_layers])

        #def _get_primary_net(self):

        t = np.cast['int32'](0)
        if 1:  #self.dataset == 'mnist':
            p_net = lasagne.layers.InputLayer([None, 1, 28, 28])
        print p_net.output_shape
        inputs = {p_net: self.input_var}

        #logpw = np.float32(0.)

        for ws, args in zip(self.weight_shapes, self.args):

            num_filters = ws[0]

            # TO-DO: generalize to have multiple samples?
            weight = self.weights[0, t:t + num_filters].dimshuffle(
                0, 'x', 'x', 'x')

            num_filters = args[0]
            filter_size = args[1]
            stride = args[2]
            pad = args[3]
            nonl = args[4]
            p_net = lasagne.layers.Conv2DLayer(p_net,
                                               num_filters,
                                               filter_size,
                                               stride,
                                               pad,
                                               nonlinearity=nonl)
            p_net = stochastic_weight_norm(p_net, weight)

            if args[5] == 'max':
                p_net = lasagne.layers.MaxPool2DLayer(p_net, self.pool_size)
            #print p_net.output_shape
            t += num_filters

        for layer in range(self.num_mlp_layers):
            weight = self.weights[:, t:t + self.num_hids].reshape(
                (self.wd1, self.num_hids))
            p_net = lasagne.layers.DenseLayer(p_net,
                                              self.num_hids,
                                              nonlinearity=rectify)
            p_net = stochastic_weight_norm(p_net, weight)
            if self.extra_l2:
                self.l2_penalty = lasagne.regularization.regularize_layer_params_weighted(
                    {p_net: 3.5 / 128}, lasagne.regularization.l2)
            t += self.num_hids

        weight = self.weights[:, t:t + self.num_classes].reshape(
            (self.wd1, self.num_classes))

        p_net = lasagne.layers.DenseLayer(p_net,
                                          self.num_classes,
                                          nonlinearity=nonlinearities.softmax)
        p_net = stochastic_weight_norm(p_net, weight)

        y = T.clip(get_output(p_net, inputs), 0.001, 0.999)  # stability

        self.p_net = p_net
        self.y = y

        #def _get_params(self):

        params = lasagne.layers.get_all_params([self.h_net, self.p_net])
        self.params = list()
        for param in params:
            if type(param) is not RSSV:
                self.params.append(param)

        params0 = lasagne.layers.get_all_param_values([self.h_net, self.p_net])
        params = lasagne.layers.get_all_params([self.h_net, self.p_net])
        updates = {p: p0 for p, p0 in zip(params, params0)}
        self.reset = theano.function([], None, updates=updates)
        self.add_reset('init')

        #def _get_elbo(self):

        logdets = self.logdets
        self.logqw = -logdets
        self.logpw = self.prior(self.weights, 0., -T.log(self.lbda)).sum(1)
        self.kl = (self.logqw - self.logpw).mean()
        self.kl_term = self.kl / T.cast(self.dataset_size, floatX)
        self.logpyx = -cc(self.y, self.target_var).mean()
        self.loss = -self.logpyx + self.kl_term

        # DK - extra monitoring (TODO)
        params = self.params
        ds = self.dataset_size
        self.logpyx_grad = flatten_list(
            T.grad(-self.logpyx, params, disconnected_inputs='warn')).norm(2)
        self.logpw_grad = flatten_list(
            T.grad(-self.logpw.mean() / ds, params,
                   disconnected_inputs='warn')).norm(2)
        self.logqw_grad = flatten_list(
            T.grad(self.logqw.mean() / ds, params,
                   disconnected_inputs='warn')).norm(2)
        self.monitored = [
            self.logpyx, self.logpw, self.logqw, self.logpyx_grad,
            self.logpw_grad, self.logqw_grad
        ]

        #def _get_grads(self):
        grads = T.grad(self.loss, self.params)
        mgrads = lasagne.updates.total_norm_constraint(grads,
                                                       max_norm=self.max_norm)
        cgrads = [T.clip(g, -self.clip_grad, self.clip_grad) for g in mgrads]
        if self.opt == 'adam':
            self.updates = lasagne.updates.adam(
                cgrads, self.params, learning_rate=self.learning_rate)
        elif self.opt == 'momentum':
            self.updates = lasagne.updates.nesterov_momentum(
                cgrads, self.params, learning_rate=self.learning_rate)
        elif self.opt == 'sgd':
            self.updates = lasagne.updates.sgd(
                cgrads, self.params, learning_rate=self.learning_rate)

    #def _get_train_func(self):
        train = theano.function([
            self.input_var, self.target_var, self.dataset_size,
            self.learning_rate
        ],
                                self.loss,
                                updates=self.updates)
        self.train_func = train
        # DK - putting this here, because is doesn't get overwritten by subclasses
        self.monitor_func = theano.function([
            self.input_var, self.target_var, self.dataset_size,
            self.learning_rate
        ],
                                            self.monitored,
                                            on_unused_input='warn')

        #def _get_useful_funcs(self):
        self.predict_proba = theano.function([self.input_var], self.y)
        self.predict = theano.function([self.input_var], self.y.argmax(1))