def _get_elbo(self): """ negative elbo, an upper bound on NLL """ logdets = self.logdets self.logqw = -logdets """ originally... logqw = - (0.5*(ep**2).sum(1)+0.5*T.log(2*np.pi)*num_params+logdets) --> constants are neglected in this wrapperfrom utils import log_laplace """ self.logpw = self.prior(self.weights, 0., -T.log(self.lbda)).sum(1) """ using normal prior centered at zero, with lbda being the inverse of the variance """ self.kl = (self.logqw - self.logpw).mean() if self.output_type == 'categorical': self.logpyx = -cc(self.y, self.target_var).mean() elif self.output_type == 'real': self.logpyx = -se(self.y, self.target_var).mean() else: assert False self.loss = - (self.logpyx - \ self.weight * self.kl/T.cast(self.dataset_size,floatX)) # DK - extra monitoring params = self.params ds = self.dataset_size self.logpyx_grad = flatten_list( T.grad(-self.logpyx, params, disconnected_inputs='warn')).norm(2) self.logpw_grad = flatten_list( T.grad(-self.logpw.mean() / ds, params, disconnected_inputs='warn')).norm(2) self.logqw_grad = flatten_list( T.grad(self.logqw.mean() / ds, params, disconnected_inputs='warn')).norm(2) self.monitored = [ self.logpyx, self.logpw, self.logqw, self.logpyx_grad, self.logpw_grad, self.logqw_grad ]
def _get_elbo(self): """ negative elbo, an upper bound on NLL """ self.logpyx = - cc(self.y,self.target_var).mean() self.loss = - (self.logpyx - \ self.weight * self.kl/T.cast(self.dataset_size,floatX)) # DK - extra monitoring params = self.params ds = self.dataset_size self.logpyx_grad = flatten_list(T.grad(-self.logpyx, params, disconnected_inputs='warn')).norm(2) self.logpw_grad = flatten_list(T.grad(-self.logpw.mean() / ds, params, disconnected_inputs='warn')).norm(2) self.logqw_grad = flatten_list(T.grad(self.logqw.mean() / ds, params, disconnected_inputs='warn')).norm(2) self.monitored = [self.logpyx, self.logpw, self.logqw, self.logpyx_grad, self.logpw_grad, self.logqw_grad] self.logpyx = - cc(self.y,self.target_var).mean() self.loss = - (self.logpyx - \ self.weight * self.kl/T.cast(self.dataset_size,floatX))
def _get_elbo(self): # NTS: is KL waaay too big?? self.kl = KL(self.prior_mean, self.prior_log_var, self.mean, self.log_var).sum(-1).mean() if self.output_type == 'categorical': self.logpyx = -cc(self.y, self.target_var).mean() elif self.output_type == 'real': self.logpyx = -se(self.y, self.target_var).mean() else: assert False self.loss = - (self.logpyx - \ self.weight * self.kl/T.cast(self.dataset_size,floatX)) # DK - extra monitoring params = self.params ds = self.dataset_size self.logpyx_grad = flatten_list( T.grad(-self.logpyx, params, disconnected_inputs='warn')).norm(2) self.monitored = [self.logpyx, self.logpyx_grad, self.kl] #, self.target_var]
def main(data_raw_one, data_raw_two): #print("Data raw one: ", data_raw_one) #print("Data raw two: ", data_raw_two) global X_indices global mini_indices global maxi_indices global X_indices_real data_raw = generateInputData(data_raw_one, data_raw_two) data_split = [data_raw] # Data already split... extremeValue = False if 1018 in np.transpose(data_split)[0]: extremeValue = True if extremeValue: print("warning") else: data = data_split print("Data: ", data, len(data)) # Get Extreme Points mini, maxi = hp.extremePointsCorrelationMain(data, 10, mini_indices, maxi_indices) #print("Mini zuzu, Maxi zuzuzu: ", mini, maxi, mini_indices, maxi_indices) # Get frequency bands cores_real_numbers = hp.getFrequencies(1,8, data) #print("COOOOOOOOORRRRREESSS 01010101", cores_real_numbers) #print("cores_real_numbers: ", cores_real_numbers) # Combine features X_whole_input = cores_real_numbers #print("X_whole_input: ", X_whole_input[0:20], len(X_whole_input)) X_reduced = reduceFeatures(X_whole_input, X_indices) #X_reduced = X_reduced + mini + maxi #print("X_reduced: ", X_reduced) #print("Len X_reduced: ", len(X_reduced)) #print("X_reduced: ", X_reduced[len(X_reduced) - 1]) #print("Len X_reduced: ", X_reduced[len(X_reduced) - 1]) X_predict = hp.flatten_list(X_reduced) for i in mini: for x in i: X_predict.append(x) for i in maxi: for x in i: X_predict.append(x) X_reduced_res_real = [] c = 0 for x in X_predict: if c in X_indices_real: X_reduced_res_real.append(x) c += 1 print("X PREDICT: ", X_reduced_res_real, len(X_reduced_res_real)) # print("UIUO", data) # print("UIUO", mini, maxi) # print("UIUO", cores_real_numbers) # print("UIUO", X_whole_input) # print("--------------------") # print("X WHOLE INPUT: ", len(X_whole_input)) # print("X INDICES: ", X_indices) ##print("REDUCE FEATURES: ", reduceFeatures(X_whole_input, X_indices)) #print(len(X_reduced)) prediction = clf.predict([X_reduced_res_real]) pred_linreg = reg.predict([X_reduced_res_real]) pred.append([prediction, pred_linreg]) #if prediction == 1: #print("SIGNAL!!!!!!! ", prediction) #else: #print("No Signal. ", prediction) print("RANDOM FOREST PREDICTION: ", prediction) print("LIN REG PREDICTION: ", pred_linreg)
layer = lasagne.layers.InputLayer([None, 784]) inputs = {layer: input_var} for nn, ws in enumerate(weight_shapes): layer = lasagne.layers.DenseLayer(layer, ws[1]) if nn < len(weight_shapes) - 1 and model in [ 'dropout', 'dropout2' ]: layer = lasagne.layers.dropout(layer, .5) print layer.output_shape layer.nonlinearity = nonlinearities.softmax y = get_output(layer, inputs) #y = T.clip(y, 0.001, 0.999) # stability loss = cc(y, target_var).mean() params = lasagne.layers.get_all_params([h_layer, layer]) loss = loss + lasagne.regularization.l2( flatten_list(params)) * np.float32(1.e-5) # TRAIN FUNCTION grads = T.grad(loss, params) mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm) cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads] updates = lasagne.updates.adam(cgrads, params, learning_rate=lr) train = theano.function([input_var, target_var, dataset_size, lr], loss, updates=updates, on_unused_input='warn') predict = theano.function([input_var], y.argmax(1)) ################## # TRAIN
# filler h_layer = lasagne.layers.InputLayer([None, 784]) # JUST primary net layer = lasagne.layers.InputLayer([None,784]) inputs = {layer:input_var} for nn, ws in enumerate(weight_shapes): layer = lasagne.layers.DenseLayer(layer, ws[1], nonlinearity=nonlinearity) if nn < len(weight_shapes)-1 and model == 'dropout': layer = lasagne.layers.dropout(layer, .5) print layer.output_shape layer.nonlinearity = nonlinearities.softmax y = get_output(layer,inputs) y = T.clip(y, 0.001, 0.999) # stability loss = cc(y,target_var).mean() params = lasagne.layers.get_all_params([h_layer,layer]) loss = loss + lasagne.regularization.l2(flatten_list(params)) * np.float32(1.e-5) # TRAIN FUNCTION grads = T.grad(loss, params) mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm) cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads] updates = lasagne.updates.adam(cgrads, params, learning_rate=lr) train = theano.function([input_var,target_var,dataset_size,lr], loss,updates=updates, on_unused_input='warn') predict = theano.function([input_var],y.argmax(1)) predict_probs = theano.function([input_var],y)
def __init__( self, srng=RandomStreams(seed=427), prior_mean=0, prior_log_var=0, n_hiddens=2, n_units=800, n_inputs=784, n_classes=10, output_type='categorical', random_biases=1, #dataset_size=None, opt='adam', #weight=1.,# the weight of the KL term **kargs): self.__dict__.update(locals()) # TODO self.dataset_size = T.scalar('dataset_size') self.weight = T.scalar('weight') self.learning_rate = T.scalar('learning_rate') self.weight_shapes = [] self.weight_shapes = [] if n_hiddens > 0: self.weight_shapes.append((n_inputs, n_units)) #self.params.append((theano.shared())) for i in range(1, n_hiddens): self.weight_shapes.append((n_units, n_units)) self.weight_shapes.append((n_units, n_classes)) else: self.weight_shapes = [(n_inputs, n_classes)] if self.random_biases: self.num_params = sum( (ws[0] + 1) * ws[1] for ws in self.weight_shapes) else: self.num_params = sum((ws[0]) * ws[1] for ws in self.weight_shapes) self.wd1 = 1 self.X = T.matrix() self.y = T.matrix() self.mean = ts(self.num_params) self.log_var = ts(self.num_params, scale=1e-6, bias=-1e8) self.params = [self.mean, self.log_var] self.ep = self.srng.normal(size=(self.num_params, ), dtype=floatX) self.weights = self.mean + (T.exp(self.log_var) + np.float32(.000001)) * self.ep t = 0 acts = self.X for nn, ws in enumerate(self.weight_shapes): if self.random_biases: num_param = (ws[0] + 1) * ws[1] weight_and_bias = self.weights[t:t + num_param] weight = weight_and_bias[:ws[0] * ws[1]].reshape( (ws[0], ws[1])) bias = weight_and_bias[ws[0] * ws[1]:].reshape((ws[1], )) acts = T.dot(acts, weight) + bias else: assert False # TODO if nn < len(self.weight_shapes) - 1: acts = (acts > 0.) * (acts) else: acts = T.nnet.softmax(acts) t += num_param y_hat = acts #y_hat = T.clip(y_hat, 0.001, 0.999) # stability self.y_hat = y_hat self.kl = KL(self.prior_mean, self.prior_log_var, self.mean, self.log_var).sum(-1).mean() self.logpyx = -cc(self.y_hat, self.y).mean() self.logpyx = -se(self.y_hat, self.y).mean() self.loss = -(self.logpyx - self.weight * self.kl / T.cast(self.dataset_size, floatX)) self.loss = se(self.y_hat, self.y).mean() self.logpyx_grad = flatten_list( T.grad(-self.logpyx, self.params, disconnected_inputs='warn')).norm(2) self.monitored = [self.logpyx, self.logpyx_grad, self.kl] #def _get_useful_funcs(self): self.predict_proba = theano.function([self.X], self.y_hat) self.predict = theano.function([self.X], self.y_hat.argmax(1)) self.predict_fixed_mask = theano.function([self.X, self.weights], self.y_hat) self.sample_weights = theano.function([], self.weights) self.monitor_fn = theano.function( [self.X, self.y], self.monitored) #, (self.predict(x) == y).sum() #def _get_grads(self): grads = T.grad(self.loss, self.params) #mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=self.max_norm) #cgrads = [T.clip(g, -self.clip_grad, self.clip_grad) for g in mgrads] cgrads = grads if self.opt == 'adam': self.updates = lasagne.updates.adam( cgrads, self.params, learning_rate=self.learning_rate) elif self.opt == 'momentum': self.updates = lasagne.updates.nesterov_momentum( cgrads, self.params, learning_rate=self.learning_rate) elif self.opt == 'sgd': self.updates = lasagne.updates.sgd( cgrads, self.params, learning_rate=self.learning_rate) #def _get_train_func(self): inputs = [ self.X, self.y, self.dataset_size, self.learning_rate, self.weight ] train = theano.function(inputs, self.loss, updates=self.updates, on_unused_input='warn') self.train_func_ = train # DK - putting this here, because is doesn't get overwritten by subclasses self.monitor_func = theano.function( [self.X, self.y, self.dataset_size, self.learning_rate], self.monitored, on_unused_input='warn')
def __init__(self, arch=None, lbda=1, perdatapoint=False, srng=RandomStreams(seed=427), prior=log_normal, opt='adam', coupling=4, coupling_dim=200, pad='same', stride=2, pool=None, uncoupled_init=0, convex_combination=0): if arch == 'Riashat': kernel_width = 3 self.kernel_width = kernel_width stride = 1 self.stride = stride pad = 'valid' self.pad = pad self.weight_shapes = [ (32, 1, kernel_width, kernel_width), # -> (None, 16, 14, 14) (32, 32, kernel_width, kernel_width) ] # -> (None, 16, 7, 7) self.args = [[32, kernel_width, stride, pad, rectify, 'none'], [32, kernel_width, stride, pad, rectify, 'max']] self.pool_size = 5 else: self.pool_size = 2 self.n_kernels = np.array(self.weight_shapes)[:, 1].sum() self.kernel_shape = self.weight_shapes[0][:1] + self.weight_shapes[0][ 2:] print "kernel_shape", self.kernel_shape self.kernel_size = np.prod(self.weight_shapes[0]) self.num_classes = 10 if arch == 'Riashat': self.num_hids = 256 else: self.num_hids = 128 self.num_mlp_layers = 1 self.num_mlp_params = self.num_classes + \ self.num_hids * self.num_mlp_layers self.num_cnn_params = np.sum(np.array(self.weight_shapes)[:, 0]) self.num_params = self.num_mlp_params + self.num_cnn_params self.coupling = coupling self.extra_l2 = 0 self.convex_combination = convex_combination #def __init__(self, self.lbda = lbda self.perdatapoint = perdatapoint self.srng = srng self.prior = prior self.__dict__.update(locals()) if perdatapoint: self.wd1 = self.input_var.shape[0] else: self.wd1 = 1 #def _get_theano_variables(self): self.input_var = T.matrix('input_var') self.input_var = T.tensor4('input_var') # <-- for CNN self.target_var = T.matrix('target_var') self.dataset_size = T.scalar('dataset_size') self.learning_rate = T.scalar('learning_rate') #def _get_hyper_net(self): # inition random noise print self.num_params ep = self.srng.normal(size=(self.wd1, self.num_params), dtype=floatX) logdets_layers = [] h_net = lasagne.layers.InputLayer([None, self.num_params]) # mean and variation of the initial noise layer_temp = LinearFlowLayer(h_net) h_net = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) if self.coupling: layer_temp = CoupledWNDenseLayer(h_net, coupling_dim, uncoupled_init=uncoupled_init) h_net = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) for c in range(self.coupling - 1): h_net = PermuteLayer(h_net, self.num_params) layer_temp = CoupledWNDenseLayer(h_net, coupling_dim, uncoupled_init=uncoupled_init) h_net = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) if self.convex_combination: layer_temp = ConvexBiasLayer( h_net, upweight_primary=self.convex_combination) h_net = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) self.h_net = h_net self.weights = lasagne.layers.get_output(h_net, ep) self.logdets = sum([get_output(ld, ep) for ld in logdets_layers]) #def _get_primary_net(self): t = np.cast['int32'](0) if 1: #self.dataset == 'mnist': p_net = lasagne.layers.InputLayer([None, 1, 28, 28]) print p_net.output_shape inputs = {p_net: self.input_var} #logpw = np.float32(0.) for ws, args in zip(self.weight_shapes, self.args): num_filters = ws[0] # TO-DO: generalize to have multiple samples? weight = self.weights[0, t:t + num_filters].dimshuffle( 0, 'x', 'x', 'x') num_filters = args[0] filter_size = args[1] stride = args[2] pad = args[3] nonl = args[4] p_net = lasagne.layers.Conv2DLayer(p_net, num_filters, filter_size, stride, pad, nonlinearity=nonl) p_net = stochastic_weight_norm(p_net, weight) if args[5] == 'max': p_net = lasagne.layers.MaxPool2DLayer(p_net, self.pool_size) #print p_net.output_shape t += num_filters for layer in range(self.num_mlp_layers): weight = self.weights[:, t:t + self.num_hids].reshape( (self.wd1, self.num_hids)) p_net = lasagne.layers.DenseLayer(p_net, self.num_hids, nonlinearity=rectify) p_net = stochastic_weight_norm(p_net, weight) if self.extra_l2: self.l2_penalty = lasagne.regularization.regularize_layer_params_weighted( {p_net: 3.5 / 128}, lasagne.regularization.l2) t += self.num_hids weight = self.weights[:, t:t + self.num_classes].reshape( (self.wd1, self.num_classes)) p_net = lasagne.layers.DenseLayer(p_net, self.num_classes, nonlinearity=nonlinearities.softmax) p_net = stochastic_weight_norm(p_net, weight) y = T.clip(get_output(p_net, inputs), 0.001, 0.999) # stability self.p_net = p_net self.y = y #def _get_params(self): params = lasagne.layers.get_all_params([self.h_net, self.p_net]) self.params = list() for param in params: if type(param) is not RSSV: self.params.append(param) params0 = lasagne.layers.get_all_param_values([self.h_net, self.p_net]) params = lasagne.layers.get_all_params([self.h_net, self.p_net]) updates = {p: p0 for p, p0 in zip(params, params0)} self.reset = theano.function([], None, updates=updates) self.add_reset('init') #def _get_elbo(self): logdets = self.logdets self.logqw = -logdets self.logpw = self.prior(self.weights, 0., -T.log(self.lbda)).sum(1) self.kl = (self.logqw - self.logpw).mean() self.kl_term = self.kl / T.cast(self.dataset_size, floatX) self.logpyx = -cc(self.y, self.target_var).mean() self.loss = -self.logpyx + self.kl_term # DK - extra monitoring (TODO) params = self.params ds = self.dataset_size self.logpyx_grad = flatten_list( T.grad(-self.logpyx, params, disconnected_inputs='warn')).norm(2) self.logpw_grad = flatten_list( T.grad(-self.logpw.mean() / ds, params, disconnected_inputs='warn')).norm(2) self.logqw_grad = flatten_list( T.grad(self.logqw.mean() / ds, params, disconnected_inputs='warn')).norm(2) self.monitored = [ self.logpyx, self.logpw, self.logqw, self.logpyx_grad, self.logpw_grad, self.logqw_grad ] #def _get_grads(self): grads = T.grad(self.loss, self.params) mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=self.max_norm) cgrads = [T.clip(g, -self.clip_grad, self.clip_grad) for g in mgrads] if self.opt == 'adam': self.updates = lasagne.updates.adam( cgrads, self.params, learning_rate=self.learning_rate) elif self.opt == 'momentum': self.updates = lasagne.updates.nesterov_momentum( cgrads, self.params, learning_rate=self.learning_rate) elif self.opt == 'sgd': self.updates = lasagne.updates.sgd( cgrads, self.params, learning_rate=self.learning_rate) #def _get_train_func(self): train = theano.function([ self.input_var, self.target_var, self.dataset_size, self.learning_rate ], self.loss, updates=self.updates) self.train_func = train # DK - putting this here, because is doesn't get overwritten by subclasses self.monitor_func = theano.function([ self.input_var, self.target_var, self.dataset_size, self.learning_rate ], self.monitored, on_unused_input='warn') #def _get_useful_funcs(self): self.predict_proba = theano.function([self.input_var], self.y) self.predict = theano.function([self.input_var], self.y.argmax(1))