def load_account_weights( self, k, BEST_LOSS=False ): ''' Function to load account specific weights ''' if BEST_LOSS: fpath = self.HOME+'trainedParams/'+self.fp_accW+'_'+str(k)+'_loss_prms.pkl' if not os.path.isfile( fpath ): fpath = self.HOME+'trainedParams/'+self.fp_accW+'_'+str(k)+'.pkl' else: fpath = self.HOME+'trainedParams/'+self.fp_accW+'_'+str(k)+'.pkl' for name in self.account_weight_layers: try: with open( fpath, 'r' ) as f: paramDic = pickle.load( f ) Wval = paramDic[ str(k)+name ].astype( np.float32 ) if self.layers_[ name ].b is not None: bval = paramDic[ str(k)+name+'_b' ].astype( np.float32 ) if len( np.shape( bval ) ) == 0: bval = np.reshape( bval, (1,) ) except: print 'init weights: ', k uniformInit = Uniform() Wval = uniformInit.sample( np.shape( self.layers_[ name ].W.get_value() ) ) if self.layers_[ name ].b is not None: bval = uniformInit.sample( np.shape( self.layers_[ name ].b.get_value() ) ) self.layers_[ name ].W.set_value( Wval ) if self.layers_[ name ].b is not None: self.layers_[ name ].b.set_value( bval )
def test_uniform_range_as_range(): from lasagne.init import Uniform sample = Uniform((0.0, 1.0)).sample((300, 400)) assert sample.shape == (300, 400) assert -0.1 < sample.min() < 0.1 assert 0.9 < sample.max() < 1.1
def test_uniform_range_as_number(): from lasagne.init import Uniform sample = Uniform(1.0).sample((300, 400)) assert sample.shape == (300, 400) assert -1.1 < sample.min() < -0.9 assert 0.9 < sample.max() < 1.1
def exp_b(name): # tanh and softplus output # sane inits for other layers # output one appliance # 0% skip prob for first appliance # 90% skip prob for other appliances # 200 units # standardise input source_dict_copy = deepcopy(source_dict) source_dict_copy['standardise_input'] = True source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict( experiment_name=name, source=source )) net_dict_copy['layers_config']= [ { 'type': DenseLayer, 'num_units': 200, 'nonlinearity': tanh, 'W': Uniform(25), 'b': Uniform(25) }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': tanh, 'W': Normal(std=1/sqrt(200)), 'b': Normal(std=1/sqrt(200)) }, { 'type': BidirectionalRecurrentLayer, 'num_units': 50, 'W_in_to_hid': Normal(std=1/sqrt(50)), 'gradient_steps': GRADIENT_STEPS, 'nonlinearity': tanh, 'learn_init': False, 'precompute_input': False }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 50, 'filter_length': 4, 'stride': 4, 'nonlinearity': tanh, 'W': Normal(std=1/sqrt(50)) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': BidirectionalRecurrentLayer, 'num_units': 80, 'W_in_to_hid': Normal(std=1/sqrt(50)), 'gradient_steps': GRADIENT_STEPS, 'nonlinearity': tanh, 'learn_init': False, 'precompute_input': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': T.nnet.softplus, 'W': Normal(std=1/sqrt(80)) } ] net = Net(**net_dict_copy) return net
def exp_a(name): # 151d but training for much longer and skip prob = 0.7 source = RealApplianceSource( filename='/data/dk3810/ukdale.h5', appliances=[ ['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television' # 'dish washer' # ['washer dryer', 'washing machine'] ], max_appliance_powers=None,#[200, 100, 200, 2500, 2400], on_power_thresholds=[5, 5, 5, 5, 5], max_input_power=5900, min_on_durations=[60, 60, 60, 1800, 1800], min_off_durations=[12, 12, 12, 1800, 600], window=("2013-06-01", "2014-07-01"), seq_length=1500, output_one_appliance=False, boolean_targets=False, train_buildings=[1], validation_buildings=[1], skip_probability=0.0, n_seq_per_batch=25, include_diff=True ) net = Net( experiment_name=name, source=source, save_plot_interval=250, loss_function=mse, updates=partial(nesterov_momentum, learning_rate=.1, clip_range=(-1, 1)), layers_config=[ { 'type': LSTMLayer, 'num_units': 50, 'W_in_to_cell': Uniform(25), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': LSTMLayer, 'num_units': 50, 'W_in_to_cell': Uniform(1), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None } ] ) return net
min_off_duration=60, subsample_target=5, train_buildings=[1], validation_buildings=[1]) net = Net( experiment_name="e88", source=source, save_plot_interval=50, loss_function=crossentropy, updates=partial(adagrad, learning_rate=0.001), layers_config=[ { 'type': LSTMLayer, # TODO change to BLSTM 'num_units': 60, 'W_in_to_cell': Uniform(5) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 80, 'filter_length': 5, 'stride': 5, 'nonlinearity': sigmoid }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1)
def exp_a(name): # global source # source = RealApplianceSource( # filename='/data/dk3810/ukdale.h5', # appliances=[ # ['fridge freezer', 'fridge', 'freezer'], # 'hair straighteners', # 'television', # 'dish washer', # ['washer dryer', 'washing machine'] # ], # max_appliance_powers=None,#[500] * 5, # on_power_thresholds=[5] * 5, # max_input_power=2500, # min_on_durations=[60, 60, 60, 1800, 1800], # min_off_durations=[12, 12, 12, 1800, 600], # window=("2013-06-01", "2014-07-01"), # seq_length=1500, # output_one_appliance=False, # boolean_targets=False, # train_buildings=[1], # validation_buildings=[1], # skip_probability=0.7, # n_seq_per_batch=25, # # subsample_target=4, # # input_padding=0, # include_diff=False, # clip_appliance_power=False, # lag=0 # ) net = Net( experiment_name=name, source=source, save_plot_interval=1000, loss_function=scaled_cost, updates=partial(nesterov_momentum, learning_rate=0.0001), layers_config=[ { 'type': DenseLayer, 'num_units': 200, 'nonlinearity': sigmoid, 'W': Uniform(0.1), 'b': Uniform(0.1) }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid, 'W': Uniform(0.1), 'b': Uniform(0.1) }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None, 'W': Uniform(25) } ] ) return net
net = {} net['input'] = InputLayer((batch_size, seq_len, feature_size)) net['rnn']=rnnmodel(net['input'],hidden_units,forgetgate=lasagne.layers.Gate(b=lasagne.init.Constant(1.)),peepholes=False, only_return_final=True,grad_clipping=args.gradclipvalue) net['out']=DenseLayer(net['rnn'],outputclass,nonlinearity=softmax) return net def build_rnn_network(rnnmodel): net = {} net['input'] = InputLayer((batch_size, seq_len, feature_size)) net['rnn']=rnnmodel(net['input'],hidden_units,nonlinearity=act,W_in_to_hid=Normal(args.ini),W_hid_to_hid=lambda shape: np.identity(hidden_units,dtype=np.float32),only_return_final=True ,grad_clipping=args.gradclipvalue) net['out']=DenseLayer(net['rnn'],outputclass,nonlinearity=softmax) return net ini_W=HeNormal(gain=np.sqrt(2)/2.0) if args.use_bn_afterrnn: ini_W=Uniform(args.ini) def build_res_rnn_network(rnnmodel): net = {} net['input'] = InputLayer((batch_size, seq_len, feature_size)) net['rnn0']=DimshuffleLayer(net['input'],(1,0,2)) for l in range(1, num_layers+1): hidini=0 if l==num_layers: hidini=U_lowbound net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (batch_size* seq_len, -1)) net['rnn%d'%(l-1)]=DenseLayer(net['rnn%d'%(l-1)],hidden_units,W=ini_W,b=Uniform(range=(0,args.ini_b)),nonlinearity=None) #W=Uniform(ini_rernn_in_to_hid), # net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (seq_len, batch_size, -1))
def exp_a(name): source = RealApplianceSource( filename='/data/dk3810/ukdale.h5', appliances= [['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television' # 'dish washer', # ['washer dryer', 'washing machine'] ], max_appliance_powers=[300, 500, 200, 2500, 2400], on_power_thresholds=[5, 5, 5, 5, 5], max_input_power=1000, min_on_durations=[60, 60, 60, 1800, 1800], min_off_durations=[12, 12, 12, 1800, 600], window=("2013-06-01", "2014-07-01"), seq_length=1000, output_one_appliance=False, boolean_targets=False, train_buildings=[1], validation_buildings=[1], skip_probability=0, n_seq_per_batch=50, subsample_target=5, X_processing_func=fdiff) net = Net(experiment_name=name, source=source, save_plot_interval=250, loss_function=crossentropy, updates=partial(nesterov_momentum, learning_rate=1.0), layers_config=[{ 'type': LSTMLayer, 'num_units': 30, 'W_in_to_cell': Uniform(5), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 60, 'filter_length': 5, 'stride': 5, 'nonlinearity': sigmoid, 'W': Uniform(1) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': LSTMLayer, 'num_units': 60, 'W_in_to_cell': Uniform(1), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid }]) return net
def test_uniform_glorot(): from lasagne.init import Uniform sample = Uniform().sample((150, 450)) assert -0.11 < sample.min() < -0.09 assert 0.09 < sample.max() < 0.11
def execute(dataset, learning_rate=0.00001, learning_rate_annealing=1.0, lmd=0., noise=0.0, encoder_units=[1024, 512, 256], num_epochs=500, which_fold=1, save_path=None, save_copy=None, dataset_path=None, num_fully_connected=0, exp_name='', init_args=None): # Reading dataset print("Loading data") if dataset == "1000_genomes" and which_fold == 1 and False: x_unsup = mlh.load_data(dataset, dataset_path, None, which_fold=which_fold, keep_labels=1.0, missing_labels_val=-1.0, embedding_input='raw', transpose=False) import pdb pdb.set_trace() x_train = np.zeros((x_unsup[0].shape[0], x_unsup[0].shape[1] * 2), dtype="int8") x_train[:, ::2] = (x_unsup[0] == 2) x_train[:, 1::2] = (x_unsup[0] >= 1) x_valid = np.zeros((x_unsup[2].shape[0], x_unsup[2].shape[1] * 2), dtype="int8") x_valid[:, ::2] = (x_unsup[2] == 2) x_valid[:, 1::2] = (x_unsup[2] >= 1) else: x_unsup = mlh.load_data(dataset, dataset_path, None, which_fold=which_fold, keep_labels=1.0, missing_labels_val=-1.0, embedding_input='bin', transpose=True) x_train = x_unsup[0][0] x_valid = x_unsup[1][0] print(x_train.shape, x_valid.shape) n_features = x_train.shape[1] exp_name += "learn_snp2vec_dae_h" for e in encoder_units: exp_name += ('-' + str(e)) # exp_name += '_g-' + str(gamma) exp_name += '_l-' + str(lmd) exp_name += '_lr-' + str(learning_rate) exp_name += '_fold-' + str(which_fold) save_path = os.path.join(save_path, exp_name) save_copy = os.path.join(save_copy, exp_name) if not os.path.exists(save_path): os.makedirs(save_path) if not os.path.exists(save_copy): os.makedirs(save_copy) # Prepare Theano variables for inputs and targets input_var = T.matrix('input') target_reconst = T.matrix('target') lr = theano.shared(np.float32(learning_rate), 'learning_rate') batch_size = 128 # building network encoder = InputLayer((batch_size, n_features), input_var) # building the encoder and decoder #import pdb; pdb.set_trace() for i in range(len(encoder_units)): encoder = DenseLayer(encoder, num_units=encoder_units[i], W=Uniform(0.00001), nonlinearity=leaky_rectify ) # if i < len(encoder_units)-1 else linear) embedding = lasagne.layers.get_output(encoder) get_embedding_fn = theano.function([input_var], embedding) params = lasagne.layers.get_all_params(encoder, trainable=True) monitor_labels = ["embedding min", "embedding mean", "embedding max"] val_outputs = [embedding.min(), embedding.mean(), embedding.max()] nets = [encoder] decoder_units = encoder_units[::-1][1:] print(decoder_units) decoder = encoder for i in range(len(decoder_units)): decoder = DenseLayer(decoder, num_units=decoder_units[i], W=Uniform(0.0001), nonlinearity=leaky_rectify) decoder = DenseLayer(decoder, num_units=n_features, W=convert_initialization(init_args["decoder_init"], nonlinearity="sigmoid"), nonlinearity=sigmoid) prediction_reconst = lasagne.layers.get_output(decoder) # Reconstruction error loss_reconst = lasagne.objectives.binary_crossentropy( prediction_reconst, target_reconst).mean() # loss_reconst = mh.define_sampled_mean_bincrossentropy( # prediction_reconst, target_reconst, gamma=gamma) #loss_reconst = mh.dice_coef_loss( # target_reconst, prediction_reconst).mean() accuracy = T.eq(T.gt(prediction_reconst, 0.5), target_reconst).mean() params += lasagne.layers.get_all_params(decoder, trainable=True) monitor_labels += ["reconst. loss", "reconst. accuracy"] val_outputs += [loss_reconst, accuracy] nets += [decoder] # sparsity_reconst = gamma * l1(prediction_reconst) # roh = input_var.mean(0) # sparsity_reconst = ((roh * T.log(roh / (prediction_reconst.mean(0)+1e-8))) +\ # ((1 - roh) * T.log((1 - roh) / (1 - prediction_reconst + 1e-8)))).sum() # Combine losses loss = loss_reconst # + sparsity_reconst # applying weight decay l2_penalty = apply_penalty(params, l2) loss = loss + lmd * l2_penalty val_outputs += [loss] monitor_labels += ['loss'] # Some variables max_patience = 100 patience = 0 train_monitored = [] valid_monitored = [] train_loss = [] updates = lasagne.updates.adam(loss, params, learning_rate=lr) for k in updates.keys(): if updates[k].ndim == 2: updates[k] = lasagne.updates.norm_constraint(updates[k], 1.0) inputs = [input_var, target_reconst] # Compile training function print "Compiling training function" train_fn = theano.function(inputs, loss, updates=updates, on_unused_input='ignore') val_fn = theano.function(inputs, [val_outputs[0]] + val_outputs, on_unused_input='ignore') start_training = time.time() print "Starting training" for epoch in range(num_epochs): start_time = time.time() print("Epoch {} of {}".format(epoch + 1, num_epochs)) nb_minibatches = 0 loss_epoch = 0 for x, target_reconst_val in data_generator(x_train, batch_size, shuffle=True, noise=noise): loss_epoch += train_fn(x, target_reconst_val) nb_minibatches += 1 loss_epoch /= nb_minibatches train_loss += [loss_epoch] # Monitoring on the training set train_minibatches = data_generator(x_train, batch_size, noise=noise) train_err = mlh.monitoring(train_minibatches, "train", val_fn, monitor_labels, 0) train_monitored += [train_err] # Monitoring on the validation set valid_minibatches = data_generator(x_valid, batch_size, noise=noise) valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn, monitor_labels, 0) valid_monitored += [valid_err] early_stop_criterion = 'loss' early_stop_val = valid_err[monitor_labels.index(early_stop_criterion)] # Early stopping if epoch == 0: best_valid = early_stop_val elif early_stop_val < best_valid and early_stop_criterion == 'loss': best_valid = early_stop_val patience = 0 # Save stuff np.savez(save_path + '/model_snp2vec_best.npz', *lasagne.layers.get_all_param_values(nets)) np.savez(save_path + "/errors_snp2vec_best.npz", zip(*train_monitored), zip(*valid_monitored)) else: patience += 1 np.savez(os.path.join(save_path, 'model_snp2vec_last.npz'), *lasagne.layers.get_all_param_values(nets)) np.savez(save_path + "/errors_snp2vec_last.npz", zip(*train_monitored), zip(*valid_monitored)) # End training if (patience == max_patience) or (epoch == num_epochs - 1): print("Ending training") # Load best model if not os.path.exists(save_path + '/model_snp2vec_best.npz'): print( "No saved model to be tested and/or generate" " the embedding !") else: with np.load(save_path + '/model_snp2vec_best.npz') as f: param_values = [ f['arr_%d' % i] for i in range(len(f.files)) ] lasagne.layers.set_all_param_values(nets, param_values) # Use the saved model to generate the feature embedding # Here the feature embedding is the different in the hidden # representation between having that feature on and having it off print("Generating embedding") embedding_size = encoder_units[-1] null_input = np.zeros((1, n_features), dtype="float32") null_embedding = get_embedding_fn(null_input)[0] all_embeddings = np.zeros((n_features, embedding_size), dtype="float32") """ single_feat_input = null_input.copy() for i in range(n_features): if i % 10000 == 0: print(i, n_features) single_feat_input[:,i] = 1 all_embeddings[i] = (get_embedding_fn(single_feat_input)[0] - null_embedding) single_feat_input[:,i] = 0 result1 = all_embeddings[:1000].copy() """ block_size = 10 single_feat_batch = np.zeros((block_size, n_features), dtype="float32") for i in range(0, n_features, block_size): if i % 10000 == 0: print(i, n_features) for j in range(block_size): single_feat_batch[j, i + j] = 1 all_embeddings[i:i + 10] = (get_embedding_fn(single_feat_batch) - null_embedding) for j in range(block_size): single_feat_batch[j, i + j] = 0 np.save( "/Tmp/carriepl/DietNetworks/all_embeddings_noise%f_fold%i.npy" % (which_fold, noise), all_embeddings) # Training set results train_minibatches = data_generator(x_train, batch_size, noise=noise) train_err = mlh.monitoring(train_minibatches, "train", val_fn, monitor_labels, 0) # Validation set results valid_minibatches = data_generator(x_valid, batch_size, noise=noise) valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn, monitor_labels, 0) # Stop print(" epoch time:\t\t\t{:.3f}s \n".format(time.time() - start_time)) break print(" epoch time:\t\t\t{:.3f}s \n".format(time.time() - start_time)) # Anneal the learning rate lr.set_value(float(lr.get_value() * learning_rate_annealing)) # Copy files to loadpath if save_path != save_copy: print('Copying model and other training files to {}'.format(save_copy)) copy_tree(save_path, save_copy)
def exp_a(name): # global source # source = RealApplianceSource( # filename='/data/dk3810/ukdale.h5', # appliances=[ # ['fridge freezer', 'fridge', 'freezer'], # 'hair straighteners', # 'television', # 'dish washer', # ['washer dryer', 'washing machine'] # ], # max_appliance_powers=None,#[500] * 5, # on_power_thresholds=[5] * 5, # max_input_power=2500, # min_on_durations=[60, 60, 60, 1800, 1800], # min_off_durations=[12, 12, 12, 1800, 600], # window=("2013-06-01", "2014-07-01"), # seq_length=1500, # output_one_appliance=False, # boolean_targets=False, # train_buildings=[1], # validation_buildings=[1], # skip_probability=0.7, # n_seq_per_batch=25, # subsample_target=4, # input_padding=3, # include_diff=False, # clip_appliance_power=False, # lag=32 # ) net = Net( experiment_name=name, source=source, save_plot_interval=250, loss_function=scaled_cost, updates=partial(nesterov_momentum, learning_rate=0.0001), layers_config=[ { 'type': LSTMLayer, 'num_units': 50, 'W_in_to_cell': Uniform(25), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { 'type': Conv1DLayer, # convolve over the time axis 'num_filters': 50, 'filter_length': 2, 'stride': 1, 'nonlinearity': sigmoid, 'W': Uniform(1) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # back to (batch, time, features) }, { 'type': FeaturePoolLayer, 'ds': 2, # number of feature maps to be pooled together 'axis': 1 # pool over the time axis }, { 'type': LSTMLayer, 'num_units': 50, 'W_in_to_cell': Uniform(5), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { 'type': Conv1DLayer, # convolve over the time axis 'num_filters': 50, 'filter_length': 2, 'stride': 1, 'nonlinearity': sigmoid, 'W': Uniform(1) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # back to (batch, time, features) }, { 'type': FeaturePoolLayer, 'ds': 2, # number of feature maps to be pooled together 'axis': 1 # pool over the time axis }, { 'type': LSTMLayer, 'num_units': 50, 'W_in_to_cell': Uniform(1), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None, 'W': Uniform(25) } ]) return net
def exp_b(name): # like A but with inputs in the range [-1,1] source = RealApplianceSource( filename='/data/dk3810/ukdale.h5', appliances=[ ['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television', 'dish washer', ['washer dryer', 'washing machine'] ], max_appliance_powers=[300, 500, 200, 2500, 2400], on_power_thresholds=[5, 5, 5, 5, 5], max_input_power=5900, min_on_durations=[60, 60, 60, 1800, 1800], min_off_durations=[12, 12, 12, 1800, 600], window=("2013-06-01", "2014-07-01"), seq_length=1500, output_one_appliance=False, boolean_targets=False, train_buildings=[1], validation_buildings=[1], skip_probability=0.7, n_seq_per_batch=10, X_processing_func=lambda X: (X * 2) - 1 ) net = Net( experiment_name=name, source=source, save_plot_interval=5000, loss_function=crossentropy, updates=partial(nesterov_momentum, learning_rate=1.0), layers_config=[ { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid, 'W': Uniform(5), 'b': Uniform(1) }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid } ], layer_changes={ 50001: { 'remove_from': -2, 'new_layers': [ { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid } ] }, 100001: { 'remove_from': -2, 'callback': set_save_plot_interval, 'new_layers': [ { 'type': BLSTMLayer, 'num_units': 40, 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid } ] }, 100501: { 'remove_from': -3, 'new_layers': [ { 'type': BLSTMLayer, 'num_units': 80, 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid } ] } } ) return net
def exp_d(name): # 'C' but with pre-training source = RealApplianceSource( filename='/data/dk3810/ukdale.h5', appliances=[['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television', 'dish washer', ['washer dryer', 'washing machine']], max_appliance_powers=[300, 500, 200, 2500, 2400], on_power_thresholds=[5, 5, 5, 5, 5], max_input_power=5900, min_on_durations=[60, 60, 60, 1800, 1800], min_off_durations=[12, 12, 12, 1800, 600], window=("2013-06-01", "2014-07-01"), seq_length=1500, output_one_appliance=False, boolean_targets=False, train_buildings=[1], validation_buildings=[1], skip_probability=0, n_seq_per_batch=25, include_diff=True) net = Net(experiment_name=name, source=source, save_plot_interval=250, loss_function=crossentropy, updates=partial(nesterov_momentum, learning_rate=.1, clip_range=(-1, 1)), layers_config=[{ 'type': LSTMLayer, 'num_units': 60, 'W_in_to_cell': Uniform(25), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid }], layer_changes={ 1000: { 'remove_from': -3, 'callback': set_subsample_target, 'new_layers': [{ 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 80, 'filter_length': 5, 'stride': 5, 'nonlinearity': sigmoid, 'W': Uniform(1) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid }], 2000: { 'remove_from': -3, 'new_layers': [{ 'type': LSTMLayer, 'num_units': 80, 'W_in_to_cell': Uniform(1), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid }] } } }) return net
def exp_i(name): # like a but with max power = 1000W and 5 appliances # tanh and softplus output # sane inits for other layers source_dict_copy = deepcopy(source_dict) source_dict_copy.update( dict(standardise_targets=True, unit_variance_targets=True, max_input_power=1000, skip_probability=0.9)) source_dict_copy['appliances'] = [['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television', 'dish washer', ['washer dryer', 'washing machine']] source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update( dict(experiment_name=name, source=source, loss_function=lambda x, t: mse(x, t).mean(), learning_rate=1e-3, learning_rate_changes_by_iteration={ 1000: 1e-4, 2000: 1e-5 })) net_dict_copy['layers_config'] = [{ 'type': DenseLayer, 'num_units': 50, 'nonlinearity': tanh, 'W': Uniform(25), 'b': Uniform(25) }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': tanh, 'W': Normal(std=1 / sqrt(50)), 'b': Normal(std=1 / sqrt(50)) }, { 'type': BidirectionalRecurrentLayer, 'num_units': 40, 'W_in_to_hid': Normal(std=1 / sqrt(50)), 'gradient_steps': GRADIENT_STEPS, 'nonlinearity': tanh, 'learn_init': False, 'precompute_input': False }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 20, 'filter_length': 4, 'stride': 4, 'nonlinearity': tanh, 'W': Normal(std=1 / sqrt(50)) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': BidirectionalRecurrentLayer, 'num_units': 80, 'W_in_to_hid': Normal(std=1 / sqrt(50)), 'gradient_steps': GRADIENT_STEPS, 'nonlinearity': tanh, 'learn_init': False, 'precompute_input': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': T.nnet.softplus }] net = Net(**net_dict_copy) return net
def exp_d(name): # tanh and softplus output # sane inits for other layers # batch norm source_dict_copy = deepcopy(source_dict) source_dict_copy.update( dict(standardise_targets=True, unit_variance_targets=True)) source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update( dict(experiment_name=name, source=source, loss_function=lambda x, t: mse(x, t).mean(), learning_rate=1e-3, learning_rate_changes_by_iteration={ 1000: 1e-4, 2000: 1e-5 })) net_dict_copy['layers_config'] = [ { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': identity, 'W': Uniform(25), 'b': Uniform(25) }, { 'type': BatchNormLayer, 'axes': (0, 1), 'nonlinearity': tanh }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': identity, 'W': Normal(std=1 / sqrt(50)), 'b': Normal(std=1 / sqrt(50)) }, { 'type': BatchNormLayer, 'axes': (0, 1), 'nonlinearity': tanh }, { 'type': BidirectionalRecurrentLayer, 'num_units': 40, 'W_in_to_hid': Normal(std=1 / sqrt(50)), 'gradient_steps': GRADIENT_STEPS, 'nonlinearity': tanh, # need nonlinearity for hid_to_hid 'learn_init': False, 'precompute_input': False }, { 'type': BatchNormLayer, 'axes': (0, 1), 'nonlinearity': identity }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 20, 'filter_length': 4, 'stride': 4, 'nonlinearity': identity, 'W': Normal(std=1 / sqrt(50)) }, { 'type': BatchNormLayer, 'nonlinearity': tanh }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': BidirectionalRecurrentLayer, 'num_units': 80, 'W_in_to_hid': Normal(std=1 / sqrt(50)), 'gradient_steps': GRADIENT_STEPS, 'nonlinearity': tanh, 'learn_init': False, 'precompute_input': False }, { 'type': BatchNormLayer, 'nonlinearity': tanh, 'axes': (0, 1) }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': T.nnet.softplus } ] net = Net(**net_dict_copy) return net
source=source, learning_rate=1e-1, save_plot_interval=50, loss_function=crossentropy, layers_config=[ { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 50, 'filter_length': 3, 'stride': 1, 'nonlinearity': sigmoid, 'W': Uniform(25), 'b': Uniform(25) }, { 'type': Conv1DLayer, 'num_filters': 50, 'filter_length': 3, 'stride': 1, 'nonlinearity': sigmoid # 'W': Uniform(10), # 'b': Uniform(10) }, { 'type': Conv1DLayer, 'num_filters': 50, 'filter_length': 5,
train = np.asarray(train, dtype=np.float64) labels = np.asarray(labels, dtype=np.int32).reshape(-1, 1) net = NeuralNet( layers=[ ('input', InputLayer), ('dropout0', DropoutLayer), ('hidden1', DenseLayer), ('dropout1', DropoutLayer), ('hidden2', DenseLayer), ('output', DenseLayer), ], input_shape=(None, len(train[1])), dropout0_p=0.1, hidden1_num_units=50, hidden1_W=Uniform(), dropout1_p=0.2, hidden2_num_units=40, #hidden2_W=Uniform(), output_nonlinearity=sigmoid, output_num_units=1, update=nesterov_momentum, update_learning_rate=theano.shared(np.float32(0.0001)), update_momentum=theano.shared(np.float32(0.9)), # Decay the learning rate on_epoch_finished=[ AdjustVariable('update_learning_rate', start=0.0001, stop=0.00001), AdjustVariable('update_momentum', start=0.9, stop=0.99), ], regression=True, y_tensor_type=T.imatrix,
def exp_e(name): # Same as A but without gradient steps source = RealApplianceSource( filename='/data/dk3810/ukdale.h5', appliances=[ ['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television' # 'dish washer', # ['washer dryer', 'washing machine'] ], max_appliance_powers=[300, 500, 200], #, 2500, 2400], on_power_thresholds=[20, 20, 20], #, 20, 20], max_input_power=1000, min_on_durations=[60, 60, 60], #, 1800, 1800], window=("2013-06-01", "2014-07-01"), seq_length=1000, output_one_appliance=False, boolean_targets=False, min_off_duration=60, subsample_target=5, train_buildings=[1], validation_buildings=[1], skip_probability=0 ) net = Net( experiment_name=name, source=source, save_plot_interval=SAVE_PLOT_INTERVAL, loss_function=crossentropy, updates=partial(nesterov_momentum, learning_rate=0.01), layers_config=[ { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid, 'W': Uniform(25), 'b': Uniform(25) }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid, 'W': Uniform(10), 'b': Uniform(10) }, { 'type': BLSTMLayer, 'num_units': 40, 'W_in_to_cell': Uniform(5) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 20, 'filter_length': 5, 'stride': 5, 'nonlinearity': sigmoid }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': BLSTMLayer, 'num_units': 80, 'W_in_to_cell': Uniform(5) }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid } ] ) return net
def exp_a(name): source = RealApplianceSource( filename='/data/dk3810/ukdale.h5', appliances=[ ['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television', 'dish washer', ['washer dryer', 'washing machine'] ], max_appliance_powers=[1, 0.5, 2, 10, 10], on_power_thresholds=[5, 5, 5, 5, 5], max_input_power=5900, min_on_durations=[60, 60, 60, 1800, 1800], min_off_durations=[12, 12, 12, 1800, 600], window=("2013-06-01", "2014-07-01"), seq_length=1520, output_one_appliance=False, boolean_targets=False, train_buildings=[1], validation_buildings=[1], skip_probability=0.7, n_seq_per_batch=25, input_padding=1, include_diff=False, clip_appliance_power=False ) net = Net( experiment_name=name, source=source, save_plot_interval=1000, loss_function=mse, updates=partial(nesterov_momentum, learning_rate=.00001, clip_range=(-1, 1)), layers_config=[ { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { 'type': Conv1DLayer, # convolve over the time axis 'num_filters': 10, 'filter_length': 2, 'stride': 1, 'nonlinearity': sigmoid }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # back to (batch, time, features) }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None, 'W': Uniform(25) } ] ) return net
def test_uniform_glorot_receptive_field(): from lasagne.init import Uniform sample = Uniform().sample((150, 150, 2)) assert -0.11 < sample.min() < -0.09 assert 0.09 < sample.max() < 0.11
#subsample_target=5, #input_padding=4, train_buildings=[1], validation_buildings=[1]) net = Net( experiment_name="e84", source=source, save_plot_interval=50, loss_function=crossentropy, #updates=nesterov_momentum, layers_config=[ { 'type': LSTMLayer, 'num_units': 20, 'W_in_to_cell': Uniform(1) }, # { # 'type': DimshuffleLayer, # 'pattern': (0, 2, 1) # }, # { # 'type': Conv1DLayer, # 'num_filters': 40, # 'filter_length': 5, # 'stride': 5, # 'nonlinearity': sigmoid # }, # { # 'type': DimshuffleLayer, # 'pattern': (0, 2, 1)
def build_rnn_network(rnnmodel, X_sym, hid_init_sym): net = {} net['input0'] = InputLayer((batch_size, seq_len), X_sym) net['input'] = lasagne.layers.EmbeddingLayer( net['input0'], outputclass, units[0]) #,W=lasagne.init.Uniform(inial_scale) net['rnn0'] = DimshuffleLayer( net['input'], (1, 0, 2)) #change to (time, batch_size,hidden_units) for l in range(1, num_layers + 1): net['hiddeninput%d' % l] = InputLayer( (batch_size, units[l - 1]), hid_init_sym[:, acc_units[l - 1]:acc_units[l]]) net['rnn%d' % (l - 1)] = ReshapeLayer(net['rnn%d' % (l - 1)], (batch_size * seq_len, -1)) net['rnn%d' % (l - 1)] = DenseLayer( net['rnn%d' % (l - 1)], units[l - 1], W=ini_W, b=lasagne.init.Constant(args.ini_b), nonlinearity=None) #W=Uniform(ini_rernn_in_to_hid), # net['rnn%d' % (l - 1)] = ReshapeLayer(net['rnn%d' % (l - 1)], (seq_len, batch_size, -1)) if args.use_residual and l > args.residual_layers and ( l - 1) % args.residual_layers == 0: # and l!=num_layers if units[l - 1] != units[l - 1 - args.residual_layers]: net['leftbranch%d' % (l - 1)] = ReshapeLayer( net['sum%d' % (l - args.residual_layers)], (batch_size * seq_len, -1)) net['leftbranch%d' % (l - 1)] = DenseLayer(net['leftbranch%d' % (l - 1)], units[l - 1], W=ini_W, nonlinearity=None) net['leftbranch%d' % (l - 1)] = ReshapeLayer( net['leftbranch%d' % (l - 1)], (seq_len, batch_size, -1)) net['leftbranch%d' % (l - 1)] = BatchNorm_step_timefirst_Layer( net['leftbranch%d' % (l - 1)], axes=(0, 1)) print('left branch') else: net['leftbranch%d' % (l - 1)] = net['sum%d' % (l - args.residual_layers)] net['sum%d' % l] = ElemwiseSumLayer( (net['rnn%d' % (l - 1)], net['leftbranch%d' % (l - 1)])) else: net['sum%d' % l] = net['rnn%d' % (l - 1)] net['rnn%d' % l] = net['sum%d' % l] if not args.use_bn_afterrnn: net['rnn%d' % l] = BatchNorm_step_timefirst_Layer( net['rnn%d' % l], axes=(0, 1), beta=lasagne.init.Constant(args.ini_b)) ini_hid_start = 0 if act == tanh: ini_hid_start = -1 * U_bound net['rnn%d' % l] = rnnmodel(net['rnn%d' % l], units[l - 1], hid_init=net['hiddeninput%d' % l], W_hid_to_hid=Uniform(range=(ini_hid_start, U_bound)), nonlinearity=act, only_return_final=False, grad_clipping=args.gradclipvalue) net['last_state%d' % l] = SliceLayer(net['rnn%d' % l], -1, axis=0) if l == 1: net['hid_out'] = net['last_state%d' % l] else: net['hid_out'] = ConcatLayer( [net['hid_out'], net['last_state%d' % l]], axis=1) if use_dropout and l % droplayers == 0: net['rnn%d' % l] = lasagne.layers.DropoutLayer(net['rnn%d' % l], p=droprate, shared_axes=taxdrop) if args.use_bn_afterrnn: net['rnn%d' % l] = BatchNorm_step_timefirst_Layer(net['rnn%d' % l], axes=(0, 1)) net['rnn%d' % num_layers] = DimshuffleLayer(net['rnn%d' % num_layers], (1, 0, 2)) net['reshape_rnn'] = ReshapeLayer(net['rnn%d' % num_layers], (-1, units[num_layers - 1])) net['out'] = DenseLayer( net['reshape_rnn'], outputclass, nonlinearity=softmax ) #lasagne.init.HeNormal(gain='relu'))#,W=Uniform(inial_scale) return net
train = np.asarray(train, dtype=np.float32) labels = np.asarray(labels, dtype=np.int32).reshape(-1, 1) net = NeuralNet( layers=[ ('input', InputLayer), ('hidden1', DenseLayer), ('hidden2', DenseLayer), ('hidden3', DenseLayer), ('output', DenseLayer), ], input_shape=(None, len(train[1])), hidden1_num_units=100, hidden1_W=Uniform(), hidden2_num_units=50, hidden2_W=Uniform(), hidden3_num_units=25, hidden3_W=Uniform(), output_nonlinearity=sigmoid, output_num_units=1, update=nesterov_momentum, update_learning_rate=theano.shared(np.float32(0.001)), update_momentum=theano.shared(np.float32(0.9)), # Decay the learning rate on_epoch_finished=[ AdjustVariable('update_learning_rate', start=0.001, stop=0.0001), AdjustVariable('update_momentum', start=0.9, stop=0.99), ],
def exp_a(name): global source source = RealApplianceSource( filename='/data/dk3810/ukdale.h5', appliances=[['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television', 'dish washer', ['washer dryer', 'washing machine']], max_appliance_powers=[300, 500, 200, 2500, 2400], on_power_thresholds=[5] * 5, max_input_power=5900, min_on_durations=[60, 60, 60, 1800, 1800], min_off_durations=[12, 12, 12, 1800, 600], window=("2013-06-01", "2014-07-01"), seq_length=1500, output_one_appliance=False, boolean_targets=False, train_buildings=[1], validation_buildings=[1], skip_probability=0.7, n_seq_per_batch=10, subsample_target=5, # input_padding=4, include_diff=False, clip_appliance_power=False, lag=0) net = Net( experiment_name=name, source=source, save_plot_interval=SAVE_PLOT_INTERVAL, loss_function=crossentropy, updates=partial(nesterov_momentum, learning_rate=1.0), layers_config=[ { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid, 'W': Uniform(25), 'b': Uniform(25) }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid, 'W': Uniform(10), 'b': Uniform(10) }, { 'type': LSTMLayer, 'num_units': 40, 'W_in_to_cell': Uniform(5), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 20, 'filter_length': 5, 'stride': 5, 'nonlinearity': sigmoid # 'W': Uniform(1) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, # { # 'type': FeaturePoolLayer, # 'ds': 5, # number of feature maps to be pooled together # 'axis': 1 # pool over the time axis # }, { 'type': LSTMLayer, 'num_units': 80, 'W_in_to_cell': Uniform(5), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid # 'W': Uniform(1) } ]) return net
def exp_e(name): # D but with downsampling 5x source = RealApplianceSource( filename='/data/dk3810/ukdale.h5', appliances=[ ['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television', 'dish washer', ['washer dryer', 'washing machine'] ], max_appliance_powers=None,#[200, 100, 200, 2500, 2400], on_power_thresholds=[5, 5, 5, 5, 5], max_input_power=5900, min_on_durations=[60, 60, 60, 1800, 1800], min_off_durations=[12, 12, 12, 1800, 600], window=("2013-06-01", "2014-07-01"), seq_length=1500, output_one_appliance=False, boolean_targets=False, train_buildings=[1], validation_buildings=[1], skip_probability=0, n_seq_per_batch=25, include_diff=True, subsample_target=5, input_padding=4 ) net = Net( experiment_name=name, source=source, save_plot_interval=1000, loss_function=scaled_cost, updates=partial(nesterov_momentum, learning_rate=.0000001, clip_range=(-1, 1)), layers_config=[ { 'type': LSTMLayer, 'num_units': 50, 'W_in_to_cell': Uniform(25), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { 'type': Conv1DLayer, # convolve over the time axis 'num_filters': 50, 'filter_length': 5, 'stride': 1, 'nonlinearity': sigmoid, 'W': Uniform(1) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # back to (batch, time, features) }, { 'type': FeaturePoolLayer, 'ds': 5, # number of feature maps to be pooled together 'axis': 1 # pool over the time axis }, { 'type': LSTMLayer, 'num_units': 50, 'W_in_to_cell': Uniform(1), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None, 'W': Uniform(25) } ] ) return net
net = Net(experiment_name="e83", source=source, learning_rate=1e-1, save_plot_interval=250, loss_function=crossentropy, layers_config=[{ 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 50, 'filter_length': 3, 'stride': 1, 'nonlinearity': sigmoid, 'W': Uniform(25), 'b': Uniform(25) }, { 'type': Conv1DLayer, 'num_filters': 50, 'filter_length': 3, 'stride': 1, 'nonlinearity': sigmoid, 'W': Uniform(10), 'b': Uniform(10) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': BLSTMLayer, 'num_units': 50,
def exp_b(name): source = RealApplianceSource( filename='/data/dk3810/ukdale.h5', appliances=[['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television', 'dish washer', ['washer dryer', 'washing machine']], max_appliance_powers=[2500] * 5, on_power_thresholds=[5] * 5, max_input_power=2500, min_on_durations=[60, 60, 60, 1800, 1800], min_off_durations=[12, 12, 12, 1800, 600], window=("2013-06-01", "2014-07-01"), seq_length=1520, output_one_appliance=False, boolean_targets=False, train_buildings=[1], validation_buildings=[1], skip_probability=0.7, n_seq_per_batch=25, input_padding=4, include_diff=False, clip_appliance_power=False) net = Net( experiment_name=name, source=source, save_plot_interval=1000, loss_function=scaled_cost, updates=partial(nesterov_momentum, learning_rate=0.1, clip_range=(-1, 1)), layers_config=[ { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { 'type': Conv1DLayer, # convolve over the time axis 'num_filters': 50, 'filter_length': 5, 'stride': 1, 'nonlinearity': sigmoid, 'W': Uniform(10) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # back to (batch, time, features) }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None } ], layer_changes={ 10000: { 'remove_from': -3, 'new_layers': [{ 'type': LSTMLayer, 'num_units': 50, 'W_in_to_cell': Uniform(1), 'gradient_steps': GRADIENT_STEPS, 'peepholes': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None }] } }) return net
def exp_a(name): # global source # source = RealApplianceSource( # filename='/data/dk3810/ukdale.h5', # appliances=[ # ['fridge freezer', 'fridge', 'freezer'], # 'hair straighteners', # 'television' # # 'dish washer', # # ['washer dryer', 'washing machine'] # ], # max_appliance_powers=[2500] * 5, # on_power_thresholds=[5] * 5, # max_input_power=2500, # min_on_durations=[60, 60, 60, 1800, 1800], # min_off_durations=[12, 12, 12, 1800, 600], # window=("2013-06-01", "2014-07-01"), # seq_length=1520, # output_one_appliance=False, # boolean_targets=False, # train_buildings=[1], # validation_buildings=[1], # skip_probability=0.7, # n_seq_per_batch=25, # input_padding=1, # include_diff=False, # clip_appliance_power=False # ) net = Net( experiment_name=name, source=source, save_plot_interval=1000, loss_function=scaled_cost, updates=partial(nesterov_momentum, learning_rate=0.1, clip_range=(-1, 1)), layers_config=[ { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { 'type': Conv1DLayer, # convolve over the time axis 'num_filters': 10, 'filter_length': 2, 'stride': 1, 'nonlinearity': sigmoid, 'W': Uniform(5) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # back to (batch, time, features) }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None # 'W': Uniform() } ]) return net
def test_uniform_mean_std(): from lasagne.init import Uniform sample = Uniform(std=1.0, mean=5.0).sample((300, 400)) assert 4.9 < sample.mean() < 5.1 assert 0.9 < sample.std() < 1.1
net = Net(experiment_name="e77", source=source, learning_rate=1e-1, save_plot_interval=50, loss_function=crossentropy, layers_config=[{ 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 50, 'filter_length': 3, 'stride': 1, 'nonlinearity': sigmoid, 'W': Uniform(1), 'b': Uniform(1) }, { 'type': Conv1DLayer, 'num_filters': 50, 'filter_length': 3, 'stride': 1, 'nonlinearity': sigmoid, 'W': Uniform(1), 'b': Uniform(1) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': LSTMLayer, 'num_units': 80,
def exp_a(name): """Results: Learning rate of 0.1 still NaNs.""" """e91d but learning rate 0.01 and smaller inits (to try to capture smaller changes) and larger first layer And e96 centres input data. And I've fixed a problem where only the last instance of an appliance if multiple appliances occured within a batch would be shown in the targets. e98: Output just the fridge and use bool targets e99 seq_length = 1000 learning rate = 0.01 (tried 0.1 and 0.05 but both NaN'd) max_input_power = 500 don't bother centering X only 50 units in first layer back to just 3 appliances skip prob = 0 e100 boolean_targets = False output_one_appliance=False e101 max_input_power = 1000 init back to Uniform(25) conv layer back to 20 filters """ source = RealApplianceSource( filename='/data/dk3810/ukdale.h5', appliances=[ ['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television' # 'dish washer', # ['washer dryer', 'washing machine'] ], max_appliance_powers=[300, 500, 200], #, 2500, 2400], on_power_thresholds=[20, 20, 20], #, 20, 20], max_input_power=1000, min_on_durations=[60, 60, 60], #, 1800, 1800], window=("2013-06-01", "2014-07-01"), seq_length=1000, output_one_appliance=False, boolean_targets=False, min_off_duration=60, subsample_target=5, train_buildings=[1], validation_buildings=[1], skip_probability=0 ) net = Net( experiment_name=name, source=source, save_plot_interval=SAVE_PLOT_INTERVAL, loss_function=crossentropy, updates=partial(nesterov_momentum, learning_rate=0.01), layers_config=[ { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid, 'W': Uniform(25), 'b': Uniform(25) }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid, 'W': Uniform(10), 'b': Uniform(10) }, { 'type': BLSTMLayer, 'num_units': 40, 'W_in_to_cell': Uniform(5), 'gradient_steps': GRADIENT_STEPS }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 20, 'filter_length': 5, 'stride': 5, 'nonlinearity': sigmoid }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': BLSTMLayer, 'num_units': 80, 'W_in_to_cell': Uniform(5), 'gradient_steps': GRADIENT_STEPS }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid } ] ) return net
def exp_b(name): # e59 but ReLU source_dict_copy = deepcopy(source_dict) source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict( experiment_name=name, source=source, learning_rate=1e-3, learning_rate_changes_by_iteration={ 1000: 1e-4, 2000: 1e-5 } )) net_dict_copy['layers_config']= [ { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': rectify, 'W': Uniform(25), 'b': Uniform(25) }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': rectify, 'W': Uniform(10), 'b': Uniform(10) }, { 'type': BidirectionalRecurrentLayer, 'num_units': 40, 'W_in_to_hid': Uniform(5), 'gradient_steps': GRADIENT_STEPS, 'nonlinearity': rectify, 'learn_init': False, 'precompute_input': False, 'W_hid_to_hid': Identity(0.5) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 20, 'filter_length': 4, 'stride': 4, 'nonlinearity': rectify }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': BidirectionalRecurrentLayer, 'num_units': 80, 'W_in_to_hid': Uniform(5), 'W_hid_to_hid': Identity(0.5), 'gradient_steps': GRADIENT_STEPS, 'nonlinearity': rectify, 'learn_init': False, 'precompute_input': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid } ] net = Net(**net_dict_copy) return net