def exp_a(name): global source # source_dict_copy = deepcopy(source_dict) # source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) net_dict_copy['layers_config'].extend([{ 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': T.nnet.softplus, 'W': Normal(std=1 / sqrt(40)) }]) net = Net(**net_dict_copy) return net
def q_network(state): input_state = InputLayer(input_var = state, shape = (None, n_state)) dense_1 = DenseLayer(input_state, num_units = n_state, nonlinearity = tanh, W = Normal(0.1, 0.0), b = Constant(0.0)) dense_2 = DenseLayer(dense_1, num_units = n_state, nonlinearity = tanh, W = Normal(0.1, 0.0), b = Constant(0.0)) q_values = DenseLayer(dense_2, num_units = n_action, nonlinearity = None, W = Normal(0.1, 0.0), b = Constant(0.0)) return q_values
def exp_b(name): global source source_dict_copy = deepcopy(source_dict) source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) net_dict_copy['layers_config'].append({ 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None, 'W': Normal(std=(1 / sqrt(25))) }) net = Net(**net_dict_copy) return net
def exp_f(name): source_dict['appliances'].append('dish washer') source_dict['appliances'].append(['washer dryer', 'washing machine']) source_dict['skip_probability'] = 0.7 source_dict_copy = deepcopy(source_dict) source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) net_dict_copy['layers_config'] = [{ 'type': BLSTMLayer, 'num_units': 50, 'gradient_steps': GRADIENT_STEPS, 'peepholes': False, 'W_in_to_cell': Normal(std=1.) }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None, 'W': Normal(std=(1 / sqrt(50))) }] net = Net(**net_dict_copy) return net
def exp_b(name): # as above but without gradient_steps source = RealApplianceSource( filename='/data/dk3810/ukdale.h5', appliances=[['fridge freezer', 'fridge', 'freezer'], 'hair straighteners', 'television' # 'dish washer', # ['washer dryer', 'washing machine'] ], max_appliance_powers=[300, 500, 200], #, 2500, 2400], on_power_thresholds=[20, 20, 20], #, 20, 20], max_input_power=1000, min_on_durations=[60, 60, 60], #, 1800, 1800], window=("2013-06-01", "2014-07-01"), seq_length=1000, output_one_appliance=False, boolean_targets=False, min_off_duration=60, train_buildings=[1], validation_buildings=[1], skip_probability=0, n_seq_per_batch=5) net = Net(experiment_name=name, source=source, save_plot_interval=SAVE_PLOT_INTERVAL, loss_function=crossentropy, updates=partial(nesterov_momentum, learning_rate=0.1), layers_config=[{ 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid, 'b': Uniform(25), 'W': Uniform(25) }, { 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid, 'b': Uniform(10), 'W': Uniform(10) }, { 'type': LSTMLayer, 'num_units': 50, 'W_in_to_cell': Normal(1) }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid }]) return net
def exp_g(name): global source try: a = source except NameError: source = RealApplianceSource(**source_dict) source.lag = 5 net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) net_dict_copy['layers_config'] = [{ 'type': LSTMLayer, 'num_units': 200, 'gradient_steps': GRADIENT_STEPS, 'peepholes': False, 'W_in_to_cell': Normal(std=1.) }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None, 'W': Normal(std=(1 / sqrt(200))) }] net = Net(**net_dict_copy) return net
def generator(input_var): network = lasagne.layers.InputLayer(shape=(None, NLAT,1,1), input_var=input_var) network = ll.DenseLayer(network, num_units=4*4*64, W=Normal(0.05), nonlinearity=nn.relu) #print(input_var.shape[0]) network = ll.ReshapeLayer(network, (batch_size,64,4,4)) network = nn.Deconv2DLayer(network, (batch_size,32,7,7), (4,4), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=nn.relu) network = nn.Deconv2DLayer(network, (batch_size,32,11,11), (5,5), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=nn.relu) network = nn.Deconv2DLayer(network, (batch_size,32,25,25), (5,5), stride=(2,2), pad='valid', W=Normal(0.05), nonlinearity=nn.relu) network = nn.Deconv2DLayer(network, (batch_size,1,28,28), (4,4), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=sigmoid) #network =lasagne.layers.Conv2DLayer(network, num_filters=1, filter_size=1, stride=1, nonlinearity=sigmoid) return network
def exp_a(name): global source # source_dict_copy = deepcopy(source_dict) # source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) net_dict_copy['layers_config'] = [ { 'type': RecurrentLayer, 'num_units': 50, 'gradient_steps': GRADIENT_STEPS, 'W_in_to_hid': Normal(std=1.) }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None, 'W': Normal(std=(1/sqrt(50))) } ] net = Net(**net_dict_copy) return net
def exp_a(name): global source # source_dict_copy = deepcopy(source_dict) # source = RealApplianceSource(**source_dict_copy) source.subsample_target = 5 net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) net_dict_copy['layers_config'] = [ { 'type': BidirectionalRecurrentLayer, 'num_units': 25, 'gradient_steps': GRADIENT_STEPS, 'W_in_to_hid': Normal(std=1.), 'nonlinearity': tanh }, { 'type': FeaturePoolLayer, 'ds': 5, # number of feature maps to be pooled together 'axis': 1, # pool over the time axis 'pool_function': T.mean }, { 'type': BidirectionalRecurrentLayer, 'num_units': 25, 'gradient_steps': GRADIENT_STEPS, 'W_in_to_hid': Normal(std=1/sqrt(25)), 'nonlinearity': tanh }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None, 'W': Normal(std=(1/sqrt(25))) } ] net = Net(**net_dict_copy) return net
def exp_a(name): # 3 appliances global source source_dict_copy = deepcopy(source_dict) source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) N = 50 net_dict_copy['layers_config'] = [ { 'type': BidirectionalRecurrentLayer, 'num_units': N, 'gradient_steps': GRADIENT_STEPS, 'W_in_to_hid': Normal(std=1.), 'nonlinearity': tanh }, { 'type': FeaturePoolLayer, 'ds': 4, # number of feature maps to be pooled together 'axis': 1, # pool over the time axis 'pool_function': T.max }, { 'type': BidirectionalRecurrentLayer, 'num_units': N, 'gradient_steps': GRADIENT_STEPS, 'W_in_to_hid': Normal(std=1 / sqrt(N)), 'nonlinearity': tanh }, { 'type': MixtureDensityLayer, 'num_units': source.n_outputs, 'num_components': 2 } ] net = Net(**net_dict_copy) return net
def exp_h(name): # replace tanh with sigmoid source_dict_copy = deepcopy(source_dict) source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) net_dict_copy['layers_config'] = [{ 'type': DenseLayer, 'num_units': 40, 'nonlinearity': sigmoid, 'W': Normal(std=1) }, { 'type': DenseLayer, 'num_units': 40, 'nonlinearity': sigmoid }, { 'type': BidirectionalRecurrentLayer, 'num_units': 40, 'gradient_steps': GRADIENT_STEPS, 'nonlinearity': sigmoid, 'learn_init': False, 'precompute_input': False }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': Conv1DLayer, 'num_filters': 40, 'filter_length': 4, 'stride': 4, 'nonlinearity': sigmoid }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) }, { 'type': BidirectionalRecurrentLayer, 'num_units': 40, 'gradient_steps': GRADIENT_STEPS, 'nonlinearity': sigmoid, 'learn_init': False, 'precompute_input': False }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': T.nnet.softplus }] net = Net(**net_dict_copy) return net
def exp_a(name): # ReLU hidden layers # linear output # output one appliance # 0% skip prob for first appliance # 100% skip prob for other appliances # input is diff global source source_dict_copy = deepcopy(source_dict) source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) net_dict_copy['layers_config'] = [{ 'type': RecurrentLayer, 'num_units': 256, 'W_in_to_hid': Normal(std=1), 'W_hid_to_hid': Identity(scale=0.9), 'nonlinearity': rectify, 'learn_init': False, 'precompute_input': True }, { 'type': RecurrentLayer, 'num_units': 256, 'W_in_to_hid': Normal(std=1 / sqrt(256)), 'W_hid_to_hid': Identity(scale=0.9), 'nonlinearity': rectify, 'learn_init': False, 'precompute_input': True }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None, 'W': Normal(std=1 / sqrt(256)) }] net = Net(**net_dict_copy) return net
def exp_a(name): global source # source_dict_copy = deepcopy(source_dict) # source = RealApplianceSource(**source_dict_copy) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) N = 512 output_shape = source.output_shape_after_processing() net_dict_copy['layers_config'] = [{ 'type': DenseLayer, 'num_units': N, 'W': Normal(std=1 / sqrt(N)), 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': N // 2, 'W': Normal(std=1 / sqrt(N)), 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': N // 4, 'W': Normal(std=1 / sqrt(N // 2)), 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': output_shape[1] * output_shape[2], 'W': Normal(std=1 / sqrt(N // 4)), 'nonlinearity': T.nnet.softplus }] net = Net(**net_dict_copy) net.load_params(25000) return net
def exp_b(name): try: a = source except NameError: source = RealApplianceSource(**source_dict) net_dict_copy = deepcopy(net_dict) net_dict_copy.update( dict(experiment_name=name, source=source, loss_function=scaled_cost)) net_dict_copy['layers_config'].append({ 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': None, 'W': Normal(std=(1 / sqrt(50))) }) net = Net(**net_dict_copy) return net
def conv_layer(input_, filter_size, num_filters, stride, pad, nonlinearity=relu, W=Normal(0.02), **kwargs): return dnn.Conv2DDNNLayer(input_, num_filters=num_filters, stride=parse_tuple(stride), filter_size=parse_tuple(filter_size), pad=pad, W=W, nonlinearity=nonlinearity, **kwargs)
def style_conv_block(conv_in, num_styles, num_filters, filter_size, stride, nonlinearity=rectify, normalization=instance_norm): sc_network = ReflectLayer(conv_in, filter_size // 2) sc_network = normalization(ConvLayer(sc_network, num_filters, filter_size, stride, nonlinearity=nonlinearity, W=Normal()), num_styles=num_styles) return sc_network
def __init__(self, index_to_token, index_to_condition, skip_token=SPECIAL_TOKENS.PAD_TOKEN, learning_rate=ADADELTA_LEARNING_RATE, grad_clip=GRAD_CLIP, hidden_layer_dim=HIDDEN_LAYER_DIMENSION, encoder_depth=ENCODER_DEPTH, decoder_depth=DECODER_DEPTH, init_embedding=None, word_embedding_dim=WORD_EMBEDDING_DIMENSION, train_word_embedding=TRAIN_WORD_EMBEDDINGS_LAYER, dense_dropout_ratio=DENSE_DROPOUT_RATIO, condition_embedding_dim=CONDITION_EMBEDDING_DIMENSION): """ :param index_to_token: Dict with tokens and indices for neural network :param skip_token: Token to skip with masking. Id of this token is inferred from index_to_token dictionary. :param learning_rate: Starting learning rate for the optimization algorithm :param grad_clip: Clipping parameter to prevent gradient explosion. :param init_embedding: Matrix to initialize word-embedding layer. Default value is random standart-gaussian initialization. """ self._index_to_token = index_to_token self._token_to_index = {v: k for k, v in index_to_token.items()} self._vocab_size = len(self._index_to_token) self._index_to_condition = index_to_condition self._condition_to_index = {v: k for k, v in index_to_condition.items()} self._condition_ids_num = len(self._condition_to_index) self._condition_embedding_dim = condition_embedding_dim self._learning_rate = learning_rate self._grad_clip = grad_clip self._W_init_embedding = Normal() if init_embedding is None else init_embedding self._word_embedding_dim = word_embedding_dim self._train_word_embedding = train_word_embedding self._skip_token_id = self._token_to_index[skip_token] self._hidden_layer_dim = hidden_layer_dim self._encoder_depth = encoder_depth self._decoder_depth = decoder_depth self._dense_dropout_ratio = dense_dropout_ratio self._train_fn = None # Training functions are compiled as needed self._build_model_computational_graph() self._compile_theano_functions_for_prediction()
def exp_x(name): try: source.lag = 1 source.target_is_diff = False except NameError: global source source = RealApplianceSource(**source_dict) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) net_dict_copy['layers_config'].append({ 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid, 'W': Normal(std=(1 / sqrt(50))) }) net = Net(**net_dict_copy) return net
def convert_initialization(component, nonlinearity="sigmoid"): # component = init_dic[component_key] assert(len(component) == 2) if component[0] == "uniform": return Uniform(component[1]) elif component[0] == "glorotnormal": if nonlinearity in ["linear", "sigmoid", "tanh"]: return GlorotNormal(1.) else: return GlorotNormal("relu") elif component[0] == "glorotuniform": if nonlinearity in ["linear", "sigmoid", "tanh"]: return GlorotUniform(1.) else: return GlorotUniform("relu") elif component[0] == "normal": return Normal(*component[1]) else: raise NotImplementedError()
def exp_x(name, learning_rate): global source try: a = source except NameError: source = RealApplianceSource(**source_dict) net_dict_copy = deepcopy(net_dict) net_dict_copy.update( dict(experiment_name=name, source=source, updates=partial(nesterov_momentum, learning_rate=learning_rate))) net_dict_copy['layers_config'].append({ 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid, 'W': Normal(std=(1 / sqrt(50))) }) net = Net(**net_dict_copy) return net
def exp_d(name): global source try: a = source except NameError: source = RealApplianceSource(**source_dict) net_dict_copy = deepcopy(net_dict) net_dict_copy.update(dict(experiment_name=name, source=source)) net_dict_copy['layers_config'] = [{ 'type': DenseLayer, 'num_units': 50, 'nonlinearity': sigmoid }, { 'type': DenseLayer, 'num_units': source.n_outputs, 'nonlinearity': sigmoid, 'W': Normal(std=1 / sqrt(50)) }] net = Net(**net_dict_copy) return net
def __init__(self, incomings, hid_state_size, voc_size, resetgate=GRU_Gate(), updategate=GRU_Gate(), hid_update=GRU_Gate(nonlinearity=nonlin.tanh), W=Normal(), max_answer_word=1, **kwargs): super(AnswerModule, self).__init__(incomings, **kwargs) self.hid_state_size = hid_state_size #FOR GRU input_shape = self.input_shapes[0] num_inputs = np.prod( input_shape[1]) + voc_size # concatenation of previous prediction def add_gate(gate, gate_name): return (self.add_param(gate.W_in, (num_inputs, hid_state_size), name="W_in_to_{}".format(gate_name)), self.add_param(gate.W_hid, (hid_state_size, hid_state_size), name="W_hid_to_{}".format(gate_name)), self.add_param(gate.b, (hid_state_size, ), name="b_{}".format(gate_name), regularizable=False), gate.nonlinearity) # Add in all parameters from gates (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate, self.nonlinearity_updategate) = add_gate(updategate, 'updategate') (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate, self.nonlinearity_resetgate) = add_gate(resetgate, 'resetgate') (self.W_in_to_hid_update, self.W_hid_to_hid_update, self.b_hid_update, self.nonlinearity_hid) = add_gate(hid_update, 'hid_update') self.W = self.add_param(W, (hid_state_size, voc_size), name="W") self.max_answer_word = max_answer_word self.rand_stream = RandomStreams(np.random.randint(1, 2147462579))
def build_generator_64(noise=None, ngf=128): # noise input InputNoise = InputLayer(shape=(None, 100), input_var=noise) #FC Layer gnet0 = DenseLayer(InputNoise, ngf * 8 * 4 * 4, W=Normal(0.02), nonlinearity=relu) print("Gen fc1:", gnet0.output_shape) #Reshape Layer gnet1 = ReshapeLayer(gnet0, ([0], ngf * 8, 4, 4)) print("Gen rs1:", gnet1.output_shape) # DeConv Layer gnet2 = Deconv2DLayer(gnet1, ngf * 8, (4, 4), (2, 2), crop=1, W=Normal(0.02), nonlinearity=relu) print("Gen deconv2:", gnet2.output_shape) # DeConv Layer gnet3 = Deconv2DLayer(gnet2, ngf * 4, (4, 4), (2, 2), crop=1, W=Normal(0.02), nonlinearity=relu) print("Gen deconv3:", gnet3.output_shape) # DeConv Layer gnet4 = Deconv2DLayer(gnet3, ngf * 4, (4, 4), (2, 2), crop=1, W=Normal(0.02), nonlinearity=relu) print("Gen deconv4:", gnet4.output_shape) # DeConv Layer gnet5 = Deconv2DLayer(gnet4, ngf * 2, (4, 4), (2, 2), crop=1, W=Normal(0.02), nonlinearity=relu) print("Gen deconv5:", gnet5.output_shape) # DeConv Layer gnet6 = Deconv2DLayer(gnet5, 3, (3, 3), (1, 1), crop='same', W=Normal(0.02), nonlinearity=tanh) print("Gen output:", gnet6.output_shape) return gnet6
def build_discriminator_128(image=None, ndf=128): lrelu = LeakyRectify(0.2) # input: images InputImg = InputLayer(shape=(None, 3, 128, 128), input_var=image) print("Dis Img_input:", InputImg.output_shape) # Conv Layer dis1 = Conv2DLayer(InputImg, ndf, (4, 4), (2, 2), pad=1, W=Normal(0.02), nonlinearity=lrelu) print("Dis conv1:", dis1.output_shape) # Conv Layer dis2 = batch_norm( Conv2DLayer(dis1, ndf * 2, (4, 4), (2, 2), pad=1, W=Normal(0.02), nonlinearity=lrelu)) print("Dis conv2:", dis2.output_shape) # Conv Layer dis3 = batch_norm( Conv2DLayer(dis2, ndf * 4, (4, 4), (2, 2), pad=1, W=Normal(0.02), nonlinearity=lrelu)) print("Dis conv3:", dis3.output_shape) # Conv Layer dis4 = batch_norm( Conv2DLayer(dis3, ndf * 8, (4, 4), (2, 2), pad=1, W=Normal(0.02), nonlinearity=lrelu)) print("Dis conv3:", dis4.output_shape) # Conv Layer dis5 = batch_norm( Conv2DLayer(dis4, ndf * 16, (4, 4), (2, 2), pad=1, W=Normal(0.02), nonlinearity=lrelu)) print("Dis conv4:", dis5.output_shape) # Conv Layer dis6 = DenseLayer(dis5, 1, W=Normal(0.02), nonlinearity=sigmoid) print("Dis output:", dis6.output_shape) return dis6
def build_discriminator_toy(image=None, nd=512, GP_norm=None): Input = InputLayer(shape=(None, 2), input_var=image) print("Dis input:", Input.output_shape) dis0 = DenseLayer(Input, nd, W=Normal(0.02), nonlinearity=relu) print("Dis fc0:", dis0.output_shape) if GP_norm is True: dis1 = DenseLayer(dis0, nd, W=Normal(0.02), nonlinearity=relu) else: dis1 = batch_norm( DenseLayer(dis0, nd, W=Normal(0.02), nonlinearity=relu)) print("Dis fc1:", dis1.output_shape) if GP_norm is True: dis2 = batch_norm( DenseLayer(dis1, nd, W=Normal(0.02), nonlinearity=relu)) else: dis2 = DenseLayer(dis1, nd, W=Normal(0.02), nonlinearity=relu) print("Dis fc2:", dis2.output_shape) disout = DenseLayer(dis2, 1, W=Normal(0.02), nonlinearity=sigmoid) print("Dis output:", disout.output_shape) return disout
def __init__(self, dim, mode, l2, l1, batch_norm, dropout, batch_size, input_dim=76, **kwargs): print "==> not used params in network class:", kwargs.keys() self.dim = dim self.mode = mode self.l2 = l2 self.l1 = l1 self.batch_norm = batch_norm self.dropout = dropout self.batch_size = batch_size self.input_var = T.tensor3('X') self.input_lens = T.ivector('L') self.target_var = T.ivector('y') self.weight = T.vector('w') print "==> Building neural network" network = layers.InputLayer((None, None, input_dim), input_var=self.input_var) network = layers.LSTMLayer(incoming=network, num_units=dim, only_return_final=False, grad_clipping=10, ingate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), forgetgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh, W_in=Orthogonal(), W_hid=Orthogonal()), outgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1))) lstm_output = layers.get_output(network) self.params = layers.get_all_params(network, trainable=True) self.reg_params = layers.get_all_params(network, regularizable=True) # for each example in minibatch take the last output last_outputs = [] for index in range(self.batch_size): last_outputs.append(lstm_output[index, self.input_lens[index]-1, :]) last_outputs = T.stack(last_outputs) network = layers.InputLayer(shape=(self.batch_size, self.dim), input_var=last_outputs) network = layers.DenseLayer(incoming=network, num_units=2, nonlinearity=softmax) self.prediction = layers.get_output(network) self.params += layers.get_all_params(network, trainable=True) self.reg_params += layers.get_all_params(network, regularizable=True) self.loss_ce = (self.weight * categorical_crossentropy(self.prediction, self.target_var)).mean() if self.l2 > 0: self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params) else: self.loss_l2 = 0 if self.l1 > 0: self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params) else: self.loss_l1 = 0 self.loss = self.loss_ce + self.loss_l2 + self.loss_l1 #updates = lasagne.updates.adadelta(self.loss, self.params, # learning_rate=0.001) #updates = lasagne.updates.momentum(self.loss, self.params, # learning_rate=0.00003) #updates = lasagne.updates.adam(self.loss, self.params) updates = lasagne.updates.adam(self.loss, self.params, beta1=0.5, learning_rate=0.0001) # from DCGAN paper #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9, # learning_rate=0.001, ## compiling theano functions if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.input_lens, self.target_var, self.weight], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.input_lens, self.target_var, self.weight], outputs=[self.prediction, self.loss])
from lasagne.objectives import crossentropy, mse from lasagne.init import Uniform, Normal from lasagne.layers import LSTMLayer, DenseLayer, Conv1DLayer, ReshapeLayer from lasagne.updates import adagrad, nesterov_momentum from functools import partial import os from neuralnilm.source import standardise, discretize, fdiff, power_and_fdiff from neuralnilm.experiment import run_experiment from neuralnilm.net import TrainingError import __main__ NAME = os.path.splitext(os.path.split(__main__.__file__)[1])[0] PATH = "/homes/dk3810/workspace/python/neuralnilm/figures" SAVE_PLOT_INTERVAL = 250 GRADIENT_STEPS = 100 """ e103 Discovered that bottom layer is hardly changing. So will try just a single lstm layer e104 standard init lower learning rate e106 lower learning rate to 0.001 e108 is e107 but with batch size of 5 e109
''' models ''' # symbols sym_y_g = T.ivector() sym_z_input = T.matrix() sym_z_rand = theano_rng.uniform(size=(batch_size_g, n_z)) sym_z_shared = T.tile(theano_rng.uniform((batch_size_g/num_classes, n_z)), (num_classes, 1)) # generator y2x: p_g(x, y) = p(y) p_g(x | y) where x = G(z, y), z follows p_g(z) gen_in_z = ll.InputLayer(shape=(None, n_z)) gen_in_y = ll.InputLayer(shape=(None,)) gen_layers = [gen_in_z] if args.dataset == 'svhn' or args.dataset == 'cifar10': gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-00')) gen_layers.append(nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4*4*512, W=Normal(0.05), nonlinearity=nn.relu, name='gen-01'), g=None, name='gen-02')) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (-1,512,4,4), name='gen-03')) gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-10')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None,256,8,8), (5,5), W=Normal(0.05), nonlinearity=nn.relu, name='gen-11'), g=None, name='gen-12')) # 4 -> 8 gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-20')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None,128,16,16), (5,5), W=Normal(0.05), nonlinearity=nn.relu, name='gen-21'), g=None, name='gen-22')) # 8 -> 16 gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-30')) gen_layers.append(nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1], (None,3,32,32), (5,5), W=Normal(0.05), nonlinearity=gen_final_non, name='gen-31'), train_g=True, init_stdv=0.1, name='gen-32')) # 16 -> 32 elif args.dataset == 'mnist': gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-1')) gen_layers.append(ll.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=ln.softplus, name='gen-2'), name='gen-3')) gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-4')) gen_layers.append(ll.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=ln.softplus, name='gen-5'), name='gen-6')) gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-7')) gen_layers.append(nn.l2normalize(ll.DenseLayer(gen_layers[-1], num_units=28**2, nonlinearity=gen_final_non, name='gen-8')))
def test_normal(): from lasagne.init import Normal sample = Normal().sample((100, 200)) assert -0.001 < sample.mean() < 0.001
sym_b_c = T.scalar('adam_beta1') sym_w_g = T.scalar('w_g') shared_unlabel = theano.shared(x_unlabelled, borrow=True) slice_x_u_d = T.ivector() slice_x_u_c = T.ivector() slice_x_u_i = T.ivector() classifier = build_network() # generator y2x: p_g(x, y) = p(y) p_g(x | y) where x = G(z, y), z follows p_g(z) gen_in_z = ll.InputLayer(shape=(None, n_z)) gen_in_y = ll.InputLayer(shape=(None,)) gen_layers = [gen_in_z] gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-00')) gen_layers.append(nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4*4*512, W=Normal(0.05), nonlinearity=nn.relu, name='gen-01'), g=None, name='gen-02')) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (-1,512,4,4), name='gen-03')) gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-10')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None,256,8,8), (5,5), W=Normal(0.05), nonlinearity=nn.relu, name='gen-11'), g=None, name='gen-12')) # 4 -> 8 gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-20')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None,128,16,16), (5,5), W=Normal(0.05), nonlinearity=nn.relu, name='gen-21'), g=None, name='gen-22')) # 8 -> 16 gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-30')) gen_layers.append(nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1], (None,3,32,32), (5,5), W=Normal(0.05), nonlinearity=gen_final_non, name='gen-31'), train_g=True, init_stdv=0.1, name='gen-32')) # 16 -> 32 # discriminator xy2p: test a pair of input comes from p(x, y) instead of p_c or p_g dis_in_x = ll.InputLayer(shape=(None, in_channels) + dim_input) dis_in_y = ll.InputLayer(shape=(None,)) dis_layers = [dis_in_x] dis_layers.append(ll.DropoutLayer(dis_layers[-1], p=0.2, name='dis-00')) dis_layers.append(ConvConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-01')) dis_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(dis_layers[-1], 32, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu, name='dis-02'), name='dis-03'))
def __init__(self, index_to_token, index_to_condition, model_init_path=None, nn_models_dir=NN_MODELS_DIR, model_prefix=NN_MODEL_PREFIX, corpus_name=BASE_CORPUS_NAME, skip_token=SPECIAL_TOKENS.PAD_TOKEN, learning_rate=LEARNING_RATE, grad_clip=GRAD_CLIP, hidden_layer_dim=HIDDEN_LAYER_DIMENSION, encoder_depth=ENCODER_DEPTH, decoder_depth=DECODER_DEPTH, init_embedding=None, word_embedding_dim=WORD_EMBEDDING_DIMENSION, train_word_embedding=TRAIN_WORD_EMBEDDINGS_LAYER, dense_dropout_ratio=DENSE_DROPOUT_RATIO, condition_embedding_dim=CONDITION_EMBEDDING_DIMENSION, is_reverse_model=False): """ :param index_to_token: Dict with tokens and indices for neural network :param model_init_path: Path to weights file to be used for model's intialization :param skip_token: Token to skip with masking. Id of this token is inferred from index_to_token dictionary :param learning_rate: Learning rate factor for the optimization algorithm :param grad_clip: Clipping parameter to prevent gradient explosion :param init_embedding: Matrix to initialize word-embedding layer. Default value is random standart-gaussian initialization """ self._index_to_token = index_to_token self._token_to_index = {v: k for k, v in index_to_token.items()} self._vocab_size = len(self._index_to_token) self._index_to_condition = index_to_condition self._condition_to_index = { v: k for k, v in index_to_condition.items() } self._condition_ids_num = len(self._condition_to_index) self._condition_embedding_dim = condition_embedding_dim self._learning_rate = learning_rate self._grad_clip = grad_clip self._W_init_embedding = Normal( ) if init_embedding is None else init_embedding self._word_embedding_dim = word_embedding_dim self._train_word_embedding = train_word_embedding self._skip_token_id = self._token_to_index[skip_token] self._hidden_layer_dim = hidden_layer_dim self._encoder_depth = encoder_depth self._decoder_depth = decoder_depth self._dense_dropout_ratio = dense_dropout_ratio self._nn_models_dir = nn_models_dir self._model_prefix = model_prefix self._corpus_name = corpus_name self._is_reverse_model = is_reverse_model self._model_load_path = model_init_path or self.model_save_path self._train_fn = None # Training functions are compiled as needed self._build_model_computational_graph() self._compile_theano_functions_for_prediction()