def __init__( self, numpy_rng, theano_rng=None, cfg=None, # the network configuration dnn_shared=None, shared_layers=[], input=None): self.cfg = cfg self.params = [] self.delta_params = [] self.n_ins = cfg.n_ins self.n_outs = cfg.n_outs self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.do_maxout = cfg.do_maxout self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm self.layers = [] self.lstm_layers = [] self.fc_layers = [] # 1. lstm self.lstm_layers_sizes = cfg.lstm_layers_sizes self.lstm_layers_number = len(self.lstm_layers_sizes) # 2. dnn self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') ####################### # build lstm layers # ####################### print '1. start to build attend-lstm layer: ' + str( self.lstm_layers_number) + ', n_attendout: ' + str(cfg.batch_size) for i in xrange(self.lstm_layers_number): if i == 0: input_size = self.n_ins input = self.x else: input_size = self.lstm_layers_sizes[i - 1] input = self.layers[-1].output lstm_layer = AttendRnnLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.lstm_layers_sizes[i], n_attendout=cfg.batch_size) print '\tbuild attend-lstm layer: ' + str( input_size) + ' x ' + str(lstm_layer.n_out) self.layers.append(lstm_layer) self.lstm_layers.append(lstm_layer) self.params.extend(lstm_layer.params) self.delta_params.extend(lstm_layer.delta_params) print '1. finish attend-lstm layer: ' + str(self.layers[-1].n_out) ####################### # build dnnv layers # ####################### print '2. start to build dnnv layer: ' + str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): if i == 0: input_size = self.layers[-1].n_out else: input_size = self.hidden_layers_sizes[i - 1] input = self.layers[-1].output fc_layer = HiddenLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.hidden_layers_sizes[i]) print '\tbuild dnnv layer: ' + str(input_size) + ' x ' + str( fc_layer.n_out) self.layers.append(fc_layer) self.fc_layers.append(fc_layer) self.params.extend(fc_layer.params) self.delta_params.extend(fc_layer.delta_params) print '2. finish dnnv layer: ' + str(self.layers[-1].n_out) ####################### # build log layers # ####################### print '3. start to build log layer: 1' input_size = self.layers[-1].n_out input = self.layers[-1].output logLayer = OutputLayer(input=input, n_in=input_size, n_out=self.n_outs) print '\tbuild final layer: ' + str(input_size) + ' x ' + str( fc_layer.n_out) self.layers.append(logLayer) self.params.extend(logLayer.params) self.delta_params.extend(logLayer.delta_params) print '3. finish log layer: ' + str(self.layers[-1].n_out) print 'Total layers: ' + str(len(self.layers)) sys.stdout.flush() self.finetune_cost = self.layers[-1].l2(self.y) self.errors = self.layers[-1].errors(self.y) if self.l2_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum()
def __init__(self, task_id, numpy_rng, theano_rng=None, cfg = None, # the network configuration dnn_shared = None, shared_layers=[], input = None): self.layers = [] self.params = [] self.delta_params = [] self.cfg = cfg self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.non_updated_layers = cfg.non_updated_layers if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data if input == None: self.x = T.matrix('x') else: self.x = input if task_id == 0: self.y = T.ivector('y') else: self.y = T.matrix('y') ####################### # build dnnv layers # ####################### print "==============" print "Task ID: %d" % (task_id) print "==============" print '1. start to build dnn layer: '+ str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): if i == 0: input_size = self.n_ins input = self.x else: input_size = self.hidden_layers_sizes[i - 1] input = self.layers[-1].output W = None; b = None if (i in shared_layers) : print "shared layer = %d" % (i) W = dnn_shared.layers[i].W; b = dnn_shared.layers[i].b hidden_layer = HiddenLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W = W, b = b, activation=self.activation) print '\tbuild lstm layer: ' + str(input_size) +' x '+ str(hidden_layer.n_out) self.layers.append(hidden_layer) self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) print '1. finish dnnv layer: '+ str(self.layers[-1].n_out) ####################### # build log layers # ####################### print '2. start to build final layer: 1' input_size = self.layers[-1].n_out input = self.layers[-1].output if task_id == 0: self.logLayer = LogisticRegression(input=self.layers[-1].output,n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) print '\tbuild final layer (classification): ' + str(input_size) +' x '+ str(self.logLayer.n_out) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) else: self.logLayer = OutputLayer(input=input, n_in=input_size, n_out=self.n_outs) print '\tbuild final layer (regression): ' + str(input_size) +' x '+ str(self.logLayer.n_out) self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) print '2. finish log layer: '+ str(self.layers[-1].n_out) print 'Total layers: '+ str(len(self.layers)) sys.stdout.flush() if self.l2_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum()
def __init__(self, numpy_rng, theano_rng=None, cfg = None, # the network configuration dnn_shared = None, shared_layers=[], input = None, extra_input = None): self.cfg = cfg self.params = [] self.delta_params = [] self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm print self.max_col_norm self.layers = [] self.extra_layers = [] self.lstm_layers = [] self.fc_layers = [] # 1. lstm self.lstm_layers_sizes = cfg.lstm_layers_sizes self.lstm_layers_number = len(self.lstm_layers_sizes) # 1.5 attention self.extra_dim = cfg.extra_dim print 'Extra dim: '+str(cfg.extra_dim) self.extra_layers_sizes = cfg.extra_layers_sizes # 2. dnn self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) if input == None: self.x = T.matrix('x') self.extra_x = T.matrix('extra_x') else: self.x = input self.extra_x = extra_input self.y = T.matrix('y') ####################################### # build phase-based attention layer # ####################################### # 0. phase-based attention #self.extra_layers_sizes.extend([self.conv_output_dim]) #print '0. start to build attend layer: '+ str(self.extra_layers_sizes) #for i in xrange(len(self.extra_layers_sizes)): # if i == 0: # input_size = 6400*5 # input_size = cfg.extra_dim # layer_input = self.extra_x # else: # input_size = self.extra_layers_sizes[i - 1] # layer_input = self.extra_layers[-1].output # # W = None; b = None # attend_layer = HiddenLayer(rng=numpy_rng, # input=layer_input, # n_in=input_size, # n_out=self.extra_layers_sizes[i], # W = W, b = b, # activation=self.activation) # print '\tbuild attend layer: ' + str(input_size) +' x '+ str(attend_layer.n_out) # self.extra_layers.append(attend_layer) # self.params.extend(attend_layer.params) # self.delta_params.extend(attend_layer.delta_params) # self.extra_layers[-1].att_e_tl = self.extra_layers[-1].output # self.extra_layers[-1].att_a_tl = T.nnet.softmax(self.extra_layers[-1].att_e_tl) # #self.extra_layers[-1].att_a_tl = T.exp(self.extra_layers[-1].att_e_tl)/(T.exp(self.extra_layers[-1].att_e_tl)).sum(0,keepdims=True) # print '0. finish attend layer: '+ str(self.extra_layers[-1].n_out) ####################### # build lstm layers # ####################### #print '1. start to build PhaseAttendLSTMLayer : '+ str(self.lstm_layers_number) + ', n_attendout: '+ str(cfg.batch_size) print '1. start to build PhaseAttendLSTMLayer : '+ str(self.lstm_layers_number) + ', n_attendout: '+ str(self.n_ins) for i in xrange(self.lstm_layers_number): if i == 0: input_size = self.n_ins input = self.x else: input_size = self.lstm_layers_sizes[i - 1] input = self.layers[-1].output lstm_layer = PhaseAttendLSTMLayer(rng=numpy_rng, input=input, n_in=input_size, extra_input = extra_input, n_out=self.lstm_layers_sizes[i]) print '\tbuild PhaseAttendLSTMLayer: ' + str(input_size) +' x '+ str(lstm_layer.n_out) self.layers.append(lstm_layer) self.lstm_layers.append(lstm_layer) self.params.extend(lstm_layer.params) self.delta_params.extend(lstm_layer.delta_params) print '1. finish PhaseAttendLSTMLayer: '+ str(self.layers[-1].n_out) ####################### # build dnnv layers # ####################### print '2. start to build dnnv layer: '+ str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): if i == 0: input_size = self.layers[-1].n_out else: input_size = self.hidden_layers_sizes[i - 1] input = self.layers[-1].output fc_layer = HiddenLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.hidden_layers_sizes[i], activation=self.activation) print '\tbuild dnnv layer: ' + str(input_size) +' x '+ str(fc_layer.n_out) self.layers.append(fc_layer) self.fc_layers.append(fc_layer) self.params.extend(fc_layer.params) self.delta_params.extend(fc_layer.delta_params) print '2. finish dnnv layer: '+ str(self.layers[-1].n_out) ####################### # build log layers # ####################### print '3. start to build log layer: 1' input_size = self.layers[-1].n_out input = self.layers[-1].output logLayer = OutputLayer(input=input, n_in=input_size, n_out=self.n_outs) print '\tbuild final layer: ' + str(input_size) +' x '+ str(fc_layer.n_out) self.layers.append(logLayer) self.params.extend(logLayer.params) self.delta_params.extend(logLayer.delta_params) print '3. finish log layer: '+ str(self.layers[-1].n_out) print 'Total layers: '+ str(len(self.layers)) sys.stdout.flush() self.finetune_cost = self.layers[-1].l2(self.y) self.errors = self.layers[-1].errors(self.y) if self.l2_reg is not None: #for i in xrange(self.lstm_layers_number): # W = self.lstm_layers[i].W_xi # self.finetune_cost += self.l2_reg * T.sqr(W).sum() # W = self.lstm_layers[i].W_hi # self.finetune_cost += self.l2_reg * T.sqr(W).sum() # W = self.lstm_layers[i].W_xf # self.finetune_cost += self.l2_reg * T.sqr(W).sum() # W = self.lstm_layers[i].W_hf # self.finetune_cost += self.l2_reg * T.sqr(W).sum() # W = self.lstm_layers[i].W_xc # self.finetune_cost += self.l2_reg * T.sqr(W).sum() # W = self.lstm_layers[i].W_hc # self.finetune_cost += self.l2_reg * T.sqr(W).sum() # W = self.lstm_layers[i].W_xo # self.finetune_cost += self.l2_reg * T.sqr(W).sum() # W = self.lstm_layers[i].W_ho # self.finetune_cost += self.l2_reg * T.sqr(W).sum() for i in xrange(self.hidden_layers_number): W = self.fc_layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum()
def __init__(self, numpy_rng=None, theano_rng=None, cfg=[], non_maximum_erasing=False, use_fast=False): self.conv_layers = [] self.n_outs = cfg.n_outs self.layers = [] self.extra_layers = [] self.conv_layer_num = len(cfg.conv_layer_configs) self.dnn_layer_num = len(cfg.hidden_layers_sizes) self.extra_layers_sizes = cfg.extra_layers_sizes self.x = T.tensor4('x') self.extra_x = T.matrix('extra_x') for i in xrange(self.conv_layer_num): if i == 0: input = self.x else: input = self.conv_layers[-1].output config = cfg.conv_layer_configs[i] print config['filter_shape'] conv_layer = ConvLayerForward(numpy_rng=numpy_rng, input=input, filter_shape=config['filter_shape'], poolsize=config['poolsize'], activation=config['activation'], flatten=config['flatten'], use_fast=use_fast) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) self.conv_output_dim = config['output_shape'][1] * config[ 'output_shape'][2] * config['output_shape'][3] cfg.n_ins = config['output_shape'][1] * config['output_shape'][ 2] * config['output_shape'][3] print self.conv_output_dim print cfg.n_ins print 'Extra input dimension: ' + str(cfg.extra_dim) for i in xrange(len(self.extra_layers_sizes)): if i == 0: input_size = cfg.extra_dim layer_input = self.extra_x else: input_size = self.extra_layers_sizes[i - 1] layer_input = self.extra_layers[-1].output W = None b = None attend_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.extra_layers_sizes[i], W=W, b=b) self.extra_layers.append(attend_layer) self.extra_output = self.extra_layers[-1].output self.extra_output = T.nnet.softmax(self.extra_layers[-1].output) print 'layer num: ' + str(len(self.layers) - 1) for i in xrange(self.dnn_layer_num): if i == 0: # 1. Join two features (magnitude + phase) input_size = self.conv_output_dim + self.extra_layers_sizes[-1] layer_input = T.join(1, self.layers[-1].output, self.extra_output) # 2. Weighted Sum (magnitude * phase) #input_size = self.conv_output_dim #layer_input = self.layers[-1].output * self.extra_output else: input_size = cfg.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None b = None hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=cfg.hidden_layers_sizes[i], W=W, b=b) self.layers.append(hidden_layer) print 'layer num: ' + str(len(self.layers) - 1) logLayer = OutputLayer(input=self.layers[-1].output, n_in=cfg.hidden_layers_sizes[-1], n_out=self.n_outs) self.layers.append(logLayer) print 'layer num: ' + str(len(self.layers) - 1)
class DNN_MTL(object): def __init__(self, task_id, numpy_rng, theano_rng=None, cfg = None, # the network configuration dnn_shared = None, shared_layers=[], input = None): self.layers = [] self.params = [] self.delta_params = [] self.cfg = cfg self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.non_updated_layers = cfg.non_updated_layers if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data if input == None: self.x = T.matrix('x') else: self.x = input if task_id == 0: self.y = T.ivector('y') else: self.y = T.matrix('y') ####################### # build dnnv layers # ####################### print "==============" print "Task ID: %d" % (task_id) print "==============" print '1. start to build dnn layer: '+ str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): if i == 0: input_size = self.n_ins input = self.x else: input_size = self.hidden_layers_sizes[i - 1] input = self.layers[-1].output W = None; b = None if (i in shared_layers) : print "shared layer = %d" % (i) W = dnn_shared.layers[i].W; b = dnn_shared.layers[i].b hidden_layer = HiddenLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W = W, b = b, activation=self.activation) print '\tbuild lstm layer: ' + str(input_size) +' x '+ str(hidden_layer.n_out) self.layers.append(hidden_layer) self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) print '1. finish dnnv layer: '+ str(self.layers[-1].n_out) ####################### # build log layers # ####################### print '2. start to build final layer: 1' input_size = self.layers[-1].n_out input = self.layers[-1].output if task_id == 0: self.logLayer = LogisticRegression(input=self.layers[-1].output,n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) print '\tbuild final layer (classification): ' + str(input_size) +' x '+ str(self.logLayer.n_out) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) else: self.logLayer = OutputLayer(input=input, n_in=input_size, n_out=self.n_outs) print '\tbuild final layer (regression): ' + str(input_size) +' x '+ str(self.logLayer.n_out) self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) print '2. finish log layer: '+ str(self.layers[-1].n_out) print 'Total layers: '+ str(len(self.layers)) sys.stdout.flush() if self.l2_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum() def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size): (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = collections.OrderedDict() for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam*learning_rate for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] if self.max_col_norm is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W if W in updates: updated_W = updates[W] col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0)) desired_norms = T.clip(col_norms, 0, self.max_col_norm) updates[W] = updated_W * (desired_norms / (1e-7 + col_norms)) train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=updates, givens={ self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) valid_fn = theano.function(inputs=[index], outputs=self.errors, givens={ self.x: valid_set_x[index * batch_size: (index + 1) * batch_size], self.y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) return train_fn, valid_fn def build_extract_feat_function(self, output_layer): print 'build_extract_feat_function' print output_layer feat = T.matrix('feat') out_da = theano.function([feat], self.layers[output_layer].output, updates = None, givens={self.x:feat}, on_unused_input='warn') return out_da def build_finetune_functions_kaldi(self, train_shared_xy, valid_shared_xy): (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = collections.OrderedDict() for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam*learning_rate for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] if self.max_col_norm is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W if W in updates: updated_W = updates[W] col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0)) desired_norms = T.clip(col_norms, 0, self.max_col_norm) updates[W] = updated_W * (desired_norms / (1e-7 + col_norms)) train_fn = theano.function(inputs=[theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=updates, givens={self.x: train_set_x, self.y: train_set_y}) valid_fn = theano.function(inputs=[], outputs=self.errors, givens={self.x: valid_set_x, self.y: valid_set_y}) return train_fn, valid_fn def write_model_to_raw(self, file_path): # output the model to tmp_path; this format is readable by PDNN _nnet2file(self.layers, filename=file_path) def write_model_to_kaldi(self, file_path, with_softmax = True): # determine whether it's BNF based on layer sizes output_layer_number = -1; #for layer_index in range(1, self.hidden_layers_number - 1): # cur_layer_size = self.hidden_layers_sizes[layer_index] # prev_layer_size = self.hidden_layers_sizes[layer_index-1] # next_layer_size = self.hidden_layers_sizes[layer_index+1] # if cur_layer_size < prev_layer_size and cur_layer_size < next_layer_size: # output_layer_number = layer_index+1; break layer_number = len(self.layers) if output_layer_number == -1: output_layer_number = layer_number fout = open(file_path, 'wb') for i in xrange(output_layer_number): activation_text = '<' + self.cfg.activation_text + '>' if i == (layer_number-1) and with_softmax: # we assume that the last layer is a softmax layer activation_text = '<softmax>' W_mat = self.layers[i].W.get_value() b_vec = self.layers[i].b.get_value() input_size, output_size = W_mat.shape W_layer = []; b_layer = '' for rowX in xrange(output_size): W_layer.append('') for x in xrange(input_size): for t in xrange(output_size): W_layer[t] = W_layer[t] + str(W_mat[x][t]) + ' ' for x in xrange(output_size): b_layer = b_layer + str(b_vec[x]) + ' ' fout.write('<affinetransform> ' + str(output_size) + ' ' + str(input_size) + '\n') fout.write('[' + '\n') for x in xrange(output_size): fout.write(W_layer[x].strip() + '\n') fout.write(']' + '\n') fout.write('[ ' + b_layer.strip() + ' ]' + '\n') if activation_text == '<maxout>': fout.write(activation_text + ' ' + str(output_size/self.pool_size) + ' ' + str(output_size) + '\n') else: fout.write(activation_text + ' ' + str(output_size) + ' ' + str(output_size) + '\n') fout.close()
def __init__(self, numpy_rng, theano_rng=None, cfg=None, testing=False, input=None): self.layers = [] self.extra_layers = [] self.params = [] self.delta_params = [] self.n_ins = cfg.n_ins self.n_outs = cfg.n_outs self.conv_layers = [] self.cfg = cfg self.conv_layer_configs = cfg.conv_layer_configs self.conv_activation = cfg.conv_activation self.use_fast = cfg.use_fast self.extra_x = T.matrix('extra_x') # 1.5 attention self.extra_dim = cfg.extra_dim print 'Extra input dimension: ' + str(cfg.extra_dim) self.extra_layers_sizes = cfg.extra_layers_sizes # 2. dnn self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') ####################### # build cnn layers # ####################### print '1. start to build cnn mag layer: ' + str( self.conv_layer_configs) self.conv_layer_num = len(self.conv_layer_configs) for i in xrange(self.conv_layer_num): if i == 0: input = self.x else: input = self.layers[-1].output config = self.conv_layer_configs[i] conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, input_shape=config['input_shape'], filter_shape=config['filter_shape'], poolsize=config['poolsize'], activation=self.conv_activation, flatten=config['flatten'], use_fast=self.use_fast, testing=testing) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) self.conv_output_dim = config['output_shape'][1] * config[ 'output_shape'][2] * config['output_shape'][3] cfg.n_ins = config['output_shape'][1] * config['output_shape'][ 2] * config['output_shape'][3] ####################################### # build phase-based attention layer # ####################################### # 0. phase-based attention print '2. start to build attend layer: ' + str(self.extra_layers_sizes) for i in xrange(len(self.extra_layers_sizes)): if i == 0: input_size = cfg.extra_dim layer_input = self.extra_x else: input_size = self.extra_layers_sizes[i - 1] layer_input = self.extra_layers[-1].output W = None b = None attend_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.extra_layers_sizes[i], W=W, b=b, activation=self.activation) print '\tbuild attend layer: ' + str(input_size) + ' x ' + str( attend_layer.n_out) self.extra_layers.append(attend_layer) self.params.extend(attend_layer.params) self.delta_params.extend(attend_layer.delta_params) self.extra_output = self.extra_layers[-1].output self.extra_output = T.nnet.softmax(self.extra_layers[-1].output) #self.extra_output_rand = numpy.asarray(numpy_rng.uniform( # low=-0.1, # high=1.0, # size=(32,20)), dtype=theano.config.floatX) #self.extra_output = theano.shared(value=self.extra_output_rand, name='rand', borrow=True) print '2. finish attend layer softmax(0): ' + str( self.extra_layers[-1].n_out) ####################################### # build dnnv # ####################################### print '3. start to build dnnv layer: ' + str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: # 1. Join two features (magnitude + phase) input_size = self.conv_output_dim + self.extra_layers_sizes[-1] layer_input = T.join(1, self.layers[-1].output, self.extra_output) # 2. Weighted Sum (magnitude * phase) #input_size = self.conv_output_dim #layer_input = self.layers[-1].output * self.extra_output else: input_size = self.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None b = None hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W=W, b=b, activation=self.activation) print '\tbuild dnnv layer: ' + str(input_size) + ' x ' + str( hidden_layer.n_out) # add the layer to our list of layers self.layers.append(hidden_layer) self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) print '3. finish dnnv layer: ' + str(self.layers[-1].n_out) ####################################### # build logistic regression layer # ####################################### print '4. start to build log layer: 1' # We now need to add a logistic layer on top of the MLP self.logLayer = OutputLayer(input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) print '\tbuild final layer: ' + str( self.layers[-1].n_out) + ' x ' + str(self.n_outs) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) print '4. finish log layer: ' + str(self.layers[-1].n_out) print 'Total layers: ' + str(len(self.layers)) self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) sys.stdout.flush()
def __init__(self, numpy_rng=None, theano_rng=None, cfg=None, non_maximum_erasing=False, use_fast=False): self.n_outs = cfg.n_outs self.layers = [] self.extra_layers = [] self.conv_layer_num = len(cfg.conv_layer_configs) self.dnn_layer_num = len(cfg.hidden_layers_sizes) self.extra_layers_sizes = cfg.extra_layers_sizes self.x = T.tensor4('x') self.extra_x = T.matrix('extra_x') for i in xrange(self.conv_layer_num): if i == 0: input = self.x else: input = self.layers[-1].output config = cfg.conv_layer_configs[i] conv_layer = ConvLayerForward(numpy_rng=numpy_rng, input=input, filter_shape=config['filter_shape'], poolsize=config['poolsize'], activation=config['activation'], flatten=config['flatten'], use_fast=use_fast) self.layers.append(conv_layer) self.conv_output_dim = config['output_shape'][1] * config[ 'output_shape'][2] * config['output_shape'][3] cfg.n_ins = config['output_shape'][1] * config['output_shape'][ 2] * config['output_shape'][3] for i in xrange(len(self.extra_layers_sizes)): if i == 0: input_size = 6400 * 5 input_size = cfg.extra_dim layer_input = self.extra_x else: input_size = self.extra_layers_sizes[i - 1] layer_input = self.extra_layers[-1].output W = None b = None attend_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.extra_layers_sizes[i], W=W, b=b) self.extra_layers.append(attend_layer) self.extra_layers[-1].att_e_tl = self.extra_layers[-1].output self.extra_layers[-1].att_a_tl = T.nnet.softmax( self.extra_layers[-1].att_e_tl) #self.extra_layers[-1].att_a_tl = T.exp(self.extra_layers[-1].att_e_tl)/(T.exp(self.extra_layers[-1].att_e_tl)).sum(0,keepdims=True) for i in xrange(self.dnn_layer_num): if i == 0: #input_size = self.conv_output_dim #layer_input = (self.extra_layers[-1].att_a_tl*self.layers[-1].output) input_size = self.conv_output_dim + self.extra_layers_sizes[-1] layer_input = T.join(1, self.extra_layers[-1].att_a_tl, self.layers[-1].output) else: input_size = cfg.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None b = None hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=cfg.hidden_layers_sizes[i], W=W, b=b) self.layers.append(hidden_layer) logLayer = OutputLayer(input=self.layers[-1].output, n_in=cfg.hidden_layers_sizes[-1], n_out=self.n_outs) self.layers.append(logLayer)
def __init__(self, numpy_rng, theano_rng=None, cfg = None, testing = False, input = None): self.cfg = cfg self.params = [] self.delta_params = [] self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm self.layers = [] self.conv_layers = [] self.lstm_layers = [] self.fc_layers = [] # 1. conv self.conv_layer_configs = cfg.conv_layer_configs self.conv_activation = cfg.conv_activation self.conv_layers_number = len(self.conv_layer_configs) self.use_fast = cfg.use_fast # 2. lstm self.lstm_layers_sizes = cfg.lstm_layers_sizes self.lstm_layers_number = len(self.lstm_layers_sizes) # 3. dnn self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') ####################### # build conv layers # ####################### print '1. start to build conv layer: '+ str(self.conv_layers_number) for i in xrange(self.conv_layers_number): if i == 0: input = self.x else: input = self.conv_layers[-1].output config = self.conv_layer_configs[i] conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, input_shape = config['input_shape'], filter_shape = config['filter_shape'], poolsize = config['poolsize'], activation = self.conv_activation, flatten = config['flatten'], use_fast = self.use_fast, testing = testing) print '\tbuild conv layer: ' +str(config['input_shape']) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3] print '\t cnn out: '+ str(self.conv_output_dim) cfg.n_ins = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3] print '1. finish conv layer: '+ str(self.layers[-1].n_out) ####################### # build lstm layers # ####################### print '2. start to build lstm layer: '+ str(self.lstm_layers_number) for i in xrange(self.lstm_layers_number): if i == 0: input_size = self.conv_output_dim input = self.layers[-1].output else: input_size = self.lstm_layers_sizes[i - 1] input = self.layers[-1].output print 'build lstm layer: ' + str(input_size) lstm_layer = LSTMLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.lstm_layers_sizes[i]) print '\tbuild lstm layer: ' + str(input_size) +' x '+ str(lstm_layer.n_out) self.layers.append(lstm_layer) self.lstm_layers.append(lstm_layer) self.params.extend(lstm_layer.params) self.delta_params.extend(lstm_layer.delta_params) print '2. finish lstm layer: '+ str(self.layers[-1].n_out) ####################### # build dnnv layers # ####################### print '3. start to build dnnv layer: '+ str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): if i == 0: input_size = self.layers[-1].n_out else: input_size = self.hidden_layers_sizes[i - 1] input = self.layers[-1].output fc_layer = HiddenLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.hidden_layers_sizes[i]) print '\tbuild dnnv layer: ' + str(input_size) +' x '+ str(fc_layer.n_out) self.layers.append(fc_layer) self.fc_layers.append(fc_layer) self.params.extend(fc_layer.params) self.delta_params.extend(fc_layer.delta_params) print '3. finish dnnv layer: '+ str(self.layers[-1].n_out) ####################### # build log layers # ####################### print '4. start to build log layer: 1' input_size = self.layers[-1].n_out input = self.layers[-1].output logLayer = OutputLayer(input=input, n_in=input_size, n_out=self.n_outs) print '\tbuild final layer: ' + str(input_size) +' x '+ str(fc_layer.n_out) self.layers.append(logLayer) self.params.extend(logLayer.params) self.delta_params.extend(logLayer.delta_params) print '4. finish log layer: '+ str(self.layers[-1].n_out) print 'Total layers: '+ str(len(self.layers)) sys.stdout.flush() self.finetune_cost = self.layers[-1].l2(self.y) self.errors = self.layers[-1].errors(self.y) if self.l2_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum()
class CNNV(object): def __init__(self, numpy_rng, theano_rng=None, cfg=None, testing=False, input=None): self.layers = [] self.extra_layers = [] self.params = [] self.delta_params = [] self.n_ins = cfg.n_ins self.n_outs = cfg.n_outs self.conv_layers = [] self.cfg = cfg self.conv_layer_configs = cfg.conv_layer_configs self.conv_activation = cfg.conv_activation self.use_fast = cfg.use_fast self.extra_x = T.matrix('extra_x') # 1.5 attention self.extra_dim = cfg.extra_dim print 'Extra input dimension: ' + str(cfg.extra_dim) self.extra_layers_sizes = cfg.extra_layers_sizes # 2. dnn self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') ####################### # build cnn layers # ####################### print '1. start to build cnn mag layer: ' + str( self.conv_layer_configs) self.conv_layer_num = len(self.conv_layer_configs) for i in xrange(self.conv_layer_num): if i == 0: input = self.x else: input = self.layers[-1].output config = self.conv_layer_configs[i] conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, input_shape=config['input_shape'], filter_shape=config['filter_shape'], poolsize=config['poolsize'], activation=self.conv_activation, flatten=config['flatten'], use_fast=self.use_fast, testing=testing) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) self.conv_output_dim = config['output_shape'][1] * config[ 'output_shape'][2] * config['output_shape'][3] cfg.n_ins = config['output_shape'][1] * config['output_shape'][ 2] * config['output_shape'][3] ####################################### # build phase-based attention layer # ####################################### # 0. phase-based attention print '2. start to build attend layer: ' + str(self.extra_layers_sizes) for i in xrange(len(self.extra_layers_sizes)): if i == 0: input_size = cfg.extra_dim layer_input = self.extra_x else: input_size = self.extra_layers_sizes[i - 1] layer_input = self.extra_layers[-1].output W = None b = None attend_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.extra_layers_sizes[i], W=W, b=b, activation=self.activation) print '\tbuild attend layer: ' + str(input_size) + ' x ' + str( attend_layer.n_out) self.extra_layers.append(attend_layer) self.params.extend(attend_layer.params) self.delta_params.extend(attend_layer.delta_params) self.extra_output = self.extra_layers[-1].output self.extra_output = T.nnet.softmax(self.extra_layers[-1].output) #self.extra_output_rand = numpy.asarray(numpy_rng.uniform( # low=-0.1, # high=1.0, # size=(32,20)), dtype=theano.config.floatX) #self.extra_output = theano.shared(value=self.extra_output_rand, name='rand', borrow=True) print '2. finish attend layer softmax(0): ' + str( self.extra_layers[-1].n_out) ####################################### # build dnnv # ####################################### print '3. start to build dnnv layer: ' + str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: # 1. Join two features (magnitude + phase) input_size = self.conv_output_dim + self.extra_layers_sizes[-1] layer_input = T.join(1, self.layers[-1].output, self.extra_output) # 2. Weighted Sum (magnitude * phase) #input_size = self.conv_output_dim #layer_input = self.layers[-1].output * self.extra_output else: input_size = self.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None b = None hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W=W, b=b, activation=self.activation) print '\tbuild dnnv layer: ' + str(input_size) + ' x ' + str( hidden_layer.n_out) # add the layer to our list of layers self.layers.append(hidden_layer) self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) print '3. finish dnnv layer: ' + str(self.layers[-1].n_out) ####################################### # build logistic regression layer # ####################################### print '4. start to build log layer: 1' # We now need to add a logistic layer on top of the MLP self.logLayer = OutputLayer(input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) print '\tbuild final layer: ' + str( self.layers[-1].n_out) + ' x ' + str(self.n_outs) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) print '4. finish log layer: ' + str(self.layers[-1].n_out) print 'Total layers: ' + str(len(self.layers)) self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) sys.stdout.flush() def kl_divergence(self, p, p_hat): return p * T.log(p / p_hat) + (1 - p) * T.log((1 - p) / (1 - p_hat)) # output conv config to files def write_conv_config(self, file_path_prefix): for i in xrange(len(self.conv_layer_configs)): self.conv_layer_configs[i][ 'activation'] = self.cfg.conv_activation_text with open(file_path_prefix + '.' + str(i), 'wb') as fp: json.dump(self.conv_layer_configs[i], fp, indent=2, sort_keys=True) fp.flush() def build_finetune_functions(self, train_shared_xy, valid_shared_xy, extra_train_shared_x, extra_valid_shared_x, batch_size): (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy (extra_train_set_x) = extra_train_shared_x (extra_valid_set_x) = extra_valid_shared_x index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = collections.OrderedDict() for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam * learning_rate for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] train_fn = theano.function( inputs=[ index, theano.Param(learning_rate, default=0.0001), theano.Param(momentum, default=0.5) ], outputs=self.errors, updates=updates, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size], self.extra_x: extra_train_set_x[index * batch_size:(index + 1) * batch_size] }, on_unused_input='ignore') valid_fn = theano.function( inputs=[index], outputs=self.errors, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size], self.extra_x: extra_valid_set_x[index * batch_size:(index + 1) * batch_size] }, on_unused_input='ignore') return train_fn, valid_fn def build_extract_feat_function(self, output_layer): feat = T.matrix('feat') out_da = theano.function([feat], self.layers[output_layer].output, updates=None, givens={self.x: feat}, on_unused_input='warn') return out_da
class RNNV(object): def __init__(self, numpy_rng, theano_rng=None, cfg = None, # the network configuration dnn_shared = None, shared_layers=[], input = None): self.layers = [] self.params = [] self.delta_params = [] self.rnn_layerX = 2 print "Use DRN 2" self.cfg = cfg self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.non_updated_layers = cfg.non_updated_layers if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: input_size = self.n_ins layer_input = self.x else: input_size = self.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None; b = None if (i in shared_layers) : W = dnn_shared.layers[i].W; b = dnn_shared.layers[i].b if i == self.rnn_layerX: hidden_layer = RnnLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W = W, b = b, activation=self.activation) else: if self.do_maxout == True: hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i] * self.pool_size, W = W, b = b, activation = (lambda x: 1.0*x), do_maxout = True, pool_size = self.pool_size) else: hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W = W, b = b, activation=self.activation) # add the layer to our list of layers self.layers.append(hidden_layer) # if the layer index is included in self.non_updated_layers, parameters of this layer will not be updated if (i not in self.non_updated_layers): self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) # We now need to add a logistic layer on top of the MLP self.logLayer = OutputLayer( input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) if self.n_outs > 0: self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) if self.l1_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l1_reg * (abs(W).sum()) if self.l2_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum() def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size): (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = collections.OrderedDict() for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam*learning_rate for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] if self.max_col_norm is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W if W in updates: updated_W = updates[W] col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0)) desired_norms = T.clip(col_norms, 0, self.max_col_norm) updates[W] = updated_W * (desired_norms / (1e-7 + col_norms)) train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=updates, givens={ self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) valid_fn = theano.function(inputs=[index], outputs=self.errors, givens={ self.x: valid_set_x[index * batch_size: (index + 1) * batch_size], self.y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) return train_fn, valid_fn def build_extract_feat_function(self, output_layer): feat = T.matrix('feat') out_da = theano.function([feat], self.layers[output_layer].output, updates = None, givens={self.x:feat}, on_unused_input='warn') return out_da def build_finetune_functions_kaldi(self, train_shared_xy, valid_shared_xy): (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = collections.OrderedDict() for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam*learning_rate for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] if self.max_col_norm is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W if W in updates: updated_W = updates[W] col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0)) desired_norms = T.clip(col_norms, 0, self.max_col_norm) updates[W] = updated_W * (desired_norms / (1e-7 + col_norms)) train_fn = theano.function(inputs=[theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=updates, givens={self.x: train_set_x, self.y: train_set_y}) valid_fn = theano.function(inputs=[], outputs=self.errors, givens={self.x: valid_set_x, self.y: valid_set_y}) return train_fn, valid_fn def write_model_to_raw(self, file_path): # output the model to tmp_path; this format is readable by PDNN _nnet2file(self.layers, filename=file_path) def write_model_to_kaldi(self, file_path, with_softmax = True): # determine whether it's BNF based on layer sizes output_layer_number = -1; for layer_index in range(1, self.hidden_layers_number - 1): cur_layer_size = self.hidden_layers_sizes[layer_index] prev_layer_size = self.hidden_layers_sizes[layer_index-1] next_layer_size = self.hidden_layers_sizes[layer_index+1] if cur_layer_size < prev_layer_size and cur_layer_size < next_layer_size: output_layer_number = layer_index+1; break layer_number = len(self.layers) if output_layer_number == -1: output_layer_number = layer_number fout = open(file_path, 'wb') for i in xrange(output_layer_number): activation_text = '<' + self.cfg.activation_text + '>' if i == (layer_number-1) and with_softmax: # we assume that the last layer is a softmax layer activation_text = '<softmax>' W_mat = self.layers[i].W.get_value() b_vec = self.layers[i].b.get_value() input_size, output_size = W_mat.shape W_layer = []; b_layer = '' for rowX in xrange(output_size): W_layer.append('') for x in xrange(input_size): for t in xrange(output_size): W_layer[t] = W_layer[t] + str(W_mat[x][t]) + ' ' for x in xrange(output_size): b_layer = b_layer + str(b_vec[x]) + ' ' fout.write('<affinetransform> ' + str(output_size) + ' ' + str(input_size) + '\n') fout.write('[' + '\n') for x in xrange(output_size): fout.write(W_layer[x].strip() + '\n') fout.write(']' + '\n') fout.write('[ ' + b_layer.strip() + ' ]' + '\n') if activation_text == '<maxout>': fout.write(activation_text + ' ' + str(output_size/self.pool_size) + ' ' + str(output_size) + '\n') else: fout.write(activation_text + ' ' + str(output_size) + ' ' + str(output_size) + '\n') fout.close()
def __init__(self, numpy_rng, theano_rng=None, cfg = None, # the network configuration dnn_shared = None, shared_layers=[], input = None): self.layers = [] self.params = [] self.delta_params = [] self.rnn_layerX = 2 print "Use DRN 2" self.cfg = cfg self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.non_updated_layers = cfg.non_updated_layers if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: input_size = self.n_ins layer_input = self.x else: input_size = self.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None; b = None if (i in shared_layers) : W = dnn_shared.layers[i].W; b = dnn_shared.layers[i].b if i == self.rnn_layerX: hidden_layer = RnnLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W = W, b = b, activation=self.activation) else: if self.do_maxout == True: hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i] * self.pool_size, W = W, b = b, activation = (lambda x: 1.0*x), do_maxout = True, pool_size = self.pool_size) else: hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W = W, b = b, activation=self.activation) # add the layer to our list of layers self.layers.append(hidden_layer) # if the layer index is included in self.non_updated_layers, parameters of this layer will not be updated if (i not in self.non_updated_layers): self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) # We now need to add a logistic layer on top of the MLP self.logLayer = OutputLayer( input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) if self.n_outs > 0: self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) if self.l1_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l1_reg * (abs(W).sum()) if self.l2_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum()
def __init__( self, task_id, numpy_rng, theano_rng=None, cfg=None, # the network configuration dnn_shared=None, shared_layers=[], input=None): self.layers = [] self.params = [] self.delta_params = [] self.cfg = cfg self.n_ins = cfg.n_ins self.n_outs = cfg.n_outs self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation self.do_maxout = cfg.do_maxout self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.non_updated_layers = cfg.non_updated_layers if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data if input == None: self.x = T.matrix('x') else: self.x = input if task_id == 0: self.y = T.ivector('y') else: self.y = T.matrix('y') ####################### # build dnnv layers # ####################### print "==============" print "Task ID: %d" % (task_id) print "==============" print '1. start to build dnn layer: ' + str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): if i == 0: input_size = self.n_ins input = self.x else: input_size = self.hidden_layers_sizes[i - 1] input = self.layers[-1].output W = None b = None if (i in shared_layers): print "shared layer = %d" % (i) W = dnn_shared.layers[i].W b = dnn_shared.layers[i].b hidden_layer = HiddenLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W=W, b=b, activation=self.activation) print '\tbuild lstm layer: ' + str(input_size) + ' x ' + str( hidden_layer.n_out) self.layers.append(hidden_layer) self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) print '1. finish dnnv layer: ' + str(self.layers[-1].n_out) ####################### # build log layers # ####################### print '2. start to build final layer: 1' input_size = self.layers[-1].n_out input = self.layers[-1].output if task_id == 0: self.logLayer = LogisticRegression( input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) print '\tbuild final layer (classification): ' + str( input_size) + ' x ' + str(self.logLayer.n_out) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) else: self.logLayer = OutputLayer(input=input, n_in=input_size, n_out=self.n_outs) print '\tbuild final layer (regression): ' + str( input_size) + ' x ' + str(self.logLayer.n_out) self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) print '2. finish log layer: ' + str(self.layers[-1].n_out) print 'Total layers: ' + str(len(self.layers)) sys.stdout.flush() if self.l2_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum()
class DNN_MTL(object): def __init__( self, task_id, numpy_rng, theano_rng=None, cfg=None, # the network configuration dnn_shared=None, shared_layers=[], input=None): self.layers = [] self.params = [] self.delta_params = [] self.cfg = cfg self.n_ins = cfg.n_ins self.n_outs = cfg.n_outs self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation self.do_maxout = cfg.do_maxout self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.non_updated_layers = cfg.non_updated_layers if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data if input == None: self.x = T.matrix('x') else: self.x = input if task_id == 0: self.y = T.ivector('y') else: self.y = T.matrix('y') ####################### # build dnnv layers # ####################### print "==============" print "Task ID: %d" % (task_id) print "==============" print '1. start to build dnn layer: ' + str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): if i == 0: input_size = self.n_ins input = self.x else: input_size = self.hidden_layers_sizes[i - 1] input = self.layers[-1].output W = None b = None if (i in shared_layers): print "shared layer = %d" % (i) W = dnn_shared.layers[i].W b = dnn_shared.layers[i].b hidden_layer = HiddenLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W=W, b=b, activation=self.activation) print '\tbuild lstm layer: ' + str(input_size) + ' x ' + str( hidden_layer.n_out) self.layers.append(hidden_layer) self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) print '1. finish dnnv layer: ' + str(self.layers[-1].n_out) ####################### # build log layers # ####################### print '2. start to build final layer: 1' input_size = self.layers[-1].n_out input = self.layers[-1].output if task_id == 0: self.logLayer = LogisticRegression( input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) print '\tbuild final layer (classification): ' + str( input_size) + ' x ' + str(self.logLayer.n_out) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) else: self.logLayer = OutputLayer(input=input, n_in=input_size, n_out=self.n_outs) print '\tbuild final layer (regression): ' + str( input_size) + ' x ' + str(self.logLayer.n_out) self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) print '2. finish log layer: ' + str(self.layers[-1].n_out) print 'Total layers: ' + str(len(self.layers)) sys.stdout.flush() if self.l2_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum() def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size): (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = collections.OrderedDict() for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam * learning_rate for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] if self.max_col_norm is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W if W in updates: updated_W = updates[W] col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0)) desired_norms = T.clip(col_norms, 0, self.max_col_norm) updates[W] = updated_W * (desired_norms / (1e-7 + col_norms)) train_fn = theano.function( inputs=[ index, theano.Param(learning_rate, default=0.0001), theano.Param(momentum, default=0.5) ], outputs=self.errors, updates=updates, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size] }) valid_fn = theano.function( inputs=[index], outputs=self.errors, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) return train_fn, valid_fn def build_extract_feat_function(self, output_layer): print 'build_extract_feat_function' print output_layer feat = T.matrix('feat') out_da = theano.function([feat], self.layers[output_layer].output, updates=None, givens={self.x: feat}, on_unused_input='warn') return out_da def build_finetune_functions_kaldi(self, train_shared_xy, valid_shared_xy): (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = collections.OrderedDict() for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam * learning_rate for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] if self.max_col_norm is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W if W in updates: updated_W = updates[W] col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0)) desired_norms = T.clip(col_norms, 0, self.max_col_norm) updates[W] = updated_W * (desired_norms / (1e-7 + col_norms)) train_fn = theano.function(inputs=[ theano.Param(learning_rate, default=0.0001), theano.Param(momentum, default=0.5) ], outputs=self.errors, updates=updates, givens={ self.x: train_set_x, self.y: train_set_y }) valid_fn = theano.function(inputs=[], outputs=self.errors, givens={ self.x: valid_set_x, self.y: valid_set_y }) return train_fn, valid_fn def write_model_to_raw(self, file_path): # output the model to tmp_path; this format is readable by PDNN _nnet2file(self.layers, filename=file_path) def write_model_to_kaldi(self, file_path, with_softmax=True): # determine whether it's BNF based on layer sizes output_layer_number = -1 #for layer_index in range(1, self.hidden_layers_number - 1): # cur_layer_size = self.hidden_layers_sizes[layer_index] # prev_layer_size = self.hidden_layers_sizes[layer_index-1] # next_layer_size = self.hidden_layers_sizes[layer_index+1] # if cur_layer_size < prev_layer_size and cur_layer_size < next_layer_size: # output_layer_number = layer_index+1; break layer_number = len(self.layers) if output_layer_number == -1: output_layer_number = layer_number fout = open(file_path, 'wb') for i in xrange(output_layer_number): activation_text = '<' + self.cfg.activation_text + '>' if i == ( layer_number - 1 ) and with_softmax: # we assume that the last layer is a softmax layer activation_text = '<softmax>' W_mat = self.layers[i].W.get_value() b_vec = self.layers[i].b.get_value() input_size, output_size = W_mat.shape W_layer = [] b_layer = '' for rowX in xrange(output_size): W_layer.append('') for x in xrange(input_size): for t in xrange(output_size): W_layer[t] = W_layer[t] + str(W_mat[x][t]) + ' ' for x in xrange(output_size): b_layer = b_layer + str(b_vec[x]) + ' ' fout.write('<affinetransform> ' + str(output_size) + ' ' + str(input_size) + '\n') fout.write('[' + '\n') for x in xrange(output_size): fout.write(W_layer[x].strip() + '\n') fout.write(']' + '\n') fout.write('[ ' + b_layer.strip() + ' ]' + '\n') if activation_text == '<maxout>': fout.write(activation_text + ' ' + str(output_size / self.pool_size) + ' ' + str(output_size) + '\n') else: fout.write(activation_text + ' ' + str(output_size) + ' ' + str(output_size) + '\n') fout.close()
def __init__(self, numpy_rng, theano_rng=None, cfg = None, # the network configuration dnn_shared = None, shared_layers=[], input = None): self.layers = [] self.params = [] self.delta_params = [] self.cfg = cfg self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.non_updated_layers = cfg.non_updated_layers if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') ####################################### # build dnnv + attend layer # ####################################### print '3. start to build dnnv layer: '+ str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: input_size = self.n_ins layer_input = self.x else: input_size = self.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None; b = None if (i in shared_layers) : W = dnn_shared.layers[i].W; b = dnn_shared.layers[i].b if self.do_maxout == True: hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i] * self.pool_size, W = W, b = b, activation = (lambda x: 1.0*x), do_maxout = True, pool_size = self.pool_size) else: hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W = W, b = b, activation=self.activation) print '\tbuild dnnv layer: ' + str(input_size) +' x '+ str(hidden_layer.n_out) # add the layer to our list of layers self.layers.append(hidden_layer) # if the layer index is included in self.non_updated_layers, parameters of this layer will not be updated if (i not in self.non_updated_layers): self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) print '3. finish dnnv layer: '+ str(self.layers[-1].n_out) ####################################### # build logistic regression layer # ####################################### print '4. start to build log layer: 1' # We now need to add a logistic layer on top of the MLP self.logLayer = OutputLayer( input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) print '\tbuild final layer: ' + str(self.layers[-1].n_out) +' x '+ str(self.n_outs) if self.n_outs > 0: self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) print '4. finish log layer: '+ str(self.layers[-1].n_out) print 'Total layers: '+ str(len(self.layers)) sys.stdout.flush() # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) if self.l1_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l1_reg * (abs(W).sum()) if self.l2_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum()
def __init__(self, numpy_rng, theano_rng=None, cfg = None, testing = False, input = None): self.layers = [] self.extra_layers = [] self.params = [] self.delta_params = [] self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs self.conv_layers = [] self.cfg = cfg self.conv_layer_configs = cfg.conv_layer_configs self.conv_activation = cfg.conv_activation self.use_fast = cfg.use_fast self.extra_x = T.matrix('extra_x') # 1.5 attention self.extra_dim = cfg.extra_dim print 'Extra input dimension: '+str(cfg.extra_dim) self.extra_layers_sizes = cfg.extra_layers_sizes # 2. dnn self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') ####################### # build cnn layers # ####################### print '1. start to build cnn mag layer: '+ str(self.conv_layer_configs) self.conv_layer_num = len(self.conv_layer_configs) for i in xrange(self.conv_layer_num): if i == 0: input = self.x else: input = self.layers[-1].output config = self.conv_layer_configs[i] conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, input_shape = config['input_shape'], filter_shape = config['filter_shape'], poolsize = config['poolsize'], activation = self.conv_activation, flatten = config['flatten'], use_fast = self.use_fast, testing = testing) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3] cfg.n_ins = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3] ####################################### # build phase-based attention layer # ####################################### # 0. phase-based attention print '2. start to build attend layer: '+ str(self.extra_layers_sizes) for i in xrange(len(self.extra_layers_sizes)): if i == 0: input_size = cfg.extra_dim layer_input = self.extra_x else: input_size = self.extra_layers_sizes[i - 1] layer_input = self.extra_layers[-1].output W = None; b = None attend_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.extra_layers_sizes[i], W = W, b = b, activation=self.activation) print '\tbuild attend layer: ' + str(input_size) +' x '+ str(attend_layer.n_out) self.extra_layers.append(attend_layer) self.params.extend(attend_layer.params) self.delta_params.extend(attend_layer.delta_params) self.extra_output = self.extra_layers[-1].output self.extra_output = T.nnet.softmax(self.extra_layers[-1].output) #self.extra_output_rand = numpy.asarray(numpy_rng.uniform( # low=-0.1, # high=1.0, # size=(32,20)), dtype=theano.config.floatX) #self.extra_output = theano.shared(value=self.extra_output_rand, name='rand', borrow=True) print '2. finish attend layer softmax(0): '+ str(self.extra_layers[-1].n_out) ####################################### # build dnnv # ####################################### print '3. start to build dnnv layer: '+ str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: # 1. Join two features (magnitude + phase) input_size = self.conv_output_dim + self.extra_layers_sizes[-1] layer_input = T.join(1, self.layers[-1].output, self.extra_output) # 2. Weighted Sum (magnitude * phase) #input_size = self.conv_output_dim #layer_input = self.layers[-1].output * self.extra_output else: input_size = self.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None; b = None hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W = W, b = b, activation=self.activation) print '\tbuild dnnv layer: ' + str(input_size) +' x '+ str(hidden_layer.n_out) # add the layer to our list of layers self.layers.append(hidden_layer) self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) print '3. finish dnnv layer: '+ str(self.layers[-1].n_out) ####################################### # build logistic regression layer # ####################################### print '4. start to build log layer: 1' # We now need to add a logistic layer on top of the MLP self.logLayer = OutputLayer( input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) print '\tbuild final layer: ' + str(self.layers[-1].n_out) +' x '+ str(self.n_outs) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) print '4. finish log layer: '+ str(self.layers[-1].n_out) print 'Total layers: '+ str(len(self.layers)) self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) sys.stdout.flush()
class CNNV(object): def __init__(self, numpy_rng, theano_rng=None, cfg = None, testing = False, input = None): self.layers = [] self.extra_layers = [] self.params = [] self.delta_params = [] self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs self.conv_layers = [] self.cfg = cfg self.conv_layer_configs = cfg.conv_layer_configs self.conv_activation = cfg.conv_activation self.use_fast = cfg.use_fast self.extra_x = T.matrix('extra_x') # 1.5 attention self.extra_dim = cfg.extra_dim print 'Extra input dimension: '+str(cfg.extra_dim) self.extra_layers_sizes = cfg.extra_layers_sizes # 2. dnn self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') ####################### # build cnn layers # ####################### print '1. start to build cnn mag layer: '+ str(self.conv_layer_configs) self.conv_layer_num = len(self.conv_layer_configs) for i in xrange(self.conv_layer_num): if i == 0: input = self.x else: input = self.layers[-1].output config = self.conv_layer_configs[i] conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, input_shape = config['input_shape'], filter_shape = config['filter_shape'], poolsize = config['poolsize'], activation = self.conv_activation, flatten = config['flatten'], use_fast = self.use_fast, testing = testing) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3] cfg.n_ins = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3] ####################################### # build phase-based attention layer # ####################################### # 0. phase-based attention print '2. start to build attend layer: '+ str(self.extra_layers_sizes) for i in xrange(len(self.extra_layers_sizes)): if i == 0: input_size = cfg.extra_dim layer_input = self.extra_x else: input_size = self.extra_layers_sizes[i - 1] layer_input = self.extra_layers[-1].output W = None; b = None attend_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.extra_layers_sizes[i], W = W, b = b, activation=self.activation) print '\tbuild attend layer: ' + str(input_size) +' x '+ str(attend_layer.n_out) self.extra_layers.append(attend_layer) self.params.extend(attend_layer.params) self.delta_params.extend(attend_layer.delta_params) self.extra_output = self.extra_layers[-1].output self.extra_output = T.nnet.softmax(self.extra_layers[-1].output) #self.extra_output_rand = numpy.asarray(numpy_rng.uniform( # low=-0.1, # high=1.0, # size=(32,20)), dtype=theano.config.floatX) #self.extra_output = theano.shared(value=self.extra_output_rand, name='rand', borrow=True) print '2. finish attend layer softmax(0): '+ str(self.extra_layers[-1].n_out) ####################################### # build dnnv # ####################################### print '3. start to build dnnv layer: '+ str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: # 1. Join two features (magnitude + phase) input_size = self.conv_output_dim + self.extra_layers_sizes[-1] layer_input = T.join(1, self.layers[-1].output, self.extra_output) # 2. Weighted Sum (magnitude * phase) #input_size = self.conv_output_dim #layer_input = self.layers[-1].output * self.extra_output else: input_size = self.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None; b = None hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W = W, b = b, activation=self.activation) print '\tbuild dnnv layer: ' + str(input_size) +' x '+ str(hidden_layer.n_out) # add the layer to our list of layers self.layers.append(hidden_layer) self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) print '3. finish dnnv layer: '+ str(self.layers[-1].n_out) ####################################### # build logistic regression layer # ####################################### print '4. start to build log layer: 1' # We now need to add a logistic layer on top of the MLP self.logLayer = OutputLayer( input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) print '\tbuild final layer: ' + str(self.layers[-1].n_out) +' x '+ str(self.n_outs) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) print '4. finish log layer: '+ str(self.layers[-1].n_out) print 'Total layers: '+ str(len(self.layers)) self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) sys.stdout.flush() def kl_divergence(self, p, p_hat): return p * T.log(p / p_hat) + (1 - p) * T.log((1 - p) / (1 - p_hat)) # output conv config to files def write_conv_config(self, file_path_prefix): for i in xrange(len(self.conv_layer_configs)): self.conv_layer_configs[i]['activation'] = self.cfg.conv_activation_text with open(file_path_prefix + '.' + str(i), 'wb') as fp: json.dump(self.conv_layer_configs[i], fp, indent=2, sort_keys = True) fp.flush() def build_finetune_functions(self, train_shared_xy, valid_shared_xy, extra_train_shared_x, extra_valid_shared_x, batch_size): (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy (extra_train_set_x) = extra_train_shared_x (extra_valid_set_x) = extra_valid_shared_x index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = collections.OrderedDict() for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam*learning_rate for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=updates, givens={ self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size], self.extra_x: extra_train_set_x[index * batch_size: (index + 1) * batch_size] }, on_unused_input='ignore') valid_fn = theano.function(inputs=[index], outputs=self.errors, givens={ self.x: valid_set_x[index * batch_size: (index + 1) * batch_size], self.y: valid_set_y[index * batch_size: (index + 1) * batch_size], self.extra_x: extra_valid_set_x[index * batch_size: (index + 1) * batch_size] }, on_unused_input='ignore') return train_fn, valid_fn def build_extract_feat_function(self, output_layer): feat = T.matrix('feat') out_da = theano.function([feat], self.layers[output_layer].output, updates = None, givens={self.x:feat}, on_unused_input='warn') return out_da