def build_finetune_functions(self, train_shared_xy, valid_shared_xy, use_lhuc=False, layer_index=0): """ This function is to build finetune functions and to update gradients :param train_shared_xy: theano shared variable for input and output training data :type train_shared_xy: tuple of shared variable :param valid_shared_xy: theano shared variable for input and output development data :type valid_shared_xy: tuple of shared variable :returns: finetune functions for training and development """ logger = logging.getLogger("DNN initialization") (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy lr = T.scalar('lr', dtype=theano.config.floatX) mom = T.scalar('mom', dtype=theano.config.floatX) # momentum cost = self.finetune_cost # + self.L2_reg * self.L2_sqr ## added for LHUC if use_lhuc: # In lhuc the parameters are only scaling parameters which have the name 'c' self.lhuc_params = [] for p in self.params: if p.name == 'c': self.lhuc_params.append(p) params = self.lhuc_params gparams = T.grad(cost, params) else: params = self.params gparams = T.grad(cost, params) freeze_params = 0 for layer in range(layer_index): freeze_params += len(self.rnn_layers[layer].params) # use optimizer if self.optimizer == 'sgd': # zip just concatenate two lists updates = OrderedDict() for i, (param, gparam) in enumerate(zip(params, gparams)): weight_update = self.updates[param] upd = mom * weight_update - lr * gparam updates[weight_update] = upd # freeze layers and update weights if i >= freeze_params: updates[param] = param + upd elif self.optimizer == 'adam': updates = compile_ADAM_train_function(self, gparams, learning_rate=lr) elif self.optimizer == 'rprop': updates = compile_RPROP_train_function(self, gparams) else: logger.critical( "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" % (self.optimizer)) sys.exit(1) train_model = theano.function( inputs=[lr, mom], # index, batch_size outputs=self.errors, updates=updates, givens={ self.x: train_set_x, # [index*batch_size:(index + 1)*batch_size] self.y: train_set_y, self.is_train: np.cast['int32'](1) }, on_unused_input='ignore') valid_model = theano.function(inputs=[], outputs=self.errors, givens={ self.x: valid_set_x, self.y: valid_set_y, self.is_train: np.cast['int32'](0) }, on_unused_input='ignore') return train_model, valid_model
def build_finetune_functions(self, train_shared_xy, valid_shared_xy): """ This function is to build finetune functions and to update gradients :param train_shared_xy: theano shared variable for input and output training data :type train_shared_xy: tuple of shared variable :param valid_shared_xy: theano shared variable for input and output development data :type valid_shared_xy: tuple of shared variable :returns: finetune functions for training and development """ logger = logging.getLogger("DNN initialization") (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy lr = T.scalar('lr', dtype=theano.config.floatX) mom = T.scalar('mom', dtype=theano.config.floatX) # momentum cost = self.finetune_cost #+ self.L2_reg * self.L2_sqr gparams = T.grad(cost, self.params) # use optimizer if self.optimizer == 'sgd': # zip just concatenate two lists updates = OrderedDict() for param, gparam in zip(self.params, gparams): weight_update = self.updates[param] upd = mom * weight_update - lr * gparam updates[weight_update] = upd updates[param] = param + upd elif self.optimizer == 'adam': updates = compile_ADAM_train_function(self, gparams, learning_rate=lr) elif self.optimizer == 'rprop': updates = compile_RPROP_train_function(self, gparams) else: logger.critical( "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" % (self.optimizer)) sys.exit(1) train_model = theano.function( inputs=[lr, mom], #index, batch_size outputs=self.errors, updates=updates, givens={ self.x: train_set_x, #[index*batch_size:(index + 1)*batch_size] self.y: train_set_y, self.is_train: np.cast['int32'](1) }, on_unused_input='ignore') valid_model = theano.function(inputs=[], outputs=self.errors, givens={ self.x: valid_set_x, self.y: valid_set_y, self.is_train: np.cast['int32'](0) }, on_unused_input='ignore') return train_model, valid_model
def build_finetune_functions_S2SPF(self, train_shared_xydf, valid_shared_xydf, layer_index=6): """ This function is to build finetune functions and to update gradients :param train_shared_xy: theano shared variable for input and output training data :type train_shared_xy: tuple of shared variable :param valid_shared_xy: theano shared variable for input and output development data :type valid_shared_xy: tuple of shared variable :returns: finetune functions for training and development """ (train_set_x, train_set_y, train_set_d, train_set_f) = train_shared_xydf (valid_set_x, valid_set_y, valid_set_d, valid_set_f) = valid_shared_xydf lr = T.scalar('lr', dtype = theano.config.floatX) mom = T.scalar('mom', dtype = theano.config.floatX) # momentum cost = self.finetune_cost #+ self.L2_reg * self.L2_sqr params = self.params gparams = T.grad(cost, params) encoder_params = 0 for layer in range(layer_index): encoder_params += len(self.rnn_layers[layer].params) # use optimizer if self.optimizer=='sgd': # zip just concatenate two lists updates = OrderedDict() for i, (param, gparam) in enumerate(zip(params, gparams)): weight_update = self.updates[param] if i >= encoder_params: upd = mom * weight_update - lr * gparam else: upd = mom * weight_update - (lr*2) * gparam updates[weight_update] = upd updates[param] = param + upd elif self.optimizer=='adam': updates = compile_ADAM_train_function(self, gparams, learning_rate=lr) elif self.optimizer=='rprop': updates = compile_RPROP_train_function(self, gparams) else: logger.critical("This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" %(self.optimizer)) sys.exit(1) train_model = theano.function(inputs = [lr, mom], outputs = self.errors, updates = updates, givens = {self.x: train_set_x, self.y: train_set_y, self.d: train_set_d, self.f: train_set_f, self.is_train: np.cast['int32'](1)}, on_unused_input='ignore') valid_model = theano.function(inputs = [], outputs = self.errors, givens = {self.x: valid_set_x, self.y: valid_set_y, self.d: valid_set_d, self.f: valid_set_f, self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') return train_model, valid_model
def build_finetune_functions_S2SPF(self, train_shared_xydf, valid_shared_xydf, layer_index=6): """ This function is to build finetune functions and to update gradients :param train_shared_xy: theano shared variable for input and output training data :type train_shared_xy: tuple of shared variable :param valid_shared_xy: theano shared variable for input and output development data :type valid_shared_xy: tuple of shared variable :returns: finetune functions for training and development """ (train_set_x, train_set_y, train_set_d, train_set_f) = train_shared_xydf (valid_set_x, valid_set_y, valid_set_d, valid_set_f) = valid_shared_xydf lr = T.scalar('lr', dtype=theano.config.floatX) mom = T.scalar('mom', dtype=theano.config.floatX) # momentum cost = self.finetune_cost # + self.L2_reg * self.L2_sqr params = self.params gparams = T.grad(cost, params) encoder_params = 0 for layer in range(layer_index): encoder_params += len(self.rnn_layers[layer].params) # use optimizer if self.optimizer == 'sgd': # zip just concatenate two lists updates = OrderedDict() for i, (param, gparam) in enumerate(zip(params, gparams)): weight_update = self.updates[param] if i >= encoder_params: upd = mom * weight_update - lr * gparam else: upd = mom * weight_update - (lr * 2) * gparam updates[weight_update] = upd updates[param] = param + upd elif self.optimizer == 'adam': updates = compile_ADAM_train_function(self, gparams, learning_rate=lr) elif self.optimizer == 'rprop': updates = compile_RPROP_train_function(self, gparams) else: logger.critical( "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" % (self.optimizer)) sys.exit(1) train_model = theano.function(inputs=[lr, mom], outputs=self.errors, updates=updates, givens={ self.x: train_set_x, self.y: train_set_y, self.d: train_set_d, self.f: train_set_f, self.is_train: np.cast['int32'](1) }, on_unused_input='ignore') valid_model = theano.function(inputs=[], outputs=self.errors, givens={ self.x: valid_set_x, self.y: valid_set_y, self.d: valid_set_d, self.f: valid_set_f, self.is_train: np.cast['int32'](0) }, on_unused_input='ignore') return train_model, valid_model
def build_finetune_functions(self, train_shared_xy, valid_shared_xy, use_lhuc=False, layer_index=0): """ This function is to build finetune functions and to update gradients :param train_shared_xy: theano shared variable for input and output training data :type train_shared_xy: tuple of shared variable :param valid_shared_xy: theano shared variable for input and output development data :type valid_shared_xy: tuple of shared variable :returns: finetune functions for training and development """ logger = logging.getLogger("DNN initialization") (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy lr = T.scalar('lr', dtype = theano.config.floatX) mom = T.scalar('mom', dtype = theano.config.floatX) # momentum cost = self.finetune_cost #+ self.L2_reg * self.L2_sqr ## added for LHUC if use_lhuc: # In lhuc the parameters are only scaling parameters which have the name 'c' self.lhuc_params = [] for p in self.params: if p.name == 'c': self.lhuc_params.append(p) params = self.lhuc_params gparams = T.grad(cost, params) else: params = self.params gparams = T.grad(cost, params) freeze_params = 0 for layer in range(layer_index): freeze_params += len(self.rnn_layers[layer].params) # use optimizer if self.optimizer=='sgd': # zip just concatenate two lists updates = OrderedDict() for i, (param, gparam) in enumerate(zip(params, gparams)): weight_update = self.updates[param] upd = mom * weight_update - lr * gparam updates[weight_update] = upd # freeze layers and update weights if i >= freeze_params: updates[param] = param + upd elif self.optimizer=='adam': updates = compile_ADAM_train_function(self, gparams, learning_rate=lr) elif self.optimizer=='rprop': updates = compile_RPROP_train_function(self, gparams) else: logger.critical("This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" %(self.optimizer)) sys.exit(1) train_model = theano.function(inputs = [lr, mom], #index, batch_size outputs = self.errors, updates = updates, givens = {self.x: train_set_x, #[index*batch_size:(index + 1)*batch_size] self.y: train_set_y, self.is_train: np.cast['int32'](1)}, on_unused_input='ignore') valid_model = theano.function(inputs = [], outputs = self.errors, givens = {self.x: valid_set_x, self.y: valid_set_y, self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') return train_model, valid_model