def get_optimization_func(self, target_dim_list, loss_func, optimizer, clip): """Compile and return optimization function. Args: target_dim_list: list of integars. targets' dimension. e.g. target_dim_list=[2] loss_func: string | function. optimizer: object. clip: None | real value. Return: optimization function. """ # set gt nodes self.set_gt_nodes(target_dim_list) # Default loss if type(loss_func) is str: assert len( self.out_nodes_ ) == 1, "If the number of out_layers > 1, you need define your own loss_func!" loss_node = obj.get(loss_func)(self.out_nodes_[0], self.gt_nodes_[0]) # User defined loss else: loss_node = loss_func(self) # Compute gradient gparams = K.grad(loss_node + self.reg_value_, self.params_) # Clip gradient if clip is not None: gparams = [K.clip(gparam, -clip, clip) for gparam in gparams] # Gradient based optimization param_updates = optimizer.get_updates(self.params_, gparams) # Get all updates updates = param_updates + self.inner_updates_ # Compile model inputs = self.in_nodes_ + self.gt_nodes_ + [K.common_tr_phase_node] outputs = [loss_node] f = K.function_no_given(inputs, outputs, updates) return f
def get_optimization_func(self, target_dim_list, loss_func, optimizer, clip): """Compile and return optimization function. Args: target_dim_list: list of integars. targets' dimension. e.g. target_dim_list=[2] loss_func: string | function. optimizer: object. clip: None | real value. Return: optimization function. """ # set gt nodes self.set_gt_nodes(target_dim_list) # Default loss if type(loss_func) is str: assert len(self.out_nodes_)==1, "If the number of out_layers > 1, you need define your own loss_func!" loss_node = obj.get(loss_func)(self.out_nodes_[0], self.gt_nodes_[0]) # User defined loss else: loss_node = loss_func(self) # Compute gradient gparams = K.grad(loss_node + self.reg_value_, self.params_) # Clip gradient if clip is not None: gparams = [K.clip(gparam, -clip, clip) for gparam in gparams] # Gradient based optimization param_updates = optimizer.get_updates(self.params_, gparams) # Get all updates updates = param_updates + self.inner_updates_ # Compile model inputs = self.in_nodes_ + self.gt_nodes_ + [K.common_tr_phase_node] outputs = [loss_node] f = K.function_no_given(inputs, outputs, updates) return f
def beta_divergence(y_pred, y_gt, beta): y_pred = K.clip(y_pred, _EPSILON, np.inf) y_gt = K.clip(y_gt, _EPSILON, np.inf) beta_mat = 1. / (beta*(beta-1)) * (K.power(y_gt, beta) + (beta-1) * K.power(y_pred, beta) - beta * y_gt * K.power(y_pred, (beta-1))) return K.mean(K.sum(beta_mat, axis=-1))
def is_divergence(y_pred, y_gt): y_pred = K.clip(y_pred, _EPSILON, np.inf) y_gt = K.clip(y_gt, _EPSILON, np.inf) is_mat = y_gt / y_pred - K.log(y_gt / y_pred) - 1 return K.mean(K.sum(is_mat, axis=-1))
def kl_divergence(y_pred, y_gt): y_pred = K.clip(y_pred, _EPSILON, np.inf) y_gt = K.clip(y_gt, _EPSILON, np.inf) kl_mat = y_gt * K.log(y_gt / y_pred) - y_gt + y_pred return K.mean(K.sum(kl_mat, axis=-1))
def binary_crossentropy(p_y_pred, y_gt): p_y_pred = K.clip(p_y_pred, _EPSILON, 1. - _EPSILON) return K.mean(K.mean(K.binary_crossentropy(p_y_pred, y_gt), axis=-1))
def sparse_categorical_crossentropy(p_y_pred, y_gt): p_y_pred = K.clip(p_y_pred, _EPSILON, 1. - _EPSILON) y_gt = K.to_one_hot(y_gt, ) return K.mean(K.categorical_crossentropy(p_y_pred, y_gt))
def categorical_crossentropy(p_y_pred, y_gt): p_y_pred = K.clip(p_y_pred, _EPSILON, 1. - _EPSILON) return K.mean(K.categorical_crossentropy(p_y_pred, y_gt))
def fit(self, x, y, batch_size=100, n_epochs=10, loss_func='categorical_crossentropy', optimizer=SGD(lr=0.01, rho=0.9), clip=None, callbacks=[], shuffle=True, verbose=1): x = to_list(x) y = to_list(y) # format x = [K.format_data(e) for e in x] y = [K.format_data(e) for e in y] # shuffle data if shuffle: x, y = supports.shuffle(x, y) # check data self._check_data(y, loss_func) # init gt_nodes self._gt_nodes_ = [K.placeholder(e.ndim) for e in y] # memory usage print "Train", self._show_memory_usage(self._layer_list_, batch_size) # default objective if type(loss_func) is str: assert len(self._out_nodes_)==len(self._gt_nodes_), "If you are using default objectives, " \ + "out_node of out_layers must match ground truth!" loss_node = sum([ obj.get(loss_func)(pred_node, gt_node) for pred_node, gt_node in zip( self._out_nodes_, self._gt_nodes_) ]) # user defined objective else: loss_node = loss_func(self._out_nodes_, self._any_nodes_, self._gt_nodes_) #loss_node = loss_func( self ) # gradient gparams = K.grad(loss_node + self._reg_value_, self._params_) # todo clip gradient if clip is not None: gparams = [K.clip(gparam, -clip, clip) for gparam in gparams] # gradient based opt param_updates = optimizer.get_updates(self._params_, gparams) # get all updates updates = param_updates + self._inner_updates_ # compile for callback if callbacks is not None: callbacks = to_list(callbacks) for callback in callbacks: callback.compile(self) # compile model input_nodes = self._in_nodes_ + self._gt_nodes_ output_nodes = [loss_node] f = K.function_no_given(input_nodes, self._tr_phase_node_, output_nodes, updates) # train N = len(x[0]) batch_num = int(np.ceil(float(N) / batch_size)) n_abs_epoch = n_epochs + self._epoch_ # callback print '\n0th epoch:' for callback in callbacks: if (self._epoch_ % callback.call_freq == 0): callback.call() while self._epoch_ < n_abs_epoch: self._epoch_ += 1 # train t1 = time.time() loss_list = [] for i2 in xrange(batch_num): batch_x = [ e[i2 * batch_size:min((i2 + 1) * batch_size, N)] for e in x ] batch_y = [ e[i2 * batch_size:min((i2 + 1) * batch_size, N)] for e in y ] in_list = batch_x + batch_y + [1.] loss = f(*in_list)[0] # training phase loss_list.append(loss) if verbose == 1: self._print_progress(self._epoch_, batch_num, i2) if verbose == 2: self._print_progress_loss(self._epoch_, batch_num, i2, loss) t2 = time.time() self._tr_time_ += (t2 - t1) if verbose != 0: print '\n', ' tr_time: ', "%.2f" % ( t2 - t1), 's' # print an empty line # callback for callback in callbacks: if (self._epoch_ % callback.call_freq == 0): callback.call()