def categorical_cross_entropy_sample(self, predicted, target): target_flatten = flatten(target) predicted_flatten = flatten(predicted) target_var = [ auto_diff.Var(name=f't_{i}', value=float(target_flatten[i])) for i in range(len(target_flatten)) ] predicted_var = [ auto_diff.Var(name=f's_{i}', value=np.maximum(1e-15, float(predicted_flatten[i]))) for i in range(len(predicted_flatten)) ] log_predicted_var = [ auto_diff.Log(pred_var) for pred_var in predicted_var ] prod_target_predicted = [ auto_diff.Mul(ti, log_pred_i) for ti, log_pred_i in zip(target_var, log_predicted_var) ] sum_ce = reduce(lambda x, y: auto_diff.Add(x, y), prod_target_predicted) sum_ce = auto_diff.Mul(auto_diff.Constant(-1), sum_ce) cost = predicted - target # cost = np.array([sum_ce.gradient(pred_var).eval() for pred_var in predicted_var]).reshape( # predicted.shape # ) return {'error': sum_ce.eval(), 'derivative_error': cost}
def sigmoid(self, value=0): x = auto_diff.Var(name='x') sigmoid_func = 'auto_diff.Div(auto_diff.Constant(1),' \ 'auto_diff.Add(auto_diff.Constant(1), ' \ 'auto_diff.Exponential(auto_diff.Mul(auto_diff.Constant(-1), {0}))))' self.gradient = f'{sigmoid_func}.gradient({{0}})' return sigmoid_func
def compute_backward_pass(self, learning_rate=0.01): if self.activation_type == 'softmax': derivative_output = self.derivative else: vars = [ auto_diff.Var(name="x", value=x) for x in flatten(self.z_out) ] derivative_output = np.array( [eval(self.derivative.format(x)).eval() for x in vars], ndmin=2).reshape(self.out.shape) if self.next_layer is None: # error_gradient_out = np.dot(derivative_output, self.compute_error_gradient().T).T error_gradient_out = self.compute_error_gradient( ) * derivative_output if self.activation_type == 'softmax': error_gradient_out = self.cross_entropy_with_softmax() # error_gradient_out = np.array(np.sum(error_gradient_out, axis=1), ndmin=2) self.crt_err_gradient = np.dot(error_gradient_out.T, self.prev_layer.out) self.chain_gradient = np.dot(error_gradient_out, self.data) if error_gradient_out.shape[0] > 1: self.crt_err_gradient = self.crt_err_gradient / error_gradient_out.shape[ 0] self.chain_gradient = self.chain_gradient / error_gradient_out.shape[ 0] self.update_weights(learning_rate) self.prev_layer.compute_backward_pass(learning_rate) # print('--------') # print(self.crt_err_gradient) # print('--------') elif self.prev_layer is not None and self.prev_layer.prev_layer is not None: error_gradient_hidden = self.next_layer.chain_gradient * derivative_output self.bias_gradient = self.compute_bias_gradient( error_gradient_hidden) self.crt_err_gradient = np.dot(error_gradient_hidden.T, self.prev_layer.out) self.chain_gradient = np.dot(error_gradient_hidden, self.data) self.update_weights(learning_rate) self.prev_layer.compute_backward_pass(learning_rate) # print('--------') # print(self.crt_err_gradient) # print('--------') elif self.prev_layer.prev_layer is None: error_gradient_hidden = self.next_layer.chain_gradient * derivative_output self.bias_gradient = self.compute_bias_gradient( error_gradient_hidden) self.crt_err_gradient = np.dot(error_gradient_hidden.T, self.prev_layer.out) self.update_weights(learning_rate) self.chain_gradient = error_gradient_hidden # print('--------') # print(self.crt_err_gradient) # print('--------') else: return self.data
def __init__(self, x_arr=None, name=CHOICES[0], layer=None): self.function = getattr(self, name, None) if name == 'softmax': self.softmax_batch(x_arr, layer) else: self.derivative = None if x_arr is not None: vars = [ auto_diff.Var(name="x", value=x) for x in flatten(x_arr) ] self.output = np.array( [eval(self.function().format(x)).eval() for x in vars], ndmin=2).reshape(x_arr.shape) layer.derivative = self.gradient return
def softmax_sample(self, x_arr): x_flatten = flatten(x_arr) stable_softmax = self.softmax_stabilizer(x_flatten) x_arr_var = [ auto_diff.Var(name=f'x_{i}', value=float(stable_softmax[i])) for i in range(len(stable_softmax)) ] exp_arr = [auto_diff.Exponential(var) for var in x_arr_var] sum_denominator = reduce(lambda x, y: auto_diff.Add(x, y), exp_arr) softmax_results = [ self.function(x, sum_denominator, x_arr_var) for x in exp_arr ] softmax_forward = [ softmax_result[0] for softmax_result in softmax_results ] softmax_back = [ softmax_result[1] for softmax_result in softmax_results ] return softmax_forward