def eval(self, executor, eval_program=None): if eval_program is None: eval_program = Program() block = eval_program.current_block() with program_guard(main_program=eval_program): total_distance = _clone_var_(block, self.total_distance) seq_num = _clone_var_(block, self.seq_num) instance_error = _clone_var_(block, self.instance_error) seq_num = layers.cast(x=seq_num, dtype='float32') instance_error = layers.cast(x=instance_error, dtype='float32') avg_distance = layers.elementwise_div(x=total_distance, y=seq_num) avg_instance_error = layers.elementwise_div( x=instance_error, y=seq_num) result = executor.run( eval_program, fetch_list=[avg_distance, avg_instance_error]) return np.array(result[0]), np.array(result[1])
def eval(self, executor, eval_program=None): if eval_program is None: eval_program = Program() block = eval_program.current_block() with program_guard(main_program=eval_program): total_distance = _clone_var_(block, self.total_distance) seq_num = _clone_var_(block, self.seq_num) instance_error = _clone_var_(block, self.instance_error) seq_num = layers.cast(x=seq_num, dtype='float32') instance_error = layers.cast(x=instance_error, dtype='float32') avg_distance = layers.elementwise_div(x=total_distance, y=seq_num) avg_instance_error = layers.elementwise_div(x=instance_error, y=seq_num) result = executor.run( eval_program, fetch_list=[avg_distance, avg_instance_error]) return np.array(result[0]), np.array(result[1])
def eval(self, executor, eval_program=None): if eval_program is None: eval_program = Program() block = eval_program.current_block() kwargs = {'main_program': eval_program} total = _clone_var_(block, self.total) correct = _clone_var_(block, self.correct) total = layers.cast(total, dtype='float32', **kwargs) correct = layers.cast(correct, dtype='float32', **kwargs) out = layers.elementwise_div(x=correct, y=total, **kwargs) return np.array(executor.run(eval_program, fetch_list=[out])[0])
def _add_average_apply_op(self, block, param_grad): param = block.clone_variable(param_grad[0]) grad = block.clone_variable(param_grad[1]) sum_1 = block.clone_variable(self._get_accumulator('sum_1', param)) sum_2 = block.clone_variable(self._get_accumulator('sum_2', param)) sum_3 = block.clone_variable(self._get_accumulator('sum_3', param)) num_accumulates = block.clone_variable( self._get_accumulator('num_accumulates', param)) old_num_accumulates = block.clone_variable( self._get_accumulator('old_num_accumulates', param)) num_updates = block.clone_variable( self._get_accumulator('num_updates', param)) # backup param value to grad layers.assign(input=param, output=grad) # param = (sum_1 + sum_2 + sum_3) / (num_accumulates + old_num_accumulates) tmp = layers.sum(x=[num_accumulates, old_num_accumulates]) sum = layers.sum(x=[sum_1, sum_2, sum_3]) tmp = layers.cast(x=tmp, dtype='float32') sum = layers.cast(x=sum, dtype='float32') layers.elementwise_div(x=sum, y=tmp, out=param)
def create_operators(self, param, grad): group_scale_name = self.group_name + "_scale" if group_scale_name not in self.context: group_norm_var = layers.sums(input=self.context[self.group_name]) layers.sqrt(x=group_norm_var, out=group_norm_var) clip_var = self.context[self.group_name + "_clip"] group_scale_var = layers.elementwise_div(x=clip_var, y=layers.elementwise_max( x=clip_var, y=group_norm_var)) assert group_scale_var.shape == (1L, ) self.context[group_scale_name] = group_scale_var new_grad = layers.elementwise_mul(x=grad, y=self.context[group_scale_name]) return param, new_grad
def create_operators(self, param, grad): group_scale_name = self.group_name + "_scale" if group_scale_name not in self.context: group_norm_var = layers.sums(input=self.context[self.group_name]) layers.sqrt(x=group_norm_var, out=group_norm_var) clip_var = self.context[self.group_name + "_clip"] group_scale_var = layers.elementwise_div( x=clip_var, y=layers.elementwise_max( x=clip_var, y=group_norm_var)) assert group_scale_var.shape == (1L, ) self.context[group_scale_name] = group_scale_var new_grad = layers.elementwise_mul( x=grad, y=self.context[group_scale_name]) return param, new_grad