def train(self): """ 1、构造tensorflow的基本算子、算法。注意这一步都是在“定义”和“构造”,不是真正的模型训练和计算 """ # 先构造一个数据流图 temp_graph = tf.Graph() with temp_graph.as_default(): # 定义占位符,表示待训练的数据集,用这种方式最后运行train的时候总是报错,暂无法解决: # You must feed a value for placeholder tensor 'x' with dtype float and shape [?,?] # x = tf.placeholder(dtype=tf.float32, shape=[None, None], name='x') # y = tf.placeholder(dtype=tf.float32, shape=[None], name='y') # 定义待训练的参数w和b,weight被赋予随机值,介于-1和1之间,bias分配一个变量并赋值为0 weight = tf.Variable(tf.random_uniform([1, self.__x_train.shape[1]], -1.0, 1.0)) bias = tf.Variable(tf.zeros([1])) # 定义二分类的sigmoid模型 y = 1/(1+exp-(w*x + b)) # y_pre = tf.div(1.0, # tf.add(1.0, # tf.exp(tf.neg(tf.reduce_sum(tf.multiply(weight, self.__x_train), # 1 # ) + bias) # ) # ) # ) # 也可以直接利用tf的sigmoid函数 y_pre = tf.sigmoid(tf.reduce_sum(tf.multiply(weight, self.__x_train), 1) + bias) # 定义损失函数为对数似然函数(-y*log(y_pre) - (1-y)*log(1-y_pre))/样本数 # 为什么这样定义呢?这里要扯到线性回归的最小二乘法和逻辑回归中的最大似然函数法的区别了。 # 最小二乘法的核心思想是,让预测值和真实值的“误差”尽可能小; # 而最大似然函数法的核心思想是,让已知训练样本发生的概率尽可能大。 # 上述的对数似然函数就是这么来的,推导过程可参考相关文献,在梯度下降的运用中,就是加个负号,让其最小 loss0 = self.__y_train * tf.log(y_pre) loss1 = (1 - self.__y_train) * tf.log(1 - y_pre) loss = tf.reduce_sum(- loss0 - loss1) / self.__x_train.shape[0] # 定义优化算法(梯度下降),目标就是最小化损失函数 optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) train = optimizer.minimize(loss) # 初始化变量 init = tf.global_variables_initializer() """ 2.正式训练 """ # 建立会话 with tf.Session(graph=temp_graph) as sess: # 这个时候才开始真正地计算 sess.run(init) print('初始化参数:weight=', sess.run(weight), ', bias=', sess.run(bias)) # 拟合平面,过程就是执行1000遍梯度下降算法,得到最佳的w和b for step in range(1000): sess.run(train) if step % 100 == 0: print("第%u步:权重:%s,偏置:%f,损失:%f" % (step, weight.eval(), bias.eval(), loss.eval())) self.__weight = weight.eval() self.__bias = bias.eval()
def loss_fn(y_true: tf.Tensor, y_pred: tf.Tensor): """ split the label """ grid_pred_xy = y_pred[..., 0:2] grid_pred_wh = y_pred[..., 2:4] pred_confidence = y_pred[..., 4:5] pred_cls = y_pred[..., 5:] all_true_xy = y_true[..., 0:2] all_true_wh = y_true[..., 2:4] true_confidence = y_true[..., 4:5] true_cls = y_true[..., 5:] obj_mask = true_confidence # true_confidence[..., 0] > obj_thresh obj_mask_bool = y_true[..., 4] > obj_thresh """ calc the ignore mask """ ignore_mask = calc_ignore_mask(all_true_xy, all_true_wh, grid_pred_xy, grid_pred_wh, obj_mask_bool, iou_thresh, layer, h) grid_true_xy, grid_true_wh = tf_xywh_to_grid(all_true_xy, all_true_wh, layer, h) # NOTE When wh=0 , tf.log(0) = -inf, so use K.switch to avoid it grid_true_wh = K.switch(obj_mask_bool, grid_true_wh, tf.zeros_like(grid_true_wh)) """ define loss """ coord_weight = 2 - all_true_wh[..., 0:1] * all_true_wh[..., 1:2] xy_loss = tf.reduce_sum( obj_mask * coord_weight * tf.nn.sigmoid_cross_entropy_with_logits( labels=grid_true_xy, logits=grid_pred_xy)) / h.batch_size wh_loss = tf.reduce_sum( obj_mask * coord_weight * wh_weight * tf.square( tf.subtract(x=grid_true_wh, y=grid_pred_wh))) / h.batch_size obj_loss = obj_weight * tf.reduce_sum( obj_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=true_confidence, logits=pred_confidence)) / h.batch_size noobj_loss = noobj_weight * tf.reduce_sum( (1 - obj_mask) * ignore_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=true_confidence, logits=pred_confidence)) / h.batch_size cls_loss = tf.reduce_sum( obj_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=true_cls, logits=pred_cls)) / h.batch_size total_loss = obj_loss + noobj_loss + cls_loss + xy_loss + wh_loss return total_loss
def model_fn(features, labels, mode, params): scores = predict_scores(features) if mode == ModeKeys.INFER: return EstimatorSpec(mode, predictions=scores) positive_scores = lookup_positives(scores, labels['click_position']) logits = create_diffs(positive_scores, scores) lbls = create_label(labels['click_position']) ele_loss = elementwise_loss(lbls, logits, labels['normal_mask']) * lbls loss = reduce_sum(ele_loss) true_lbl = true_label(features, labels) if mode == ModeKeys.EVAL: return EstimatorSpec(mode, loss=loss, eval_metric_ops={ 'acc': mean( accuracy( argmax(noise_label(labels), axis=1), argmax(to_one_hot(scores), axis=1))) }) else: optimizer = AdamOptimizer(learning_rate=params['learning_rate']) train_op = optimizer.minimize(loss, global_step=get_global_step()) return EstimatorSpec(mode, loss=loss, train_op=train_op)
def _setup_actor_critic_loss(self, actor, critic, num_actions): actions_one_hot = tf.placeholder(tf.float32, [None, num_actions]) action_probability = tf.reduce_sum(actor * actions_one_hot, axis=1) log_prob = tf.log(tf.maximum(action_probability, self._log_noise)) advantage = self._R - tf.stop_gradient(critic) entropy = tf.reduce_sum(tf.log(tf.maximum(actor, self._log_noise)) * actor, axis=1) actor_loss = -(tf.reduce_sum((log_prob * advantage), axis=0) + tf.reduce_sum((-1 * self._entropy_beta * entropy), axis=0)) critic_loss = tf.reduce_sum(tf.square(self._R - critic), axis=0) loss = 0.5 * critic_loss + actor_loss return loss, actions_one_hot
def calc_loss(logits: tf.Tensor, caps_out: tf.Tensor, x: tf.Tensor, y: tf.Tensor, decoded: tf.Tensor): with tf.variable_scope('calc_loss'): # margin loss 中调节上margin和下margind的权重 lambda_val = 0.5 # 上margin与下margin的参数值 m_plus = 0.95 m_minus = 0.05 max_l = tf.square(tf.maximum(0., m_plus-logits)) max_r = tf.square(tf.maximum(0., logits-m_minus)) margin_loss = tf.reduce_mean(tf.reduce_sum(y * max_l + lambda_val * (1. - y) * max_r, axis=-1)) orgin = tf.reshape(x, (x.shape[0], -1)) reconstruct_loss = 0.0005*tf.reduce_mean(tf.square(orgin-decoded)) total_loss = margin_loss+reconstruct_loss return total_loss
def capsnet(inputs: tf.Tensor): layer1 = keras.layers.Conv2D(256, 9, strides=1, padding='valid') layer2 = keras.layers.Conv2D(32 * 8, 9, strides=2) reshape1 = keras.layers.Reshape((-1, 8)) active1 = keras.layers.Activation(squash) layer3 = CapsDense(units=10, vec_len=16, activation=squash, use_routing=True, use_bias=True) final = keras.layers.Lambda( lambda x: tf.reduce_sum(tf.abs(x), axis=-1) + 1.e-9, name="final") with tf.variable_scope('CapsNet'): x = layer1(inputs) x = layer2(x) # tf.Tensor x = reshape1(x) x = active1(x) caps_out = layer3(x) logits = final(caps_out) return logits, caps_out
def _pixel_selector_grad(op, grad): """The gradients for 'pixel_selector'. Args: op: The 'pixel_selector' operation we want to differentiate. grad: Gradient with respect to the output of the 'pixel_selector' op. Returns: Gradients with respect to the coordinates of points of interest for 'pixel_selector'. """ input = op.inputs[0] coord = op.inputs[1] strides = op.inputs[2] coord_grad = ops.zeros_like((NUM_POINTS, 3), tf.float32) back_grad = ops.reshape(grad, [-1]) coord_grad_tmp = np.zeros((NUM_POINTS, 3), np.float32) for i in range(0, NUM_POINTS): for j in range(0, 3): coord_tmp = np.zeros((NUM_POINTS, 3), np.float32) coord_tmp[i, j] = 1.0 coord_tmp = coord + coord_tmp tmp_1 = ops.reshape( select_module.pixel_selector(input, coord_tmp, strides), [-1]) coord_tmp = np.zeros((NUM_POINTS, 3), np.float32) coord_tmp[i, j] = -1.0 coord_tmp = coord + coord_tmp tmp_2 = ops.reshape( select_module.pixel_selector(input, coord_tmp, strides), [-1]) tmp = ops.subtract(tmp_1, tmp_2) tmp = ops.divide(tmp, 2) tmp = ops.multiply(tmp, back_grad) tmp_3 = np.zeros((NUM_POINTS, 3), np.float32) tmp_3[i, j] = 1.0 coord_grad_tmp = coord_grad_tmp + tmp_3 * ops.reduce_sum(tmp) coord_grad = coord_grad_tmp return [None, coord_grad, None]
def losses(labels: list, preds: list): l = 0 for i in range(len(labels)): # 这里我可以给不同的label不同的loss操作 l += tf.reduce_sum(((labels[i] - preds[i])**2) * (i + 1)) return l
# -*- coding:utf-8 -*- ''' @Author: zzx @E-mail: [email protected] @File: 对多元函数求导.py @CreateTime: 2020/7/21 15:13 ''' import tensorflow.python as tf # 2、多元函数求导 X = tf.constant([[1., 2.], [3., 4.]]) y = tf.constant([[1.], [2.]]) # 函数参数,初始化参数随便定义 w = tf.Variable(initial_value=[[1.], [2.]]) b = tf.Variable(initial_value=1.) # 在这里可以执行自动求导 with tf.GradientTape() as tape: L = 0.5 * tf.reduce_sum(tf.square(tf.matmul(X, w) + b - y)) w_grad, b_grad = tape.gradient(L, [w, b]) print("".format(L.numpy(), w_grad.numpy(), b_grad.numpy()))
def he_squash(s): s_square_norm = tf.reduce_sum(tf.square(s), -1, keepdims=True) scalar_factor = s_square_norm / (1 + s_square_norm) / ( tf.sqrt(s_square_norm) + 1.e-9) v = scalar_factor * s # element-wise return v