def calc_loss(logits: tf.Tensor, caps_out: tf.Tensor, x: tf.Tensor, y: tf.Tensor, decoded: tf.Tensor): with tf.variable_scope('calc_loss'): # margin loss 中调节上margin和下margind的权重 lambda_val = 0.5 # 上margin与下margin的参数值 m_plus = 0.95 m_minus = 0.05 max_l = tf.square(tf.maximum(0., m_plus-logits)) max_r = tf.square(tf.maximum(0., logits-m_minus)) margin_loss = tf.reduce_mean(tf.reduce_sum(y * max_l + lambda_val * (1. - y) * max_r, axis=-1)) orgin = tf.reshape(x, (x.shape[0], -1)) reconstruct_loss = 0.0005*tf.reduce_mean(tf.square(orgin-decoded)) total_loss = margin_loss+reconstruct_loss return total_loss
def variable_summaries(var, name): with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.scalar_summary('mean_' + name, mean) # 计算参数的标准差 with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.scalar_summary('stddev_' + name, stddev) tf.scalar_summary('max_' + name, tf.reduce_max(var)) tf.scalar_summary('min_' + name, tf.reduce_min(var)) # 用直方图记录参数的分布 tf.histogram_summary('histogram_' + name, var)
def loss_fn(y_true: tf.Tensor, y_pred: tf.Tensor): """ split the label """ grid_pred_xy = y_pred[..., 0:2] grid_pred_wh = y_pred[..., 2:4] pred_confidence = y_pred[..., 4:5] pred_cls = y_pred[..., 5:] all_true_xy = y_true[..., 0:2] all_true_wh = y_true[..., 2:4] true_confidence = y_true[..., 4:5] true_cls = y_true[..., 5:] obj_mask = true_confidence # true_confidence[..., 0] > obj_thresh obj_mask_bool = y_true[..., 4] > obj_thresh """ calc the ignore mask """ ignore_mask = calc_ignore_mask(all_true_xy, all_true_wh, grid_pred_xy, grid_pred_wh, obj_mask_bool, iou_thresh, layer, h) grid_true_xy, grid_true_wh = tf_xywh_to_grid(all_true_xy, all_true_wh, layer, h) # NOTE When wh=0 , tf.log(0) = -inf, so use K.switch to avoid it grid_true_wh = K.switch(obj_mask_bool, grid_true_wh, tf.zeros_like(grid_true_wh)) """ define loss """ coord_weight = 2 - all_true_wh[..., 0:1] * all_true_wh[..., 1:2] xy_loss = tf.reduce_sum( obj_mask * coord_weight * tf.nn.sigmoid_cross_entropy_with_logits( labels=grid_true_xy, logits=grid_pred_xy)) / h.batch_size wh_loss = tf.reduce_sum( obj_mask * coord_weight * wh_weight * tf.square( tf.subtract(x=grid_true_wh, y=grid_pred_wh))) / h.batch_size obj_loss = obj_weight * tf.reduce_sum( obj_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=true_confidence, logits=pred_confidence)) / h.batch_size noobj_loss = noobj_weight * tf.reduce_sum( (1 - obj_mask) * ignore_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=true_confidence, logits=pred_confidence)) / h.batch_size cls_loss = tf.reduce_sum( obj_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=true_cls, logits=pred_cls)) / h.batch_size total_loss = obj_loss + noobj_loss + cls_loss + xy_loss + wh_loss return total_loss
def _setup_actor_critic_loss(self, actor, critic, num_actions): actions_one_hot = tf.placeholder(tf.float32, [None, num_actions]) action_probability = tf.reduce_sum(actor * actions_one_hot, axis=1) log_prob = tf.log(tf.maximum(action_probability, self._log_noise)) advantage = self._R - tf.stop_gradient(critic) entropy = tf.reduce_sum(tf.log(tf.maximum(actor, self._log_noise)) * actor, axis=1) actor_loss = -(tf.reduce_sum((log_prob * advantage), axis=0) + tf.reduce_sum((-1 * self._entropy_beta * entropy), axis=0)) critic_loss = tf.reduce_sum(tf.square(self._R - critic), axis=0) loss = 0.5 * critic_loss + actor_loss return loss, actions_one_hot
def train(self): """ 1、构造tensorflow的基本算子、算法。注意这一步都是在“定义”和“构造”,不是真正的模型训练和计算 """ # 先构造一个线性模型 y = w*x + b,这里w被赋予随机值,介于-1和1之间,b分配一个变量并赋值为0 w = tf.Variable(tf.random_uniform([1], -1.0, 1.0)) b = tf.Variable(tf.zeros([1])) y = tf.mul(w, self.__x_data) + b # 定义损失函数(方差)和优化算法(梯度下降),目标就是最小化损失函数 loss = tf.reduce_mean(tf.square(y - self.__y_data)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.05) train = optimizer.minimize(loss) # 初始化变量 init = tf.global_variables_initializer() """ 2、正式训练 """ # 建立会话 sess = tf.Session() # 这个时候才开始真正地计算 sess.run(init) print('初始化参数:w=', sess.run(w), ', b=', sess.run(b)) # 拟合平面,过程就是执行100遍梯度下降算法,得到最佳的w和b for step in numpy.arange(0, 101): sess.run(train) if step % 10 == 0: print(step, sess.run(w), sess.run(b)) """ 3、画图 """ plt.scatter(self.__x_data, self.__y_data, marker='.', color='red', s=40, label='First') plt.plot([numpy.min(self.__x_data), numpy.max(self.__x_data)], [sess.run(w)*numpy.min(self.__x_data)+sess.run(b), sess.run(w)*numpy.max(self.__x_data)+sess.run(b)], 'b') plt.show() """ 4、任务完成, 关闭会话. """ sess.close()
def squash(s: tf.Tensor) -> tf.Tensor: """squash activation NOTE : euclidean norm is tf.sqrt(tf.square(s)) $$v_j =\farc{| |s_j||^2}{1+||s_j||^2}\farc{s_j}{||s_j||}$$ Parameters ---------- s: tf.Tensor s shape [batch,caps,len] Returns ------- tf.Tensor v vector v shape equal s """ with tf.variable_scope('squash'): s_norm = tf.norm_v2(s, axis=-1, keepdims=True) s_square_norm = tf.square(s_norm) v = (s_norm * s) / (1 + s_square_norm) return v
def train(self): """ 1、构造tensorflow的基本算子、算法。注意这一步都是在“定义”和“构造”,不是真正的模型训练和计算 这里需特别注意一下训练数据和学习率的关系: 本案例中,训练数据X都在0-1之间,学习率取0.5是比较恰当的。 但是,当训练数据越大的时候,学习率越要变小,例如X在0-5之间的话,学习率取0.05较合适。 个人感觉:训练数据取值越大,如果学习率不降的话,在每一步梯度计算时,容易“步子太大扯着蛋”, 即所谓的“梯度爆炸”,最终无法收敛导致系数越来越大直到溢出,算不出来了。 """ # 先构造一个数据流图,定义线性模型 y = w*x + b,这里w被赋予随机值,介于-1和1之间,b分配一个变量并赋值为0 temp_graph = tf.Graph() with temp_graph.as_default(): tf_v_w = tf.Variable(tf.random_uniform([1, 2], -1.0, 1.0)) tf_v_b = tf.Variable(tf.zeros([1])) tf_v_y = tf.matmul(tf_v_w, self.__x_data) + tf_v_b # 定义损失函数(方差)和优化算法(梯度下降),目标就是最小化损失函数 loss = tf.reduce_mean(tf.square(tf_v_y - self.__y_data)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.05) train = optimizer.minimize(loss) # 初始化变量 init = tf.global_variables_initializer() """ 2.正式训练 """ # 建立会话 with tf.Session(graph=temp_graph) as sess: # 这个时候才开始真正地计算 sess.run(init) print('初始化参数:w=', sess.run(tf_v_w), ', b=', sess.run(tf_v_b)) # 拟合平面,过程就是执行100遍梯度下降算法,得到最佳的w和b for step in numpy.arange(0, 101): sess.run(train) if step % 10 == 0: print("第%u步:权重:%s,偏置:%f,损失:%f" % (step, tf_v_w.eval(), tf_v_b.eval(), loss.eval())) # 将训练完毕的参数保存 self.__w_data = tf_v_w.eval() self.__b_data = tf_v_b.eval()
train_times = 50000 base_path = "/Users/coorchice/Desktop/ML/model/ml/BreadBasket/" save_path = base_path + str(train_times) + "/" BBDATA = read_datas('data/') x_data = tf.placeholder(tf.float32, [None, 135]) y_data = tf.placeholder(tf.float32, [None]) W = tf.Variable(tf.truncated_normal([135, 1], stddev=0.1)) b = tf.Variable(tf.constant(0.1, shape=[1])) y = tf.nn.relu(tf.matmul(x_data, W) + b) # 按照交叉熵公式计算交叉熵 with tf.name_scope('loss'): # cross_entropy = -tf.reduce_sum(y_data * tf.log(y)) cross_entropy = tf.reduce_mean((tf.square((y - y_data)))) tf.scalar_summary('loss', cross_entropy) # init_lr = 0.00001 lr = tf.Variable(0.00005, trainable=False) # global_step = tf.Variable(0., trainable=False) # lr = tf.train.exponential_decay(init_lr, global_step=global_step, decay_steps=10000, decay_rate=0.5, staircase=True) # 使用梯度下降法不断的调整变量,寻求最小的交叉熵 # 此处使用梯度下降法以0.01的学习速率最小化交叉熵 train_step = tf.train.GradientDescentOptimizer(lr).minimize(cross_entropy) # train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(cross_entropy) # correct_prediction = tf.equal(y, y_data) # correct_prediction = tf.less_equal(tf.abs(y - y_data), 150) # dv = tf.reduce_mean(tf.reduce_sum(tf.abs(y - y_data)))
# -*- coding:utf-8 -*- ''' @Author: zzx @E-mail: [email protected] @File: 对多元函数求导.py @CreateTime: 2020/7/21 15:13 ''' import tensorflow.python as tf # 2、多元函数求导 X = tf.constant([[1., 2.], [3., 4.]]) y = tf.constant([[1.], [2.]]) # 函数参数,初始化参数随便定义 w = tf.Variable(initial_value=[[1.], [2.]]) b = tf.Variable(initial_value=1.) # 在这里可以执行自动求导 with tf.GradientTape() as tape: L = 0.5 * tf.reduce_sum(tf.square(tf.matmul(X, w) + b - y)) w_grad, b_grad = tape.gradient(L, [w, b]) print("".format(L.numpy(), w_grad.numpy(), b_grad.numpy()))
def my_squash(s): s_norm = tf.norm_v2(s, axis=-1, keepdims=True) s_square_norm = tf.square(s_norm) v = (s_square_norm * s) / ((1 + s_square_norm) * s_norm) return v
def he_squash(s): s_square_norm = tf.reduce_sum(tf.square(s), -1, keepdims=True) scalar_factor = s_square_norm / (1 + s_square_norm) / ( tf.sqrt(s_square_norm) + 1.e-9) v = scalar_factor * s # element-wise return v
# -*- coding:utf-8 -*- ''' @Author: zzx @E-mail: [email protected] @File: tf对x²求导.py @CreateTime: 2020/7/21 15:11 ''' # 完成自动求导的功能 import tensorflow.python as tf x = tf.Variable(initial_value=3.) with tf.GradientTape() as tape: y = tf.square(x) # 使用tape中的gradient的方法计算导数 y_grad = tape.gradient(y, x) print(y, y_grad)