def train(self): """ 1、构造tensorflow的基本算子、算法。注意这一步都是在“定义”和“构造”,不是真正的模型训练和计算 """ # 先构造一个数据流图 temp_graph = tf.Graph() with temp_graph.as_default(): # 定义占位符,表示待训练的数据集,用这种方式最后运行train的时候总是报错,暂无法解决: # You must feed a value for placeholder tensor 'x' with dtype float and shape [?,?] # x = tf.placeholder(dtype=tf.float32, shape=[None, None], name='x') # y = tf.placeholder(dtype=tf.float32, shape=[None], name='y') # 定义待训练的参数w和b,weight被赋予随机值,介于-1和1之间,bias分配一个变量并赋值为0 weight = tf.Variable(tf.random_uniform([1, self.__x_train.shape[1]], -1.0, 1.0)) bias = tf.Variable(tf.zeros([1])) # 定义二分类的sigmoid模型 y = 1/(1+exp-(w*x + b)) # y_pre = tf.div(1.0, # tf.add(1.0, # tf.exp(tf.neg(tf.reduce_sum(tf.multiply(weight, self.__x_train), # 1 # ) + bias) # ) # ) # ) # 也可以直接利用tf的sigmoid函数 y_pre = tf.sigmoid(tf.reduce_sum(tf.multiply(weight, self.__x_train), 1) + bias) # 定义损失函数为对数似然函数(-y*log(y_pre) - (1-y)*log(1-y_pre))/样本数 # 为什么这样定义呢?这里要扯到线性回归的最小二乘法和逻辑回归中的最大似然函数法的区别了。 # 最小二乘法的核心思想是,让预测值和真实值的“误差”尽可能小; # 而最大似然函数法的核心思想是,让已知训练样本发生的概率尽可能大。 # 上述的对数似然函数就是这么来的,推导过程可参考相关文献,在梯度下降的运用中,就是加个负号,让其最小 loss0 = self.__y_train * tf.log(y_pre) loss1 = (1 - self.__y_train) * tf.log(1 - y_pre) loss = tf.reduce_sum(- loss0 - loss1) / self.__x_train.shape[0] # 定义优化算法(梯度下降),目标就是最小化损失函数 optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) train = optimizer.minimize(loss) # 初始化变量 init = tf.global_variables_initializer() """ 2.正式训练 """ # 建立会话 with tf.Session(graph=temp_graph) as sess: # 这个时候才开始真正地计算 sess.run(init) print('初始化参数:weight=', sess.run(weight), ', bias=', sess.run(bias)) # 拟合平面,过程就是执行1000遍梯度下降算法,得到最佳的w和b for step in range(1000): sess.run(train) if step % 100 == 0: print("第%u步:权重:%s,偏置:%f,损失:%f" % (step, weight.eval(), bias.eval(), loss.eval())) self.__weight = weight.eval() self.__bias = bias.eval()
def _tensorflow_initialization(self): self._session = tf.Session( graph=self._graph, config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, gpu_options=tf.GPUOptions(allow_growth=True))) self._session.run(tf.global_variables_initializer()) self._saver = None
def test_predict_scores(self): features = { 'anchor_label': constant([[0., 1], [1., 0.]]), 'label': constant([[[0., 1.], [0., 1.], [0., 1.]], [[1., 0.], [1., 0.], [1., 0.]]]) } p = predict_scores(features) with self.test_session() as sess: sess.run(global_variables_initializer()) print(p.eval())
def train(self): """ 1、构造tensorflow的基本算子、算法。注意这一步都是在“定义”和“构造”,不是真正的模型训练和计算 """ # 先构造一个线性模型 y = w*x + b,这里w被赋予随机值,介于-1和1之间,b分配一个变量并赋值为0 w = tf.Variable(tf.random_uniform([1], -1.0, 1.0)) b = tf.Variable(tf.zeros([1])) y = tf.mul(w, self.__x_data) + b # 定义损失函数(方差)和优化算法(梯度下降),目标就是最小化损失函数 loss = tf.reduce_mean(tf.square(y - self.__y_data)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.05) train = optimizer.minimize(loss) # 初始化变量 init = tf.global_variables_initializer() """ 2、正式训练 """ # 建立会话 sess = tf.Session() # 这个时候才开始真正地计算 sess.run(init) print('初始化参数:w=', sess.run(w), ', b=', sess.run(b)) # 拟合平面,过程就是执行100遍梯度下降算法,得到最佳的w和b for step in numpy.arange(0, 101): sess.run(train) if step % 10 == 0: print(step, sess.run(w), sess.run(b)) """ 3、画图 """ plt.scatter(self.__x_data, self.__y_data, marker='.', color='red', s=40, label='First') plt.plot([numpy.min(self.__x_data), numpy.max(self.__x_data)], [sess.run(w)*numpy.min(self.__x_data)+sess.run(b), sess.run(w)*numpy.max(self.__x_data)+sess.run(b)], 'b') plt.show() """ 4、任务完成, 关闭会话. """ sess.close()
def train(self): """ 1、构造tensorflow的基本算子、算法。注意这一步都是在“定义”和“构造”,不是真正的模型训练和计算 这里需特别注意一下训练数据和学习率的关系: 本案例中,训练数据X都在0-1之间,学习率取0.5是比较恰当的。 但是,当训练数据越大的时候,学习率越要变小,例如X在0-5之间的话,学习率取0.05较合适。 个人感觉:训练数据取值越大,如果学习率不降的话,在每一步梯度计算时,容易“步子太大扯着蛋”, 即所谓的“梯度爆炸”,最终无法收敛导致系数越来越大直到溢出,算不出来了。 """ # 先构造一个数据流图,定义线性模型 y = w*x + b,这里w被赋予随机值,介于-1和1之间,b分配一个变量并赋值为0 temp_graph = tf.Graph() with temp_graph.as_default(): tf_v_w = tf.Variable(tf.random_uniform([1, 2], -1.0, 1.0)) tf_v_b = tf.Variable(tf.zeros([1])) tf_v_y = tf.matmul(tf_v_w, self.__x_data) + tf_v_b # 定义损失函数(方差)和优化算法(梯度下降),目标就是最小化损失函数 loss = tf.reduce_mean(tf.square(tf_v_y - self.__y_data)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.05) train = optimizer.minimize(loss) # 初始化变量 init = tf.global_variables_initializer() """ 2.正式训练 """ # 建立会话 with tf.Session(graph=temp_graph) as sess: # 这个时候才开始真正地计算 sess.run(init) print('初始化参数:w=', sess.run(tf_v_w), ', b=', sess.run(tf_v_b)) # 拟合平面,过程就是执行100遍梯度下降算法,得到最佳的w和b for step in numpy.arange(0, 101): sess.run(train) if step % 10 == 0: print("第%u步:权重:%s,偏置:%f,损失:%f" % (step, tf_v_w.eval(), tf_v_b.eval(), loss.eval())) # 将训练完毕的参数保存 self.__w_data = tf_v_w.eval() self.__b_data = tf_v_b.eval()
n_features=n_features, n_classes=n_classes, max_leafs=None) tree.build_tree() # optimizer optimizer = AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08).minimize(tree.loss) # Saving the model # saver = tf.train.Saver() # Initialize the variables (i.e. assign their default value) init = global_variables_initializer() EPOCHS = 1000 TOTAL_BATCH = 16 display_step = 100 with tf.compat.v1.Session() as sess: sess.run(init) t0 = time.time() for epoch in range(EPOCHS): avg_cost = 0. # Loop over all batches acc = 0.0 val_acc = 0.0 index_in_epoch = 0 for i in range(TOTAL_BATCH):
iterator = ds.make_one_shot_iterator() next_x, next_y = iterator.get_next() batch_x = tf.placeholder_with_default(next_x, shape=[100, 28, 28, 1]) batch_y = tf.placeholder_with_default(next_y, shape=[100, 10]) logits, caps_out = capsnet(batch_x) decoded = decoder(caps_out, batch_y) """ define loss """ loss = calc_loss(logits, caps_out, batch_x, batch_y, decoded) """ define summary """ acc_op, acc = tf.metrics.accuracy(tf.argmax(batch_y, -1), tf.argmax(logits, -1)) tf.summary.scalar('loss', loss) tf.summary.scalar('acc', acc) tf.summary.image('reconstruction_img', tf.reshape(decoded, (100, 28, 28, 1))) summ = tf.summary.merge_all() """ define train op """ steps = tf.train.get_or_create_global_step(g) train_op = tf.train.AdamOptimizer().minimize(loss, global_step=steps) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: writer = tf.summary.FileWriter('log', g) sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) for i in range(10): with tqdm(total=60000//100, bar_format='{n_fmt}/{total_fmt} |{bar}| {rate_fmt}{postfix}', unit=' batch', dynamic_ncols=True) as t: for j in range(60000//100): _, summ_, steps_, loss_, acc_ = sess.run([train_op, summ, steps, loss, acc]) t.set_postfix(loss='{:<5.3f}'.format(loss_), acc='{:<4.2f}%'.format(acc_*100)) writer.add_summary(summ_, steps_) t.update()