def train_model(self, sess, max_iters): lr, train_op = self.construct_graph(sess) sess.run(tf.global_variables_initializer()) if cfg.TRAIN_MODULE_CONTINUE == 1: self.from_previous_ckpt(sess) else: if cfg.TRAIN_INIT_WEIGHT == 1: self.from_snapshot(sess) elif cfg.TRAIN_INIT_WEIGHT == 2: self.from_previous_ckpt(sess) elif cfg.TRAIN_INIT_WEIGHT == 3: self.from_best_trained_model(sess) sess.graph.finalize() timer = Timer() Data_length = len(self.Trainval_GT) keys = self.Trainval_GT.keys() idx = range(Data_length) if cfg.TRAIN_MODULE_CONTINUE == 2: iter = 0 elif cfg.TRAIN_MODULE_CONTINUE == 1: path_iter = self.pretrained_model.split('.ckpt')[0] iter_num = path_iter.split('_')[-1] iter = int(iter_num) + 1 while iter < max_iters + 1: timer.tic() if iter % Data_length == 0: np.random.shuffle(idx) image_id = keys[idx[iter % Data_length]] blobs = Get_Next_Instance_HO_Neg_HICO_3D(self.Trainval_GT, self.Trainval_N, image_id, self.Pos_augment, self.Neg_select) if (iter % cfg.TRAIN.SUMMARY_INTERVAL == 0) or (iter < 20): total_loss, summary = self.net.train_step_with_summary( sess, blobs, lr.eval(), train_op) self.writer.add_summary(summary, float(iter)) else: total_loss = self.net.train_step(sess, blobs, lr.eval(), train_op) del blobs timer.toc() if iter % (cfg.TRAIN.DISPLAY) == 0: print('iter: %d / %d, im_id: %6u, total loss: %.6f, lr: %f, speed: %.3f s/iter' % \ (iter, max_iters, image_id, total_loss, lr.eval(), timer.average_time)) if (iter % cfg.TRAIN.SNAPSHOT_ITERS == 0 and iter != 0) or (iter == 10): self.snapshot(sess, iter) iter += 1 self.writer.close()
def train_model(self, sess, max_iters): lr, train_op = self.construct_graph(sess) self.from_snapshot(sess) sess.graph.finalize() timer = Timer() Data_length = len(self.Trainval_GT) iter = 0 while iter < max_iters + 1: timer.tic() if self.iCAN_Early_flag == 1: blobs = Get_Next_Instance_HO_Neg(self.Trainval_GT, self.Trainval_N, iter, self.Pos_augment, self.Neg_select, Data_length) if self.iCAN_Early_flag == 0: # Pos + spNeg (factorized model only) blobs = Get_Next_Instance_HO_spNeg(self.Trainval_GT, self.Trainval_N, iter, self.Pos_augment, self.Neg_select, Data_length) if (iter % cfg.TRAIN.SUMMARY_INTERVAL == 0) or (iter < 20): # Compute the graph with summary loss_cls_H, loss_cls_HO, total_loss, summary = self.net.train_step_with_summary( sess, blobs, lr.eval(), train_op) self.writer.add_summary(summary, float(iter)) else: # Compute the graph without summary loss_cls_H, loss_cls_HO, total_loss = self.net.train_step( sess, blobs, lr.eval(), train_op) timer.toc() # Display training information if iter % (cfg.TRAIN.DISPLAY) == 0: print('iter: %d / %d, im_id: %u, total loss: %.6f, loss_cls_H: %.6f, loss_cls_HO: %.6f, lr: %f, speed: %.3f s/iter' % \ (iter, max_iters, self.Trainval_GT[iter%Data_length][0], total_loss, loss_cls_H, loss_cls_HO, lr.eval(), timer.average_time)) # Snapshotting if (iter % cfg.TRAIN.SNAPSHOT_ITERS == 0 and iter != 0) or (iter == 10): self.snapshot(sess, iter) iter += 1 self.writer.close()
def train_model_tf(self, sess, max_iters): lr, train_op = self.construct_graph(sess) self.from_snapshot(sess) sess.graph.finalize() timer = Timer() # Data_length = len(self.Trainval_GT) iter = self.get_init_step() while iter < max_iters + 1: timer.tic() blobs = {} from tensorflow.python.framework.errors_impl import InvalidArgumentError try: if (iter % cfg.TRAIN.SUMMARY_INTERVAL == 0) or (iter < 20): # Compute the graph with summary total_loss, image_id, summary = self.net.train_step_tfr_with_summary( sess, blobs, lr.eval(), train_op) self.writer.add_summary(summary, float(iter)) else: # Compute the graph without summary total_loss, image_id = self.net.train_step_tfr( sess, blobs, lr.eval(), train_op) except InvalidArgumentError as e: print('InvalidArgumentError') image_id = -1 total_loss = 0 if self.net.model_name.__contains__('lamb'): print('InvalidArgumentError', image_id) else: raise e timer.toc() # print(image_id) # Display training information if iter % (cfg.TRAIN.DISPLAY) == 0: if type(image_id) == tuple: image_id = image_id[0] out_str = 'iter: %d / %d, im_id: %u, total loss: %.6f, lr: %f, speed: %.3f s/iter' % \ (iter, max_iters, image_id, total_loss, lr.eval(), timer.average_time) print(out_str, end='\r', flush=True) # Snapshotting if (iter % cfg.TRAIN.SNAPSHOT_ITERS == 0 and iter != 0) or (iter == 10): # self.net.test_ self.snapshot(sess, iter) iter += 1 self.writer.close()
def train_model(self, sess, max_iters): lr, train_op, t_loss = self.construct_graph(sess) self.from_snapshot(sess) sess.graph.finalize() timer = Timer() import logging logging.basicConfig( filename='/home/zhou9878/{}.log'.format(self.net.model_name), level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) iter = self.get_init_step() while iter < max_iters + 1: timer.tic() if (iter % cfg.TRAIN.SUMMARY_INTERVAL == 0) or (iter < 20): # Compute the graph with summary # total_loss, image_id, summary = self.net.train_step_tfr_with_summary(sess, blobs, lr, train_op) total_loss, summary, image_id, _ = sess.run( [t_loss, self.net.summary_op, self.net.image_id, train_op]) # total_loss, summary = self.net.train_step_with_summary(sess, blobs, lr.eval(), train_op) self.writer.add_summary(summary, float(iter)) else: # Compute the graph without summary total_loss, image_id, _ = sess.run( [t_loss, self.net.image_id, train_op]) # total_loss, image_id = self.net.train_step_tfr(sess, blobs, lr, train_op) timer.toc() # print(image_id) # Display training information if iter % (cfg.TRAIN.DISPLAY) == 0: if type(image_id) == tuple or (type(image_id) != np.int32 and len(image_id) > 1): image_id = image_id[0] # print('iter: {:d} / {:d}, im_id: {:d}, total loss: {:.6f}, lr: {:f}, speed: {:.3f} s/iter'.format( # iter, max_iters, image_id, total_loss, lr.eval(), timer.average_time), end='\n', flush=True) logger.info( 'iter: {:d} / {:d}, im_id: {:d}, total loss: {:.6f}, lr: {:f}, speed: {:.3f} s/iter' .format(iter, max_iters, image_id, total_loss, lr.eval(), timer.average_time)) # print('\rmodel: {} im_detect: {:d}/{:d} {:d}, {:.3f}s'.format(net.model_name, count, 15765, _image_id, # _t['im_detect'].average_time), end='', # flush=True) # Snapshotting self.snapshot(sess, iter) iter += 1 self.writer.close()
def train_model(self, sess, max_iters): if 'CUDA_VISIBLE_DEVICES' not in os.environ or len( os.environ['CUDA_VISIBLE_DEVICES'].split(',')) == 1: lr, train_op, t_loss = self.construct_graph2(sess) else: lr, train_op, t_loss = self.construct_graph2(sess) self.from_snapshot(sess) sess.graph.finalize() timer = Timer() import logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # Data_length = len(self.Trainval_GT) iter = self.get_init_step() while iter < max_iters + 1: timer.tic() blobs = {} from tensorflow.python.framework.errors_impl import InvalidArgumentError if (iter % cfg.TRAIN.SUMMARY_INTERVAL == 0) or (iter < 20): # Compute the graph with summary total_loss, summary, image_id, _ = sess.run( [t_loss, self.net.summary_op, self.net.image_id, train_op]) self.writer.add_summary(summary, float(iter)) else: # Compute the graph without summary total_loss, image_id, _ = sess.run( [t_loss, self.net.image_id, train_op]) timer.toc() # print(image_id) # Display training information if iter % (cfg.TRAIN.DISPLAY) == 0: if type(image_id) == tuple: image_id = image_id[0] logger.info( 'iter: {:d} / {:d}, im_id: {:d}, total loss: {:.6f}, lr: {:f}, speed: {:.3f} s/iter' .format(iter, max_iters, image_id, total_loss, lr.eval(), timer.average_time)) # Snapshotting self.snapshot(sess, iter) iter += 1 self.writer.close()
def train_model(self, sess, max_iters): lr, train_op = self.construct_graph(sess) self.from_snapshot(sess) sess.graph.finalize() timer = Timer() # Data_length = len(self.Trainval_GT) iter = 0 import logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) while iter < max_iters + 1: timer.tic() blobs = {} if (iter % cfg.TRAIN.SUMMARY_INTERVAL == 0) or (iter < 20): # Compute the graph with summary total_loss, image_id, summary = self.net.train_step_tfr_with_summary( sess, blobs, lr, train_op) # total_loss, summary = self.net.train_step_with_summary(sess, blobs, lr.eval(), train_op) self.writer.add_summary(summary, float(iter)) else: # Compute the graph without summary total_loss, image_id = self.net.train_step_tfr( sess, blobs, lr, train_op) timer.toc() # print(image_id) # Display training information if iter % (cfg.TRAIN.DISPLAY) == 0: if type(image_id) == tuple: image_id = image_id[0] logger.info( 'iter: {:d} / {:d}, im_id: {:d}, total loss: {:.6f}, lr: {:f}, speed: {:.3f} s/iter' .format(iter, max_iters, image_id, total_loss, lr.eval(), timer.average_time)) # Snapshotting t_iter = iter self.snapshot(sess, t_iter) iter += 1 self.writer.close()
def train_model(self, sess, max_iters): lr, train_op, t_loss = self.construct_graph2(sess) self.from_snapshot(sess) sess.graph.finalize() timer = Timer() # Data_length = len(self.Trainval_GT) iter = self.get_init_step() while iter < max_iters + 1: timer.tic() if (iter % cfg.TRAIN.SUMMARY_INTERVAL == 0) or (iter < 20): # Compute the graph with summary # total_loss, image_id, summary = self.net.train_step_tfr_with_summary(sess, blobs, lr, train_op) total_loss, summary, image_id, _ = sess.run( [t_loss, self.net.summary_op, self.net.image_id, train_op]) # total_loss, summary = self.net.train_step_with_summary(sess, blobs, lr.eval(), train_op) self.writer.add_summary(summary, float(iter)) else: # Compute the graph without summary total_loss, image_id, _ = sess.run( [t_loss, self.net.image_id, train_op]) # total_loss, image_id = self.net.train_step_tfr(sess, blobs, lr, train_op) timer.toc() # print(image_id) # Display training information if iter % (cfg.TRAIN.DISPLAY) == 0: if type(image_id) == tuple: image_id = image_id[0] print( 'iter: {:d} / {:d}, im_id: {:d}, total loss: {:.6f}, lr: {:f}, speed: {:.3f} s/iter' .format(iter, max_iters, image_id, total_loss, lr.eval(), timer.average_time), end='\r', flush=True) # print('\rmodel: {} im_detect: {:d}/{:d} {:d}, {:.3f}s'.format(net.model_name, count, 15765, _image_id, # _t['im_detect'].average_time), end='', # flush=True) # Snapshotting self.snapshot(sess, iter) iter += 1 self.writer.close()
def train_model(self, sess, max_iters): lr, train_op = self.construct_graph(sess) if cfg.TRAIN_MODULE_CONTINUE == 1: self.from_previous_ckpt(sess) else: if cfg.TRAIN_INIT_WEIGHT == 2: # load all params self.from_best_trained_model(sess) elif cfg.TRAIN_INIT_WEIGHT == 1: self.from_snapshot(sess) # load from snapshot elif cfg.TRAIN_INIT_WEIGHT == 3: # load all params, initial from our best, including pvp self.from_previous_ckpt(sess) else: raise NotImplemented sess.graph.finalize() timer = Timer() Data_length = len(self.Trainval_GT) keys = self.Trainval_GT.keys() path_iter = self.pretrained_model.split('.ckpt')[0] iter_num = path_iter.split('_')[-1] if cfg.TRAIN_MODULE_CONTINUE == 2: iter = 0 elif cfg.TRAIN_MODULE_CONTINUE == 1: iter = int(iter_num) + 1 while iter < max_iters + 1: timer.tic() image_id = keys[iter % Data_length] blobs = Get_Next_Instance_Verb_AVA_transfer( self.Trainval_GT, image_id, self.Pos_augment) if (iter % cfg.TRAIN.SUMMARY_INTERVAL == 0) or (iter < 20): # Compute the graph with summary total_loss, summary = self.net.train_step_with_summary( sess, blobs, lr.eval(), train_op) self.writer.add_summary(summary, float(iter)) else: # Compute the graph without summary total_loss = self.net.train_step(sess, blobs, lr.eval(), train_op) del blobs timer.toc() # Display training information if iter % (cfg.TRAIN.DISPLAY) == 0: print('iter: %d / %d, im_id: %s, total loss: %.6f, lr: %f, speed: %.3f s/iter' % \ (iter, max_iters, self.Trainval_GT[image_id][0][0], total_loss, lr.eval(), timer.average_time)) # Snapshotting if (iter % cfg.TRAIN.SNAPSHOT_ITERS == 0 and iter != 0) or (iter == 10): self.snapshot(sess, iter) iter += 1 self.writer.close()
def train_model(self, sess, max_iters): timer = Timer() Data_length = len(self.Trainval_GT) lr, train_op = self.construct_graph(sess) # 加载初始的模型参数 if cfg.TRAIN_MODULE_CONTINUE == 1: # continue training self.from_previous_ckpt(sess) else: # from iter 0 ,默认是这个 # Initializing weight: 1--from faster RCNN 2--from previous best 3--from our model with d if cfg.TRAIN_INIT_WEIGHT == 2: self.from_best_trained_model(sess) elif cfg.TRAIN_INIT_WEIGHT == 1: self.from_snapshot(sess) elif cfg.TRAIN_INIT_WEIGHT == 3: # load all paras including D, initial from our best self.from_previous_ckpt(sess) # 将图变为只读(read-only),新的操作就不能够添加到图里了 sess.graph.finalize() # 获取模型当前的iter值 if cfg.TRAIN_MODULE_CONTINUE == 2: # from iter 0 ,默认是这个 iter = 0 elif cfg.TRAIN_MODULE_CONTINUE == 1: # from iter_ckpt path_iter = self.pretrained_model.split('.ckpt')[0] iter_num = path_iter.split('_')[-1] iter = int(iter_num) cur_min = 10 # 执行max_iters次梯度迭代 while iter < max_iters + 1: timer.tic() # 获取增强后的一张图片的信息 blobs = Get_Next_Instance_HO_Neg_HICO_pose_pattern_version2( self.Trainval_GT, self.Trainval_N, iter, self.Pos_augment, self.Neg_select, Data_length) # 执行一次梯度下降 # train_step_with_summary传入lr是为了记录lr的summary if (iter % cfg.TRAIN.SUMMARY_INTERVAL == 0) or (iter < 20): # Compute the graph with summary total_loss, base_loss, binary_loss, part_loss, summary = self.net.train_step_with_summary( sess, blobs, lr.eval(), train_op) self.writer.add_summary(summary, float(iter)) else: # Compute the graph without summary total_loss, base_loss, binary_loss, part_loss = self.net.train_step( sess, blobs, lr.eval(), train_op) timer.toc() # 打印训练信息 if iter % (cfg.TRAIN.DISPLAY) == 0: print('iter: %d / %d, im_id: %u, lr: %f, speed: %.3f s/iter\ntotal loss: %.6f\nbase loss: %.6f\nbinary loss: %.6f\npart loss: %.6f' % \ (iter, max_iters, self.Trainval_GT[iter%Data_length][0][0], lr.eval(), timer.average_time, total_loss, base_loss, binary_loss, part_loss)) # 保存模型 if (iter % cfg.TRAIN.SNAPSHOT_ITERS * 5 == 0 and iter != 0) or ( iter == 10) or (iter > 1000 and total_loss < cur_min - 0.0001): if (iter > 1000 and total_loss < cur_min - 0.0001): cur_min = total_loss self.snapshot(sess, iter, total_loss, base_loss, binary_loss, part_loss) # 更新迭代器 iter += 1 self.writer.close()
def train_model(self, sess, max_iters): lr, train_op = self.construct_graph(sess) if cfg.TRAIN_MODULE_CONTINUE == 1: self.from_previous_ckpt(sess) else: if cfg.TRAIN_INIT_WEIGHT == 2: self.from_best_trained_model(sess) if cfg.TRAIN_INIT_WEIGHT == 1: self.from_snapshot(sess) sess.graph.finalize() timer = Timer() Data_length = len(self.Trainval_GT) path_iter = self.pretrained_model.split('.ckpt')[0] iter_num = path_iter.split('_')[-1] if cfg.TRAIN_MODULE_CONTINUE == 2: iter = 0 if cfg.TRAIN_MODULE_CONTINUE == 1: iter = int(iter_num) while iter < max_iters + 1: timer.tic() if self.Early_flag == 1: blobs = Get_Next_Instance_HO_Neg_pose_pattern_version2( self.Trainval_GT, self.Trainval_N, iter, self.Pos_augment, self.Neg_select, Data_length) if self.Early_flag == 0: # Pos + spNeg (factorized model only) blobs = Get_Next_Instance_HO_spNeg_pose_pattern_version2( self.Trainval_GT, self.Trainval_N, iter, self.Pos_augment, self.Neg_select, Data_length) if (iter % cfg.TRAIN.SUMMARY_INTERVAL == 0) or (iter < 20): loss_cls_H, loss_cls_HO, total_loss, summary = self.net.train_step_with_summary( sess, blobs, lr.eval(), train_op) self.writer.add_summary(summary, float(iter)) else: loss_cls_H, loss_cls_HO, total_loss = self.net.train_step( sess, blobs, lr.eval(), train_op) timer.toc() # Display training information if iter % (cfg.TRAIN.DISPLAY) == 0: print('iter: %d / %d, im_id: %u, total loss: %.6f, loss_cls_H: %.6f, loss_cls_HO: %.6f, lr: %f, speed: %.3f s/iter' % \ (iter, max_iters, self.Trainval_GT[iter%Data_length][0][0], total_loss, loss_cls_H, loss_cls_HO, lr.eval(), timer.average_time)) # Snapshotting if (iter % cfg.TRAIN.SNAPSHOT_ITERS == 0 and iter != 0) or (iter == 10): self.snapshot(sess, iter) iter += 1 self.writer.close()
def train_model(self, sess, max_iters): lr, train_op = self.construct_graph(sess) if cfg.TRAIN_MODULE_CONTINUE == 1: self.from_previous_ckpt(sess) else: if cfg.TRAIN_INIT_WEIGHT == 2: self.from_best_trained_model(sess) if cfg.TRAIN_INIT_WEIGHT == 1: self.from_snapshot(sess) if cfg.TRAIN_INIT_WEIGHT == 3: # load all paras including D, initial from our best self.from_best_trained_model(sess) sess.graph.finalize() timer = Timer() path_iter = self.pretrained_model.split('.ckpt')[0] iter_num = path_iter.split('_')[-1] if cfg.TRAIN_MODULE_CONTINUE == 2: iter = 0 if cfg.TRAIN_MODULE_CONTINUE == 1: iter = int(iter_num) Data_length = len(self.Trainval_GT) idx = range(Data_length) np.random.shuffle(idx) while iter < max_iters + 1: timer.tic() if iter % Data_length == 0: np.random.shuffle(idx) image_id = idx[iter % Data_length] blobs = Get_Next_Instance_HO_Neg_HICO_pose_pattern_version2(self.Trainval_GT, self.Trainval_N, image_id, self.Pos_augment, self.Neg_select, Data_length) if (iter % cfg.TRAIN.SUMMARY_INTERVAL == 0) or (iter < 20): # Compute the graph with summary total_loss, summary = self.net.train_step_with_summary(sess, blobs, lr.eval(), train_op) self.writer.add_summary(summary, float(iter)) else: # Compute the graph without summary total_loss = self.net.train_step(sess, blobs, lr.eval(), train_op) timer.toc() # Display training information if iter % (cfg.TRAIN.DISPLAY) == 0: print('iter: %d / %d, im_id: %u, total loss: %.6f, lr: %f, speed: %.3f s/iter' % \ (iter, max_iters, image_id, total_loss, lr.eval(), timer.average_time)) # Snapshotting if (iter % cfg.TRAIN.SNAPSHOT_ITERS * 5 == 0 and iter != 0) or (iter == 10): self.snapshot(sess, iter) iter += 1 self.writer.close()