def evaluate_one_image(): image_array=get_one_image_file("/home/zhang-rong/Yes/CnnID/test_yes/8.jpg") with tf.Graph().as_default(): BATCH_SIZE = 1 # 获取一张图片 N_CLASSES = 10 #10分类 image = tf.cast(image_array, tf.float32) image = tf.image.per_image_standardization(image) image = tf.reshape(image, [1, 28, 28, 3]) #inference输入数据需要是4维数据,需要对image进行resize logit = MainModel.inference(image, BATCH_SIZE, N_CLASSES) logit = tf.nn.softmax(logit) #inference的softmax层没有激活函数,这里增加激活函数 #因为只有一副图,数据量小,所以用placeholder x = tf.placeholder(tf.float32, shape=[28, 28, 3]) # # 训练模型路径 logs_train_dir = '/home/zhang-rong/Yes/CnnID/model/' saver=tf.train.Saver() with tf.Session() as sess: saver.restore(sess,str(logs_train_dir+"model.ckpt")) prediction = sess.run(logit, feed_dict={x: image_array}) # 得到概率最大的索引 max_index = np.argmax(prediction) print "识别出来的身份证数字为:",max_index
def evaluate_one_image(): # 数据集路径 # test_dir = 'C:/Graduation-project/Tamper_Detection_1/train' # test, test_label = input_data.get_files(test_dir) # image_array = get_one_image(test) #调用get_one_image随机选取一幅图片并显示 image_array = get_one_image_file( "C:/Graduation-project/Tamper_Detection_1/test/Tp_S_CNN_S_N_txt00039_txt00039_11326.jpg" ) with tf.Graph().as_default(): BATCH_SIZE = 1 # 获取一张图片 N_CLASSES = 2 #二分类 image = tf.cast(image_array, tf.float32) image = tf.image.per_image_standardization(image) image = tf.reshape( image, [1, 208, 208, 3]) #inference输入数据需要是4维数据,需要对image进行resize logit = MainModel.inference(image, BATCH_SIZE, N_CLASSES) logit = tf.nn.softmax(logit) #inference的softmax层没有激活函数,这里增加激活函数 #因为只有一副图,数据量小,所以用placeholder x = tf.placeholder(tf.float32, shape=[208, 208, 3]) # # 训练模型路径 logs_train_dir = 'C:/Graduation-project/Tamper_Detection_1/model' saver = tf.train.Saver() with tf.Session() as sess: # 从指定路径下载模型 print("Reading checkpoints...") ckpt = tf.train.get_checkpoint_state(logs_train_dir) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('Loading success, global_step is %s' % global_step) else: print('No checkpoint file found') prediction = sess.run(logit, feed_dict={x: image_array}) # 得到概率最大的索引 max_index = np.argmax(prediction) if max_index == 0: print('This is Original picture with possibility %.6f' % prediction[:, 0]) else: print('This is a Modified picture with possibility %.6f' % prediction[:, 1])
def evaluate_one_image(): image_array = get_one_image_file( "/Users/gao han/Downloads/images/test1/52.jpg") with tf.Graph().as_default(): BATCH_SIZE = 1 # get one img N_CLASSES = 2 # two classification image = tf.cast(image_array, tf.float32) image = tf.image.per_image_standardization(image) image = tf.reshape(image, [1, 208, 208, 3]) # image resize logit = MainModel.inference(image, BATCH_SIZE, N_CLASSES) logit = tf.nn.softmax(logit) # Add activation function x = tf.placeholder(tf.float32, shape=[208, 208, 3]) # model save path logs_train_dir = '/Users/gao han/Downloads/images/model/' saver = tf.train.Saver() with tf.Session() as sess: # download the model paras print("Reading checkpoints...") ckpt = tf.train.get_checkpoint_state(logs_train_dir) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('Loading success, global_step is %s' % global_step) else: print('No checkpoint file found') prediction = sess.run(logit, feed_dict={x: image_array}) # get max prediction max_index = np.argmax(prediction) if max_index == 0: print('This is a cat with possibility %.6f' % prediction[:, 0]) else: print('This is a dog with possibility %.6f' % prediction[:, 1])
def train(lr, n_epochs, save_dir, clip_grads=None, load=None, model_files=None): opt = tf.train.AdamOptimizer(lr) with tf.variable_scope(tf.get_variable_scope()): model = MM.Model() model.build_model() tvars = tf.trainable_variables() grads = tf.gradients(model.Loss_Mean, tvars) if clip_grads: max_grad_norm = 1 clip_grads, _ = tf.clip_by_global_norm(grads, max_grad_norm) train_op = opt.apply_gradients(zip(grads, tvars)) sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # if load: ckpt = tf.train.get_checkpoint_state(model_files) saver.restore(sess, ckpt.model_checkpoint_path) train_list, label_list = load_training_list() img_size = MM.img_size label_size = MM.label_size for i in range(1, n_epochs): whole_loss = 0.0 whole_acc = 0.0 count = 0 for f_img, f_label in zip(train_list, label_list): img = cv2.imread(f_img).astype(np.float32) img = cv2.resize(img, (img_size, img_size)) - vgg16.VGG_MEAN img = img.reshape((1, img_size, img_size, 3)) label = cv2.imread(f_label)[:, :, 0].astype(np.float32) label = cv2.resize(label, (label_size, label_size)) label = label.astype( np.float32) # the input GT has been preprocessed to [0,1] label = np.stack((label, 1 - label), axis=2) label = np.reshape(label, [-1, 2]) _, loss, acc = sess.run( [train_op, model.Loss_Mean, model.accuracy], feed_dict={ model.input_holder: img, model.label_holder: label }) whole_loss += loss whole_acc += acc count = count + 1 if count % 200 == 0: print "Loss of %d images: %f, Accuracy: %f" % (count, ( whole_loss / count), (whole_acc / count)) save_dir = save_dir + '/model.ckpt' if not os.path.exists(save_dir): os.mkdir(save_dir) print "Epoch %d: %f" % (i, (whole_loss / len(train_list))) saver.save(sess, save_dir, global_step=i)
# generateur = Generateur_Voeux("parcours8PC.csv", "edt.csv") generateur = Generateur_Voeux("parcours8PC_1.csv", "edt.csv") Liste_charge = list() Liste_ProportionSatis = list() Liste_chargeE = list() Liste_ProportionSatisE = list() nbExecutions = 5 # I = set() # for i in range(nbExecutions): p = 0 n = 0 # while n < 3 : for i in range(nbExecutions): dossierVoeux, ListeParcours = generateur.generer() m = MainModel( dossierVoeux, "edt.csv", ) charge, p = m.resoudre() # if p >= 100.: # print dossierVoeux # n += 1 Liste_charge.append(charge) Liste_ProportionSatis.append(p) f = open(dossierVoeux + "_detail_affectation.txt", "w") f.write(str(m)) f.close() m.remise_a_zero() # a m = MainModel(dossierVoeux, "edt.csv") charge, p = m.resoudre(False)
def run_training(): """ ##1.数据的处理 """ # 训练图片路径 train_dir = 'C:/Graduation-project/Tamper_Detection_1/train/' # 输出log的位置 logs_train_dir = 'C:/Graduation-project/Tamper_Detection_1/log/' # 模型输出 train_model_dir = 'C:/Graduation-project/Tamper_Detection_1/model/' # 获取数据中的训练图片 和 训练标签 train, train_label = input_data.get_files(train_dir) # 获取转换的TensorFlow 张量 train_batch, train_label_batch = input_data.get_batch( train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) """ ##2.网络的推理 """ # 进行前向训练,获得回归值 train_logits = MainModel.inference(train_batch, BATCH_SIZE, N_CLASSES) """ ##3.定义交叉熵和 要使用的梯度下降的 优化器 """ # 计算获得损失值loss train_loss = MainModel.losses(train_logits, train_label_batch) # 对损失值进行优化 train_op = MainModel.trainning(train_loss, learning_rate) """ ##4.定义后面要使用的变量 """ # 根据计算得到的损失值,计算出分类准确率 train__acc = MainModel.evaluation(train_logits, train_label_batch) # 将图形、训练过程合并在一起 summary_op = tf.summary.merge_all() # 新建会话 sess = tf.Session() # 将训练日志写入到logs_train_dir的文件夹内 train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) saver = tf.train.Saver() # 保存变量 # 执行训练过程,初始化变量 sess.run(tf.global_variables_initializer()) # 创建一个线程协调器,用来管理之后在Session中启动的所有线程 coord = tf.train.Coordinator() # 启动入队的线程,一般情况下,系统有多少个核,就会启动多少个入队线程(入队具体使用多少个线程在tf.train.batch中定义); threads = tf.train.start_queue_runners(sess=sess, coord=coord) """ 进行训练: 使用 coord.should_stop()来查询是否应该终止所有线程,当文件队列(queue)中的所有文件都已经读取出列的时候, 会抛出一个 OutofRangeError 的异常,这时候就应该停止Sesson中的所有线程了; """ try: for step in np.arange(MAX_STEP): #从0 到 2000 次 循环 if coord.should_stop(): break _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc]) # 每50步打印一次损失值和准确率 if step % 50 == 0: print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) # 每2000步保存一次训练得到的模型 if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_model_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) # 如果读取到文件队列末尾会抛出此异常 except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() # 使用coord.request_stop()来发出终止所有线程的命令 coord.join(threads) # coord.join(threads)把线程加入主线程,等待threads结束 sess.close() # 关闭会话
elif dataset == 'SED2': path = '/home/zhanglu/Documents/dataset/SED2/SED2-Image' elif dataset == 'SOC': path = '/home/zhanglu/Downloads/SOC6K_Release/ValSet/img_select' elif dataset == 'zy': path = '/home/zhanglu/Documents/zengyi_1981_1024' imgs = os.listdir(path) return path, imgs if __name__ == "__main__": model = MM.Model() model.build_model() sess = tf.Session() sess.run(tf.global_variables_initializer()) img_size = MM.img_size label_size = MM.label_size ckpt = tf.train.get_checkpoint_state('model') saver = tf.train.Saver() saver.restore(sess, ckpt.model_checkpoint_path) datasets = ['zy'] if not os.path.exists('Result'): os.mkdir('Result') for dataset in datasets: path, imgs = load_img_list(dataset)
def run_training(): """ #1.img processing """ # the path of the img train_dir = '/Users/gao han/Downloads/images/train/' # path to save the log logs_train_dir = '/Users/gao han/Downloads/images/log/' # path to save parameters of the model train_model_dir = '/Users/gao han/Downloads/images/model/' # get the img and the label train, train_label = input_data.get_files(train_dir) # get batch TensorFlow train_batch, train_label_batch = input_data.get_batch( train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) """ ##2. CNN """ # get the output train_logits = MainModel.inference(train_batch, BATCH_SIZE, N_CLASSES) """ ##3. crossover entropy and gradient descent optimizer """ train_loss = MainModel.losses(train_logits, train_label_batch) train_op = MainModel.trainning(train_loss, learning_rate) """ ##4.Define the variables """ # Calculate the classification accuracy train__acc = MainModel.evaluation(train_logits, train_label_batch) summary_op = tf.summary.merge_all() sess = tf.Session() # save the log to logs_train_dir train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) saver = tf.train.Saver() # Initializing Variables sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc]) # print the loss and accuracy each 50 steps if step % 50 == 0: print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) # The model is saved every 2,000 steps if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_model_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) # This exception is thrown if read to the end of the file queue except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop( ) # coord.request_stop() to issue a command to terminate all threads coord.join(threads) sess.close()
def train(): #cfg = opt.cfg #model = opt.model data = opt.data epochs = opt.epochs # 500200 batches at bs 64, 117263 images = 273 epochs batch_size = opt.batch_size accumulate = opt.accumulate # effective bs = batch_size * accumulate = 16 * 4 = 64 weights = opt.weights # initial training weights weights_yolo = opt.weights_yolo # initial training weights weights_midas = opt.weights_midas # initial training weights imgsz_min, imgsz_max, imgsz_test = opt.img_size # img sizes (min, max, test) branch = opt.branch lambda_y = 1 lambda_m = 1 #lambda_p = 1 # Image Sizes gs = 64 # (pixels) grid size assert math.fmod( imgsz_min, gs) == 0, '--img-size %g must be a %g-multiple' % (imgsz_min, gs) opt.multi_scale |= imgsz_min != imgsz_max # multi if different (min, max) if opt.multi_scale: if imgsz_min == imgsz_max: imgsz_min //= 1.5 imgsz_max //= 0.667 grid_min, grid_max = imgsz_min // gs, imgsz_max // gs imgsz_min, imgsz_max = grid_min * gs, grid_max * gs img_size = imgsz_max # initialize with max size # Configure run init_seeds() #data_dict = parse_data_cfg(data) train_path = "./data/customdata/custom_train.txt" #data_dict['train'] test_path = "./data/customdata/custom_test.txt" #data_dict['valid'] nc = 4 #1 if opt.single_cls else int(data_dict['classes']) # number of classes hyp['cls'] *= nc / 80 # update coco-tuned hyp['cls'] to current dataset # Remove previous results for f in glob.glob('*_batch*.png') + glob.glob(results_file): os.remove(f) # Initialize model model = MainModel.MainModel().to(device) #print(model) # Optimizer pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in dict(model.named_parameters()).items(): if '.bias' in k: pg2 += [v] # biases elif 'Conv2d.weight' in k: pg1 += [v] # apply weight_decay else: pg0 += [v] # all else if opt.adam: #print("adam") # hyp['lr0'] *= 0.1 # reduce lr (i.e. SGD=5E-3, Adam=5E-4) optimizer = optim.Adam(pg0, lr=hyp['lr0']) # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1) else: #print("sgd") optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) #print(optimizer.param_groups) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) del pg0, pg1, pg2 start_epoch = 0 best_fitness = 0.0 #attempt_download(weights) if weights.endswith('.pt'): chkpt = torch.load(weights, map_location=device) try: chkpt = { k: v for k, v in chkpt.items() if model.state_dict()[k].numel() == v.numel() } model.load_state_dict(chkpt, strict=False) # freezing the encoder weights #print("freezing the encoder weights while loading best weights") for k, v in dict( model.pretrained.layer1.named_parameters()).items(): if ('.weight' in k): model.state_dict()['pretrained.layer1.' + k].requires_grad = False for k, v in dict( model.pretrained.layer2.named_parameters()).items(): if ('.weight' in k): model.state_dict()['pretrained.layer2.' + k].requires_grad = False for k, v in dict( model.pretrained.layer3.named_parameters()).items(): if ('.weight' in k): model.state_dict()['pretrained.layer3.' + k].requires_grad = False for k, v in dict( model.pretrained.layer4.named_parameters()).items(): if ('.weight' in k): model.state_dict()['pretrained.layer4.' + k].requires_grad = False if (branch in 'yolo'): #freeze midas lambda_m = 0 for k, v in dict(model.scratch.named_parameters()).items(): if ('.weight' in k): model.state_dict()['scratch.' + k].requires_grad = False elif (branch in 'midas'): #freeze yolo lambda_y = 0 for k, v in dict(model.named_parameters()).items(): if ('.weight' in k and 'yolo' in k): model.state_dict()[k].requires_grad = False except KeyError as e: s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \ "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights) raise KeyError(s) from e else: #initial load if weights_yolo.endswith('.pt'): # possible weights are '*.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc. Yolo_state_dict = torch.load(weights_yolo, map_location=device)['model'] #mapping yolo layers layers_mapping = { 'yolo4_hack.0.weight': 'module_list.88.Conv2d.weight', 'yolo4_class': 'module_list.89.Conv2d.weight', 'yolo3_2.0.weight': 'module_list.90.Conv2d.weight', 'yolo3_1.1.weight': 'module_list.91.Conv2d.weight', 'yolo3_1.0.weight': 'module_list.92.Conv2d.weight', #: module_list.93.weight', 'yolo3_3.0.weight': 'module_list.94.Conv2d.weight', 'yolo3_3.3.weight': 'module_list.95.Conv2d.weight', 'yolo3_3.6.weight': 'module_list.96.Conv2d.weight', 'yolo3_3.9.weight': 'module_list.97.Conv2d.weight', 'yolo3_3.12.weight': 'module_list.98.Conv2d.weight', 'yolo3_3.15.weight': 'module_list.99.Conv2d.weight', 'yolo3_hack.0.weight': 'module_list.100.Conv2d.weight', 'yolo3_class': 'module_list.101.Conv2d.weight', 'yolo2_1.0.weight': 'module_list.102.Conv2d.weight', 'yolo2_2.1.weight': 'module_list.103.Conv2d.weight', 'yolo2_2.0.weight': 'module_list.104.Conv2d.weight', #: module_list.105, 'yolo2_3.0.weight': 'module_list.106.Conv2d.weight', 'yolo2_3.3.weight': 'module_list.107.Conv2d.weight', 'yolo2_3.6.weight': 'module_list.108.Conv2d.weight', 'yolo2_3.9.weight': 'module_list.109.Conv2d.weight', 'yolo2_3.12.weight': 'module_list.110.Conv2d.weight', 'yolo2_3.15.weight': 'module_list.111.Conv2d.weight', 'yolo2_hack.0.weight': 'module_list.112.Conv2d.weight', 'yolo2_class': 'module_list.113.Conv2d.weight' } yolo_weight_dict = { k: v for k, v in Yolo_state_dict.items() if k in layers_mapping.values() } # load model try: #have to make changes model_dict = model.state_dict() yolo_dict = {} for k, v in layers_mapping.items(): for k2, v2 in yolo_weight_dict.items(): if (v == k2 and model_dict[k].numel() == v2.numel()): yolo_dict[k] = v2 model_dict.update(yolo_dict) model.load_state_dict(model_dict, strict=False) except KeyError as e: s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \ "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights_yolo, opt.cfg, opt.weights_yolo) raise KeyError(s) from e del yolo_weight_dict if weights_midas.endswith('.pt'): # pytorch format # possible weights are '*.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc. Midas_state_dict = torch.load(weights_midas, map_location=device) # load model try: #have to make changes model_dict = model.state_dict() midas_weight_dict = { k: v for k, v in Midas_state_dict.items() if k in model_dict } model_dict.update(midas_weight_dict) model.load_state_dict(model_dict, strict=False) #chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} #model.load_state_dict(chkpt['model'], strict=False) # freezing the encoder weights #print("freezing the encoder weights") for k, v in dict( model.pretrained.layer1.named_parameters()).items(): if ('.weight' in k): model.state_dict()['pretrained.layer1.' + k].requires_grad = False for k, v in dict( model.pretrained.layer2.named_parameters()).items(): if ('.weight' in k): model.state_dict()['pretrained.layer2.' + k].requires_grad = False for k, v in dict( model.pretrained.layer3.named_parameters()).items(): if ('.weight' in k): model.state_dict()['pretrained.layer3.' + k].requires_grad = False for k, v in dict( model.pretrained.layer4.named_parameters()).items(): if ('.weight' in k): model.state_dict()['pretrained.layer4.' + k].requires_grad = False if (branch in 'yolo'): #freeze midas lambda_m = 0 for k, v in dict(model.scratch.named_parameters()).items(): if ('.weight' in k): model.state_dict()['scratch.' + k].requires_grad = False elif (branch in 'midas'): #freeze yolo lambda_y = 0 for k, v in dict(model.named_parameters()).items(): if ('.weight' in k and 'yolo' in k): model.state_dict()[k].requires_grad = False #print("done") except KeyError as e: s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \ "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights_midas, opt.cfg, opt.weights_midas) raise KeyError(s) from e # load optimizer #if chkpt['optimizer'] is not None: #optimizer.load_state_dict(chkpt['optimizer']) #best_fitness = chkpt['best_fitness'] # load results #if chkpt.get('training_results') is not None: #with open(results_file, 'w') as file: #file.write(chkpt['training_results']) # write results.txt start_epoch = 1 del Midas_state_dict #chkpt #elif len(weights) > 0: # darknet format # possible weights are '*.weights', 'yolov3-tiny.conv.15', 'darknet53.conv.74' etc. #load_darknet_weights(model, weights) # Mixed precision training https://github.com/NVIDIA/apex if mixed_precision: model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) # Scheduler https://github.com/ultralytics/yolov3/issues/238 lf = lambda x: ( ((1 + math.cos(x * math.pi / epochs)) / 2 )**1.0) * 0.95 + 0.05 # cosine https://arxiv.org/pdf/1812.01187.pdf scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf, last_epoch=-1) # scheduler = lr_scheduler.MultiStepLR(optimizer, [round(epochs * x) for x in [0.8, 0.9]], 0.1, start_epoch - 1) # Plot lr schedule # y = [] # for _ in range(epochs): # scheduler.step() # y.append(optimizer.param_groups[0]['lr']) # plt.plot(y, '.-', label='LambdaLR') # plt.xlabel('epoch') # plt.ylabel('LR') # plt.tight_layout() # plt.savefig('LR.png', dpi=300) # Initialize distributed training if device.type != 'cpu' and torch.cuda.device_count( ) > 1 and torch.distributed.is_available(): dist.init_process_group( backend='nccl', # 'distributed backend' init_method= 'tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank model = torch.nn.parallel.DistributedDataParallel( model, find_unused_parameters=True) model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level # Dataset dataset = LoadImagesAndLabels( train_path, img_size, batch_size, augment=True, hyp=hyp, # augmentation hyperparameters rect=opt.rect, # rectangular training cache_images=opt.cache_images, single_cls=opt.single_cls) # Dataloader batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, num_workers=nw, shuffle=not opt. rect, # Shuffle=True unless rectangular training is used pin_memory=True, collate_fn=dataset.collate_fn) # Testloader testloader = torch.utils.data.DataLoader(LoadImagesAndLabels( test_path, imgsz_test, batch_size, hyp=hyp, rect=True, cache_images=opt.cache_images, single_cls=opt.single_cls), batch_size=batch_size, num_workers=nw, pin_memory=True, collate_fn=dataset.collate_fn) # Model parameters model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights # Model EMA ema = ModelEMA(model) # Start training nb = len(dataloader) # number of batches n_burn = max(3 * nb, 500) # burn-in iterations, max(3 epochs, 500 iterations) maps = np.zeros(nc) # mAP per class # torch.autograd.set_detect_anomaly(True) results = ( 0, 0, 0, 0, 0, 0, 0, 0 ) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' yolo_loss = 0 ssim_loss = 0 t0 = time.time() print('Image sizes %g - %g train, %g test' % (imgsz_min, imgsz_max, imgsz_test)) print('Using %g dataloader workers' % nw) print('Starting training for %g epochs...' % epochs) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if dataset.image_weights: w = model.class_weights.cpu().numpy() * (1 - maps)**2 # class weights image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w) dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx mloss = torch.zeros(4).to(device) # mean losses #ssim_mloss = torch.zeros(1).to(device) # mean losses print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) pbar = tqdm(enumerate(dataloader), total=nb) # progress bar for i, ( imgs, targets, paths, _, midas ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) midas = midas.to(device).float() / 255.0 # Burn-in if ni <= n_burn * 2: model.gr = np.interp( ni, [0, n_burn * 2], [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou) if ni == n_burn: # burnin complete print_model_biases(model) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp( ni, [0, n_burn], [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, [0, n_burn], [0.9, hyp['momentum']]) # Multi-Scale training if opt.multi_scale: if ni / accumulate % 1 == 0: # adjust img_size (67% - 150%) every 1 batch img_size = random.randrange(grid_min, grid_max + 1) * gs sf = img_size / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to 32-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Run model pred = model(imgs) #print('pred', pred[0]) #print('midas',midas) #print(len(pred[1])) # Compute loss yolo_loss, loss_items = compute_loss(pred[1], targets, model) ssim_obj = SSIM() midas = midas.unsqueeze(1) ssim_loss = 1 - ssim_obj(pred[0], midas) #print('ssim_loss', ssim_loss.data) #ssim_loss_items = ssim_loss loss = lambda_y * yolo_loss + lambda_m * ssim_loss if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss_items) return results # Scale loss by nominal batch_size of 64 loss *= batch_size / 64 # Compute gradient if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # Optimize accumulated gradient if ni % accumulate == 0: optimizer.step() optimizer.zero_grad() ema.update(model) # Print batch results mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size) pbar.set_description(s) # Plot images with bounding boxes if ni < 1: f = 'train_batch%g.png' % i # filename plot_images(imgs=imgs, targets=targets, paths=paths, fname=f) if tb_writer: tb_writer.add_image(f, cv2.imread(f)[:, :, ::-1], dataformats='HWC') # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Update scheduler scheduler.step() # Process epoch results ema.update_attr(model) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP is_coco = any([ x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data'] ]) and model.nc == 80 results, maps = test.test(data, lambda_y, lambda_m, batch_size=batch_size, img_size=imgsz_test, model=ema.ema, save_json=final_epoch and is_coco, single_cls=opt.single_cls, dataloader=testloader) # Write epoch results with open(results_file, 'a') as f: f.write(s + '%10.3g' * 8 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (opt.bucket, opt.name)) # Write Tensorboard results if tb_writer: tags = [ 'train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1', 'val/giou_loss', 'val/obj_loss', 'val/cls_loss' ] for x, tag in zip(list(mloss[:-1]) + list(results), tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] if fi > best_fitness: best_fitness = fi # Save training results save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # Create checkpoint chkpt = ema.ema.state_dict() #{'epoch': epoch, #'best_fitness': best_fitness, #'training_results': f.read(), #'model': ema.ema.module.state_dict() if hasattr(model, 'module') else ema.ema.state_dict(), #'optimizer': None if final_epoch else optimizer.state_dict()} # Save last checkpoint torch.save(chkpt, last) # Save best checkpoint if (best_fitness == fi) and not final_epoch: torch.save(chkpt, best) # Save backup every 10 epochs (optional) # if epoch > 0 and epoch % 10 == 0: # torch.save(chkpt, wdir + 'backup%g.pt' % epoch) # Delete checkpoint del chkpt # end epoch ---------------------------------------------------------------------------------------------------- # end training n = opt.name if len(n): n = '_' + n if not n.isnumeric() else n fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename ispt = f2.endswith('.pt') # is *.pt strip_optimizer(f2) if ispt else None # strip optimizer os.system('gsutil cp %s gs://%s/weights' % ( f2, opt.bucket)) if opt.bucket and ispt else None # upload if not opt.evolve: plot_results() # save as results.png print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if torch.cuda.device_count() > 1 else None torch.cuda.empty_cache() return results