def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50, warmup=False): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0 and warmup is True: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 1.0 / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) mloss = torch.zeros(1).to(device) # mean losses enable_amp = True if "cuda" in device.type else False for i, [images, targets] in enumerate( metric_logger.log_every(data_loader, print_freq, header)): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] # 混合精度训练上下文管理器,如果在CPU环境中不起任何作用 with torch.cuda.amp.autocast(enabled=enable_amp): loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purpose loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() # 记录训练损失 mloss = (mloss * i + loss_value) / (i + 1) # update mean losses if not math.isfinite(loss_value): # 当计算的损失为无穷大时停止训练 print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() optimizer.step() if lr_scheduler is not None: # 第一轮使用warmup训练方式 lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) now_lr = optimizer.param_groups[0]["lr"] metric_logger.update(lr=now_lr) return mloss, now_lr
def evaluate(model, data_loader, device, mAP_list=None): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) print_txt = coco_evaluator.coco_eval[iou_types[0]].stats coco_mAP = print_txt[0] voc_mAP = print_txt[1] if isinstance(mAP_list, list): mAP_list.append(voc_mAP) return coco_evaluator
def evaluate(model, data_loader, device): cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() coco_info = coco_evaluator.coco_eval[ iou_types[0]].stats.tolist() # numpy to list return coco_info
def evaluate(model, data_loader, device, num_classes): model.eval() confmat = utils.ConfusionMatrix(num_classes) metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' with torch.no_grad(): for image, target in metric_logger.log_every(data_loader, 100, header): image, target = image.to(device), target.to(device) output = model(image) output = output['out'] confmat.update(target.flatten(), output.argmax(1).flatten()) confmat.reduce_from_all_processes() return confmat
def train_one_epoch(model, optimizer, data_loader, device, epoch, num_classes, lr_scheduler, print_freq=10, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) if num_classes == 2: # 设置cross_entropy中背景和前景的loss权重(根据自己的数据集进行设置) loss_weight = torch.as_tensor([1.0, 2.0], device=device) else: loss_weight = None for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) with torch.cuda.amp.autocast(enabled=scaler is not None): output = model(image) loss = criterion(output, target, loss_weight, num_classes=num_classes, ignore_index=255) optimizer.zero_grad() if scaler is not None: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() lr_scheduler.step() lr = optimizer.param_groups[0]["lr"] metric_logger.update(loss=loss.item(), lr=lr) return metric_logger.meters["loss"].global_avg, lr
def train_one_epoch(model, optimizer, data_loader, device, epoch, warmup=True, print_freq=10): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0 and warmup is True: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 1.0 / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) output = model(image) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() if lr_scheduler is not None: lr_scheduler.step() lr = optimizer.param_groups[0]["lr"] metric_logger.update(loss=loss.item(), lr=round(lr, 5))
def train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) with torch.cuda.amp.autocast(enabled=scaler is not None): output = model(image) loss = criterion(output, target) optimizer.zero_grad() if scaler is not None: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() lr_scheduler.step() lr = optimizer.param_groups[0]["lr"] metric_logger.update(loss=loss.item(), lr=lr) return metric_logger.meters["loss"].global_avg, lr
def evaluate(model, data_loader, device): cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " coco91to80 = data_loader.dataset.coco91to80 coco80to91 = dict([(str(v), k) for k, v in coco91to80.items()]) results = [] for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time # 遍历每张图像的预测结果 for target, output in zip(targets, outputs): if len(output) == 0: continue img_id = int(target["image_id"]) per_image_boxes = output["boxes"] # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h] # 而我们预测的box格式是[x_min, y_min, x_max, y_max],所以需要转下格式 per_image_boxes[:, 2:] -= per_image_boxes[:, :2] per_image_classes = output["labels"] per_image_scores = output["scores"] # 遍历每个目标的信息 for object_score, object_class, object_box in zip( per_image_scores, per_image_classes, per_image_boxes): object_score = float(object_score) # 要将类别信息还原回coco91中 coco80_class = int(object_class) coco91_class = int(coco80to91[str(coco80_class)]) # We recommend rounding coordinates to the nearest tenth of a pixel # to reduce resulting JSON file size. object_box = [round(b, 2) for b in object_box.tolist()] res = { "image_id": img_id, "category_id": coco91_class, "bbox": object_box, "score": round(object_score, 3) } results.append(res) metric_logger.update(model_time=model_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) # 同步所有进程中的数据 all_results = utils.all_gather(results) if utils.is_main_process(): # 将所有进程上的数据合并到一个list当中 results = [] for res in all_results: results.extend(res) # write predict results into json file json_str = json.dumps(results, indent=4) with open('predict_tmp.json', 'w') as json_file: json_file.write(json_str) # accumulate predictions from all images coco_true = data_loader.dataset.coco coco_pre = coco_true.loadRes('predict_tmp.json') coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType="bbox") coco_evaluator.evaluate() coco_evaluator.accumulate() coco_evaluator.summarize() coco_info = coco_evaluator.stats.tolist() # numpy to list else: coco_info = None return coco_info
def evaluate(model, data_loader, device, data_set=None, mAP_list=None): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " if data_set is None: data_set = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(data_set, iou_types) for images, targets in metric_logger.log_every(data_loader, 100, header): images = torch.stack(images, dim=0) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] images = images.to(device) # targets = {k: v.to(device) for k, v in targets.items()} if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() # list((bboxes_out, labels_out, scores_out), ...) results = model(images, targets) outputs = [] for index, (bboxes_out, labels_out, scores_out) in enumerate(results): # 将box的相对坐标信息(0-1)转为绝对值坐标(xmin, ymin, xmax, ymax) height_width = targets[index]["height_width"] # height_width = [300, 300] bboxes_out[:, [0, 2]] = bboxes_out[:, [0, 2]] * height_width[1] bboxes_out[:, [1, 3]] = bboxes_out[:, [1, 3]] * height_width[0] info = {"boxes": bboxes_out.to(cpu_device), "labels": labels_out.to(cpu_device), "scores": scores_out.to(cpu_device)} outputs.append(info) # outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = dict() for index in range(len(outputs)): info = {targets[index]["image_id"].item(): outputs[index]} res.update(info) # res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) print_txt = coco_evaluator.coco_eval[iou_types[0]].stats coco_mAP = print_txt[0] voc_mAP = print_txt[1] if isinstance(mAP_list, list): mAP_list.append(voc_mAP)
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, train_loss=None, train_lr=None, warmup=False): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0 and warmup is True: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 5.0 / 10000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) for images, targets in metric_logger.log_every(data_loader, print_freq, header): # batch inputs information images = torch.stack(images, dim=0) boxes = [] labels = [] img_id = [] for t in targets: boxes.append(t['boxes']) labels.append(t['labels']) img_id.append(t["image_id"]) targets = {"boxes": torch.stack(boxes, dim=0), "labels": torch.stack(labels, dim=0), "image_id": torch.as_tensor(img_id)} images = images.to(device) targets = {k: v.to(device) for k, v in targets.items()} losses_dict = model(images, targets) losses = losses_dict["total_losses"] # reduce losses over all GPUs for logging purpose losses_dict_reduced = utils.reduce_dict(losses_dict) losses_reduce = losses_dict_reduced["total_losses"] loss_value = losses_reduce.item() if isinstance(train_loss, list): # 记录训练损失 train_loss.append(loss_value) if not math.isfinite(loss_value): # 当计算的损失为无穷大时停止训练 print("Loss is {}, stopping training".format(loss_value)) print(losses_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() optimizer.step() if lr_scheduler is not None: # 第一轮使用warmup训练方式 lr_scheduler.step() # metric_logger.update(loss=losses, **loss_dict_reduced) metric_logger.update(**losses_dict_reduced) now_lr = optimizer.param_groups[0]["lr"] metric_logger.update(lr=now_lr) if isinstance(train_lr, list): train_lr.append(now_lr)
def _train_one_epoch(self, train_loader, batch_size=0, epoch=0, print_freq=1, multi_scale=False, img_size=(512, 512), grid_min=None, grid_max=None, grid_size=32, random_size=64, device=torch.device('cuda'), warmup=False): self.model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0 and warmup: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 1.0 / 1000 warmup_iters = min(1000, len(train_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(self.optimizer, warmup_iters, warmup_factor) random_size = 1 enable_amp = 'cuda' in device.type scale = amp.GradScaler(enabled=enable_amp) lr_now = 0. loss_mean = torch.zeros(4).to(device) # mean losses batch_size = len(train_loader) # number of batches for i, (images, targets, paths, _, _) in enumerate( metric_logger.log_every(train_loader, print_freq, header)): # count_batch 统计从 epoch0 开始的所有 batch 数 count_batch = i + batch_size * epoch # number integrated batches (since train start) images = images.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) # Multi-Scale # 由于label已转为相对坐标,故缩放图片不影响label的值 # 每训练64张图片,就随机修改一次输入图片大小 if multi_scale: images, img_size = self.random_size( images, img_size, count_batch % random_size == 0, grid_min, grid_max, grid_size) # 混合精度训练上下文管理器,如果在CPU环境中不起任何作用 with amp.autocast(enabled=enable_amp): # loss: compute_loss loss_dict = self.loss(self.model(images), targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purpose loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_items = torch.cat((loss_dict_reduced["box_loss"], loss_dict_reduced["obj_loss"], loss_dict_reduced["class_loss"], losses_reduced)).detach() loss_mean = (loss_mean * i + loss_items) / ( i + 1) # update mean losses if not torch.isfinite(losses_reduced): print('WARNING: non-finite loss, ending training ', loss_dict_reduced) print("training image path: {}".format(",".join(paths))) sys.exit(1) losses *= 1. / random_size # scale loss # backward scale.scale(losses).backward() # optimize # 每训练64张图片更新一次权重 if count_batch % random_size == 0: scale.step(self.optimizer) scale.update() self.optimizer.zero_grad() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) lr_now = self.optimizer.param_groups[0]["lr"] metric_logger.update(lr=lr_now) if count_batch % random_size == 0 and lr_scheduler is not None: # 第一轮使用warmup训练方式 self.optimizer.step() lr_scheduler.step() return loss_mean, lr_now
def evaluate(self, data_loader, coco=None, device=None): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) if not device: device = torch.device("cpu") self.model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " if coco is None: coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(self.model) coco_evaluator = CocoEvaluator(coco, iou_types) log_every = metric_logger.log_every(data_loader, 100, header) for images, targets, paths, shapes, img_index in log_every: images = images.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() pred = self.model(images)[0] # only get inference result pred = non_max_suppression(pred, conf_thres=0.001, iou_thres=0.6, multi_label=False) outputs = [] for index, pred_i in enumerate(pred): if pred_i is None: pred_i = torch.empty((0, 6), device=device) boxes = torch.empty((0, 4), device=device) else: boxes = pred_i[:, :4] # l, t, r, b # shapes: (h0, w0), ((h / h0, w / w0), pad) # 将boxes信息还原回原图尺度,这样计算的mAP才是准确的 boxes = scale_coordinates(boxes, images[index].shape[1:], shapes[index]).round() image = images[index] self.img_show(image, boxes) # 注意这里传入的boxes格式必须是 l_abs, t_abs, r_abs, b_abs,且为绝对坐标 info = { "boxes": boxes.to(device), "labels": pred_i[:, 5].to(device=device, dtype=torch.int64), "scores": pred_i[:, 4].to(device) } outputs.append(info) model_time = time.time() - model_time res = { img_id: output for img_id, output in zip(img_index, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) result_info = coco_evaluator.coco_eval[ iou_types[0]].stats.tolist() # numpy to list return result_info
def evaluate(model, data_loader, device, data_set=None): cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " if data_set is None: data_set = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(data_set, iou_types) for images, targets in metric_logger.log_every(data_loader, 100, header): images = torch.stack(images, dim=0).to(device) if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() # list((bboxes_out, labels_out, scores_out), ...) results = model(images, targets=None) model_time = time.time() - model_time outputs = [] for index, (bboxes_out, labels_out, scores_out) in enumerate(results): # 将box的相对坐标信息(0-1)转为绝对值坐标(xmin, ymin, xmax, ymax) height_width = targets[index]["height_width"] # 还原回原图尺度 bboxes_out[:, [0, 2]] = bboxes_out[:, [0, 2]] * height_width[1] bboxes_out[:, [1, 3]] = bboxes_out[:, [1, 3]] * height_width[0] info = { "boxes": bboxes_out.to(cpu_device), "labels": labels_out.to(cpu_device), "scores": scores_out.to(cpu_device) } outputs.append(info) res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() coco_info = coco_evaluator.coco_eval[ iou_types[0]].stats.tolist() # numpy to list return coco_info