def main(): parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch') parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument('--vgg', help='Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth') parser.add_argument('--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from') parser.add_argument('--log_step', default=50, type=int, help='Print logs every log_step') parser.add_argument('--save_step', default=5000, type=int, help='Save checkpoint every save_step') parser.add_argument('--use_tensorboard', default=True, type=str2bool) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 args.num_gpus = num_gpus if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") logger = setup_logger("SSD", distributed_util.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args) if not args.skip_test: logger.info('Start evaluating...') torch.cuda.empty_cache() # speed up evaluating after training finished do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)
def main(): parser = argparse.ArgumentParser( description='SSD Evaluation on VOC and COCO dataset.') parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--weights", type=str, help="Trained weights.") parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.") parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger = setup_logger("SSD", distributed_util.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) evaluation(cfg, weights_file=args.weights, output_dir=args.output_dir, distributed=distributed)
def main(): parser = argparse.ArgumentParser(description='SSD Evaluation on VOC and COCO dataset.') parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--weights", type=str, help="Trained weights.") parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.") parser.add_argument("--eval_mode", default="test", type=str, help='Use defined test datasets for final evaluation or use a validation split. Default: "test", alternative "val"') parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if torch.cuda.is_available(): # This flag allows you to enable the inbuilt cudnn auto-tuner to # find the best algorithm to use for your hardware. torch.backends.cudnn.benchmark = True if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger = setup_logger("SSD", distributed_util.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) evaluation(cfg, args, weights_file=args.weights, output_dir=args.output_dir, distributed=distributed)
def do_evaluation(cfg, model, output_dir, distributed): if isinstance(model, torch.nn.parallel.DistributedDataParallel): model = model.module assert isinstance(model, SSD), 'Wrong module.' test_datasets = build_dataset(dataset_list=cfg.DATASETS.TEST, is_test=True) device = torch.device(cfg.MODEL.DEVICE) model.eval() if not model.is_test: model.is_test = True predictor = Predictor(cfg=cfg, model=model, iou_threshold=cfg.TEST.NMS_THRESHOLD, score_threshold=cfg.TEST.CONFIDENCE_THRESHOLD, device=device) cpu_device = torch.device("cpu") logger = logging.getLogger("SSD.inference") for dataset_name, test_dataset in zip(cfg.DATASETS.TEST, test_datasets): logger.info("Test dataset {} size: {}".format(dataset_name, len(test_dataset))) indices = list(range(len(test_dataset))) if distributed: indices = indices[distributed_util.get_rank()::distributed_util. get_world_size()] # show progress bar only on main process. progress_bar = tqdm if distributed_util.is_main_process() else iter logger.info('Progress on {} 0:'.format(cfg.MODEL.DEVICE.upper())) predictions = {} for i in progress_bar(indices): image = test_dataset.get_image(i) output = predictor.predict(image) boxes, labels, scores = [o.to(cpu_device).numpy() for o in output] predictions[i] = (boxes, labels, scores) distributed_util.synchronize() predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not distributed_util.is_main_process(): return final_output_dir = os.path.join(output_dir, dataset_name) if not os.path.exists(final_output_dir): os.makedirs(final_output_dir) torch.save(predictions, os.path.join(final_output_dir, 'predictions.pth')) evaluate(dataset=test_dataset, predictions=predictions, output_dir=final_output_dir)
def _evaluation(cfg, dataset_name, test_dataset, predictor, distributed, output_dir): """ Perform evaluating on one dataset Args: cfg: dataset_name: dataset's name test_dataset: Dataset object predictor: Predictor object, used to to prediction. distributed: whether distributed evaluating or not output_dir: path to save prediction results Returns: evaluate result """ cpu_device = torch.device("cpu") logger = logging.getLogger("SSD.inference") logger.info("Evaluating {} dataset({} images):".format(dataset_name, len(test_dataset))) indices = list(range(len(test_dataset))) if distributed: indices = indices[distributed_util.get_rank()::distributed_util.get_world_size()] # show progress bar only on main process. progress_bar = tqdm if distributed_util.is_main_process() else iter logger.info('Progress on {} 0:'.format(cfg.MODEL.DEVICE.upper())) predictions = {} for i in progress_bar(indices): image = test_dataset.get_image(i) #print(type(image)) #image=numpy(image) #transform=PredictionTransform(cfg.INPUT.IMAGE_SIZE, cfg.INPUT.PIXEL_MEAN) #image=transform(image) output = predictor.predict(image) print('output') boxes, labels, scores = [o.to(cpu_device).numpy() for o in output] predictions[i] = (boxes, labels, scores) distributed_util.synchronize() predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not distributed_util.is_main_process(): return final_output_dir = os.path.join(output_dir, dataset_name) if not os.path.exists(final_output_dir): os.makedirs(final_output_dir) torch.save(predictions, os.path.join(final_output_dir, 'predictions.pth')) return evaluate(dataset=test_dataset, predictions=predictions, output_dir=final_output_dir)
def do_train(cfg, model, data_loader, optimizer, scheduler, device, args): logger = logging.getLogger("SSD.trainer") logger.info("Start training") model.train() save_to_disk = distributed_util.get_rank() == 0 if args.use_tensorboard and save_to_disk: import tensorboardX summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR) else: summary_writer = None max_iter = len(data_loader) start_training_time = time.time() trained_time = 0 tic = time.time() end = time.time() for iteration, (images, boxes, labels) in enumerate(data_loader): iteration = iteration + 1 scheduler.step() images = images.to(device) boxes = boxes.to(device) labels = labels.to(device) #print(images.shape) #print(labels.shape) #print(boxes.shape) optimizer.zero_grad() loss_dict = model(images, targets=(boxes, labels)) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss = sum(loss for loss in loss_dict.values()) loss.backward() optimizer.step() trained_time += time.time() - end end = time.time() if iteration % args.log_step == 0: eta_seconds = int((trained_time / iteration) * (max_iter - iteration)) log_str = [ "Iter: {:06d}, Lr: {:.5f}, Cost: {:.2f}s, Eta: {}".format(iteration, optimizer.param_groups[0]['lr'], time.time() - tic, str(datetime.timedelta(seconds=eta_seconds))), "total_loss: {:.3f}".format(losses_reduced.item()) ] for loss_name, loss_item in loss_dict_reduced.items(): log_str.append("{}: {:.3f}".format(loss_name, loss_item.item())) log_str = ', '.join(log_str) logger.info(log_str) if summary_writer: global_step = iteration summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step) for loss_name, loss_item in loss_dict_reduced.items(): summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step) summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step) tic = time.time() if save_to_disk and iteration % args.save_step == 0: model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_iteration_{:06d}.pth".format(cfg.INPUT.IMAGE_SIZE, iteration)) _save_model(logger, model, model_path) # Do eval when training, to trace the mAP changes and see performance improved whether or nor if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter: do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed) model.train() if save_to_disk: model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE)) _save_model(logger, model, model_path) # compute training time total_training_time = int(time.time() - start_training_time) total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter)) return model
def do_train(cfg, model, data_loader, optimizer, scheduler, criterion, device, args): logger = logging.getLogger("SSD.trainer") logger.info("Start training") save_to_disk = distributed_util.get_rank() == 0 if args.use_tensorboard and save_to_disk: import tensorboardX summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR, comment="myvgg") #dummy_input = torch.zeros(1, 3, 300, 300) #dummy_input = dummy_input.type(torch.cuda.FloatTensor) #with summary_writer: # summary_writer.add_graph(model, dummy_input, True) else: summary_writer = None model.train() max_iter = len(data_loader) start_training_time = time.time() trained_time = 0 tic = time.time() end = time.time() for iteration, (images, boxes, labels) in enumerate(data_loader): iteration = iteration + 1 scheduler.step() images = images.to(device) boxes = boxes.to(device) labels = labels.to(device) optimizer.zero_grad() confidence, locations = model(images) regression_loss, classification_loss = criterion(confidence, locations, labels, boxes) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict({'regression_loss': regression_loss, 'classification_loss': classification_loss}) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss = regression_loss + classification_loss loss.backward() optimizer.step() trained_time += time.time() - end end = time.time() if iteration % args.log_step == 0: eta_seconds = int((trained_time / iteration) * (max_iter - iteration)) logger.info( "Iter: {:06d}, Lr: {:.5f}, Cost: {:.2f}s, Eta: {}, ".format(iteration, optimizer.param_groups[0]['lr'], time.time() - tic, str(datetime.timedelta(seconds=eta_seconds))) + "Loss: {:.3f}, ".format(losses_reduced.item()) + "Regression Loss {:.3f}, ".format(loss_dict_reduced['regression_loss'].item()) + "Classification Loss: {:.3f}".format(loss_dict_reduced['classification_loss'].item())) if summary_writer: global_step = iteration summary_writer.add_scalar('losses/total_loss', losses_reduced.item(), global_step=global_step) summary_writer.add_scalar('losses/location_loss', loss_dict_reduced['regression_loss'].item(), global_step=global_step) summary_writer.add_scalar('losses/class_loss', loss_dict_reduced['classification_loss'].item(), global_step=global_step) summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step) tic = time.time() if save_to_disk and iteration % args.save_step == 0: model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_iteration_{:06d}.pth".format(cfg.INPUT.IMAGE_SIZE, iteration)) _save_model(logger, model, model_path) if save_to_disk: model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE)) _save_model(logger, model, model_path) # compute training time total_training_time = int(time.time() - start_training_time) total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter)) return model
def do_train(cfg, model, data_loader, optimizer, scheduler, device, args, val_sets_dict=None): logger = logging.getLogger("SSD.trainer") logger.info("Start training") model.train() save_to_disk = distributed_util.get_rank() == 0 if args.use_tensorboard and save_to_disk: import tensorboardX summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR) tf_writer = tf.compat.v1.summary.FileWriter(cfg.OUTPUT_DIR) else: summary_writer = None if cfg.DATASETS.DG: dataloaders = data_loader max_iter = len(data_loader[0]) dataiters = [iter(dataloader) for dataloader in dataloaders] else: max_iter = len(data_loader) data_loader = iter(data_loader) start_training_time = time.time() trained_time = 0 tic = time.time() end = time.time() if args.return_best: best_map = 0 for iteration in range(scheduler.last_epoch, max_iter): if cfg.DATASETS.DG: # domain generalization settings # we need to read images from different sources images = torch.ones(cfg.SOLVER.BATCH_SIZE * len(dataloaders), 3, cfg.INPUT.IMAGE_SIZE, cfg.INPUT.IMAGE_SIZE) for j in range(len(dataloaders)): if cfg.MODEL.SELF_SUPERVISED: d_images, d_boxes, d_labels, d_j_images, d_j_index, d_orig_boxes, d_orig_labels = next(dataiters[j]) else: d_images, d_boxes, d_labels, d_orig_boxes, d_orig_labels = next(dataiters[j]) start_bs = cfg.SOLVER.BATCH_SIZE * j end_bs = start_bs + cfg.SOLVER.BATCH_SIZE images[start_bs:end_bs, :, :, :] = d_images if j == 0: boxes = d_boxes labels = d_labels orig_boxes = d_orig_boxes orig_labels = d_orig_labels if cfg.MODEL.SELF_SUPERVISED: j_images = d_j_images j_index = d_j_index else: boxes = torch.cat((boxes, d_boxes)) labels = torch.cat((labels, d_labels)) orig_boxes = torch.cat((orig_boxes, d_orig_boxes)) orig_labels = torch.cat((orig_labels, d_orig_labels)) if cfg.MODEL.SELF_SUPERVISED: j_images = torch.cat((j_images, d_j_images)) j_index = torch.cat((j_index, d_j_index)) else: if cfg.MODEL.SELF_SUPERVISED: images, boxes, labels, j_images, j_index, orig_boxes, orig_labels = next(data_loader) else: images, boxes, labels, orig_boxes, orig_labels = next(data_loader) # it is not a problem if we increment iteration because it will be reset in the loop iteration = iteration + 1 images = images.to(device) boxes = boxes.to(device) labels = labels.to(device) optimizer.zero_grad() loss_dict = model(images, targets=(boxes, labels)) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss = sum(loss for loss in loss_dict.values()) # loss.backward() becomes: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() if cfg.MODEL.SELF_SUPERVISED: j_images = j_images.to(device) j_index = j_index.to(device) loss_dict_j = model(j_images, targets=j_index, auxiliary_task=True) loss_dict_reduced_j = reduce_loss_dict(loss_dict_j) losses_reduced_j = sum(loss for loss in loss_dict_reduced_j.values()) loss_j = sum(loss for loss in loss_dict_j.values()) # apply reduction factor for auxiliary loss loss_j = loss_j * cfg.MODEL.SELF_SUPERVISOR.WEIGHT # loss.backward() becomes: with amp.scale_loss(loss_j, optimizer) as scaled_loss: scaled_loss.backward() # append this loss to the dictionary of losses loss_dict.update(loss_dict_j) losses_reduced += losses_reduced_j optimizer.step() scheduler.step() trained_time += time.time() - end end = time.time() if iteration % args.log_step == 0: eta_seconds = int((trained_time / iteration) * (max_iter - iteration)) log_str = [ "Iter: {:06d}, Lr: {:.5f}, Cost: {:.2f}s, Eta: {}".format(iteration, optimizer.param_groups[0]['lr'], time.time() - tic, str(datetime.timedelta(seconds=eta_seconds))), "total_loss: {:.3f}".format(losses_reduced.item()) ] for loss_name, loss_item in loss_dict_reduced.items(): log_str.append("{}: {:.3f}".format(loss_name, loss_item.item())) log_str = ', '.join(log_str) logger.info(log_str) if summary_writer: global_step = iteration summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step) for loss_name, loss_item in loss_dict_reduced.items(): summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step) summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step) if cfg.MODEL.SELF_SUPERVISED: _log_images_tensorboard(cfg, global_step, images, orig_boxes, orig_labels, summary_writer, j_images=j_images) else: _log_images_tensorboard(cfg, global_step, images, orig_boxes, orig_labels, summary_writer) #for tag, value in model.named_parameters(): # tag = tag.replace('.', '/') # if 'ss_classifier' in tag: # print(tag, value) #_log_network_params(tf_writer, model, global_step) tic = time.time() if save_to_disk and iteration % args.save_step == 0: model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_iteration_{:06d}.pth".format(cfg.INPUT.IMAGE_SIZE, iteration)) save_training_checkpoint(logger, model, scheduler, optimizer, model_path) # Do eval when training, to trace the mAP changes and see whether or not performance improved # if args.return_best = True the model returned should be the one that gave best performances on the val set if args.eval_step > 0 and iteration % args.eval_step == 0 and (not iteration == max_iter or args.return_best): dataset_metrics = do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed, datasets_dict=val_sets_dict) model.train() if args.distributed and not distributed_util.is_main_process(): continue avg_map = _compute_avg_map(dataset_metrics) if args.return_best: if avg_map > best_map: best_map = avg_map logger.info("With iteration {} passed the best! New best avg map: {:4f}".format(iteration, best_map)) model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_best.pth".format(cfg.INPUT.IMAGE_SIZE)) _save_model(logger, model, model_path) else: logger.info("With iteration {} the best has not been reached. Best avg map: {:4f}, Current avg mAP: {:4f}".format(iteration, best_map, avg_map)) # logging if summary_writer: global_step = iteration summary_writer.add_scalar("val_avg_map", avg_map, global_step=global_step) for dataset_name, metrics in dataset_metrics.items(): for metric_name, metric_value in metrics.get_printable_metrics().items(): summary_writer.add_scalar('/'.join(['val', dataset_name, metric_name]), metric_value, global_step=global_step) if save_to_disk: model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE)) _save_model(logger, model, model_path) # compute training time total_training_time = int(time.time() - start_training_time) total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter)) if args.return_best: model.load(os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_best.pth".format(cfg.INPUT.IMAGE_SIZE))) return model
def do_train(cfg, model, data_loader, optimizer, scheduler, device, args, resume_iteration=0): logger = logging.getLogger("SSD.trainer") logger.info("Start training") model.train() save_to_disk = distributed_util.get_rank() == 0 if args.use_tensorboard and save_to_disk: import tensorboardX summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR) else: summary_writer = None max_iter = len(data_loader) start_training_time = time.time() trained_time = 0 tic = time.time() end = time.time() import numpy as np count_useful_iteration = 0 count_not_useful_iteration = 0 for iteration, (images, quads, labels, score_map) in enumerate(data_loader): #size infoes #shape(boxes):[2, 24564, 8] #shape(labels):[2, 24564] #shape(images):[2, 3, 512, 512] #print('iteration:',iteration) if args.resume: iteration = resume_iteration + iteration else: iteration = iteration + 1 if save_to_disk and iteration % args.save_step == 0: model_path = os.path.join( cfg.OUTPUT_DIR, "ssd{}_vgg_iteration_{:06d}.pth".format( cfg.INPUT.IMAGE_SIZE, iteration)) _save_model(logger, model, model_path, iteration) scheduler.step() # labels_temp = labels.numpy() # labels_temp = np.squeeze(labels_temp,0) # index = np.squeeze(np.argwhere(labels_temp == 1),1) # print('index:',index) # current_quad = quads[0,index,:] # print(current_quad) # temp_img = images.numpy()[0] # temp_img = np.swapaxes(temp_img, 0, 1) # temp_img = np.swapaxes(temp_img, 1, 2) # for i in range(np.shape(current_quad)[0]): # cv2.circle(temp_img, (int(current_quad[i][0]), int(current_quad[i][1])), 5, (0, 255, 0), 5) # cv2.circle(temp_img, (int(current_quad[i][2]), int(current_quad[i][3])), 5, (255, 255, 255), 5) # cv2.circle(temp_img, (int(current_quad[i][4]), int(current_quad[i][5])), 5, (255, 0, 0), 5) # cv2.circle(temp_img, (int(current_quad[i][6]), int(current_quad[i][7])), 5, (0, 0, 255), 5) # cv2.imshow('img', temp_img.astype(np.uint8)) # cv2.waitKey() if len(quads) == 0: print('quads is None') continue images = images.to(device) quads = quads.to(device) labels = labels.to(device) num_pos = torch.sum(labels) if num_pos == 0: count_not_useful_iteration += 1 print('num_pos==0 and no pos sample found') continue else: # print(num_pos) count_useful_iteration += 1 optimizer.zero_grad() if score_map is None: loss_dict = model(images, targets=(quads, labels)) else: score_map = score_map.to(device) loss_dict = model(images, (quads, labels), score_map) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss = sum(loss for loss in loss_dict.values()) loss.backward() optimizer.step() trained_time += time.time() - end end = time.time() if iteration % args.log_step == 0: eta_seconds = int( (trained_time / iteration) * (max_iter - iteration)) log_str = [ "Iter: {:06d}, Lr: {:.7f}, Cost: {:.2f}s, Eta: {}".format( iteration, optimizer.param_groups[0]['lr'], time.time() - tic, str(datetime.timedelta(seconds=eta_seconds))), "total_loss: {:.3f}".format(losses_reduced.item()) ] log_str.append("{}: {:.3f}".format( 'regression_loss', loss_dict_reduced['regression_loss'])) log_str.append("{}: {:.6f}".format( 'classification_loss', loss_dict_reduced['classification_loss'])) log_str.append("{}: {:.5f}".format('fcn_loss', loss_dict_reduced['fcn_loss'])) log_str = ', '.join(log_str) logger.info(log_str) if summary_writer: global_step = iteration summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step) for loss_name, loss_item in loss_dict_reduced.items(): summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step) summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step) tic = time.time() # Do eval when training, to trace the mAP changes and see performance improved whether or nor # if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter: # do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed) # model.train() # if save_to_disk: # model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE)) # _save_model(logger, model, model_path) # compute training time total_training_time = int(time.time() - start_training_time) total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / max_iter)) with open('useful_iteration.txt', 'w') as f: f.write('count_useful_iteration:' + str(count_useful_iteration) + '\n') f.write('count_not_useful_iteration:' + str(count_not_useful_iteration)) return model
def main(): parser = argparse.ArgumentParser( description='ssd_fcn_multitask_text_detectior training with pytorch') parser.add_argument( "--config_file", default="./configs/icdar2015_incidental_scene_text_512.yaml", metavar="FILE", help="path to config file", type=str) # parser.add_argument("--config_file",default="./configs/synthtext.yaml",metavar="FILE",help="path to config file",type=str) parser.add_argument( '--vgg', default='./pretrained_on_imgnet/vgg16_reducedfc.pth', help= 'Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth' ) parser.add_argument( '--resume', default= "/home/binchengxiong/ssd_fcn_multitask_text_detection_pytorch1.0/output/ssd512_vgg_iteration_043000.pth", type=str, help='Checkpoint state_dict file to resume training from') parser.add_argument('--log_step', default=50, type=int, help='Print logs every log_step') parser.add_argument('--save_step', default=1000, type=int, help='Save checkpoint every save_step') parser.add_argument( '--eval_step', default=5000, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0') parser.add_argument('--use_tensorboard', default=True, type=str2bool) parser.add_argument("--skip-test", default=True, dest="skip_test", help="Do not test the final model", action="store_true") parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = 1 args.num_gpus = num_gpus if torch.cuda.is_available(): # This flag allows you to enable the inbuilt cudnn auto-tuner to # find the best algorithm to use for your hardware. torch.backends.cudnn.benchmark = True logger = setup_logger("SSD", distributed_util.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) #freeze方法可以防止参数被后续进一步修改,ref:https://github.com/rbgirshick/yacs cfg.freeze() logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args)
def main(): parser = argparse.ArgumentParser( description='Single Shot MultiBox Detector Training With PyTorch') parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( '--vgg', help= 'Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth' ) parser.add_argument( '--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from') parser.add_argument('--log_step', default=50, type=int, help='Print logs every log_step') parser.add_argument('--save_step', default=5000, type=int, help='Save checkpoint every save_step') parser.add_argument( '--eval_step', default=0, type=int, help= 'Evaluate dataset every eval_step, disabled when eval_step <= 0. Default: disabled' ) parser.add_argument('--use_tensorboard', default=True, type=str2bool) parser.add_argument("--num_workers", default=4, type=int, help="Number of workers to use for data loaders") parser.add_argument( "--eval_mode", default="test", type=str, help= 'Use defined test datasets for periodic evaluation or use a validation split. Default: "test", alternative "val"' ) parser.add_argument( "--return_best", default=False, type=str2bool, help= "If false (default) tests on the target the last model. If true tests on the target the model with the best performance on the validation set" ) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 args.num_gpus = num_gpus if torch.cuda.is_available(): # This flag allows you to enable the inbuilt cudnn auto-tuner to # find the best algorithm to use for your hardware. torch.backends.cudnn.benchmark = True if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") logger = setup_logger("SSD", distributed_util.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) if not os.path.exists(cfg.OUTPUT_DIR): if not args.distributed or (args.distributed and distributed_util.is_main_process()): os.makedirs(cfg.OUTPUT_DIR) model = train(cfg, args) if not args.skip_test: logger.info('Start evaluating...') torch.cuda.empty_cache() # speed up evaluating after training finished do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)