def __init__(self, info): super(SSD, self).__init__() self.infer = info['infer'] detection_metadata = info['metadatas'][1] if detection_metadata and 'Categories' in detection_metadata: self.categories = detection_metadata['Categories'] else: self.categories = ['object'] self.num_classes = len(self.categories) + 1 lib.eprint('ssd: set num_classes={}'.format(self.num_classes)) self.mode = info['params'].get('mode', 'mb2-ssd-lite') mb2_width_mult = info['params'].get('mb2_width_mult', 1.0) # adapt from train_ssd.py if self.mode == 'vgg16-ssd': create_net = create_vgg_ssd config = vgg_ssd_config elif self.mode == 'mb1-ssd': create_net = create_mobilenetv1_ssd config = mobilenetv1_ssd_config elif self.mode == 'mb1-ssd-lite': create_net = create_mobilenetv1_ssd_lite config = mobilenetv1_ssd_config elif self.mode == 'sq-ssd-lite': create_net = create_squeezenet_ssd_lite config = squeezenet_ssd_config elif self.mode == 'mb2-ssd-lite': create_net = lambda num, is_test: create_mobilenetv2_ssd_lite( num, width_mult=mb2_width_mult, is_test=is_test) config = mobilenetv1_ssd_config elif self.mode == 'mb3-large-ssd-lite': create_net = lambda num: create_mobilenetv3_large_ssd_lite( num, is_test=is_test) config = mobilenetv1_ssd_config elif self.mode == 'mb3-small-ssd-lite': create_net = lambda num: create_mobilenetv3_small_ssd_lite( num, is_test=is_test) config = mobilenetv1_ssd_config config.iou_threshold = info['params'].get( 'iou_threshold', config.iou_threshold) self.prob_threshold = info['params'].get( 'confidence_threshold', 0.01) self.config = config self.model = create_net(self.num_classes, is_test=self.infer) self.criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3, center_variance=0.1, size_variance=0.2, device=info['device']) self.match_prior = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) self.image_mean = torch.tensor(self.config.image_mean, dtype=torch.float32).reshape( 1, 3, 1, 1).to(info['device'])
def try_the_datasets(): # Transform config = mobilenetv1_ssd_config train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) # train test # img_path = "/media/wy_disk/wy_file/Detection/dataset/datasets/ECP/ECP/day/img/train" # label_path = "/media/wy_disk/wy_file/Detection/dataset/datasets/ECP/ECP/day/labels/train" # val test # img_path = "/media/wy_disk/wy_file/Detection/dataset/datasets/ECP/ECP/day/img/val" # label_path = "/media/wy_disk/wy_file/Detection/dataset/datasets/ECP/ECP/day/labels/val" # night train test # img_path = "/media/wy_disk/wy_file/Detection/dataset/datasets/ECP/ECP/night/img/train" # label_path = "/media/wy_disk/wy_file/Detection/dataset/datasets/ECP/ECP/night/labels/train" # night val test # img_path = "/media/wy_disk/wy_file/Detection/dataset/datasets/ECP/ECP/night/img/val" # label_path = "/media/wy_disk/wy_file/Detection/dataset/datasets/ECP/ECP/night/labels/val" dataset = EuroCity_Dataset(img_path, label_path, transform=train_transform, target_transform=target_transform) DL = DataLoader(dataset, batch_size=3, shuffle=False, num_workers=0) # for index, (img, bbox, labels) in enumerate(DL): for index, D in enumerate(DL): import pdb pdb.set_trace()
def main_active_mode(args): Query_iteration = 10 create_net = lambda num: create_mobilenetv2_ssd_lite( num, width_mult=args['detection_model']["width_mult"]) config = mobilenetv1_ssd_config train_loader, val_loader, num_classes = dataset_loading(args, config) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) active_dataset = VIRAT_table_comm( args["Datasets"]["virat_seq"]["train_image_path"], args["Datasets"]["virat_seq"]["train_anno_path"], transform=test_transform, target_transform=target_transform, downpurning_ratio=0.2) labeled, unlabeled = train_loader.dataset.dataset_information() query_item = len(unlabeled) // Query_iteration active_dataset.Active_mode() for q_iter in range(Query_iteration): #if q_iter != 0: active_dataset.Active_mode() labeled, unlabeled = active_dataset.dataset_information() query_index = np.random.choice(unlabeled, query_item, replace=False) train_loader.dataset.setting_be_selected_sample(query_index) active_dataset.setting_be_selected_sample(query_index) train_loader.dataset.training_mode() train(args, train_loader)
def dataset_loading(args, config): train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) #dataset = VIRAT_Dataset(args["Datasets"]["virat_seq"]["train_image_path"], # args["Datasets"]["virat_seq"]["train_anno_path"], # transform=train_transform, target_transform=target_transform, downpurning_ratio = 0.2) #0.2 dataset = VIRAT_table_comm( args["Datasets"]["virat_seq"]["train_image_path"], args["Datasets"]["virat_seq"]["train_anno_path"], transform=train_transform, target_transform=target_transform, downpurning_ratio=0.2) # dataset = VIRAT_Dataset(args["Datasets"]["virat_seq"]["train_image_path"], # args["Datasets"]["virat_seq"]["train_anno_path"], # transform=train_transform, downpurning_ratio = 0.05) #0.2 label_file = "" if os.path.exists(label_file): store_labels(label_file, dataset.class_names) logging.info(dataset) num_classes = len(dataset.class_names) train_dataset = dataset train_loader = DataLoader(train_dataset, args["flow_control"]["batch_size"], num_workers=args["flow_control"]["num_workers"], shuffle=True) val_dataset = VIRAT_Dataset( args["Datasets"]["virat_seq"]["train_image_path"], args["Datasets"]["virat_seq"]["train_anno_path"], transform=train_transform, target_transform=target_transform, downpurning_ratio=0.2 * 3. / 9.) val_loader = DataLoader(val_dataset, args["flow_control"]["batch_size"], num_workers=args["flow_control"]["num_workers"], shuffle=False) logging.info("Build network.") return train_loader, val_loader, num_classes
def res_test(dataset, net, device): config = mobilenetv1_ssd_config criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3, center_variance=0.1, size_variance=0.2, device=device) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) val_dataset = SKUDataset(dataset, transform=test_transform, target_transform=target_transform, mode='1') loader = DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False) net.eval() running_loss = 0.0 running_regression_loss = 0.0 running_classification_loss = 0.0 num = 0 for i, data in enumerate(loader): images, boxes, labels = data images = images.to(device) boxes = boxes.to(device) labels = labels.to(device) num += 1 with torch.no_grad(): confidence, locations = net(images) regression_loss, classification_loss = criterion( confidence, locations, labels, boxes) loss = regression_loss + classification_loss running_loss += loss.item() running_regression_loss += regression_loss.item() running_classification_loss += classification_loss.item() if i % 50 == 0: logger.info(f"Step: {i} in Test - loss : {loss}. ") return running_loss / num, running_regression_loss / num, running_classification_loss / num
def get_voc_dataset(data_root_07="", data_root_12="", net='vgg16-ssd', batch_size=128, num_torch_workers=4): if net == 'vgg16-ssd': config = vgg_ssd_config else: raise ValueError train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) datasets = [] if data_root_07 != "": train_dataset_07 = VOCDataset(root=data_root_07, is_test=False, transform=train_transform, target_transform=target_transform) datasets.append(train_dataset_07) if data_root_12 != "": train_dataset_12 = VOCDataset(root=data_root_12, is_test=False, transform=train_transform, target_transform=target_transform) datasets.append(train_dataset_12) train_dataset = ConcatDataset(datasets) test_dataset = VOCDataset(root=data_root_07, is_test=True) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=num_torch_workers) # test_loader = torch.utils.data.DataLoader(test_dataset, # batch_size=batch_size, # shuffle=False, # pin_memory=True, # num_workers=num_torch_workers) return {'train': train_loader, 'test': test_dataset}
def active_dataset_setting(args, config): target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) datasetn_ame = args['flow_control']['dataset_type'] if dataset_name in ['ecp']: active_dataset = ECP_table_comm( args["Datasets"]["ecp"]["train_image_path"], args["Datasets"]["ecp"]["train_anno_path"], transform=test_transform, target_transform=target_transform) elif dataset_name in ["virat", "virat_seq"]: active_dataset = VIRAT_table_comm( args["Datasets"]["virat_seq"]["train_image_path"], args["Datasets"]["virat_seq"]["train_anno_path"], transform=test_transform, target_transform=target_transform, downpurning_ratio=0.2) else: raise ValueError("Doesn't exists dataset : {}".format(dataset_name)) return active_dataset
running_loss += loss.item() running_regression_loss += regression_loss.item() running_classification_loss += classification_loss.item() return running_loss / num, running_regression_loss / num, running_classification_loss / num if __name__ == '__main__': timer = Timer() logging.info(args) create_net = create_fpnnet_ssd config = fpnnet_ssd_config train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) logging.info("Prepare training datasets.") datasets = [] for dataset_path in args.datasets: dataset = _VOCDataset(dataset_path, transform=train_transform, target_transform=target_transform) label_file = os.path.join(args.checkpoint_folder, "voc-model-labels.txt") store_labels(label_file, dataset.class_names) num_classes = len(dataset.class_names) datasets.append(dataset) logging.info(f"Stored labels into file {label_file}.") train_dataset = ConcatDataset(datasets) logging.info("Train dataset size: {}".format(len(train_dataset)))
logging.info(args) if args.net == 'slim': create_net = create_mb_tiny_fd config = fd_config elif args.net == 'RFB': create_net = create_Mb_Tiny_RFB_fd config = fd_config else: logging.fatal("The net type is wrong.") parser.print_help(sys.stderr) sys.exit(1) train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, args.overlap_threshold) test_transform = TestTransform(config.image_size, config.image_mean_test, config.image_std) if not os.path.exists(args.checkpoint_folder): os.makedirs(args.checkpoint_folder) logging.info("Prepare training datasets.") datasets = [] for dataset_path in args.datasets: if args.dataset_type == 'voc': dataset = VOCDataset(dataset_path, transform=train_transform, target_transform=target_transform) label_file = os.path.join(args.checkpoint_folder, "voc-model-labels.txt")
def dataset_loading(args, config): train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) logging.info("Prepare training datasets.") dataset_name = args['flow_control']['dataset_type'] if dataset_name == 'voc': dataset = VOCDataset("", transform=train_transform, target_transform=target_transform) label_txt_name = "voc-model-labels.txt" elif dataset_name == 'open_images': dataset = OpenImagesDataset(dataset_path, transform=train_transform, target_transform=target_transform, dataset_type="train", balance_data=args.balance_data) label_txt_name = "open-images-model-labels.txt" elif dataset_name == 'coco': dataset = CocoDetection(args["Datasets"]["coco"]["train_image_path"], args["Datasets"]["coco"]["train_anno_path"], transform=train_transform, target_transform=target_transform) label_txt_name = "open-images-model-labels.txt" elif dataset_name == 'ecp': #dataset = EuroCity_Dataset( args["Datasets"]["ecp"]["train_image_path"], # args["Datasets"]["ecp"]["train_anno_path"], # transform=train_transform, target_transform=target_transform) dataset = ECP_table_comm(args["Datasets"]["ecp"]["train_image_path"], args["Datasets"]["ecp"]["train_anno_path"], transform=train_transform, target_transform=target_transform) dataset.Active_mode() if len(dataset) == 0: raise ValueError("Doesn't exist any file") label_txt_name = "open-images-model-labels.txt" elif dataset_name == "ecp-random": dataset = ECP_subsample_dataset( args["Datasets"]["ecp"]["train_image_path"], args["Datasets"]["ecp"]["train_anno_path"], transform=train_transform, target_transform=target_transform, _sampling_mode="random", ratio=args['flow_control']['dataset_ratio']) label_txt_name = "open-images-model-labels.txt" elif dataset_name == "ecp-centroid": dataset = ECP_subsample_dataset( args["Datasets"]["ecp"]["train_image_path"], args["Datasets"]["ecp"]["train_anno_path"], transform=train_transform, target_transform=target_transform, _sampling_mode="centroid", ratio=args['flow_control']['dataset_ratio']) label_txt_name = "open-images-model-labels.txt" elif dataset_name in ["virat", "VIRAT"]: # dataset = VIRAT_Loader(args["Datasets"]["virat"]["train_image_path"], # args["Datasets"]["virat"]["train_anno_path"], # transform=train_transform, target_transform=target_transform) dataset = VIRAT_Dataset( args["Datasets"]["virat_seq"]["train_image_path"], args["Datasets"]["virat_seq"]["train_anno_path"], transform=train_transform, target_transform=target_transform, downpurning_ratio=0.2) #0.2 # dataset = VIRAT_table_comm(args["Datasets"]["virat_seq"]["train_image_path"], # args["Datasets"]["virat_seq"]["train_anno_path"], # transform=train_transform, target_transform=target_transform) label_txt_name = "virat_labels.txt" else: raise ValueError( "Dataset type {} is not supported.".format(dataset_name)) label_file = os.path.join(args["flow_control"]["checkpoint_folder"], label_txt_name) if os.path.exists(label_file): store_labels(label_file, dataset.class_names) logging.info(dataset) num_classes = len(dataset.class_names) train_dataset = dataset logging.info("Stored labels into file {}.".format(label_file)) logging.info("Train dataset size: {}".format(len(train_dataset))) logging.debug("================= train_loader ===================") logging.debug("DataLoader batchsize : ", args["flow_control"]["batch_size"]) # if dataset_name == "virat": # indicies = np.arange(args["flow_control"]["batch_size"]) # train_loader = DataLoader(train_dataset, args["flow_control"]["batch_size"], # num_workers=args["flow_control"]["num_workers"], # shuffle=False, sampler=SubsetRandomSampler(indicies)) # else: train_loader = DataLoader(train_dataset, args["flow_control"]["batch_size"], num_workers=args["flow_control"]["num_workers"], shuffle=True) logging.info("Prepare Validation datasets.") if dataset_name == "voc": raise NotImplementedError("Doesn't modify") val_dataset = VOCDataset("", transform=test_transform, target_transform=target_transform, is_test=True) elif dataset_name == 'open_images': val_dataset = OpenImagesDataset(dataset_path, transform=test_transform, target_transform=target_transform, dataset_type="test") logging.info(val_dataset) elif dataset_name == "coco": val_dataset = CocoDetection(args["Datasets"]["coco"]["val_image_path"], args["Datasets"]["coco"]["val_anno_path"], transform=test_transform, target_transform=target_transform) logging.info(val_dataset) elif dataset_name in ["ecp", "ecp-random", "ecp-centroid"]: val_dataset = EuroCity_Dataset( args["Datasets"]["ecp"]["val_image_path"], args["Datasets"]["ecp"]["val_anno_path"], transform=test_transform, target_transform=target_transform) # elif dataset_name = "ecp-random": # val_dataset = ECP_subsample_dataset( args["Datasets"]["ecp"]["val_image_path"], # args["Datasets"]["ecp"]["val_anno_path"], # transform=test_transform, target_transform=target_transform, _sampling_mode = "random", ratio = 0.1) # elif dataset_name = "ecp-centroid": # val_dataset = ECP_subsample_dataset( args["Datasets"]["ecp"]["val_image_path"], # args["Datasets"]["ecp"]["val_anno_path"], # transform=test_transform, target_transform=target_transform, _sampling_mode = "centroid", ratio = 0.1) elif dataset_name in ["virat", "VIRAT"]: # val_dataset = VIRAT_Loader(args["Datasets"]["virat"]["test_image_path"], # args["Datasets"]["virat"]["test_anno_path"], # transform=train_transform, target_transform=target_transform) val_dataset = VIRAT_Dataset( args["Datasets"]["virat_seq"]["train_image_path"], args["Datasets"]["virat_seq"]["train_anno_path"], transform=train_transform, target_transform=target_transform, downpurning_ratio=0.2 * 3. / 9.) logging.info("validation dataset size: {}".format(len(val_dataset))) # if dataset_name == "virat": # indicies = np.arange(args["flow_control"]["batch_size"]) # val_loader = DataLoader(train_dataset, args["flow_control"]["batch_size"], # num_workers=args["flow_control"]["num_workers"], # shuffle=False, sampler=SubsetRandomSampler(indicies)) # else: # val_loader = DataLoader(val_dataset, args["flow_control"]["batch_size"], # num_workers=args["flow_control"]["num_workers"], # shuffle=False) val_loader = DataLoader(val_dataset, args["flow_control"]["batch_size"], num_workers=args["flow_control"]["num_workers"], shuffle=False) logging.info("Build network.") return train_loader, val_loader, num_classes
def main_acitve_mode(args): # Device setting DEVICE = torch.device("cuda:0" if torch.cuda.is_available() and args["flow_control"]["use_cuda"] else "cpu") if args["flow_control"]["use_cuda"] and torch.cuda.is_available(): torch.backends.cudnn.benchmark = True logging.info("Use Cuda.") timer = Timer() # Model setting if args["flow_control"]["net"] == 'vgg16-ssd': create_net = create_vgg_ssd config = vgg_ssd_config elif args["flow_control"]["net"] == 'mb1-ssd': create_net = create_mobilenetv1_ssd config = mobilenetv1_ssd_config elif args["flow_control"]["net"] == 'mb1-ssd-lite': create_net = create_mobilenetv1_ssd_lite config = mobilenetv1_ssd_config elif args["flow_control"]["net"] == 'sq-ssd-lite': create_net = create_squeezenet_ssd_lite config = squeezenet_ssd_config elif args["flow_control"]["net"] == 'mb2-ssd-lite': create_net = lambda num: create_mobilenetv2_ssd_lite( num, width_mult=args['detection_model']["width_mult"]) config = mobilenetv1_ssd_config else: logging.fatal("The net type is wrong.") parser.print_help(sys.stderr) sys.exit(1) # Dataset train_loader, val_loader, num_classes = dataset_loading(args, config) net = create_net(num_classes) net, criterion, optimizer, scheduler = optim_and_model_initial( args, net, timer, config, DEVICE) Query_iteration = 10 train_loader.dataset.Active_mode() labeled, unlabeled = train_loader.dataset.dataset_information() query_item = len(unlabeled) // Query_iteration logging.info("Query iteration: {}, ".format(Query_iteration) + "per query each item : {} ".format(query_item)) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) if args['flow_control']['dataset_type'] == "ecp": active_dataset = ECP_table_comm( args["Datasets"]["ecp"]["train_image_path"], args["Datasets"]["ecp"]["train_anno_path"], transform=test_transform, target_transform=target_transform) elif args['flow_control']['dataset_type'] in ["virat", "VIRAT"]: active_dataset = ECP_table_comm( args["Datasets"]["virat_seq"]["train_image_path"], args["Datasets"]["virat_seq"]["train_anno_path"], transform=test_transform, target_transform=target_transform) else: raise NotImplementedError("Doen't implmented") active_dataset.Active_mode() for q_iter in range(Query_iteration): if q_iter != 0: scheduler.base_lrs[0] = scheduler.base_lrs[0] * 1.1 active_dataset.Active_mode() labeled, unlabeled = active_dataset.dataset_information() logging.info( "Query iteration: {}/{}, ".format(q_iter, Query_iteration) + "per query each item : {} ".format(query_item)) logging.info("Fetch data...") # imgs_list, bboxes_list, labels_list = train_loader.dataset.data_fetch() logging.info("Fetch data finish...") _setting_sampler = args["flow_control"]["sample_method"] if _setting_sampler == "random": net.train(False) query_index = np.random.choice(unlabeled, query_item, replace=False) train_loader.dataset.setting_be_selected_sample(query_index) active_dataset.setting_be_selected_sample(query_index) elif _setting_sampler == "seqencial": net.train(False) query_index = unlabeled[:query_item] train_loader.dataset.setting_be_selected_sample(query_index) active_dataset.setting_be_selected_sample(query_index) elif _setting_sampler == "uncertainty_modify": net.train(False) imgs_list = active_dataset.data_fetch() max_num = 50 confidences = [] for index in range(len(imgs_list) // max_num + 1): with torch.no_grad(): begin_pointer = index * max_num end_pointer = min((index + 1) * max_num, len(imgs_list)) sub_batch = torch.stack( imgs_list[begin_pointer:end_pointer]).cuda() _confidence, locations = net(sub_batch) confidences.append(_confidence.data.cpu()) confidences = torch.cat(confidences, 0) probability = torch.softmax(confidences, 2) entropy = torch.sum(probability * torch.log(probability) * -1, 2) mean = torch.mean(entropy, 1) stddev = torch.std(entropy, 1) criteria = mean * stddev / (mean + stddev) query_index = torch.argsort(-1 * criteria)[:query_item].tolist() train_loader.dataset.setting_be_selected_sample(query_index) active_dataset.setting_be_selected_sample(query_index) elif _setting_sampler == "uncertainty": net.train(False) imgs_list = active_dataset.data_fetch() max_num = 50 confidences = [] for index in range(len(imgs_list) // max_num + 1): with torch.no_grad(): begin_pointer = index * max_num end_pointer = min((index + 1) * max_num, len(imgs_list)) sub_batch = torch.stack( imgs_list[begin_pointer:end_pointer]).cuda() _confidence, locations = net(sub_batch) confidences.append(_confidence.data.cpu()) confidences = torch.cat(confidences, 0) probability = torch.softmax(confidences, 2) entropy = torch.sum(probability * torch.log(probability) * -1, 2) maximum = torch.max(entropy, 1)[0] criteria = maximum query_index = torch.argsort(-1 * criteria)[:query_item].tolist() train_loader.dataset.setting_be_selected_sample(query_index) active_dataset.setting_be_selected_sample(query_index) elif _setting_sampler == "diversity": pass elif _setting_sampler == "balance_feature": pass else: raise NotImplementedError( "_setting_sampler : {} doesn't implement".format( _setting_sampler)) train_loader.dataset.training_mode() # Training process for epoch in range(0, args['flow_control']['num_epochs']): scheduler.step() train(args, train_loader, net, criterion, optimizer, device=DEVICE, debug_steps=args['flow_control']['debug_steps'], epoch=epoch) if epoch % args['flow_control'][ 'validation_epochs'] == 0 or epoch == args['flow_control'][ 'num_epochs'] - 1: val_loss, val_regression_loss, val_classification_loss = test( args, val_loader, net, criterion, DEVICE) logging.info("Epoch: {}, ".format(epoch) + "Validation Loss: {:.4f}, ".format(val_loss) + "Validation Regression Loss {:.4f}, ".format( val_regression_loss) + "Validation Classification Loss: {:.4f}".format( val_classification_loss)) _postfix_infos = [args['flow_control']['dataset_type'], args["flow_control"]["net"],START_TRAINING_TIME] if (args['flow_control']['dataset_type'] != "ecp-random" or args['flow_control']['dataset_type'] != "ecp-centroid") \ else [args['flow_control']['dataset_type'], str(args['flow_control']['dataset_ratio']), args["flow_control"]["net"],START_TRAINING_TIME] postfix = "_".join(_postfix_infos) folder_name = os.path.join( args["flow_control"]["checkpoint_folder"] + "_" + postfix, "query_iter_{}".format( str(float(q_iter + 1) / float(Query_iteration)))) if not os.path.exists(folder_name): os.makedirs(folder_name) model_path = os.path.join( folder_name, "{}-Epoch-{}-Loss-{}.pth".format( args['flow_control']['net'], epoch, val_loss)) net.module.save(model_path) logging.info("Saved model {}".format(model_path))
def main(args): DEVICE = torch.device( "cuda:0" if torch.cuda.is_available() and args.use_cuda else "cpu") #DEVICE = torch.device("cpu") if args.use_cuda and torch.cuda.is_available(): torch.backends.cudnn.benchmark = True logging.info("Use Cuda.") timer = Timer() logging.info(args) if args.net == 'vgg16-ssd': create_net = create_vgg_ssd config = vgg_ssd_config elif args.net == 'mb1-ssd': create_net = create_mobilenetv1_ssd config = mobilenetv1_ssd_config elif args.net == 'mb1-ssd-lite': create_net = create_mobilenetv1_ssd_lite config = mobilenetv1_ssd_config elif args.net == 'sq-ssd-lite': create_net = create_squeezenet_ssd_lite config = squeezenet_ssd_config elif args.net == 'mb2-ssd-lite': create_net = lambda num: create_mobilenetv2_ssd_lite( num, width_mult=args.mb2_width_mult) config = mobilenetv1_ssd_config else: logging.fatal("The net type is wrong.") parser.print_help(sys.stderr) sys.exit(1) train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) logging.info("Prepare training datasets.") datasets = [] for dataset_path in args.datasets: if args.dataset_type == 'voc': dataset = VOCDataset(dataset_path, transform=train_transform, target_transform=target_transform) label_file = os.path.join(args.checkpoint_folder, "voc-model-labels.txt") store_labels(label_file, dataset.class_names) num_classes = len(dataset.class_names) elif args.dataset_type == 'open_images': dataset = OpenImagesDataset(dataset_path, transform=train_transform, target_transform=target_transform, dataset_type="train", balance_data=args.balance_data) label_file = os.path.join(args.checkpoint_folder, "open-images-model-labels.txt") store_labels(label_file, dataset.class_names) logging.info(dataset) num_classes = len(dataset.class_names) elif args.dataset_type == 'coco': # root, annFile, transform=None, target_transform=None, transforms=None) # dataset_type="train", balance_data=args.balance_data) dataset = CocoDetection( "/home/wenyen4desh/datasets/coco/train2017", "/home/wenyen4desh/datasets/coco/annotations/instances_train2017.json", transform=train_transform, target_transform=target_transform) label_file = os.path.join(args.checkpoint_folder, "open-images-model-labels.txt") store_labels(label_file, dataset.class_names) logging.info(dataset) num_classes = len(dataset.class_names) # raise ValueError("Dataset type {} yet implement.".format(args.dataset_type)) else: raise ValueError("Dataset type {} is not supported.".format( args.dataset_type)) datasets.append(dataset) logging.info("Stored labels into file {}.".format(label_file)) train_dataset = ConcatDataset(datasets) logging.info("Train dataset size: {}".format(len(train_dataset))) train_loader = DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True) logging.info("Prepare Validation datasets.") if args.dataset_type == "voc": val_dataset = VOCDataset(args.validation_dataset, transform=test_transform, target_transform=target_transform, is_test=True) elif args.dataset_type == 'open_images': val_dataset = OpenImagesDataset(dataset_path, transform=test_transform, target_transform=target_transform, dataset_type="test") logging.info(val_dataset) elif args.dataset_type == "coco": val_dataset = CocoDetection( "/home/wenyen4desh/datasets/coco/val2017", "/home/wenyen4desh/datasets/coco/annotations/instances_val2017.json", transform=test_transform, target_transform=target_transform) logging.info(val_dataset) logging.info("validation dataset size: {}".format(len(val_dataset))) val_loader = DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False) logging.info("Build network.") net = create_net(num_classes) min_loss = -10000.0 last_epoch = -1 base_net_lr = args.base_net_lr if args.base_net_lr is not None else args.lr extra_layers_lr = args.extra_layers_lr if args.extra_layers_lr is not None else args.lr if args.freeze_base_net: logging.info("Freeze base net.") freeze_net_layers(net.base_net) params = itertools.chain(net.source_layer_add_ons.parameters(), net.extras.parameters(), net.regression_headers.parameters(), net.classification_headers.parameters()) params = [{ 'params': itertools.chain(net.source_layer_add_ons.parameters(), net.extras.parameters()), 'lr': extra_layers_lr }, { 'params': itertools.chain(net.regression_headers.parameters(), net.classification_headers.parameters()) }] elif args.freeze_net: freeze_net_layers(net.base_net) freeze_net_layers(net.source_layer_add_ons) freeze_net_layers(net.extras) params = itertools.chain(net.regression_headers.parameters(), net.classification_headers.parameters()) logging.info("Freeze all the layers except prediction heads.") else: params = [{ 'params': net.base_net.parameters(), 'lr': base_net_lr }, { 'params': itertools.chain(net.source_layer_add_ons.parameters(), net.extras.parameters()), 'lr': extra_layers_lr }, { 'params': itertools.chain(net.regression_headers.parameters(), net.classification_headers.parameters()) }] timer.start("Load Model") if args.resume: logging.info("Resume from the model {}".format(args.resume)) net.load(args.resume) elif args.base_net: logging.info("Init from base net {}".format(args.base_net)) net.init_from_base_net(args.base_net) elif args.pretrained_ssd: logging.info("Init from pretrained ssd {}".format(args.pretrained_ssd)) net.init_from_pretrained_ssd(args.pretrained_ssd) logging.info('Took {:.2f} seconds to load the model.'.format( timer.end("Load Model"))) net.to(DEVICE) criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3, center_variance=0.1, size_variance=0.2, device=DEVICE) optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) logging.info("Learning rate: {}, Base net learning rate: {}, ".format( args.lr, base_net_lr) + "Extra Layers learning rate: {}.".format(extra_layers_lr)) if args.scheduler == 'multi-step': logging.info("Uses MultiStepLR scheduler.") milestones = [int(v.strip()) for v in args.milestones.split(",")] scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1, last_epoch=last_epoch) elif args.scheduler == 'cosine': logging.info("Uses CosineAnnealingLR scheduler.") scheduler = CosineAnnealingLR(optimizer, args.t_max, last_epoch=last_epoch) else: logging.fatal("Unsupported Scheduler: {}.".format(args.scheduler)) parser.print_help(sys.stderr) sys.exit(1) logging.info("Start training from epoch {}.".format(last_epoch + 1)) for epoch in range(last_epoch + 1, args.num_epochs): scheduler.step() train(train_loader, net, criterion, optimizer, device=DEVICE, debug_steps=args.debug_steps, epoch=epoch) if epoch % args.validation_epochs == 0 or epoch == args.num_epochs - 1: val_loss, val_regression_loss, val_classification_loss = test( val_loader, net, criterion, DEVICE) logging.info("Epoch: {}, ".format(epoch) + "Validation Loss: {:.4f}, ".format(val_loss) + "Validation Regression Loss {:.4f}, ".format( val_regression_loss) + "Validation Classification Loss: {:.4f}".format( val_classification_loss)) model_path = os.path.join( args.checkpoint_folder, "{}-Epoch-{}-Loss-{}.pth".format(args.net, epoch, val_loss)) net.save(model_path) logging.info("Saved model {}".format(model_path))
def setup_and_start_training(self): logging.basicConfig( stream=sys.stdout, level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') DEVICE = torch.device("cuda:0" if torch.cuda.is_available() and self. system_dict["params"]["use_cuda"] else "cpu") if self.system_dict["params"]["use_cuda"] and torch.cuda.is_available( ): torch.backends.cudnn.benchmark = True logging.info("Using gpu.") else: logging.info("Using cpu.") timer = Timer() logging.info(self.system_dict) if self.system_dict["params"]["net"] == 'vgg16-ssd': create_net = create_vgg_ssd config = vgg_ssd_config elif self.system_dict["params"]["net"] == 'mb1-ssd': create_net = create_mobilenetv1_ssd config = mobilenetv1_ssd_config elif self.system_dict["params"]["net"] == 'mb1-ssd-lite': create_net = create_mobilenetv1_ssd_lite config = mobilenetv1_ssd_config elif self.system_dict["params"]["net"] == 'sq-ssd-lite': create_net = create_squeezenet_ssd_lite config = squeezenet_ssd_config elif self.system_dict["params"]["net"] == 'mb2-ssd-lite': create_net = lambda num: create_mobilenetv2_ssd_lite( num, width_mult=self.system_dict["params"]["mb2_width_mult"]) config = mobilenetv1_ssd_config else: logging.fatal("The net type is wrong.") sys.exit(1) train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) logging.info("Prepare training datasets.") datasets = [] dataset = VOCDataset( self.system_dict["dataset"]["val"]["img_dir"], self.system_dict["dataset"]["val"]["label_dir"], transform=train_transform, target_transform=target_transform, label_file=self.system_dict["params"]["label_file"]) label_file = self.system_dict["params"]["label_file"] #store_labels(label_file, dataset.class_names) num_classes = len(dataset.class_names) datasets.append(dataset) logging.info(f"Stored labels into file {label_file}.") train_dataset = ConcatDataset(datasets) logging.info("Train dataset size: {}".format(len(train_dataset))) train_loader = DataLoader( train_dataset, self.system_dict["params"]["batch_size"], num_workers=self.system_dict["params"]["num_workers"], shuffle=True) if (self.system_dict["dataset"]["val"]["status"]): val_dataset = VOCDataset( self.system_dict["dataset"]["val"]["img_dir"], self.system_dict["dataset"]["val"]["label_dir"], transform=test_transform, target_transform=target_transform, is_test=True, label_file=self.system_dict["params"]["label_file"]) logging.info("validation dataset size: {}".format( len(val_dataset))) val_loader = DataLoader( val_dataset, self.system_dict["params"]["batch_size"], num_workers=self.system_dict["params"]["num_workers"], shuffle=False) logging.info("Build network.") net = create_net(num_classes) min_loss = -10000.0 last_epoch = -1 base_net_lr = self.system_dict["params"][ "base_net_lr"] if self.system_dict["params"][ "base_net_lr"] is not None else self.system_dict["params"]["lr"] extra_layers_lr = self.system_dict["params"][ "extra_layers_lr"] if self.system_dict["params"][ "extra_layers_lr"] is not None else self.system_dict["params"][ "lr"] if self.system_dict["params"]["freeze_base_net"]: logging.info("Freeze base net.") freeze_net_layers(net.base_net) params = itertools.chain(net.source_layer_add_ons.parameters(), net.extras.parameters(), net.regression_headers.parameters(), net.classification_headers.parameters()) params = [{ 'params': itertools.chain(net.source_layer_add_ons.parameters(), net.extras.parameters()), 'lr': extra_layers_lr }, { 'params': itertools.chain(net.regression_headers.parameters(), net.classification_headers.parameters()) }] elif self.system_dict["params"]["freeze_net"]: freeze_net_layers(net.base_net) freeze_net_layers(net.source_layer_add_ons) freeze_net_layers(net.extras) params = itertools.chain(net.regression_headers.parameters(), net.classification_headers.parameters()) logging.info("Freeze all the layers except prediction heads.") else: params = [{ 'params': net.base_net.parameters(), 'lr': base_net_lr }, { 'params': itertools.chain(net.source_layer_add_ons.parameters(), net.extras.parameters()), 'lr': extra_layers_lr }, { 'params': itertools.chain(net.regression_headers.parameters(), net.classification_headers.parameters()) }] timer.start("Load Model") resume = self.system_dict["params"]["resume"] base_net = self.system_dict["params"]["base_net"] pretrained_ssd = self.system_dict["params"]["pretrained_ssd"] if self.system_dict["params"]["resume"]: logging.info(f"Resume from the model {resume}") net.load(self.system_dict["params"]["resume"]) elif self.system_dict["params"]["base_net"]: logging.info(f"Init from base net {base_net}") net.init_from_base_net(self.system_dict["params"]["base_net"]) elif self.system_dict["params"]["pretrained_ssd"]: logging.info(f"Init from pretrained ssd {pretrained_ssd}") net.init_from_pretrained_ssd( self.system_dict["params"]["pretrained_ssd"]) logging.info( f'Took {timer.end("Load Model"):.2f} seconds to load the model.') net.to(DEVICE) criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3, center_variance=0.1, size_variance=0.2, device=DEVICE) optimizer = torch.optim.SGD( params, lr=self.system_dict["params"]["lr"], momentum=self.system_dict["params"]["momentum"], weight_decay=self.system_dict["params"]["weight_decay"]) lr = self.system_dict["params"]["lr"] logging.info( f"Learning rate: {lr}, Base net learning rate: {base_net_lr}, " + f"Extra Layers learning rate: {extra_layers_lr}.") if (not self.system_dict["params"]["milestones"]): self.system_dict["params"]["milestones"] = "" self.system_dict["params"]["milestones"] += str( int(self.system_dict["params"]["num_epochs"] / 3)) + "," self.system_dict["params"]["milestones"] += str( int(2 * self.system_dict["params"]["num_epochs"] / 3)) if self.system_dict["params"]["scheduler"] == 'multi-step': logging.info("Uses MultiStepLR scheduler.") milestones = [ int(v.strip()) for v in self.system_dict["params"]["milestones"].split(",") ] scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1, last_epoch=last_epoch) elif self.system_dict["params"]["scheduler"] == 'cosine': logging.info("Uses CosineAnnealingLR scheduler.") scheduler = CosineAnnealingLR(optimizer, self.system_dict["params"]["t_max"], last_epoch=last_epoch) logging.info(f"Start training from epoch {last_epoch + 1}.") for epoch in range(last_epoch + 1, self.system_dict["params"]["num_epochs"]): scheduler.step() self.base_train( train_loader, net, criterion, optimizer, device=DEVICE, debug_steps=self.system_dict["params"]["debug_steps"], epoch=epoch) if ((self.system_dict["dataset"]["val"]["status"]) and (epoch % self.system_dict["params"]["validation_epochs"] == 0 or epoch == self.system_dict["params"]["num_epochs"] - 1)): val_loss, val_regression_loss, val_classification_loss = self.base_test( val_loader, net, criterion, DEVICE) logging.info( f"Epoch: {epoch}, " + f"Validation Loss: {val_loss:.4f}, " + f"Validation Regression Loss {val_regression_loss:.4f}, " + f"Validation Classification Loss: {val_classification_loss:.4f}" ) net_name = self.system_dict["params"]["net"] model_path = os.path.join( self.system_dict["params"]["checkpoint_folder"], f"{net_name}-Epoch-{epoch}-Loss-{val_loss}.pth") net.save(model_path) logging.info(f"Saved model {model_path}") if (not self.system_dict["dataset"]["val"]["status"]): model_path = os.path.join( self.system_dict["params"]["checkpoint_folder"], f"{net_name}-Epoch-{epoch}.pth") net.save(model_path) logging.info(f"Saved model {model_path}")
# config = mobilenetv1_ssd_config # elif args.net == 'sq-ssd-lite': # create_net = create_squeezenet_ssd_lite # config = squeezenet_ssd_config # elif args.net == 'mb2-ssd-lite': # create_net = lambda num: create_mobilenetv2_ssd_lite(num, width_mult=args.mb2_width_mult) # config = mobilenetv1_ssd_config else: logging.fatal("The net type is wrong.") parser.print_help(sys.stderr) sys.exit(1) train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) # augmentation target_transform = MatchPrior( config.priors, config.center_variance, config.size_variance, iou_threshold=args.matching_IoU, sigmoid_thresh=sigmoid_thresh) # config.priors : tensor[8732, 4] / test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) logging.info("Prepare training datasets.") datasets = [] for dataset_path in args.datasets: if args.dataset_type == 'voc': dataset = VOCDataset(dataset_path, transform=train_transform, target_transform=target_transform) label_file = "./models/voc-model-labels.txt" store_labels(label_file, dataset.class_names)
def dataset_loading(args, config): train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) test_normal_transform = TestTransform(config.image_size, 0, 1) logging.info("Prepare training datasets.") dataset_name = args['flow_control']['dataset_type'] if dataset_name == 'voc': dataset = VOCDataset(dataset_path, transform=train_transform, target_transform=target_transform) label_txt_name = "voc-model-labels.txt" elif dataset_name == 'open_images': dataset = OpenImagesDataset(dataset_path, transform=train_transform, target_transform=target_transform, dataset_type="train", balance_data=args.balance_data) label_txt_name = "open-images-model-labels.txt" elif dataset_name == 'coco': dataset = CocoDetection(args["Datasets"]["coco"]["train_image_path"], args["Datasets"]["coco"]["train_anno_path"], transform=test_normal_transform, target_transform=target_transform) # target_transform=target_transform) # transform=train_transform, target_transform=target_transform) label_txt_name = "open-images-model-labels.txt" elif dataset_name == "ecp": dataset = EuroCity_Dataset(args["Datasets"]["ecp"]["train_image_path"], args["Datasets"]["ecp"]["train_anno_path"], transform=test_normal_transform, target_transform=target_transform) label_txt_name = "open-images-model-labels.txt" elif dataset_name == "ecp-random": dataset = ECP_subsample_dataset( args["Datasets"]["ecp"]["train_image_path"], args["Datasets"]["ecp"]["train_anno_path"], transform=test_normal_transform, target_transform=target_transform, _sampling_mode="random", ratio=0.1) label_txt_name = "open-images-model-labels.txt" elif dataset_name == "ecp-centroid": dataset = ECP_subsample_dataset( args["Datasets"]["ecp"]["train_image_path"], args["Datasets"]["ecp"]["train_anno_path"], transform=test_normal_transform, target_transform=target_transform, _sampling_mode="centroid", ratio=0.1) label_txt_name = "open-images-model-labels.txt" else: raise ValueError( "Dataset type {} is not supported.".format(dataset_name)) label_file = os.path.join(args["flow_control"]["checkpoint_folder"], label_txt_name) if os.path.exists(label_file): store_labels(label_file, dataset.class_names) logging.info(dataset) num_classes = len(dataset.class_names) train_dataset = dataset logging.info("Stored labels into file {}.".format(label_file)) logging.info("Train dataset size: {}".format(len(train_dataset))) train_loader = DataLoader(train_dataset, args["flow_control"]["batch_size"], num_workers=args["flow_control"]["num_workers"], shuffle=True) logging.info("Prepare Validation datasets.") if dataset_name == "voc": val_dataset = VOCDataset(args.validation_dataset, transform=test_transform, target_transform=target_transform, is_test=True) elif dataset_name == 'open_images': val_dataset = OpenImagesDataset(dataset_path, transform=test_transform, target_transform=target_transform, dataset_type="test") logging.info(val_dataset) elif dataset_name == "coco": val_dataset = CocoDetection(args["Datasets"]["coco"]["val_image_path"], args["Datasets"]["coco"]["val_anno_path"], transform=test_transform, target_transform=target_transform) logging.info(val_dataset) elif dataset_name == "ecp": val_dataset = EuroCity_Dataset( args["Datasets"]["ecp"]["val_image_path"], args["Datasets"]["ecp"]["val_anno_path"], transform=test_transform, target_transform=target_transform) logging.info(val_dataset) elif dataset_name == "ecp-random": val_dataset = ECP_subsample_dataset( args["Datasets"]["ecp"]["val_image_path"], args["Datasets"]["ecp"]["val_anno_path"], transform=test_transform, target_transform=target_transform, _sampling_mode="random", ratio=0.1) elif dataset_name == "ecp-centroid": val_dataset = ECP_subsample_dataset( args["Datasets"]["ecp"]["val_image_path"], args["Datasets"]["ecp"]["val_anno_path"], transform=test_transform, target_transform=target_transform, _sampling_mode="centroid", ratio=0.1) logging.info("validation dataset size: {}".format(len(val_dataset))) val_loader = DataLoader(val_dataset, args["flow_control"]["batch_size"], num_workers=args["flow_control"]["num_workers"], shuffle=False) logging.info("Build network.") return train_loader, val_loader, num_classes
def train_network(dataset_path, model_path, net_type): args.datasets = dataset_path args.validation_dataset = dataset_path args.checkpoint_folder = model_path args.log_dir = os.path.join(args.checkpoint_folder, 'log') args.net = net_type timer = Timer() logging.info(args) if args.net == 'slim': create_net = create_mb_tiny_fd config = fd_config elif args.net == 'RFB': create_net = create_Mb_Tiny_RFB_fd config = fd_config else: logging.fatal("The net type is wrong.") parser.print_help(sys.stderr) sys.exit(1) train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, args.overlap_threshold) test_transform = TestTransform(config.image_size, config.image_mean_test, config.image_std) if not os.path.exists(args.checkpoint_folder): os.makedirs(args.checkpoint_folder) logging.info("Prepare training datasets.") datasets = [] # voc datasets dataset = VOCDataset(dataset_path, transform=train_transform, target_transform=target_transform) label_file = os.path.join(args.checkpoint_folder, "voc-model-labels.txt") store_labels(label_file, dataset.class_names) num_classes = len(dataset.class_names) print('num_classes: ', num_classes) logging.info(f"Stored labels into file {label_file}.") # train_dataset = ConcatDataset(datasets) train_dataset = dataset logging.info("Train dataset size: {}".format(len(train_dataset))) train_loader = DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True) logging.info("Prepare Validation datasets.") val_dataset = VOCDataset(args.validation_dataset, transform=test_transform, target_transform=target_transform, is_test=True) logging.info("validation dataset size: {}".format(len(val_dataset))) val_loader = DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False) logging.info("Build network.") net = create_net(num_classes) timer.start("Load Model") if args.resume: logging.info(f"Resume from the model {args.resume}") net.load(args.resume) logging.info( f'Took {timer.end("Load Model"):.2f} seconds to load the model.') # add multigpu_train if torch.cuda.device_count() >= 1: cuda_index_list = [int(v.strip()) for v in args.cuda_index.split(",")] net = nn.DataParallel(net, device_ids=cuda_index_list) logging.info("use gpu :{}".format(cuda_index_list)) min_loss = -10000.0 last_epoch = -1 base_net_lr = args.base_net_lr if args.base_net_lr is not None else args.lr extra_layers_lr = args.extra_layers_lr if args.extra_layers_lr is not None else args.lr params = [{ 'params': net.module.base_net.parameters(), 'lr': base_net_lr }, { 'params': itertools.chain(net.module.source_layer_add_ons.parameters(), net.module.extras.parameters()), 'lr': extra_layers_lr }, { 'params': itertools.chain(net.module.regression_headers.parameters(), net.module.classification_headers.parameters()) }] net.to(DEVICE) criterion = MultiboxLoss(config.priors, iou_threshold=args.iou_threshold, neg_pos_ratio=5, center_variance=0.1, size_variance=0.2, device=DEVICE, num_classes=num_classes, loss_type=args.loss_type) if args.optimizer_type == "SGD": optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer_type == "Adam": optimizer = torch.optim.Adam(params, lr=args.lr) logging.info("use Adam optimizer") else: logging.fatal(f"Unsupported optimizer: {args.scheduler}.") parser.print_help(sys.stderr) sys.exit(1) logging.info( f"Learning rate: {args.lr}, Base net learning rate: {base_net_lr}, " + f"Extra Layers learning rate: {extra_layers_lr}.") if args.optimizer_type != "Adam": if args.scheduler == 'multi-step': logging.info("Uses MultiStepLR scheduler.") milestones = [int(v.strip()) for v in args.milestones.split(",")] scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1, last_epoch=last_epoch) elif args.scheduler == 'poly': logging.info("Uses PolyLR scheduler.") else: logging.fatal(f"Unsupported Scheduler: {args.scheduler}.") parser.print_help(sys.stderr) sys.exit(1) logging.info(f"Start training from epoch {last_epoch + 1}.") for epoch in range(last_epoch + 1, args.num_epochs): if args.optimizer_type != "Adam": if args.scheduler != "poly": if epoch != 0: scheduler.step() train(train_loader, net, criterion, optimizer, device=DEVICE, debug_steps=args.debug_steps, epoch=epoch) if args.scheduler == "poly": adjust_learning_rate(optimizer, epoch) logging.info("epoch: {} lr rate :{}".format( epoch, optimizer.param_groups[0]['lr'])) if epoch % args.validation_epochs == 0 or epoch == args.num_epochs - 1: logging.info("validation epoch: {} lr rate :{}".format( epoch, optimizer.param_groups[0]['lr'])) val_loss, val_regression_loss, val_classification_loss = test( val_loader, net, criterion, DEVICE) logging.info( f"Epoch: {epoch}, " + f"Validation Loss: {val_loss:.4f}, " + f"Validation Regression Loss {val_regression_loss:.4f}, " + f"Validation Classification Loss: {val_classification_loss:.4f}" ) model_path = os.path.join( args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{val_loss:.4f}.pth") net.module.save(model_path) logging.info(f"Saved model {model_path}")