def main(): args = parser.parse_args() logger = get_logger(args.logging_file) logger.info(args) args.save_dir = os.path.join(os.getcwd(), args.save_dir) check_dir(args.save_dir) assert args.world_size >= 1 args.classes = 1000 args.num_training_samples = 1281167 args.world = args.rank ngpus_per_node = torch.cuda.device_count() args.world_size = ngpus_per_node * args.world_size args.mix_precision_training = True if args.dtype == 'float16' else False mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
parser.add_argument('--input-size', type=int, default=256, help='size of the input image size. default is 224') parser.add_argument('-n', '--noise-type', help='noise type', choices=['gaussian', 'poisson', 'text', 'mc'], default='gaussian', type=str) parser.add_argument('-p', '--noise-param', help='noise parameter (e.g. std for gaussian)', default=50, type=float) parser.add_argument('--clean-targets', help='use clean targets for training', action='store_true') parser.add_argument('--save-dir', type=str, default='params', help='directory of saved models') parser.add_argument('--log-interval', type=int, default=50, help='Number of batches to wait before logging.') parser.add_argument('--logging-file', type=str, default='train_imagenet.log', help='name of training log file') parser.add_argument("--local_rank", default=0, type=int) args = parser.parse_args() check_dir(args.save_dir) device = torch.device("cuda:0") device_ids = args.devices.strip().split(',') device_ids = [int(device) for device in device_ids] lr = args.lr train_loss = args.loss epochs = args.epochs num_workers = args.num_workers batch_size = args.batch_size * len(device_ids) adam_param = tuple(map(float, args.adam_param.split(','))) pre_transform = RandomCrop(args.input_size, pad_if_needed=True) source_transform = transform.Compose([ # RandomGaussianNoise(p=0.95, mean=0, std=25, fixed_distribution=False),