def angle_validate(model, archloader, args): arch_dict = archloader.get_arch_dict() base_model = dynamic_resnet20().cuda(args.gpu) # base_2 = dynamic_resnet20().cuda(args.gpu) angle_result_dict = {} with torch.no_grad(): for key, value in arch_dict.items(): angle = generate_angle(base_model, model, value["arch"]) tmp_dict = {} tmp_dict['arch'] = value['arch'] tmp_dict['acc'] = angle.item() print("angle: ", angle.item()) angle_result_dict[key] = tmp_dict print('\n', "=" * 10, "RESULTS", "=" * 10) for key, value in angle_result_dict.items(): print(key, "\t", value) print("=" * 10, "E N D", "=" * 10) with open("angle_result.json", "w") as f: json.dump(angle_result_dict, f)
def main(): args = get_args() num_gpus = torch.cuda.device_count() np.random.seed(args.seed) args.gpu = args.local_rank % num_gpus torch.cuda.set_device(args.gpu) # cudnn.benchmark = True # cudnn.deterministic = True # torch.manual_seed(args.seed) # cudnn.enabled = True # torch.cuda.manual_seed(args.seed) if num_gpus > 1: torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.batch_size = args.batch_size // args.world_size # Log log_format = '[%(asctime)s] %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%d %I:%M:%S') t = time.time() local_time = time.localtime(t) if not os.path.exists('./log'): os.mkdir('./log') fh = logging.FileHandler( os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t))) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) # archLoader arch_loader = ArchLoader(args.path) arch_dataset = ArchDataSet(args.path) arch_sampler = None if num_gpus > 1: arch_sampler = DistributedSampler(arch_dataset) arch_dataloader = torch.utils.data.DataLoader(arch_dataset, batch_size=1, shuffle=False, num_workers=3, pin_memory=True, sampler=arch_sampler) val_dataset = get_val_dataset() val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) train_loader = get_train_loader(batch_size=args.batch_size, local_rank=0, num_workers=args.workers) print('load data successfully') model = dynamic_resnet20() print("load model successfully") print('load from latest checkpoint') lastest_model = args.weights if lastest_model is not None: checkpoint = torch.load(lastest_model, map_location=None if True else 'cpu') model.load_state_dict(checkpoint['state_dict']) model = model.cuda(args.gpu) if num_gpus > 1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=False) # 参数设置 args.val_dataloader = val_loader print("start to validate model...") validate(model, train_loader, args, arch_loader=arch_dataloader)
def main(): if not torch.cuda.is_available(): print('no gpu device available') sys.exit(1) writer = None num_gpus = torch.cuda.device_count() np.random.seed(args.seed) args.gpu = args.local_rank % num_gpus args.nprocs = num_gpus torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.deterministic = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) if args.local_rank == 0: args.exp = datetime.datetime.now().strftime("%YY_%mM_%dD_%HH") + "_" + \ "{:04d}".format(random.randint(0, 1000)) print('gpu device = %d' % args.gpu) print("args = %s", args) if args.model_type == "dynamic": model = dynamic_resnet20() elif args.model_type == "independent": model = Independent_resnet20() elif args.model_type == "slimmable": model = mutableResNet20() elif args.model_type == "original": model = resnet20() else: print("Not Implement") # model = resnet20() model = model.cuda(args.gpu) if num_gpus > 1: torch.distributed.init_process_group(backend='nccl', init_method='env://') model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) args.world_size = torch.distributed.get_world_size() args.batch_size = args.batch_size // args.world_size # criterion_smooth = CrossEntropyLabelSmooth(args.classes, args.label_smooth) # criterion_smooth = criterion_smooth.cuda() criterion = torch.nn.CrossEntropyLoss().cuda(args.gpu) soft_criterion = CrossEntropyLossSoft() optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # scheduler = torch.optim.lr_scheduler.LambdaLR( # optimizer, lambda step: (1.0-step/args.total_iters), last_epoch=-1) # a_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( # optimizer, T_0=5) # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10) # a_scheduler = torch.optim.lr_scheduler.LambdaLR( # optimizer, lambda epoch: 1 - (epoch / args.epochs)) a_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[60, 120, 160], last_epoch=-1) # !! scheduler = GradualWarmupScheduler(optimizer, 1, total_epoch=5, after_scheduler=a_scheduler) if args.local_rank == 0: writer = SummaryWriter( "./runs/%s-%05d" % (time.strftime("%m-%d", time.localtime()), random.randint(0, 100))) # Prepare data train_loader = get_train_loader(args.batch_size, args.local_rank, args.num_workers) # 原来跟train batch size一样,现在修改小一点 , val_loader = get_val_loader(args.batch_size, args.num_workers) archloader = ArchLoader("data/Track1_final_archs.json") for epoch in range(args.epochs): train(train_loader, val_loader, optimizer, scheduler, model, archloader, criterion, soft_criterion, args, args.seed, epoch, writer) scheduler.step() if (epoch + 1) % args.report_freq == 0: top1_val, top5_val, objs_val = infer(train_loader, val_loader, model, criterion, archloader, args, epoch) if args.local_rank == 0: # model if writer is not None: writer.add_scalar("Val/loss", objs_val, epoch) writer.add_scalar("Val/acc1", top1_val, epoch) writer.add_scalar("Val/acc5", top5_val, epoch) save_checkpoint({ 'state_dict': model.state_dict(), }, epoch, args.exp)
weight[:cand[idx + 1], :cand[idx], :, :].data, (-1, )) conv2 = torch.reshape( layer_i.conv2.conv.conv.weight[:cand[idx + 2], :cand[idx + 1], :, :].data, (-1, )) downs = torch.reshape( layer_i.downsample.conv.conv. weight[:cand[idx + 2], :cand[idx], :, :].data, (-1, )) arch_vector += [torch.cat([conv1, conv2, downs], dim=0)] cnt += 1 idx = cnt * 2 return torch.cat(arch_vector, dim=0) def generate_angle(b_model, t_model, candidate): vec1 = generate_arch_vector(b_model, candidate) vec2 = generate_arch_vector(t_model, candidate) cos = nn.CosineSimilarity(dim=0) angle = torch.acos(cos(vec1, vec2)) return angle if __name__ == "__main__": m1 = dynamic_resnet20() m2 = dynamic_resnet20() print(generate_angle(m1, m2, arc_representation))