def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if args.random_seed: args.seed = np.random.randint(0, 1000, 1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # dataset modelnet pre_transform, transform = T.NormalizeScale(), T.SamplePoints( args.num_points) train_dataset = GeoData.ModelNet(os.path.join(args.data, 'modelnet10'), '10', True, transform, pre_transform) train_queue = DenseDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size // 2) test_dataset = GeoData.ModelNet(os.path.join(args.data, 'modelnet10'), '10', False, transform, pre_transform) valid_queue = DenseDataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size // 2) n_classes = train_queue.dataset.num_classes criterion = torch.nn.CrossEntropyLoss().cuda() model = Network(args.init_channels, n_classes, args.num_cells, criterion, args.n_steps, in_channels=args.in_channels, emb_dims=args.emb_dims, dropout=args.dropout, k=args.k).cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) num_edges = model._steps * 2 post_train = 5 # import pdb;pdb.set_trace() args.epochs = args.warmup_dec_epoch + args.decision_freq * ( num_edges - 1) + post_train + 1 logging.info("total epochs: %d", args.epochs) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) normal_selected_idxs = torch.tensor(len(model.alphas_normal) * [-1], requires_grad=False, dtype=torch.int).cuda() normal_candidate_flags = torch.tensor(len(model.alphas_normal) * [True], requires_grad=False, dtype=torch.bool).cuda() logging.info('normal_selected_idxs: {}'.format(normal_selected_idxs)) logging.info('normal_candidate_flags: {}'.format(normal_candidate_flags)) model.normal_selected_idxs = normal_selected_idxs model.normal_candidate_flags = normal_candidate_flags print(F.softmax(torch.stack(model.alphas_normal, dim=0), dim=-1).detach()) count = 0 normal_probs_history = [] train_losses, valid_losses = utils.AverageMeter(), utils.AverageMeter() for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training # import pdb;pdb.set_trace() att = model.show_att() beta = model.show_beta() train_acc, train_losses = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, train_losses) valid_overall_acc, valid_class_acc, valid_losses = infer( valid_queue, model, criterion, valid_losses) logging.info( 'train_acc %f\tvalid_overall_acc %f \t valid_class_acc %f', train_acc, valid_overall_acc, valid_class_acc) logging.info('beta %s', beta.cpu().detach().numpy()) logging.info('att %s', att.cpu().detach().numpy()) # make edge decisions saved_memory_normal, model.normal_selected_idxs, \ model.normal_candidate_flags = edge_decision('normal', model.alphas_normal, model.normal_selected_idxs, model.normal_candidate_flags, normal_probs_history, epoch, model, args) if saved_memory_normal: del train_queue, valid_queue torch.cuda.empty_cache() count += 1 new_batch_size = args.batch_size + args.batch_increase * count logging.info("new_batch_size = {}".format(new_batch_size)) train_queue = DenseDataLoader(train_dataset, batch_size=new_batch_size, shuffle=True, num_workers=args.batch_size // 2) valid_queue = DenseDataLoader(test_dataset, batch_size=new_batch_size, shuffle=False, num_workers=args.batch_size // 2) # post validation if args.post_val: post_valid_overall_acc, post_valid_class_acc, valid_losses = infer( valid_queue, model, criterion, valid_losses) logging.info('post_valid_overall_acc %f', post_valid_overall_acc) writer.add_scalar('stats/train_acc', train_acc, epoch) writer.add_scalar('stats/valid_overall_acc', valid_overall_acc, epoch) writer.add_scalar('stats/valid_class_acc', valid_class_acc, epoch) utils.save(model, os.path.join(args.save, 'weights.pt')) scheduler.step() logging.info("#" * 30 + " Done " + "#" * 30) logging.info('genotype = %s', model.get_genotype())
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True) num_edges = model._steps * 2 post_train = 5 epochs = args.warmup_dec_epoch + args.decision_freq * (num_edges - 1) + post_train + 1 logging.info("total epochs: %d", epochs) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) normal_selected_idxs = torch.tensor(len(model.alphas_normal) * [-1], requires_grad=False, dtype=torch.int).cuda() reduce_selected_idxs = torch.tensor(len(model.alphas_reduce) * [-1], requires_grad=False, dtype=torch.int).cuda() normal_candidate_flags = torch.tensor(len(model.alphas_normal) * [True], requires_grad=False, dtype=torch.bool).cuda() reduce_candidate_flags = torch.tensor(len(model.alphas_reduce) * [True], requires_grad=False, dtype=torch.bool).cuda() logging.info('normal_selected_idxs: {}'.format(normal_selected_idxs)) logging.info('reduce_selected_idxs: {}'.format(reduce_selected_idxs)) logging.info('normal_candidate_flags: {}'.format(normal_candidate_flags)) logging.info('reduce_candidate_flags: {}'.format(reduce_candidate_flags)) model.normal_selected_idxs = normal_selected_idxs model.reduce_selected_idxs = reduce_selected_idxs model.normal_candidate_flags = normal_candidate_flags model.reduce_candidate_flags = reduce_candidate_flags print(F.softmax(torch.stack(model.alphas_normal, dim=0), dim=-1).detach()) print(F.softmax(torch.stack(model.alphas_reduce, dim=0), dim=-1).detach()) count = 0 normal_probs_history = [] reduce_probs_history = [] for epoch in range(epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) saved_memory_normal, model.normal_selected_idxs, \ model.normal_candidate_flags = edge_decision('normal', model.alphas_normal, model.normal_selected_idxs, model.normal_candidate_flags, normal_probs_history, epoch, model, args) saved_memory_reduce, model.reduce_selected_idxs, \ model.reduce_candidate_flags = edge_decision('reduce', model.alphas_reduce, model.reduce_selected_idxs, model.reduce_candidate_flags, reduce_probs_history, epoch, model, args) if saved_memory_normal or saved_memory_reduce: del train_queue, valid_queue torch.cuda.empty_cache() count += 1 new_batch_size = args.batch_size + args.batch_increase * count logging.info("new_batch_size = {}".format(new_batch_size)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=new_batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=new_batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) # post validation if args.post_val: with torch.no_grad(): post_valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('post_valid_acc %f', post_valid_acc) logging.info('genotype = %s', model.get_genotype(force=True)) writer.add_scalar('stats/train_acc', train_acc, epoch) writer.add_scalar('stats/valid_acc', valid_acc, epoch) utils.save(model, os.path.join(args.save, 'weights.pt')) logging.info("#" * 30 + " Done " + "#" * 30) logging.info('genotype = %s', model.get_genotype())
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if args.random_seed: args.seed = np.random.randint(0, 1000, 1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # dataset ppi train_dataset = GeoData.PPI(os.path.join(args.data, 'ppi'), split='train') train_queue = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) valid_dataset = GeoData.PPI(os.path.join(args.data, 'ppi'), split='val') valid_queue = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False) n_classes = train_queue.dataset.num_classes criterion = torch.nn.BCEWithLogitsLoss().cuda() model = Network(args.init_channels, n_classes, args.num_cells, criterion, args.n_steps, in_channels=args.in_channels).cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) num_edges = model._steps * 2 post_train = 5 args.epochs = args.warmup_dec_epoch + args.decision_freq * (num_edges - 1) + post_train + 1 logging.info("total epochs: %d", args.epochs) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) normal_selected_idxs = torch.tensor(len(model.alphas_normal) * [-1], requires_grad=False, dtype=torch.int).cuda() normal_candidate_flags = torch.tensor(len(model.alphas_normal) * [True], requires_grad=False, dtype=torch.bool).cuda() logging.info('normal_selected_idxs: {}'.format(normal_selected_idxs)) logging.info('normal_candidate_flags: {}'.format(normal_candidate_flags)) model.normal_selected_idxs = normal_selected_idxs model.normal_candidate_flags = normal_candidate_flags print(F.softmax(torch.stack(model.alphas_normal, dim=0), dim=-1).detach()) count = 0 normal_probs_history = [] train_losses, valid_losses = utils.AverageMeter(), utils.AverageMeter() for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training train_acc, train_losses = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, train_losses) valid_acc, valid_losses = infer(valid_queue, model, criterion, valid_losses) logging.info('train_acc %f\tvalid_acc %f', train_acc, valid_acc) # make edge decisions saved_memory_normal, model.normal_selected_idxs, \ model.normal_candidate_flags = edge_decision('normal', model.alphas_normal, model.normal_selected_idxs, model.normal_candidate_flags, normal_probs_history, epoch, model, args) if saved_memory_normal: del train_queue, valid_queue torch.cuda.empty_cache() count += 1 new_batch_size = args.batch_size + args.batch_increase * count logging.info("new_batch_size = {}".format(new_batch_size)) train_queue = DataLoader(train_dataset, batch_size=new_batch_size, shuffle=True) valid_queue = DataLoader(valid_dataset, batch_size=new_batch_size, shuffle=False) if args.post_val: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('post valid_acc %f', valid_acc) writer.add_scalar('stats/train_acc', train_acc, epoch) writer.add_scalar('stats/valid_acc', valid_acc, epoch) utils.save(model, os.path.join(args.save, 'weights.pt')) scheduler.step() logging.info("#" * 30 + " Done " + "#" * 30) logging.info('genotype = %s', model.get_genotype())
def main(): if not torch.cuda.is_available(): print('no gpu device available') sys.exit(1) if args.random_seed: args.seed = np.random.randint(0, 1000, 1) # reproducible ,再次运行代码时,初始化值不变。 #you should ensure that all other libraries your code relies on and which use random numbers also use a fixed seed. np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = torch.nn.BCEWithLogitsLoss().cuda() ## in_channels是特征维度 !! model = Network(args.init_channels, args.classes, args.num_cells, criterion, args.n_steps, in_channels=args.in_channels).cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) num_edges = model._steps * 2 post_train = 5 args.epochs = args.warmup_dec_epoch + args.decision_freq * ( num_edges - 1) + post_train + 1 logging.info("total epochs: %d", args.epochs) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) normal_selected_idxs = torch.tensor(len(model.alphas_normal) * [-1], requires_grad=False, dtype=torch.int).cuda() normal_candidate_flags = torch.tensor(len(model.alphas_normal) * [True], requires_grad=False, dtype=torch.bool).cuda() logging.info('normal_selected_idxs: {}'.format(normal_selected_idxs)) logging.info('normal_candidate_flags: {}'.format(normal_candidate_flags)) model.normal_selected_idxs = normal_selected_idxs model.normal_candidate_flags = normal_candidate_flags print(F.softmax(torch.stack(model.alphas_normal, dim=0), dim=-1).detach()) normal_probs_history = [] train_losses, valid_losses = utils.AverageMeter(), utils.AverageMeter() for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training train_acc, train_loss = train(model, architect, criterion, optimizer, lr) print("!!!!!!!!!!!!!!!!train_loss:", train_loss) valid_acc, valid_losses = infer(model, criterion, valid_losses) logging.info('train_acc %f\tvalid_acc %f', train_acc, valid_acc) # make edge decisions saved_memory_normal, model.normal_selected_idxs, \ model.normal_candidate_flags = edge_decision('normal', model.alphas_normal, model.normal_selected_idxs, model.normal_candidate_flags, normal_probs_history, epoch, model, args) writer.add_scalar('stats/train_acc', train_acc, epoch) writer.add_scalar('stats/valid_acc', valid_acc, epoch) utils.save(model, os.path.join(args.save, 'search_weights.pt')) scheduler.step() logging.info("#" * 30 + " Done " + "#" * 30) logging.info('genotype = %s', model.get_genotype())