def train(): best_test_acc = 0. class_acc_best = 0. for epoch in range(args.epochs): logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_overall_acc, train_class_acc, train_obj = train_step( train_queue, model, criterion, optimizer, args) test_overall_acc, test_class_acc, test_obj = infer( test_queue, model, criterion) if test_overall_acc > best_test_acc: best_test_acc = test_overall_acc class_acc_best = test_class_acc utils.save(model, os.path.join(args.save, 'best_weights.pt')) logging.info( 'train_overall_acc %f\t train_class_acc %f\t test_overall_acc %f\t test_class_acc %f\t best_test_overall_acc %f\t test_class_acc_when_best %f', train_overall_acc, train_class_acc, test_overall_acc, test_class_acc, best_test_acc, class_acc_best) utils.save(model, os.path.join(args.save, 'weights.pt')) scheduler.step() logging.info( 'Finish! best_test_overall_acc %f\t test_class_acc_when_best %f', best_test_acc, class_acc_best)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if args.random_seed: args.seed = np.random.randint(0, 1000, 1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # dataset modelnet pre_transform, transform = T.NormalizeScale(), T.SamplePoints( args.num_points) train_dataset = GeoData.ModelNet(os.path.join(args.data, 'modelnet10'), '10', True, transform, pre_transform) train_queue = DenseDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size // 2) test_dataset = GeoData.ModelNet(os.path.join(args.data, 'modelnet10'), '10', False, transform, pre_transform) valid_queue = DenseDataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size // 2) n_classes = train_queue.dataset.num_classes criterion = torch.nn.CrossEntropyLoss().cuda() model = Network(args.init_channels, n_classes, args.num_cells, criterion, args.n_steps, in_channels=args.in_channels, emb_dims=args.emb_dims, dropout=args.dropout, k=args.k).cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) num_edges = model._steps * 2 post_train = 5 # import pdb;pdb.set_trace() args.epochs = args.warmup_dec_epoch + args.decision_freq * ( num_edges - 1) + post_train + 1 logging.info("total epochs: %d", args.epochs) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) normal_selected_idxs = torch.tensor(len(model.alphas_normal) * [-1], requires_grad=False, dtype=torch.int).cuda() normal_candidate_flags = torch.tensor(len(model.alphas_normal) * [True], requires_grad=False, dtype=torch.bool).cuda() logging.info('normal_selected_idxs: {}'.format(normal_selected_idxs)) logging.info('normal_candidate_flags: {}'.format(normal_candidate_flags)) model.normal_selected_idxs = normal_selected_idxs model.normal_candidate_flags = normal_candidate_flags print(F.softmax(torch.stack(model.alphas_normal, dim=0), dim=-1).detach()) count = 0 normal_probs_history = [] train_losses, valid_losses = utils.AverageMeter(), utils.AverageMeter() for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training # import pdb;pdb.set_trace() att = model.show_att() beta = model.show_beta() train_acc, train_losses = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, train_losses) valid_overall_acc, valid_class_acc, valid_losses = infer( valid_queue, model, criterion, valid_losses) logging.info( 'train_acc %f\tvalid_overall_acc %f \t valid_class_acc %f', train_acc, valid_overall_acc, valid_class_acc) logging.info('beta %s', beta.cpu().detach().numpy()) logging.info('att %s', att.cpu().detach().numpy()) # make edge decisions saved_memory_normal, model.normal_selected_idxs, \ model.normal_candidate_flags = edge_decision('normal', model.alphas_normal, model.normal_selected_idxs, model.normal_candidate_flags, normal_probs_history, epoch, model, args) if saved_memory_normal: del train_queue, valid_queue torch.cuda.empty_cache() count += 1 new_batch_size = args.batch_size + args.batch_increase * count logging.info("new_batch_size = {}".format(new_batch_size)) train_queue = DenseDataLoader(train_dataset, batch_size=new_batch_size, shuffle=True, num_workers=args.batch_size // 2) valid_queue = DenseDataLoader(test_dataset, batch_size=new_batch_size, shuffle=False, num_workers=args.batch_size // 2) # post validation if args.post_val: post_valid_overall_acc, post_valid_class_acc, valid_losses = infer( valid_queue, model, criterion, valid_losses) logging.info('post_valid_overall_acc %f', post_valid_overall_acc) writer.add_scalar('stats/train_acc', train_acc, epoch) writer.add_scalar('stats/valid_overall_acc', valid_overall_acc, epoch) writer.add_scalar('stats/valid_class_acc', valid_class_acc, epoch) utils.save(model, os.path.join(args.save, 'weights.pt')) scheduler.step() logging.info("#" * 30 + " Done " + "#" * 30) logging.info('genotype = %s', model.get_genotype())
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if args.random_seed: args.seed = np.random.randint(0, 1000, 1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # dataset ppi train_dataset = GeoData.PPI(os.path.join(args.data, 'ppi'), split='train') train_queue = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) valid_dataset = GeoData.PPI(os.path.join(args.data, 'ppi'), split='val') valid_queue = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False) n_classes = train_queue.dataset.num_classes criterion = torch.nn.BCEWithLogitsLoss().cuda() model = Network(args.init_channels, n_classes, args.num_cells, criterion, args.n_steps, in_channels=args.in_channels).cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) num_edges = model._steps * 2 post_train = 5 args.epochs = args.warmup_dec_epoch + args.decision_freq * (num_edges - 1) + post_train + 1 logging.info("total epochs: %d", args.epochs) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) normal_selected_idxs = torch.tensor(len(model.alphas_normal) * [-1], requires_grad=False, dtype=torch.int).cuda() normal_candidate_flags = torch.tensor(len(model.alphas_normal) * [True], requires_grad=False, dtype=torch.bool).cuda() logging.info('normal_selected_idxs: {}'.format(normal_selected_idxs)) logging.info('normal_candidate_flags: {}'.format(normal_candidate_flags)) model.normal_selected_idxs = normal_selected_idxs model.normal_candidate_flags = normal_candidate_flags print(F.softmax(torch.stack(model.alphas_normal, dim=0), dim=-1).detach()) count = 0 normal_probs_history = [] train_losses, valid_losses = utils.AverageMeter(), utils.AverageMeter() for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training train_acc, train_losses = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, train_losses) valid_acc, valid_losses = infer(valid_queue, model, criterion, valid_losses) logging.info('train_acc %f\tvalid_acc %f', train_acc, valid_acc) # make edge decisions saved_memory_normal, model.normal_selected_idxs, \ model.normal_candidate_flags = edge_decision('normal', model.alphas_normal, model.normal_selected_idxs, model.normal_candidate_flags, normal_probs_history, epoch, model, args) if saved_memory_normal: del train_queue, valid_queue torch.cuda.empty_cache() count += 1 new_batch_size = args.batch_size + args.batch_increase * count logging.info("new_batch_size = {}".format(new_batch_size)) train_queue = DataLoader(train_dataset, batch_size=new_batch_size, shuffle=True) valid_queue = DataLoader(valid_dataset, batch_size=new_batch_size, shuffle=False) if args.post_val: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('post valid_acc %f', valid_acc) writer.add_scalar('stats/train_acc', train_acc, epoch) writer.add_scalar('stats/valid_acc', valid_acc, epoch) utils.save(model, os.path.join(args.save, 'weights.pt')) scheduler.step() logging.info("#" * 30 + " Done " + "#" * 30) logging.info('genotype = %s', model.get_genotype())