criterion = nn.CrossEntropyLoss() #multi-class classification task model = model.to(device) model.train() # DONOTCHANGE: They are reserved for nsml bind_model(model) # below the nsml load nsml.load(checkpoint='15', session='team_62/airush1/40') nsml.save('stillgoing') if args.pause: nsml.paused(scope=locals()) if args.mode == "train": # Warning: Do not load data before this line dataloader = train_dataloader(args.input_size, args.batch_size, args.num_workers) for epoch_idx in range(1, args.epochs + 1): total_loss = 0 total_correct = 0 for batch_idx, (image, tags) in enumerate(dataloader): optimizer.zero_grad() image = image.to(device) #torch.Size([64, 3, 224, 224]) tags = tags.to(device) #torch.Size([64]) output = model(image).double() # torch.Size([64, 350]) loss = criterion(output, tags) # criterion ì ë¤ì íì¸íì loss.backward() optimizer.step()
if re_train_info is not None and args.mode == "train": print(re_train_info) nsml.load(checkpoint=re_train_info['checkpoint'], session=re_train_info['session']) nsml.save('dontgiveup') if args.pause: nsml.paused(scope=locals()) if args.mode == "train": # Warning: Do not load data before this line dataloader, valid_dataloader = train_dataloader( args.input_size, args.batch_size, args.num_workers, test_bs=test_bs, br_multi_oh=use_train_time_multi_calss_info_add #) , print_nor_info=False, use_last_fine_tune=use_last_fine_tune) def validation(val_step_num): total_valid_correct = 0 model.eval() for batch_idx, (image, tags) in enumerate(valid_dataloader): image = image.to(device) tags = tags.to(device) output = model(image).double() output_prob = F.softmax(output, dim=1) predict_vector = np.argmax(to_np(output_prob), axis=1) label_vector = to_np(tags)
model.eval() transform = None batch_size = (256 if m_name == "Resnet18" else 32) if args.transform == "5crop": transform = transforms.Compose([transforms.Resize((args.input_size, args.input_size)), transforms.FiveCrop((args.input_size, args.input_size)), transforms.Lambda( lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops]))]) batch_size //= 5 elif args.transform == "10crop": transform = transforms.Compose([transforms.TenCrop((args.input_size, args.input_size)), transforms.Lambda( lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops]))]) batch_size //= 10 dataloader, val_dataloader = train_dataloader(args.input_size, batch_size, args.num_workers, infer_batch_size=batch_size, transform=transform, infer_transform=transform) for nsml_cp in nsml_checkpoints: # Warning: Do not load data before this line nsml.load(checkpoint=nsml_cp, session="team_13/airush1/" + nsml_ss) total_loss = 0. total_correct = 0. total_ranking_ap_score = 0. total_ranking_loss = 0. print(model.__class__.__name__) print(criterion.__class__.__name__) print("team_13/airush1/" + nsml_ss, nsml_cp) # eval!
model = MyEnsembleTTA(modelA,modelB,modelC) model = fuse_bn_recursively(model) model = model.to(device) #use gpu #summary(model, (3,args.input_size,args.input_size)) # DONOTCHANGE: They are reserved for nsml bind_model(model) #nsml.load(checkpoint='3', session='team_27/airush1/392 ') #nsml.save('dontgiveup') if args.pause: nsml.paused(scope=locals()) if args.mode == "train": # Warning: Do not load data before this line dataloader, valid_dataloader = train_dataloader(args.input_size, args.batch_size*10, args.num_workers, test_bs = False , br_multi_oh=True#) ,print_nor_info = False,use_last_fine_tune=True) def validation(val_step_num): total_valid_correct = 0 model.eval() for batch_idx, (image, tags) in enumerate(valid_dataloader): image = image.to(device) tags = tags.to(device) output = model(image).double() output_prob = F.softmax(output, dim=1) predict_vector = np.argmax(to_np(output_prob), axis=1) label_vector = to_np(tags) bool_vector = predict_vector == label_vector accuracy = bool_vector.sum() / len(bool_vector)
if args.load_nsml_cp and args.nsml_checkpoint is not None and args.nsml_session is not None: nsml.load(checkpoint=args.nsml_checkpoint, session=args.nsml_session) print("load", args.nsml_session, args.nsml_checkpoint) if str.isnumeric(args.nsml_checkpoint): epoch_start += int(args.nsml_checkpoint) args.epochs += int(args.nsml_checkpoint) if args.only_save: nsml.save(args.nsml_session + "," + args.nsml_checkpoint) else: dataloader, val_dataloader = train_dataloader( args.input_size, batch_size, args.num_workers, infer_batch_size=infer_batch_size, transform=transforms.Compose(transform_list), infer_transform=transforms.Compose(infer_transform_list), val_ratio=args.val_ratio, use_random_label=args.use_random_label, seed=args.seed) if args.sava_step_ratio > 0: save_step_interval = int( len(dataloader) * args.sava_step_ratio) print("save_step_interval", save_step_interval, "total steps", len(dataloader)) else: save_step_interval = None for epoch_idx in range(epoch_start, args.epochs + 1): if args.use_train: total_loss = 0.