Exemple #1
0
    criterion = nn.CrossEntropyLoss()  #multi-class classification task

    model = model.to(device)
    model.train()

    # DONOTCHANGE: They are reserved for nsml
    bind_model(model)
    # below the nsml load
    nsml.load(checkpoint='15', session='team_62/airush1/40')
    nsml.save('stillgoing')

    if args.pause:
        nsml.paused(scope=locals())
    if args.mode == "train":
        # Warning: Do not load data before this line
        dataloader = train_dataloader(args.input_size, args.batch_size,
                                      args.num_workers)
        for epoch_idx in range(1, args.epochs + 1):
            total_loss = 0
            total_correct = 0
            for batch_idx, (image, tags) in enumerate(dataloader):
                optimizer.zero_grad()
                image = image.to(device)  #torch.Size([64, 3, 224, 224])

                tags = tags.to(device)  #torch.Size([64])
                output = model(image).double()  # torch.Size([64, 350])

                loss = criterion(output,
                                 tags)  # criterion 은 다시 확인하자
                loss.backward()
                optimizer.step()
Exemple #2
0
    if re_train_info is not None and args.mode == "train":
        print(re_train_info)
        nsml.load(checkpoint=re_train_info['checkpoint'],
                  session=re_train_info['session'])
        nsml.save('dontgiveup')

    if args.pause:
        nsml.paused(scope=locals())
    if args.mode == "train":
        # Warning: Do not load data before this line
        dataloader, valid_dataloader = train_dataloader(
            args.input_size,
            args.batch_size,
            args.num_workers,
            test_bs=test_bs,
            br_multi_oh=use_train_time_multi_calss_info_add  #)
            ,
            print_nor_info=False,
            use_last_fine_tune=use_last_fine_tune)

        def validation(val_step_num):
            total_valid_correct = 0
            model.eval()
            for batch_idx, (image, tags) in enumerate(valid_dataloader):
                image = image.to(device)
                tags = tags.to(device)
                output = model(image).double()
                output_prob = F.softmax(output, dim=1)
                predict_vector = np.argmax(to_np(output_prob), axis=1)
                label_vector = to_np(tags)
        model.eval()
        transform = None
        batch_size = (256 if m_name == "Resnet18" else 32)
        if args.transform == "5crop":
            transform = transforms.Compose([transforms.Resize((args.input_size, args.input_size)),
                                            transforms.FiveCrop((args.input_size, args.input_size)), transforms.Lambda(
                    lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops]))])
            batch_size //= 5
        elif args.transform == "10crop":
            transform = transforms.Compose([transforms.TenCrop((args.input_size, args.input_size)), transforms.Lambda(
                lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops]))])
            batch_size //= 10

        dataloader, val_dataloader = train_dataloader(args.input_size, batch_size,
                                                      args.num_workers,
                                                      infer_batch_size=batch_size,
                                                      transform=transform,
                                                      infer_transform=transform)

        for nsml_cp in nsml_checkpoints:
            # Warning: Do not load data before this line
            nsml.load(checkpoint=nsml_cp, session="team_13/airush1/" + nsml_ss)

            total_loss = 0.
            total_correct = 0.
            total_ranking_ap_score = 0.
            total_ranking_loss = 0.
            print(model.__class__.__name__)
            print(criterion.__class__.__name__)
            print("team_13/airush1/" + nsml_ss, nsml_cp)
            # eval!
Exemple #4
0
    model = MyEnsembleTTA(modelA,modelB,modelC)
    model = fuse_bn_recursively(model)
    model = model.to(device) #use gpu
    #summary(model, (3,args.input_size,args.input_size))
    # DONOTCHANGE: They are reserved for nsml
    bind_model(model)
    #nsml.load(checkpoint='3', session='team_27/airush1/392 ') 
    #nsml.save('dontgiveup')


    if args.pause:
        nsml.paused(scope=locals())
    if args.mode == "train":
        # Warning: Do not load data before this line
        dataloader, valid_dataloader = train_dataloader(args.input_size, args.batch_size*10, args.num_workers, test_bs =  False
                                                        , br_multi_oh=True#)
                                                        ,print_nor_info = False,use_last_fine_tune=True)

        def validation(val_step_num):
            total_valid_correct = 0
            model.eval()
            for batch_idx, (image, tags) in enumerate(valid_dataloader):
                image = image.to(device)

                tags = tags.to(device)
                output = model(image).double()
                output_prob = F.softmax(output, dim=1)
                predict_vector = np.argmax(to_np(output_prob), axis=1)
                label_vector = to_np(tags)
                bool_vector = predict_vector == label_vector
                accuracy = bool_vector.sum() / len(bool_vector)
        if args.load_nsml_cp and args.nsml_checkpoint is not None and args.nsml_session is not None:
            nsml.load(checkpoint=args.nsml_checkpoint,
                      session=args.nsml_session)
            print("load", args.nsml_session, args.nsml_checkpoint)
            if str.isnumeric(args.nsml_checkpoint):
                epoch_start += int(args.nsml_checkpoint)
                args.epochs += int(args.nsml_checkpoint)

        if args.only_save:
            nsml.save(args.nsml_session + "," + args.nsml_checkpoint)
        else:
            dataloader, val_dataloader = train_dataloader(
                args.input_size,
                batch_size,
                args.num_workers,
                infer_batch_size=infer_batch_size,
                transform=transforms.Compose(transform_list),
                infer_transform=transforms.Compose(infer_transform_list),
                val_ratio=args.val_ratio,
                use_random_label=args.use_random_label,
                seed=args.seed)
            if args.sava_step_ratio > 0:
                save_step_interval = int(
                    len(dataloader) * args.sava_step_ratio)
                print("save_step_interval", save_step_interval, "total steps",
                      len(dataloader))
            else:
                save_step_interval = None

            for epoch_idx in range(epoch_start, args.epochs + 1):
                if args.use_train:
                    total_loss = 0.