Ejemplo n.º 1
0
def add_transform_to_xfo(usd_xform, obj, opt_anim):
    if opt_anim is None or not utils.is_transform_animated(obj, opt_anim):
        usd_xform.AddTransformOp().Set(utils.build_transform(obj))
    else:
        usd_tfm = usd_xform.AddTransformOp()
        for frame in range(opt_anim[0], opt_anim[1] + 1):
            usd_tfm.Set(utils.build_transform(obj, frame), Usd.TimeCode(frame))
def main():
    N_FOLDS = 5
    BATCH_SIZE = args.batch
    IMAGE_SIZE = args.size
    model_name = args.model
    image_size = args.size
    num_workers = 64

    assert args.task in ['class', 'reg']
    if args.task == 'class':
        n_class = utils.N_CLASS
    elif args.task == 'reg':
        n_class = utils.N_CLASS_REG

    print(f'found {torch.cuda.device_count()} gpus !!')
    if args.multi:
        print('use multi gpu !!')

    device = torch.device("cuda:0")
    train_df = pd.read_csv(utils.TRAIN_CSV_PATH)
    if args.debug:
        train_df = train_df[:1000]

    skf = StratifiedKFold(n_splits=N_FOLDS, random_state=41, shuffle=True)
    indices = list(skf.split(train_df, train_df['diagnosis']))
    if not args.cv:
        print('do not use cross validation')
        #indices = [indices[0]]
        with open('folds_index.json', 'rb') as f:
            indices = pickle.load(f)

    # cross validation
    oof_preds = np.zeros((len(train_df), utils.N_CLASS))
    print(f'tta: {args.tta}')
    if args.tta:
        tfms_mode = 'test'
        num_tta = args.tta
    else:
        tfms_mode = 'val'
        num_tta = 1

    val_tfms = utils.build_transform(size=image_size, mode=tfms_mode)
    for i_fold, (train_index, valid_index) in tqdm(enumerate(indices)):
        valid = train_df.iloc[valid_index]
        val_dataset = RetinopathyDataset(df=valid,
                                         mode='train',
                                         transform=val_tfms)
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=BATCH_SIZE,
                                                 shuffle=False,
                                                 pin_memory=True,
                                                 num_workers=num_workers)
        model_path = os.path.join(args.weight, f'model_fold{i_fold}')
        model = utils.load_pytorch_model(model_name, model_path, n_class)
        if args.multi:
            model = nn.DataParallel(model)
        y_pred = utils.predict(model, val_loader, n_class, device, tta=num_tta)
        if args.cv:
            oof_preds[valid_index] = y_pred
    if args.cv:
        valid_preds = oof_preds
        valid_true = train_df['diagnosis']
#         val_kappa = cohen_kappa_score(np.argmax(oof_preds, axis=1), train_df['diagnosis'],
#                                       weights='quadratic')
    else:
        valid_preds = y_pred
        valid_true = valid['diagnosis']


#         val_kappa = cohen_kappa_score(np.argmax(y_pred, axis=1), valid['diagnosis'],
#                                       weights='quadratic')
    if args.task == 'class':
        round_valid_preds = np.argmax(valid_preds, axis=1)
    elif args.task == 'reg':
        print('optimizing threshold ...')
        optR = utils.OptimizedRounder()
        optR.fit(valid_preds, valid_true)
        coef = optR.coefficients()
        print(f'best coef: {coef}')
        round_valid_preds = optR.predict(valid_preds, coef)
    val_kappa = cohen_kappa_score(round_valid_preds,
                                  valid_true,
                                  weights='quadratic')

    print(f'best val kappa: {val_kappa}')

    #     test_csv = pd.read_csv(utils.TEST_CSV_PATH)
    #     test_tfms = utils.build_transform(size=IMAGE_SIZE, mode='test')
    #     test_dataset = RetinopathyDataset(df=test_csv, mode='test', transform=test_tfms,
    #                                       auto_crop=True, add_blur=True)
    #     test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE,
    #                                               shuffle=False, pin_memory=True,
    #                                               num_workers=num_workers)

    #     test_preds = np.zeros((len(test_csv), utils.N_CLASS))
    #     for i in range(len(indices)):
    #         model = utils.load_pytorch_model(model_name, os.path.join(result_dir, f'model_fold{i}'))
    #         test_preds += utils.predict(model, test_loader, n_class=5, device=device)

    #     submission_csv = pd.read_csv(utils.SAMPLE_SUBMISSION_PATH)
    #     submission_csv['diagnosis'] = np.argmax(test_preds, axis=1)
    #     submission_csv.to_csv(os.path.join(result_dir, 'submission.csv'),
    #                           index=False)

    print('finish!!!')
def main():
    N_FOLDS = 5
    BATCH_SIZE = args.batch
    num_workers = 64

    print(f'found {torch.cuda.device_count()} gpus !!')
    if args.multi:
        print('use multi gpu !!')

    device = torch.device("cuda:0")
    train_df = pd.read_csv(utils.TRAIN_CSV_PATH)
    if args.debug:
        train_df = train_df[:1000]

    print('do not use cross validation')
    with open('folds_index.json', 'rb') as f:
        indices = pickle.load(f)
    indices = indices[0]

    print(f'tta: {args.tta}')
    if args.tta:
        tfms_mode = 'test'
        num_tta = args.tta
    else:
        tfms_mode = 'val'
        num_tta = 1

    models = [
        {
            'path': 'results/20190725135839/model_fold0',
            'model_name': 'efficientnet-b3',
            'image_size': 256,
            'task': 'class'
        },
        {
            'path': 'results/20190804235524/model_fold0',
            'model_name': 'efficientnet-b3',
            'image_size': 320,
            'task': 'class'
        },
        {
            'path': 'results/20190805111653/model_fold0',
            'model_name': 'efficientnet-b3',
            'image_size': 512,
            'task': 'class'
        },
        {
            'path': 'results/20190723033442/model_fold0',
            'model_name': 'se_resnext50_32x4d',
            'image_size': 256,
            'task': 'class'
        },
        {
            'path': 'results/20190723134833/model_fold0',
            'model_name': 'se_resnext50_32x4d',
            'image_size': 320,
            'task': 'class'
        },
    ]

    valid_preds = 0
    for model in tqdm(models):
        if model['task'] == 'class':
            n_class = utils.N_CLASS
        elif model['task'] == 'reg':
            n_class = utils.N_CLASS_REG
        val_tfms = utils.build_transform(size=model['image_size'],
                                         mode=tfms_mode)
        train_index, valid_index = indices
        valid = train_df.iloc[valid_index]
        val_dataset = RetinopathyDataset(df=valid,
                                         mode='train',
                                         transform=val_tfms)
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=BATCH_SIZE,
                                                 shuffle=False,
                                                 pin_memory=True,
                                                 num_workers=num_workers)
        model_path = model['path']
        model = utils.load_pytorch_model(model['model_name'], model_path,
                                         n_class)
        if args.multi:
            model = nn.DataParallel(model)
        y_pred = utils.predict(model, val_loader, n_class, device, tta=num_tta)
        #y_pred = softmax(y_pred, axis=1)
        valid_preds += y_pred
        valid_true = valid['diagnosis']
    round_valid_preds = np.argmax(valid_preds, axis=1)
    val_kappa = cohen_kappa_score(round_valid_preds,
                                  valid_true,
                                  weights='quadratic')

    print(f'best val kappa: {val_kappa}')

    print('finish!!!')
Ejemplo n.º 4
0
def main():
    config = utils.load_yaml(args.config)
    task = config['task']
    EPOCHS = config['epoch']
    N_FOLDS = 5
    BATCH_SIZE = config['batchsize']
    IMAGE_SIZE = config['image_size']
    model_name = config['model']
    optimizer_name = config['optimizer']
    loss = config['loss']
    lr = float(config['lr'])
    n_class = config['n_class']
    lr_scheduler = config.get('lr_scheduler')
    azure_run = None
    tb_writer = None
    num_workers = 64
    experiment_name = datetime.strftime(datetime.now(), '%Y%m%d%H%M%S')

    print(f'found {torch.cuda.device_count()} gpus !!')
    try:

        if args.debug:
            print('running in debug mode')
            EPOCHS = 1
            N_FOLDS = 2
        if args.debug:
            result_dir = Path(utils.RESULT_DIR) / ('debug-' + experiment_name)
        else:
            result_dir = Path(utils.RESULT_DIR) / experiment_name
            ws = Workspace.from_config('.aml_config/config.json')
            exp = Experiment(workspace=ws, name='kaggle-aptos2019')
            azure_run = exp.start_logging()
            azure_run.log('experiment name', experiment_name)
            azure_run.log('epoch', EPOCHS)
            azure_run.log('batch size', BATCH_SIZE)
            azure_run.log('image size', IMAGE_SIZE)
            azure_run.log('model', model_name)
            azure_run.log('optimizer', optimizer_name)
            azure_run.log('loss_name', loss['name'])
            azure_run.log('lr', lr)
            azure_run.log('lr_scheduler', lr_scheduler)
            azure_run.log('task', task)
            if args.cv:
                azure_run.log('cv', N_FOLDS)
            else:
                azure_run.log('cv', 0)

        if args.multi:
            print('use multi gpu !!')

        os.mkdir(result_dir)
        print(f'created: {result_dir}')
        utils.save_yaml(result_dir / Path(args.config).name, config)

        #         if not args.debug:
        #             tb_writer = SummaryWriter(log_dir=result_dir)

        device = torch.device("cuda:0")
        config = {
            'epochs': EPOCHS,
            'multi': args.multi,
            'batch_size': BATCH_SIZE,
            'image_size': IMAGE_SIZE,
            'model_name': model_name,
            'n_class': n_class,
            'optimizer_name': optimizer_name,
            'loss': loss,
            'lr': lr,
            'lr_scheduler': lr_scheduler,
            'task': task,
            'device': device,
            'num_workers': num_workers,
        }

        print(config)

        if not args.debug:
            slack.notify_start(experiment_name, config)
        train_df = pd.read_csv(utils.TRAIN_CSV_PATH)
        if args.debug:
            train_df = train_df[:1000]
        config['df'] = train_df

        skf = StratifiedKFold(n_splits=N_FOLDS, random_state=41, shuffle=True)
        indices = list(skf.split(train_df, train_df['diagnosis']))
        if not args.cv:
            print('do not use cross validation')
            indices = [indices[0]]

        # cross validation
        oof_preds = np.zeros((len(train_df), n_class))
        for i_fold, (train_index, valid_index) in tqdm(enumerate(indices)):
            model_path = result_dir / f'model_fold{i_fold}'
            config['train_index'] = train_index
            config['valid_index'] = valid_index
            config['model_path'] = str(model_path)
            if azure_run:
                if i_fold == 0:
                    config['azure_run'] = azure_run
                    y_pred, y_true = utils.run_model(**config)
                else:
                    with azure_run.child_run() as child:
                        config['azure_run'] = child
                        y_pred, y_true = utils.run_model(**config)
            else:
                y_pred, y_true = utils.run_model(**config)
            if args.cv:
                oof_preds[valid_index] = y_pred
        if args.cv:
            valid_preds = oof_preds
            valid_true = train_df['diagnosis']
        else:
            valid_preds = y_pred
            valid_true = y_true
        if task == 'class':
            round_valid_preds = np.argmax(valid_preds, axis=1)
        elif task == 'reg':
            print('optimizing threshold ...')
            optR = utils.OptimizedRounder()
            optR.fit(valid_preds, valid_true)
            coef = optR.coefficients()
            print(f'best coef: {coef}')
            if azure_run:
                azure_run.log('coef', coef)
            round_valid_preds = optR.predict(valid_preds, coef)
        val_kappa = cohen_kappa_score(round_valid_preds,
                                      valid_true,
                                      weights='quadratic')

        print(f'best val kappa: {val_kappa}')
        if azure_run:
            azure_run.log('best val kappa', val_kappa)

        test_csv = pd.read_csv(utils.TEST_CSV_PATH)
        #test_tfms = utils.build_transform(size=IMAGE_SIZE, mode='test')
        test_tfms = utils.build_transform(size=IMAGE_SIZE, mode='val')
        test_dataset = RetinopathyDataset(df=test_csv,
                                          mode='test',
                                          transform=test_tfms,
                                          auto_crop=True,
                                          add_blur=True)
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=BATCH_SIZE,
                                                  shuffle=False,
                                                  pin_memory=True,
                                                  num_workers=num_workers)

        test_preds = np.zeros((len(test_csv), n_class))
        for i in range(len(indices)):
            model = utils.load_pytorch_model(model_name,
                                             result_dir / f'model_fold{i}',
                                             n_class)
            test_preds += utils.predict(model,
                                        test_loader,
                                        n_class=n_class,
                                        device=device,
                                        tta=1)
        test_preds /= len(indices)
        if task == 'class':
            round_test_preds = np.argmax(test_preds, axis=1)
        elif task == 'reg':
            round_test_preds = optR.predict(test_preds, coef)
        submission_csv = pd.read_csv(utils.SAMPLE_SUBMISSION_PATH)
        submission_csv['diagnosis'] = round_test_preds
        submission_csv.to_csv(result_dir / 'submission.csv', index=False)

        print('finish!!!')
        if not args.debug:
            slack.notify_finish(experiment_name, config, val_kappa)

    except KeyboardInterrupt as e:
        if not args.debug:
            slack.notify_fail(experiment_name, config, e.__class__.__name__,
                              str(e))
    except Exception as e:
        if azure_run:
            azure_run.fail(e)
        if not args.debug:
            slack.notify_fail(experiment_name, config, e.__class__.__name__,
                              str(e))
        raise
    finally:
        if azure_run:
            azure_run.complete()
            print('close azure_run')
        if tb_writer:
            tb_writer.export_scalars_to_json(
                os.path.join(result_dir, 'all_scalars.json'))
            tb_writer.close()
            print('close tb_writer')
Ejemplo n.º 5
0
def main():
    N_FOLDS = 5
    BATCH_SIZE = args.batch
    num_workers = 64
    
    
    print(f'found {torch.cuda.device_count()} gpus !!')
    if args.multi:
        print('use multi gpu !!')
        
    device = torch.device("cuda:0")
    train_df = pd.read_csv(utils.TRAIN_CSV_PATH)
    if args.debug:
        train_df = train_df[:1000]

    print('do not use cross validation')
    with open('folds_index.json', 'rb') as f:
        indices = pickle.load(f)
    indices = indices[0]
        

    print(f'tta: {args.tta}')
    if args.tta:
        tfms_mode = 'test'
        num_tta = args.tta
    else:
        tfms_mode = 'val'
        num_tta = 1
        
    models = [
        {
            'path': 'results/20190725135839/model_fold0',
            'model_name': 'efficientnet-b3',
            'image_size': 256,
            'task': 'class'
        },
        {
            'path': 'results/20190804235524/model_fold0',
            'model_name': 'efficientnet-b3',
            'image_size': 320,
            'task': 'class'
        },
        {
            'path': 'results/20190805111653/model_fold0',
            'model_name': 'efficientnet-b3',
            'image_size': 512,
            'task': 'class'
        },
        {
            'path': 'results/20190723134833/model_fold0',
            'model_name': 'se_resnext50_32x4d',
            'image_size': 320,
            'task': 'class',
        },
        {
            'path': 'results/20190808003823/model_fold0',
            'model_name': 'efficientnet-b3',
            'image_size': 320,
            'task': 'reg',
            'coef': [0.51308747, 1.51896171, 2.23885099, 3.40257123]
        },
    ]
        
    valid_preds = 0
    predstable = pd.DataFrame()
    for i, model_conf in tqdm(enumerate(models)):
        if model_conf['task'] == 'class':
            n_class = utils.N_CLASS
        elif model_conf['task'] == 'reg':
            n_class = utils.N_CLASS_REG
        val_tfms = utils.build_transform(size=model_conf['image_size'],
                                         mode=tfms_mode)
        train_index, valid_index = indices
        valid = train_df.iloc[valid_index]
        val_dataset = RetinopathyDataset(df=valid, mode='train', 
                                         transform=val_tfms)
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=BATCH_SIZE, 
                                                 shuffle=False, 
                                                 pin_memory=True,
                                                 num_workers=num_workers)
        model_path = model_conf['path']
        model = utils.load_pytorch_model(model_conf['model_name'], 
                                         model_path, n_class)
        if args.multi:
            model = nn.DataParallel(model)
        y_pred = utils.predict(model, val_loader, n_class, device, tta=num_tta)
        if model_conf['task'] == 'class':
            round_valid_preds = np.argmax(y_pred, axis=1)
        elif model_conf['task'] == 'reg':
            optR = utils.OptimizedRounder()
            coef = model_conf['coef']
            round_valid_preds = optR.predict(y_pred, coef).astype('int')
        predstable[f'preds{i}'] = round_valid_preds
    # voting
    voted_preds = []
    for i in range(len(predstable)):
        c = Counter(predstable.iloc[i,:])
        voted_preds.append(c.most_common()[0][0])
    valid_true = valid['diagnosis']
    val_kappa = cohen_kappa_score(voted_preds, valid_true,
                                  weights='quadratic')

        
    print(f'best val kappa: {val_kappa}')


    print('finish!!!')