Example #1
0
def main():
    args = parse_args()
    args.uncropped = True

    with open('models/detection/%s/config.yml' % args.name, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    # config["tvec"] = False
    print('-'*20)
    for key in config.keys():
        print('%s: %s' % (key, str(config[key])))
    print('-'*20)

    cudnn.benchmark = False

    df = pd.read_csv('inputs/sample_submission.csv')
    img_ids = df['ImageId'].values
    img_paths = np.array('inputs/test_images/' + df['ImageId'].values + '.jpg')
    mask_paths = np.array('inputs/test_masks/' + df['ImageId'].values + '.jpg')
    labels = np.array([convert_str_to_labels(s, names=['yaw', 'pitch', 'roll',
                       'x', 'y', 'z', 'score']) for s in df['PredictionString']])

    if not args.uncropped:
        cropped_img_ids = pd.read_csv('inputs/testset_cropped_imageids.csv')['ImageId'].values
        for i, img_id in enumerate(img_ids):
            if img_id in cropped_img_ids:
                img_paths[i] = 'inputs/test_images_uncropped/' + img_id + '.jpg'
                mask_paths[i] = 'inputs/test_masks_uncropped/' + img_id + '.jpg'

    test_set = Dataset(
        img_paths,
        mask_paths,
        labels,
        input_w=config['input_w'],
        input_h=config['input_h'],
        transform=None,
        test=True,
        lhalf=config['lhalf'])
    test_loader = torch.utils.data.DataLoader(
        test_set,
        batch_size=16,
        shuffle=False,
        num_workers=0,
        # num_workers=config['num_workers'],
        # pin_memory=True,
    )

    heads = OrderedDict([
        ('hm', 1),
        ('reg', 2),
        ('depth', 1),
    ])

    if config['rot'] == 'eular':
        heads['eular'] = 3
    elif config['rot'] == 'trig':
        heads['trig'] = 6
    elif config['rot'] == 'quat':
        heads['quat'] = 4
    else:
        raise NotImplementedError

    if config['wh']:
        heads['wh'] = 2
    
    if config['tvec']:
        heads['tvec'] = 3

    name = args.name
    if args.uncropped:
        name += '_uncropped'
    if args.hflip:
        name += '_hf'

    if os.path.exists('outputs/raw/test/%s.pth' %name):
        merged_outputs = torch.load('outputs/raw/test/%s.pth' %name)

    else:
        merged_outputs = {}
        for i in tqdm(range(len(df))):
            img_id = df.loc[i, 'ImageId']

            output = {
                'hm': 0,
                'reg': 0,
                'depth': 0,
                'eular': 0 if config['rot'] == 'eular' else None,
                'trig': 0 if config['rot'] == 'trig' else None,
                'quat': 0 if config['rot'] == 'quat' else None,
                'wh': 0 if config['wh'] else None,
                'tvec': 0 if config['tvec'] else None,
            }

            merged_outputs[img_id] = output

        preds = []
        for fold in range(config['n_splits']):
            print('Fold [%d/%d]' %(fold + 1, config['n_splits']))

            model = get_model(config['arch'], heads=heads,
                              head_conv=config['head_conv'],
                              num_filters=config['num_filters'],
                              dcn=config['dcn'],
                              gn=config['gn'], ws=config['ws'],
                              freeze_bn=config['freeze_bn'])
            model = model.cuda()

            model_path = 'models/detection/%s/model_%d.pth' % (config['name'], fold+1)
            if not os.path.exists(model_path):
                print('%s is not exists.' %model_path)
                continue
            model.load_state_dict(torch.load(model_path))

            model.eval()

            preds_fold = []
            outputs_fold = {}
            with torch.no_grad():
                pbar = tqdm(total=len(test_loader))
                for i, batch in enumerate(test_loader):
                    input = batch['input'].cuda()
                    mask = batch['mask'].cuda()

                    output = model(input)
                    # print(output)

                    if args.hflip:
                        output_hf = model(torch.flip(input, (-1,)))
                        output_hf['hm'] = torch.flip(output_hf['hm'], (-1,))
                        output_hf['reg'] = torch.flip(output_hf['reg'], (-1,))
                        output_hf['reg'][:, 0] = 1 - output_hf['reg'][:, 0]
                        output_hf['depth'] = torch.flip(output_hf['depth'], (-1,))
                        if config['rot'] == 'trig':
                            output_hf['trig'] = torch.flip(output_hf['trig'], (-1,))
                            yaw = torch.atan2(output_hf['trig'][:, 1], output_hf['trig'][:, 0])
                            yaw *= -1.0
                            output_hf['trig'][:, 0] = torch.cos(yaw)
                            output_hf['trig'][:, 1] = torch.sin(yaw)
                            roll = torch.atan2(output_hf['trig'][:, 5], output_hf['trig'][:, 4])
                            roll = rotate(roll, -np.pi)
                            roll *= -1.0
                            roll = rotate(roll, np.pi)
                            output_hf['trig'][:, 4] = torch.cos(roll)
                            output_hf['trig'][:, 5] = torch.sin(roll)

                        if config['wh']:
                            output_hf['wh'] = torch.flip(output_hf['wh'], (-1,))
                        
                        if config['tvec']:
                            output_hf['tvec'] = torch.flip(output_hf['tvec'], (-1,))
                            output_hf['tvec'][:, 0] *= -1.0

                        output['hm'] = (output['hm'] + output_hf['hm']) / 2
                        output['reg'] = (output['reg'] + output_hf['reg']) / 2
                        output['depth'] = (output['depth'] + output_hf['depth']) / 2
                        if config['rot'] == 'trig':
                            output['trig'] = (output['trig'] + output_hf['trig']) / 2
                        if config['wh']:
                            output['wh'] = (output['wh'] + output_hf['wh']) / 2
                        if config['tvec']:
                            output['tvec'] = (output['tvec'] + output_hf['tvec']) / 2

                    for b in range(len(batch['img_path'])):
                        img_id = os.path.splitext(os.path.basename(batch['img_path'][b]))[0]

                        outputs_fold[img_id] = {
                            'hm': output['hm'][b:b+1].cpu(),
                            'reg': output['reg'][b:b+1].cpu(),
                            'depth': output['depth'][b:b+1].cpu(),
                            'eular': output['eular'][b:b+1].cpu() if config['rot'] == 'eular' else None,
                            'trig': output['trig'][b:b+1].cpu() if config['rot'] == 'trig' else None,
                            'quat': output['quat'][b:b+1].cpu() if config['rot'] == 'quat' else None,
                            'wh': output['wh'][b:b+1].cpu() if config['wh'] else None,
                            'tvec': output['tvec'][b:b+1].cpu() if config['tvec'] else None,
                            'mask': mask[b:b+1].cpu(),
                        }

                        merged_outputs[img_id]['hm'] += outputs_fold[img_id]['hm'] / config['n_splits']
                        merged_outputs[img_id]['reg'] += outputs_fold[img_id]['reg'] / config['n_splits']
                        merged_outputs[img_id]['depth'] += outputs_fold[img_id]['depth'] / config['n_splits']
                        if config['rot'] == 'eular':
                            merged_outputs[img_id]['eular'] += outputs_fold[img_id]['eular'] / config['n_splits']
                        if config['rot'] == 'trig':
                            merged_outputs[img_id]['trig'] += outputs_fold[img_id]['trig'] / config['n_splits']
                        if config['rot'] == 'quat':
                            merged_outputs[img_id]['quat'] += outputs_fold[img_id]['quat'] / config['n_splits']
                        if config['wh']:
                            merged_outputs[img_id]['wh'] += outputs_fold[img_id]['wh'] / config['n_splits']
                        if config['tvec']:
                            merged_outputs[img_id]['tvec'] += outputs_fold[img_id]['tvec'] / config['n_splits']
                        merged_outputs[img_id]['mask'] = outputs_fold[img_id]['mask']

                    batch_det = decode(
                        config,
                        output['hm'],
                        output['reg'],
                        output['depth'],
                        eular=output['eular'] if config['rot'] == 'eular' else None,
                        trig=output['trig'] if config['rot'] == 'trig' else None,
                        quat=output['quat'] if config['rot'] == 'quat' else None,
                        wh=output['wh'] if config['wh'] else None,
                        tvec=output['tvec'] if config['tvec'] else None,
                        mask=mask,
                    )
                    batch_det = batch_det.cpu().numpy()

                    for k, det in enumerate(batch_det):
                        if args.nms:
                            det = nms(det, dist_th=args.nms_th)
                        preds_fold.append(convert_labels_to_str(det[det[:, 6] > args.score_th, :7]))

                        if args.show and not config['cv']:
                            img = cv2.imread(batch['img_path'][k])
                            img_pred = visualize(img, det[det[:, 6] > args.score_th])
                            plt.imshow(img_pred[..., ::-1])
                            plt.show()

                    pbar.update(1)
                pbar.close()

            if not config['cv']:
                df['PredictionString'] = preds_fold
                name = '%s_1_%.2f' %(args.name, args.score_th)
                if args.uncropped:
                    name += '_uncropped'
                if args.nms:
                    name += '_nms%.2f' %args.nms_th
                df.to_csv('outputs/submissions/test/%s.csv' %name, index=False)
                return

        if not args.uncropped:
            # ensemble duplicate images
            dup_df = pd.read_csv('processed/test_image_hash.csv')
            dups = dup_df.hash.value_counts()
            dups = dups.loc[dups>1]

            for i in range(len(dups)):
                img_ids = dup_df[dup_df.hash == dups.index[i]].ImageId

                output = {
                    'hm': 0,
                    'reg': 0,
                    'depth': 0,
                    'eular': 0 if config['rot'] == 'eular' else None,
                    'trig': 0 if config['rot'] == 'trig' else None,
                    'quat': 0 if config['rot'] == 'quat' else None,
                    'wh': 0 if config['wh'] else None,
                    'tvec': 0 if config['tvec'] else None,
                    'mask': 0,
                }
                for img_id in img_ids:
                    if img_id in cropped_img_ids:
                        print('fooo')
                    output['hm'] += merged_outputs[img_id]['hm'] / len(img_ids)
                    output['reg'] += merged_outputs[img_id]['reg'] / len(img_ids)
                    output['depth'] += merged_outputs[img_id]['depth'] / len(img_ids)
                    if config['rot'] == 'eular':
                        output['eular'] += merged_outputs[img_id]['eular'] / len(img_ids)
                    if config['rot'] == 'trig':
                        output['trig'] += merged_outputs[img_id]['trig'] / len(img_ids)
                    if config['rot'] == 'quat':
                        output['quat'] += merged_outputs[img_id]['quat'] / len(img_ids)
                    if config['wh']:
                        output['wh'] += merged_outputs[img_id]['wh'] / len(img_ids)
                    if config['tvec']:
                        output['tvec'] += merged_outputs[img_id]['tvec'] / len(img_ids)
                    output['mask'] += merged_outputs[img_id]['mask'] / len(img_ids)

                for img_id in img_ids:
                    merged_outputs[img_id] = output

        torch.save(merged_outputs, 'outputs/raw/test/%s.pth' %name)

    # decode
    dets = {}
    for i in tqdm(range(len(df))):
        img_id = df.loc[i, 'ImageId']

        output = merged_outputs[img_id]

        det = decode(
            config,
            output['hm'],
            output['reg'],
            output['depth'],
            eular=output['eular'] if config['rot'] == 'eular' else None,
            trig=output['trig'] if config['rot'] == 'trig' else None,
            quat=output['quat'] if config['rot'] == 'quat' else None,
            wh=output['wh'] if config['wh'] else None,
            tvec=output['tvec'] if config['tvec'] else None,
            mask=output['mask'],
        )
        det = det.numpy()[0]

        dets[img_id] = det.tolist()

        if args.nms:
            det = nms(det, dist_th=args.nms_th)

        if np.sum(det[:, 6] > args.score_th) >= args.min_samples:
            det = det[det[:, 6] > args.score_th]
        else:
            det = det[:args.min_samples]

        if args.show:
            img = cv2.imread('inputs/test_images/%s.jpg' %img_id)
            img_pred = visualize(img, det)
            plt.imshow(img_pred[..., ::-1])
            plt.show()

        df.loc[i, 'PredictionString'] = convert_labels_to_str(det[:, :7])

    with open('outputs/decoded/test/%s.json' %name, 'w') as f:
        json.dump(dets, f)

    name = '%s_%.2f' %(args.name, args.score_th)
    if args.uncropped:
        name += '_uncropped'
    if args.nms:
        name += '_nms%.2f' %args.nms_th
    if args.hflip:
        name += '_hf'
    if args.min_samples > 0:
        name += '_min%d' %args.min_samples
    df.to_csv('outputs/submissions/test/%s.csv' %name, index=False)
def main():
    config = vars(parse_args())

    if config['name'] is None:
        config['name'] = 'ensemble_%s' % datetime.now().strftime('%m%d%H')

    if os.path.exists('models/detection/%s/config.yml' % config['name']):
        with open('models/detection/%s/config.yml' % config['name'], 'r') as f:
            config = yaml.load(f, Loader=yaml.FullLoader)
    else:
        config['models'] = config['models'].split(',')

    if not os.path.exists('models/detection/%s' % config['name']):
        os.makedirs('models/detection/%s' % config['name'])

    with open('models/detection/%s/config.yml' % config['name'], 'w') as f:
        yaml.dump(config, f)

    print('-'*20)
    for key in config.keys():
        print('%s: %s' % (key, str(config[key])))
    print('-'*20)

    with open('models/detection/%s/config.yml' % config['models'][0], 'r') as f:
        model_config = yaml.load(f, Loader=yaml.FullLoader)

    df = pd.read_csv('inputs/train.csv')
    img_paths = np.array('inputs/train_images/' + df['ImageId'].values + '.jpg')
    img_ids = df['ImageId'].values
    mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values + '.jpg')
    labels = np.array([convert_str_to_labels(s, names=['yaw', 'pitch', 'roll',
                       'x', 'y', 'z', 'score']) for s in df['PredictionString']])

    dets = {}
    kf = KFold(n_splits=model_config['n_splits'], shuffle=True, random_state=41)
    for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)):
        val_img_ids = img_ids[val_idx]

        if os.path.exists('outputs/raw/val/%s.pth' %config['name']):
            merged_outputs = torch.load('outputs/raw/val/%s.pth' %config['name'])

        else:
            merged_outputs = {}
            for img_id in tqdm(val_img_ids, total=len(val_img_ids)):
                output = {
                    'hm': 0,
                    'reg': 0,
                    'depth': 0,
                    'eular': 0 if model_config['rot'] == 'eular' else None,
                    'trig': 0 if model_config['rot'] == 'trig' else None,
                    'quat': 0 if model_config['rot'] == 'quat' else None,
                    'wh': 0 if model_config['wh'] else None,
                    'mask': 0,
                }

                merged_outputs[img_id] = output

            for model_name in config['models']:
                outputs = torch.load('outputs/raw/val/%s_%d.pth' %(model_name, fold + 1))

                for img_id in tqdm(val_img_ids, total=len(val_img_ids)):
                    output = outputs[img_id]

                    merged_outputs[img_id]['hm'] += output['hm'] / len(config['models'])
                    merged_outputs[img_id]['reg'] += output['reg'] / len(config['models'])
                    merged_outputs[img_id]['depth'] += output['depth'] / len(config['models'])
                    merged_outputs[img_id]['trig'] += output['trig'] / len(config['models'])
                    merged_outputs[img_id]['wh'] += output['wh'] / len(config['models'])
                    merged_outputs[img_id]['mask'] += output['mask'] / len(config['models'])

            torch.save(merged_outputs, 'outputs/raw/val/%s_%d.pth' %(config['name'], fold + 1))

        # decode
        for img_id in tqdm(val_img_ids, total=len(val_img_ids)):
            output = merged_outputs[img_id]

            det = decode(
                model_config,
                output['hm'],
                output['reg'],
                output['depth'],
                eular=output['eular'] if model_config['rot'] == 'eular' else None,
                trig=output['trig'] if model_config['rot'] == 'trig' else None,
                quat=output['quat'] if model_config['rot'] == 'quat' else None,
                wh=output['wh'] if model_config['wh'] else None,
                mask=output['mask'],
            )
            det = det.numpy()[0]

            dets[img_id] = det.tolist()

            if config['nms']:
                det = nms(det, dist_th=config['nms_th'])

            if np.sum(det[:, 6] > config['score_th']) >= config['min_samples']:
                det = det[det[:, 6] > config['score_th']]
            else:
                det = det[:config['min_samples']]

            if config['show']:
                img = cv2.imread('inputs/train_images/%s.jpg' %img_id)
                img_pred = visualize(img, det)
                plt.imshow(img_pred[..., ::-1])
                plt.show()

            df.loc[df.ImageId == img_id, 'PredictionString'] = convert_labels_to_str(det[:, :7])

    with open('outputs/decoded/val/%s.json' %config['name'], 'w') as f:
        json.dump(dets, f)

    df.to_csv('outputs/submissions/val/%s.csv' %config['name'], index=False)
Example #3
0
def main():
    args = parse_args()

    with open('models/detection/%s/config.yml' % args.name, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    print('-' * 20)
    for key in config.keys():
        print('%s: %s' % (key, str(config[key])))
    print('-' * 20)

    cudnn.benchmark = True

    df = pd.read_csv('inputs/train.csv')
    img_paths = np.array('inputs/train_images/' + df['ImageId'].values +
                         '.jpg')
    mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values +
                          '.jpg')
    labels = np.array(
        [convert_str_to_labels(s) for s in df['PredictionString']])

    heads = OrderedDict([
        ('hm', 1),
        ('reg', 2),
        ('depth', 1),
    ])

    if config['rot'] == 'eular':
        heads['eular'] = 3
    elif config['rot'] == 'trig':
        heads['trig'] = 6
    elif config['rot'] == 'quat':
        heads['quat'] = 4
    else:
        raise NotImplementedError

    if config['wh']:
        heads['wh'] = 2

    pred_df = df.copy()
    pred_df['PredictionString'] = np.nan

    dets = {}
    kf = KFold(n_splits=config['n_splits'], shuffle=True, random_state=41)
    for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)):
        print('Fold [%d/%d]' % (fold + 1, config['n_splits']))

        train_img_paths, val_img_paths = img_paths[train_idx], img_paths[
            val_idx]
        train_mask_paths, val_mask_paths = mask_paths[train_idx], mask_paths[
            val_idx]
        train_labels, val_labels = labels[train_idx], labels[val_idx]

        val_set = Dataset(val_img_paths,
                          val_mask_paths,
                          val_labels,
                          input_w=config['input_w'],
                          input_h=config['input_h'],
                          transform=None,
                          lhalf=config['lhalf'])
        val_loader = torch.utils.data.DataLoader(
            val_set,
            batch_size=config['batch_size'],
            shuffle=False,
            num_workers=config['num_workers'],
            # pin_memory=True,
        )

        model = get_model(config['arch'],
                          heads=heads,
                          head_conv=config['head_conv'],
                          num_filters=config['num_filters'],
                          dcn=config['dcn'],
                          gn=config['gn'],
                          ws=config['ws'],
                          freeze_bn=config['freeze_bn'])
        model = model.cuda()

        model_path = 'models/detection/%s/model_%d.pth' % (config['name'],
                                                           fold + 1)
        if not os.path.exists(model_path):
            print('%s is not exists.' % model_path)
            continue
        model.load_state_dict(torch.load(model_path))

        model.eval()

        outputs = {}

        with torch.no_grad():
            pbar = tqdm(total=len(val_loader))
            for i, batch in enumerate(val_loader):
                input = batch['input'].cuda()
                mask = batch['mask'].cuda()
                hm = batch['hm'].cuda()
                reg_mask = batch['reg_mask'].cuda()

                output = model(input)

                if args.hflip:
                    output_hf = model(torch.flip(input, (-1, )))
                    output_hf['hm'] = torch.flip(output_hf['hm'], (-1, ))
                    output_hf['reg'] = torch.flip(output_hf['reg'], (-1, ))
                    output_hf['reg'][:, 0] = 1 - output_hf['reg'][:, 0]
                    output_hf['depth'] = torch.flip(output_hf['depth'], (-1, ))
                    if config['rot'] == 'trig':
                        output_hf['trig'] = torch.flip(output_hf['trig'],
                                                       (-1, ))
                        yaw = torch.atan2(output_hf['trig'][:, 1],
                                          output_hf['trig'][:, 0])
                        yaw *= -1.0
                        output_hf['trig'][:, 0] = torch.cos(yaw)
                        output_hf['trig'][:, 1] = torch.sin(yaw)
                        roll = torch.atan2(output_hf['trig'][:, 5],
                                           output_hf['trig'][:, 4])
                        roll = rotate(roll, -np.pi)
                        roll *= -1.0
                        roll = rotate(roll, np.pi)
                        output_hf['trig'][:, 4] = torch.cos(roll)
                        output_hf['trig'][:, 5] = torch.sin(roll)

                    if config['wh']:
                        output_hf['wh'] = torch.flip(output_hf['wh'], (-1, ))

                    output['hm'] = (output['hm'] + output_hf['hm']) / 2
                    output['reg'] = (output['reg'] + output_hf['reg']) / 2
                    output['depth'] = (output['depth'] +
                                       output_hf['depth']) / 2
                    if config['rot'] == 'trig':
                        output['trig'] = (output['trig'] +
                                          output_hf['trig']) / 2
                    if config['wh']:
                        output['wh'] = (output['wh'] + output_hf['wh']) / 2

                batch_det = decode(
                    config,
                    output['hm'],
                    output['reg'],
                    output['depth'],
                    eular=output['eular']
                    if config['rot'] == 'eular' else None,
                    trig=output['trig'] if config['rot'] == 'trig' else None,
                    quat=output['quat'] if config['rot'] == 'quat' else None,
                    wh=output['wh'] if config['wh'] else None,
                    mask=mask,
                )
                batch_det = batch_det.cpu().numpy()

                for k, det in enumerate(batch_det):
                    img_id = os.path.splitext(
                        os.path.basename(batch['img_path'][k]))[0]

                    outputs[img_id] = {
                        'hm':
                        output['hm'][k:k + 1].cpu(),
                        'reg':
                        output['reg'][k:k + 1].cpu(),
                        'depth':
                        output['depth'][k:k + 1].cpu(),
                        'eular':
                        output['eular'][k:k + 1].cpu()
                        if config['rot'] == 'eular' else None,
                        'trig':
                        output['trig'][k:k + 1].cpu()
                        if config['rot'] == 'trig' else None,
                        'quat':
                        output['quat'][k:k + 1].cpu()
                        if config['rot'] == 'quat' else None,
                        'wh':
                        output['wh'][k:k + 1].cpu() if config['wh'] else None,
                        'mask':
                        mask[k:k + 1].cpu(),
                    }

                    dets[img_id] = det.tolist()
                    if args.nms:
                        det = nms(det, dist_th=args.nms_th)
                    pred_df.loc[pred_df.ImageId == img_id,
                                'PredictionString'] = convert_labels_to_str(
                                    det[det[:, 6] > args.score_th, :7])

                    if args.show:
                        gt = batch['gt'].numpy()[k]

                        img = cv2.imread(batch['img_path'][k])
                        img_gt = visualize(img, gt[gt[:, -1] > 0])
                        img_pred = visualize(img,
                                             det[det[:, 6] > args.score_th])

                        plt.subplot(121)
                        plt.imshow(img_gt[..., ::-1])
                        plt.subplot(122)
                        plt.imshow(img_pred[..., ::-1])
                        plt.show()

                pbar.update(1)
            pbar.close()

        torch.save(outputs,
                   'outputs/raw/val/%s_%d.pth' % (args.name, fold + 1))

        torch.cuda.empty_cache()

        if not config['cv']:
            break

    with open('outputs/decoded/val/%s.json' % args.name, 'w') as f:
        json.dump(dets, f)

    name = '%s_%.2f' % (args.name, args.score_th)
    if args.nms:
        name += '_nms%.2f' % args.nms_th
    if args.hflip:
        name += '_hf'
    pred_df.to_csv('outputs/submissions/val/%s.csv' % name, index=False)
    print(pred_df.head())