def cluster(in_root, out_root, s, mode, r):
    out_file = os.path.join(out_root, '{}_{}_{}.pickle'.format(s, mode, r))

    meta_file = os.path.join(in_root, '{}_{}_000.csv'.format(s, mode))
    meta = load_csv(meta_file)

    if not os.path.exists(out_file):

        date = getattr(sys.modules[__name__], '{}_ref_date'.format(s))

        temp_meta = dict()
        for key in meta.keys():
            temp_meta[key] = [
                e for e, d in zip(meta[key], meta['date']) if d in date
            ]

        t_idx = np.argsort(temp_meta['t'])
        date_meta = dict()
        for key in meta.keys():
            date_meta[key] = [temp_meta[key][i] for i in t_idx]

        print(len(date_meta['t']))
        xy = get_xy(date_meta)

        ref_xy = [xy[0, :]]
        ref_idx = [0]
        for i in tqdm(range(len(date_meta['t']))):
            if sum((xy[i, :] - ref_xy[-1])**2) > r**2:
                ref_xy.append(xy[i, :])
                ref_idx.append(i)

        ref_xy = np.array(ref_xy)
        save_pickle([ref_xy, date_meta, ref_idx], out_file)
    else:
        ref_xy, date_meta, ref_idx = load_pickle(out_file)

    print('{}: {}'.format(s, len(ref_idx)))

    out_img = os.path.join(out_root, '{}_{}_{}.png'.format(s, mode, r))
    plt.clf()
    plt.clf()
    f, (ax1) = plt.subplots(1, 1, sharey=False)
    f.set_figheight(50)
    f.set_figwidth(50)
    ax1.scatter(ref_xy[:, 0], ref_xy[:, 1], c=np.arange(len(ref_xy)))
    plt.savefig(out_img)

    out_meta = dict()
    for key in meta.keys():
        out_meta[key] = [date_meta[key][i] for i in ref_idx]

    out_file = os.path.join(out_root, '{}_{}_{}.csv'.format(s, mode, r))
    save_csv(out_meta, out_file)
def clean_parametrization(in_root, folds, cols_to_keep, out_root):
    full_data = dict()
    full_ref_data = dict()
    full_query_data = dict()

    for key in cols_to_keep:
        full_data[key] = []
        full_ref_data[key] = []
        full_query_data[key] = []

    meta = dict()
    for s in folds:
        ref_data = load_csv(os.path.join(in_root, '{}_ref.csv'.format(s)))
        query_data = load_csv(os.path.join(in_root, '{}_query.csv'.format(s)))  # Not used to detect ref outliers

        for key in ['l', 'northing', 'easting']:
            ref_data[key] = np.array(ref_data[key], dtype=float)
            query_data[key] = np.array(query_data[key], dtype=float)

        l_max = max(ref_data['l'])
        num_bins = math.ceil(l_max)

        ref_member_path = os.path.join(out_root, '{}_ref_bin_raw_members.pickle'.format(s))
        if not os.path.exists(ref_member_path):
            bin_members = [[i for i in range(len(ref_data['t'])) if math.floor(ref_data['l'][i]) == j] for j in
                           tqdm(range(num_bins))]
            save_pickle(bin_members, ref_member_path)
        else:
            bin_members = load_pickle(ref_member_path)

        ref_bin_xy_path = os.path.join(out_root, '{}_ref_bin_raw_xy.pickle'.format(s))
        if not os.path.exists(ref_bin_xy_path):
            ref_bin_xy = [
                np.median(np.array([[ref_data['easting'][i], ref_data['northing'][i]] for i in bin_members[j]]),
                          axis=0) if len(
                    bin_members[j]) else np.array([-1, -1]) for j
                in tqdm(range(num_bins))]
            save_pickle(ref_bin_xy, ref_bin_xy_path)
        else:
            ref_bin_xy = load_pickle(ref_bin_xy_path)

        meta['{}_ref'.format(s)], clean_ref_data = find_and_remove_errors('ref', out_root, ref_bin_xy, ref_data, s)

        # Cleaning query files to allow for more efficient testing, should not influence performance
        # (other than possibly excluding faulty gps/ins 'ground truth', which we don't want anyways)
        meta['{}_query'.format(s)], clean_query_data = find_and_remove_errors('query', out_root, ref_bin_xy, query_data,
                                                                              s)

        fold_clean_data = dict()
        for key in clean_ref_data.keys():
            fold_clean_data[key] = []

            fold_clean_data[key].extend(clean_ref_data[key])
            fold_clean_data[key].extend(clean_query_data[key])

            full_data[key].extend(clean_ref_data[key])
            full_data[key].extend(clean_query_data[key])

            full_query_data[key].extend(clean_ref_data[key])
            full_ref_data[key].extend(clean_query_data[key])

        save_csv(fold_clean_data, os.path.join(out_root, '{}.csv'.format(s)))

    save_csv(full_data, os.path.join(out_root, 'full.csv'.format(s)))
    save_csv(full_ref_data, os.path.join(out_root, 'full_ref.csv'.format(s)))
    save_csv(full_query_data, os.path.join(out_root, 'full_query.csv'.format(s)))

    save_csv(meta, os.path.join(out_root, 'meta.csv'))
Exemple #3
0
def compile(l, d, code):
    mkdir(OUT_ROOT)
    top_n_root = os.path.join(srv_root(), 'neurips/top_n')

    queries = [
        'oxford_night',
        'oxford_overcast',
        'oxford_snow',
        'oxford_sunny',
        'pittsburgh_query',
    ]

    min_ys = [0, 40, 50, 50, 10]
    major_s = [10, 10, 10, 10, 10]
    minor_s = [2.5 if m == 10 else 1.0 for m in major_s]

    titles = [
        'Oxford RobotCar, night',
        'Oxford RobotCar, overcast',
        'Oxford RobotCar, snow',
        'Oxford RobotCar, sunny',
        'Pittsburgh',
    ]

    checkpoints = [
        '/scratch_net/tellur_third/user/efs/data/checkpoints/offtheshelf/offtheshelf/offtheshelf',
        '/scratch_net/tellur_third/user/efs/data/checkpoints/pittsburgh30/pittsnetvlad/vd16_pitts30k_conv5_3_vlad_preL2_intra_white',
        '/scratch_net/tellur_third/user/efs/cvpr_aws_logs/learnlarge/triplet_xy_000/epoch-checkpoint-2',
        '/scratch_net/tellur_third/user/efs/cvpr_aws_logs/learnlarge/quadruplet_xy_000/epoch-checkpoint-2',
        '/srv/beegfs02/scratch/toploc/data/mai_2020_logs/ha0_lolazy_triplet_muTrue_renone_vl64_pca_neurips_002/epoch-checkpoint-2',
        '/srv/beegfs02/scratch/toploc/data/mai_2020_logs/ha0_lolazy_quadruplet_muTrue_renone_vl64_pca_neurips_002/epoch-checkpoint-2',
        '/scratch_net/tellur_third/user/efs/home_logs/learnlarge_ral/huber_distance_triplet_xy_000/epoch-checkpoint-2',
        '/srv/beegfs02/scratch/toploc/data/mai_2020_logs/ha0_lologratio_ma15_mi15_muTrue_renone_tu1_vl64_pca_neurips_002/epoch-checkpoint-1',
        '/srv/beegfs02/scratch/toploc/data/mai_2020_logs/ha0_loms_loss_msTrue_muTrue_renone_tu1_vl64_pca_neurips_001/epoch-checkpoint-0',
        '/srv/beegfs02/scratch/toploc/data/mai_2020_logs/al0.8_be15_ha0_lowms_ma15_mi15_msTrue_muTrue_renone_tu1_vl64_pca_neurips_000/epoch-checkpoint-0',
    ]

    fill_styles = [
        'none',
        'none',
        'none',
        'none',
        'none',
        'none',
        'none',
        'none',
        'none',
        'full',
    ]

    markers = [
        '',
        "^",
        "^",
        "s",
        "^",
        "s",
        "^",
        'v',
        "o",
        "d",
    ]

    losses = [
        'Off-the-shelf \\cite{arandjelovic2016netvlad}',
        'Triplet trained on Pittsburgh \\cite{arandjelovic2016netvlad}',
        'Triplet \\cite{arandjelovic2016netvlad}',
        'Quadruplet \\cite{chen2017beyond}',
        'Lazy triplet \\cite{angelina2018pointnetvlad}',
        'Lazy quadruplet \\cite{angelina2018pointnetvlad}',
        'Trip.~+ Huber dist. \\cite{thoma2020geometrically}',
        'Log-ratio \\cite{kim2019deep}',
        'Multi-similarity \\cite{wang2019multi}',
        'Ours',
    ]

    lines = [
        ':',
        ':',
        '--',
        '--',
        '-.',
        '-.',
        '--',
        '-.',
        '--',
        '-',
    ]

    colors = [
        '#000000',
        '#ff6b1c',
        '#f03577',
        '#5f396b',
        '#1934e6',
        '#0e6606',
        '#B0C4DE',
        '#990000',
        '#663300',
        '#11d194',
    ]

    setting = 'l{}_dim{}'.format(l, d)
    print(setting)

    rows = 2
    cols = 3

    f, axs = plt.subplots(rows, cols, constrained_layout=False)
    if rows == 1:
        axs = np.expand_dims(axs, 0)
    if cols == 1:
        axs = np.expand_dims(axs, 1)
    f.tight_layout()
    f.set_figheight(8)  # 8.875in textheight
    f.set_figwidth(10)  # 6.875in textwidth

    for i, query in enumerate(queries):
        print(query)

        print_gt = True

        t = 25.0
        l = 0.0
        out_setting = 'l{}_dim{}'.format(l, d)

        setting = 'l{}_dim{}'.format(l, d)

        min_y = 1000
        max_y = 0

        for j, (checkpoint, loss, m, line, color) in enumerate(
                zip(checkpoints, losses, cycle(markers), cycle(lines),
                    cycle(colors))):

            cp_name = checkpoint.split('/')[-2]
            cp_name = ''.join(
                os.path.basename(cp_name).split('.'))  # Removing '.'
            cp_name += '_e{}'.format(checkpoint[-1])

            t_n_file = os.path.join(top_n_root, setting,
                                    '{}_{}.pickle'.format(query, cp_name))
            if not os.path.exists(t_n_file):
                print('Missing: {}'.format(t_n_file))
                continue
            print(t_n_file)

            [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist,
             ref_idx] = load_pickle(t_n_file)
            top_g_dists = np.array(top_g_dists)

            if print_gt:
                print_gt = False
                X = np.linspace(0, t, num=50)
                Y = [
                    float(sum(gt_g_dist < x)) / float(len(gt_g_dist)) * 100
                    for x in X
                ]
                ax = axs[i % rows, i // rows]
                # ax = axs
                width = 0.75

                ax.plot(X,
                        Y,
                        label='Upper bound',
                        linewidth=width,
                        c='#000000')
                ax.title.set_text(titles[i])
                ax.set_xlim([0, t])
                ax.grid(True)

                x_min = X[bisect.bisect(Y, min_ys[i])]

            t_1_d = np.array([td[0] for td in top_g_dists])
            X = np.linspace(0, t, num=50)

            Y = [float(sum(t_1_d < x)) / float(len(t_1_d)) * 100 for x in X]

            min_y = min(np.min(np.array(Y)), min_y)
            max_y = max(np.max(np.array(Y)), max_y)

            ax = axs[i % rows, i // rows]
            width = 0.75
            ax.plot(X,
                    Y,
                    label=loss,
                    linestyle=line,
                    marker=m,
                    linewidth=width,
                    markevery=j % rows + cols,
                    c=color,
                    markersize=3,
                    fillstyle=fill_styles[j % len(fill_styles)])

            #ax.plot(X, Y, label=cp_name)

        ax = axs[i % rows, i // rows]
        ax.set_xlim([x_min, t])
        ax.set_ylim([min_ys[i], min(max_y + 5, 100)])

        # Major ticks every 20, minor ticks every 5
        major_ticks_x = np.arange(x_min // (t / 5) * (t / 5), t, t / 5)[1:]
        minor_ticks_x = np.arange(x_min // (t / 5 / 4) * (t / 5 / 4), t,
                                  t / 5 / 4)[1:]

        y_step = 10

        major_ticks_y = np.arange(min_ys[i], min(max_y + 5, 100), major_s[i])
        minor_ticks_y = np.arange(min_ys[i], min(max_y + 5, 100), minor_s[i])

        ax.set_xticks(major_ticks_x)
        ax.set_xticks(minor_ticks_x, minor=True)
        ax.set_yticks(major_ticks_y)
        ax.set_yticks(minor_ticks_y, minor=True)

        # And a corresponding grid
        ax.grid(which='both')

        # Or if you want different settings for the grids:
        ax.grid(which='minor', alpha=0.2)
        ax.grid(which='major', alpha=0.5)

    out_setting = out_setting.replace('.', '')
    out_name = os.path.join(OUT_ROOT, '{}_neurips_roc.pdf'.format(out_setting))

    axs[-1, -1].axis('off')

    for i in range(cols):
        axs[-1, i].set_xlabel('Distance threshold $d$ [m]')

    for i in range(rows):
        axs[i, 0].set_ylabel('Correctly localized [\%]')

    handles, labels = axs[0, 0].get_legend_handles_labels()

    left = 0.0  # the left side of the subplots of the figure
    right = 1.0  # the right side of the subplots of the figure
    bottom = 0.23  # the bottom of the subplots of the figure
    top = 1.0  # the top of the subplots of the figure
    wspace = 0.2  # the amount of width reserved for space between subplots,
    # expressed as a fraction of the average axis width
    hspace = 0.2  # the amount of height reserved for space between subplots,
    # expressed as a fraction of the average axis height

    # space = 0.2
    plt.subplots_adjust(wspace=wspace,
                        hspace=hspace,
                        left=left,
                        right=right,
                        bottom=bottom,
                        top=top)

    axs[-1, -1].legend(handles,
                       labels,
                       bbox_to_anchor=(0.0, 0.5),
                       loc='center left',
                       ncol=1,
                       borderaxespad=0.,
                       frameon=True,
                       fontsize='medium')  # mode="expand",

    plt.savefig(out_name, bbox_inches='tight', pad_inches=0)

    plt.savefig(out_name.replace('.pdf', '.pgf'),
                bbox_inches='tight',
                pad_inches=0)

    # Test
    plt.show()
def plot_roc(l, d):
    mkdir(OUT_ROOT)
    top_n_root = os.path.join(fs_root(), 'top_n')

    checkpoints = \
        [
            # Trained on cold
            'triplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',

            # Trained on small oxford
            'triplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0',

            # Trained on large oxford
            'ha0_lotriplet_vl64'

            # Treined on Pittsburgh
            'pittsnetvlad',

            # Image-net
            'offtheshelf'

            # III with hard postitives
            'ha6_loevil_triplet_muTrue_vl64'
        ]

    queries = [
        'oxford_night',
        'freiburg_cloudy',
        'oxford_overcast',
        'freiburg_sunny',
        'oxford_snow',
        'pittsburgh_query',
        'oxford_sunny',

    ]

    titles = [
        'Oxford RobotCar, night',
        'Cold Freiburg, cloudy',
        'Oxford RobotCar, overcast',
        'Cold Freiburg, sunny',
        'Oxford RobotCar, snow',
        'Pittsburgh',
        'Oxford RobotCar, sunny',
    ]

    losses = [

        'I Cold Freiburg',
        'II Oxford (small)',
        'III Oxford (large)',
        'IV Pittsburgh',
        'V ImageNet (off-the-shelf)',

        '\\textit{III Oxford (large) + HP}',
    ]

    fill_styles = [
        'none',
        'none',
        'none',
        'full',
        'none',

        'full',

    ]

    markers = [
        '|',
        '.',
        'o',
        '*',
        '',

        'o',
    ]

    lines = [
        '--',

        '-',
        '-',
        '-.',
        ':',

        '-'
    ]

    colors = [
        '#1cad62',

        '#00BFFF',
        '#1E90FF',  # Triplet
        '#8c0054',
        '#000000',

        '#1934e6',  # Triplet HP
    ]

    rows = 2
    cols = 4

    f, axs = plt.subplots(rows, cols, constrained_layout=False)
    if rows == 1:
        axs = np.expand_dims(axs, 0)
    if cols == 1:
        axs = np.expand_dims(axs, 1)
    f.tight_layout()
    f.set_figheight(4.5)  # 8.875in textheight
    f.set_figwidth(8.5)  # 6.875in textwidth

    for i, query in enumerate(queries):
        print(query)

        print_gt = True

        if query.startswith('freiburg'):
            t = 1.5
        else:
            t = 15.0

        setting = 'l{}_dim{}'.format(l, d)

        min_y = 1000
        max_y = 0

        for j, (cp_name, m, line, color) in enumerate(
                zip(checkpoints, cycle(markers), cycle(lines), cycle(colors))):

            t_n_file = os.path.join(top_n_root, setting, '{}_{}.pickle'.format(query, cp_name))
            if not os.path.exists(t_n_file):
                print('Missing: {}'.format(t_n_file))
                continue
            print(t_n_file)

            [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx] = load_pickle(t_n_file)
            top_g_dists = np.array(top_g_dists)

            if print_gt:
                print_gt = False
                X = np.linspace(0, t, num=50)
                Y = [float(sum(gt_g_dist < x)) / float(len(gt_g_dist)) * 100 for x in X]
                ax = axs[i % rows, i // rows]
                width = 0.75

                ax.plot(X, Y, label='Upper bound', linewidth=width, c='#000000')
                ax.plot([0], [0], linewidth=0, label=' ')
                ax.plot([0], [0], linewidth=0, label='\\textbf{Training datasets:}')
                ax.title.set_text(titles[i])
                ax.set_xlim([0, t])
                ax.grid(True)

            if 'ha6_loevil_triplet_muTrue_vl64' in cp_name:
                ax = axs[i % rows, i // rows]
                ax.plot([0], [0], linewidth=0, label=' ')
                ax.plot([0], [0], linewidth=0, label='\\textbf{With our mining:}')

            t_1_d = np.array([td[0] for td in top_g_dists])
            X = np.linspace(0, t, num=50)

            Y = [float(sum(t_1_d < x)) / float(len(t_1_d)) * 100 for x in X]

            min_y = min(np.min(np.array(Y)), min_y)
            max_y = max(np.max(np.array(Y)), max_y)

            ax = axs[i % rows, i // rows]
            width = 0.75
            ax.plot(X, Y, label=losses[j], linestyle=line, marker=m, linewidth=width, markevery=j % rows + cols,
                    c=color, markersize=3, fillstyle=fill_styles[j])

        ax = axs[i % rows, i // rows]
        ax.set_xlim([0, t])
        ax.set_ylim([min_y, min(max_y + 5, 99)])

        # Major ticks every 20, minor ticks every 5
        major_ticks_x = np.arange(0, t, t / 3)
        minor_ticks_x = np.arange(0, t, t / 3 / 4)

        y_step = 20
        if 'night' in query:
            y_step /= 2

        major_ticks_y = np.arange(min_y, min(max_y + 5, 99), y_step)
        minor_ticks_y = np.arange(min_y, min(max_y + 5, 99), 5)

        ax.set_xticks(major_ticks_x)
        ax.set_xticks(minor_ticks_x, minor=True)
        ax.set_yticks(major_ticks_y)
        ax.set_yticks(minor_ticks_y, minor=True)

        # And a corresponding grid
        ax.grid(which='both')

        # Or if you want different settings for the grids:
        ax.grid(which='minor', alpha=0.2)
        ax.grid(which='major', alpha=0.5)

    out_name = os.path.join(OUT_ROOT, '{}_training_region_roc.pdf'.format(setting))

    axs[-1, -1].axis('off')

    for i in range(cols):
        axs[-1, i].set_xlabel('Distance threshold $d$ [m]')

    for i in range(rows):
        axs[i, 0].set_ylabel('Correctly localized [\%]')

    left = 0.0  # the left side of the subplots of the figure
    right = 1.0  # the right side of the subplots of the figure
    bottom = 0.0  # the bottom of the subplots of the figure
    top = 1.0  # the top of the subplots of the figure
    wspace = 0.2  # the amount of width reserved for space between subplots,
    # expressed as a fraction of the average axis width
    hspace = 0.25  # the amount of height reserved for space between subplots,
    # expressed as a fraction of the average axis height

    # space = 0.2
    plt.subplots_adjust(wspace=wspace, hspace=hspace, left=left, right=right, bottom=bottom, top=top)

    handles, labels = axs[0, 0].get_legend_handles_labels()

    axs[-1, -1].legend(handles, labels, loc='lower left', bbox_to_anchor=(-0.075, 0.0), ncol=1, frameon=True,
                       borderaxespad=0.0)

    plt.savefig(out_name, bbox_inches='tight', pad_inches=0)
    plt.savefig(out_name.replace('.pdf', '.png'), bbox_inches='tight', pad_inches=0)
def get_grad_cam():
    with tf.Graph().as_default() as graph:
        print("In Graph")

        ops, tuple_shape = build_inference_model()
        sess = restore_weights()

        print('\n'.join([n.name for n in tf.all_variables()]))

        # For better gpu utilization, loading processes and gpu inference are done in separate threads.
        # Start CPU threads
        num_loader_threads = 3
        for i in range(num_loader_threads):
            worker = Thread(target=cpu_thread)
            worker.setDaemon(True)
            worker.start()

            worker = Thread(target=save_thread)
            worker.setDaemon(True)
            worker.start()

        # Start GPU threads
        worker = Thread(target=gpu_thread, args=(sess, ops))
        worker.setDaemon(True)
        worker.start()

        ref_meta = load_csv(REF_CSV)
        query_meta = load_csv(QUERY_CSV)
        ref_xy = get_xy(ref_meta)
        query_xy = get_xy(query_meta)

        [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist,
         ref_idx] = load_pickle(TOP_N_PICKLE)
        top_n = np.array(top_i)

        num = len(query_meta['path'])
        # Fewer queries for speed
        last_xy = query_xy[0, :]
        selected = [0]
        if QUERY_CSV.startswith('pittsburgh'):
            selected = np.linspace(0, num, 500, dtype=int)
        else:
            if 'freiburg' in QUERY_CSV:
                r = 0.5
            else:
                r = 2
            for i in range(num):
                if sum((query_xy[i, :] - last_xy)**2) > r**2:
                    last_xy = query_xy[i, :]
                    selected.append(i)

            selected = np.array(selected, dtype=int)

        xy_dists = pairwise_distances(query_xy, ref_xy, metric='euclidean')

        # Clean list
        image_info = [(query_meta['path'][i], ref_meta['path'][top_n[i, 0]])
                      for i in selected]
        image_dist = [(np.linalg.norm(query_xy[i] - ref_xy[top_n[i, 0]]))
                      for i in selected]

        batched_indices = np.reshape(selected, (-1, TUPLES_PER_BATCH))
        batched_image_info = np.reshape(image_info, (-1, TUPLES_PER_BATCH * 2))
        batched_distances = np.reshape(image_dist, (-1, TUPLES_PER_BATCH))

        for batch_indices, batch_image_info, batched_distance in zip(
                batched_indices, batched_image_info, batched_distances):
            CPU_IN_QUEUE.put(
                (batch_indices, batch_image_info, batched_distance))

        # Wait for completion & order output
        CPU_IN_QUEUE.join()
        GPU_IN_QUEUE.join()
        GPU_OUT_QUEUE.join()
def get_top_n():
    # check if complete:
    ld_checkpoints = get_checkpoints('obm')

    ld_cp_names = []
    for cp in ld_checkpoints:
        cp_name = cp.split('/')[-2]
        cp_name = ''.join(os.path.basename(cp_name).split('.'))  # Removing '.'
        cp_name += '_e{}'.format(cp[-1])
        ld_cp_names.append(cp_name)

    if any([x in QUERY_LV_PICKLE for x in ld_cp_names]):
        L = [0.0, 0.3, 1.0, 5.0]
        D = [64, 128, 256, 512, 1024, 2048, 4096]
    else:
        L = [0.0]
        D = [256]

    complete = True
    for l in L:
        for d in D:

            out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d))
            name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1])
            out_pickle = os.path.join(out_folder, '{}.pickle'.format(name))

            if not os.path.exists(out_pickle):
                complete = False
                break
        if not complete:
            break

    if complete:
        print('Skipping complete {}'.format(QUERY_LV_PICKLE))
        return

    ref_meta = load_csv(REF_CSV)
    query_meta = load_csv(QUERY_CSV)
    full_ref_xy = get_xy(ref_meta)
    full_query_xy = get_xy(query_meta)
    num_q = full_query_xy.shape[0]

    pca_f = np.array(load_pickle(PCA_LV_PICKLE))
    full_ref_f = np.array(load_pickle(REF_LV_PICKLE))
    full_query_f = np.array(load_pickle(QUERY_LV_PICKLE))

    full_xy_dists = pairwise_distances(full_query_xy,
                                       full_ref_xy,
                                       metric='euclidean')

    for d in D:

        print(d)
        pca = PCA(whiten=True, n_components=d)
        pca = pca.fit(pca_f)
        pca_ref_f = pca.transform(full_ref_f)
        pca_query_f = pca.transform(full_query_f)

        for l in L:
            print(l)

            out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d))
            mkdir(out_folder)
            name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1])
            out_pickle = os.path.join(out_folder, '{}.pickle'.format(name))

            if os.path.exists(out_pickle):
                print('{} already exists. Skipping.'.format(out_pickle))
                continue

            ref_idx = [0]
            for i in range(len(full_ref_xy)):
                if sum((full_ref_xy[i, :] - full_ref_xy[ref_idx[-1], :])**
                       2) >= l**2:
                    ref_idx.append(i)

            if len(ref_idx) < N:
                continue

            ref_f = np.array([pca_ref_f[i, :] for i in ref_idx])
            xy_dists = np.array([full_xy_dists[:, i]
                                 for i in ref_idx]).transpose()

            print('Building tree')
            ref_tree = KDTree(ref_f)

            print('Retrieving')
            top_f_dists, top_i = np.array(
                ref_tree.query(pca_query_f,
                               k=N,
                               return_distance=True,
                               sort_results=True))
            top_f_dists = np.array(top_f_dists)
            top_i = np.array(top_i, dtype=int)

            top_g_dists = [[xy_dists[q, r] for r in top_i[q, :]]
                           for q in range(num_q)]

            gt_i = np.argmin(xy_dists, axis=1)
            gt_g_dist = np.min(xy_dists, axis=1)

            # Translate to original indices
            top_i = [[ref_idx[r] for r in top_i[q, :]] for q in range(num_q)]
            gt_i = [ref_idx[r] for r in gt_i]

            save_pickle(
                [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx],
                out_pickle)
Exemple #7
0
def get_top_n():
    ref_meta = load_csv(REF_CSV)
    query_meta = load_csv(QUERY_CSV)
    full_ref_xy = get_xy(ref_meta)
    full_query_xy = get_xy(query_meta)
    num_q = full_query_xy.shape[0]

    pca_f = np.array(load_pickle(PCA_LV_PICKLE))
    full_ref_f = np.array(load_pickle(REF_LV_PICKLE))
    full_query_f = np.array(load_pickle(QUERY_LV_PICKLE))

    full_xy_dists = pairwise_distances(full_query_xy,
                                       full_ref_xy,
                                       metric='euclidean')

    for d in DIMS:

        print(d)
        pca = PCA(whiten=True, n_components=d)
        pca = pca.fit(pca_f)
        pca_ref_f = pca.transform(full_ref_f)
        pca_query_f = pca.transform(full_query_f)

        for l in L:
            print(l)

            out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d))
            mkdir(out_folder)
            name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1])
            out_pickle = os.path.join(out_folder, '{}.pickle'.format(name))

            if os.path.exists(out_pickle):
                print('{} already exists. Skipping.'.format(out_pickle))
                continue

            ref_idx = [0]
            for i in range(len(full_ref_xy)):
                if sum((full_ref_xy[i, :] - full_ref_xy[ref_idx[-1], :])**
                       2) >= l**2:
                    ref_idx.append(i)

            if len(ref_idx) < N:
                continue

            ref_f = np.array([pca_ref_f[i, :] for i in ref_idx])
            xy_dists = np.array([full_xy_dists[:, i]
                                 for i in ref_idx]).transpose()

            print('Building tree')
            ref_tree = KDTree(ref_f)

            print('Retrieving')
            top_f_dists, top_i = np.array(
                ref_tree.query(pca_query_f,
                               k=N,
                               return_distance=True,
                               sort_results=True))
            top_f_dists = np.array(top_f_dists)
            top_i = np.array(top_i, dtype=int)

            top_g_dists = [[xy_dists[q, r] for r in top_i[q, :]]
                           for q in range(num_q)]

            gt_i = np.argmin(xy_dists, axis=1)
            gt_g_dist = np.min(xy_dists, axis=1)

            # Translate to original indices
            top_i = [[ref_idx[r] for r in top_i[q, :]] for q in range(num_q)]
            gt_i = [ref_idx[r] for r in gt_i]

            save_pickle(
                [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx],
                out_pickle)
Exemple #8
0
def get_top_n():
    name = os.path.basename(QUERY_LV_PICKLE).split('.')[0]
    print(name)
    sampling = 1

    out_png_1 = os.path.join(
        OUT_ROOT,
        '{}_top{}_t{}_path_{}_s{}.pdf'.format(name, N, T, PERPLEXITY,
                                              sampling))
    out_png_1c = os.path.join(
        OUT_ROOT, '{}_top{}_t{}_ct_{}_s{}.pdf'.format(name, N, T, PERPLEXITY,
                                                      sampling))

    out_pickle = os.path.join(
        OUT_ROOT, '{}_top{}_t{}_{}_s{}.pickle'.format(name, N, T, PERPLEXITY,
                                                      sampling))
    if os.path.exists(out_pickle):
        print('{} already exists. Skipping.'.format(out_pickle))
        return

    pca_f = np.array(load_pickle(PCA_LV_PICKLE))

    pca = PCA(whiten=True, n_components=256)
    pca = pca.fit(pca_f)

    query_meta = load_csv(QUERY_CSV)

    query_xy = get_xy(query_meta)[::sampling]

    l_query_f = np.array(load_pickle(QUERY_LV_PICKLE))
    l_query_f = l_query_f[::sampling, :]

    query_f = pca.transform(l_query_f)

    Y = TSNE(n_components=2, perplexity=PERPLEXITY).fit_transform(query_f)

    Y[:, 0] = (Y[:, 0] - min(Y[:, 0])) / (max(Y[:, 0]) - min(Y[:, 0]))
    Y[:, 1] = (Y[:, 1] - min(Y[:, 1])) / (max(Y[:, 1]) - min(Y[:, 1]))

    plt.clf()
    plt.figure(figsize=(3, 3))
    x = [p[0] for p in query_xy]
    y = [p[1] for p in query_xy]

    x_max = np.max(x)
    x_min = np.min(x)
    y_max = np.max(y)
    y_min = np.min(y)
    x_span = float(x_max - x_min)
    y_span = float(y_max - y_min)

    query_color = [(0, float(p[1] - y_min) / y_span,
                    float(p[0] - x_min) / x_span) for p in query_xy]

    s1 = plt.scatter(x, y, c=query_color, s=2)
    s1.set_rasterized(True)
    plt.savefig(out_png_1, bbox_inches='tight', pad_inches=0)

    plt.clf()
    plt.figure(figsize=(3, 3))
    s2 = plt.scatter(Y[:, 0], Y[:, 1], c=query_color, s=2)
    s2.set_rasterized(True)
    plt.savefig(out_png_1c, bbox_inches='tight', pad_inches=0)
Exemple #9
0
    query_xy = get_xy(query_meta)
    ax.plot(query_xy[:, 0],
            query_xy[:, 1],
            label='{} {} query images'.format(len(query_xy), name),
            linewidth=0.8)
    save_csv(query_meta,
             os.path.join(list_out_root, '{}_{}.csv'.format(place, name)))

ax.set_xlabel('x [m]')
ax.set_ylabel('y [m]')
ax.title.set_text('Cold Freiburg test images')
ax.legend(markerscale=5)

# ------------------------------------- Oxford Training -------------------------------------

data = load_pickle(
    os.path.join(fs_root(), 'data/beyond/queries/full-10-25/train_ref.pickle'))
small_x = [data[k]['x'] for k in data.keys()]
small_y = [data[k]['y'] for k in data.keys()]

ax = axs[0, 3]
full_meta = load_csv(
    os.path.join(fs_root(),
                 'data/learnlarge/clean_merged_parametrized/train_ref.csv'))
ref_xy = get_xy(full_meta)

s1 = ax.scatter(ref_xy[::1, 0],
                ref_xy[::1, 1],
                s=1,
                label='Large: {} training images'.format(len(ref_xy)))
s2 = ax.scatter(small_x[::1],
                small_y[::1],
def compile_table(l, d):
    mkdir(OUT_ROOT)
    top_n_root = os.path.join(fs_root(), 'top_n')

    queries = [
        'oxford_night',
        'oxford_overcast',
        'oxford_snow',
        'oxford_sunny',
        'freiburg_cloudy',
        'freiburg_sunny',
        'pittsburgh_query'
    ]

    checkpoints = [
        # Trained on cold
        'triplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',
        'quadruplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',

        'lazy_triplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',
        'lazy_quadruplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',

        'sum_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',
        'h_sum_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',

        # Trained on small oxford
        'triplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0',
        'quadruplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0',

        'lazy_triplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0',
        'lazy_quadruplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0',

        'h_sum_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0',

        # Trained on large oxford
        'ha0_lotriplet_vl64'
        'ha0_loquadruplet_vl64'
        'ha0_lolazy_triplet_vl64'
        'ha0_lolazy_quadruplet_vl64'
        'ha0_lodistance_triplet_vl64'
        'ha0_lohuber_distance_triplet_vl64'
        'ha6_loevil_triplet_muTrue_vl64'
        'ha6_loevil_quadruplet_muTrue_vl64'
        'ha6_loresidual_det_muTrue_vl64'
        'ha0_lotriplet_vl0'
        'ha0_loquadruplet_vl0'
        'ha6_loevil_quadruplet_muTrue_vl0'
        'ha6_loresidual_det_muTrue_vl0'
        'ha0_lotriplet_muTrue_vl64'
        'ha0_lotriplet_muFalse_vl64'
        'ha6_lotriplet_muTrue_vl64'
        'ha6_lotriplet_muFalse_vl64'

        # Treined on Pittsburgh
        'pittsnetvlad',

        # Image-net
        'offtheshelf'
    ]

    losses = [
        'GT',

        'Triplet \cite{arandjelovic2016netvlad}',
        'Quadruplet \cite{chen2017beyond}',

        'Lazy triplet \cite{angelina2018pointnetvlad}',
        'Lazy quadruplet \cite{angelina2018pointnetvlad}',

        'Triplet + distance \cite{thoma2020geometrically}',
        'Triplet + Huber dist.~\cite{thoma2020geometrically}',

        'Triplet \cite{arandjelovic2016netvlad}',
        'Quadruplet \cite{chen2017beyond}',

        'Lazy triplet \cite{angelina2018pointnetvlad}',
        'Lazy quadruplet \cite{angelina2018pointnetvlad}',

        'Triplet + Huber dist.~\cite{thoma2020geometrically}',

        'Triplet \cite{arandjelovic2016netvlad}',
        'Quadruplet \cite{chen2017beyond}',

        'Lazy triplet \cite{angelina2018pointnetvlad}',
        'Lazy quadruplet \cite{angelina2018pointnetvlad}',

        'Triplet + distance \cite{thoma2020geometrically}',
        'Triplet + Huber dist.~\cite{thoma2020geometrically}',

        '\\textit{Triplet + HP}',
        '\\textit{Quadruplet + HP}',

        '\\textit{Volume}',
        '$\\mathit{Volume}^*$',

        'Triplet \cite{arandjelovic2016netvlad}',

        'Off-the-shelf \cite{deng2009imagenet}'
    ]

    setting = 'l{}_dim{}'.format(l, d)
    print(setting)

    table = defaultdict(list)

    table['Loss'] = losses

    for_mean = defaultdict(list)
    for i, query in enumerate(queries):
        print(query)

        print_gt = True

        if query.startswith('freiburg'):
            T = [0.5, 1.0, 1.5]
        else:
            T = [5.0, 10.0, 15.0]

        for j, checkpoint in enumerate(checkpoints):

            cp_name = checkpoint

            t_n_file = os.path.join(top_n_root, setting, '{}_{}.pickle'.format(query, cp_name))
            if not os.path.exists(t_n_file):
                print('Missing: {}'.format(t_n_file))
                table[query].append('-')
                for_mean[query].append([-1, -1, -1])
                continue
            print(t_n_file)

            [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx] = load_pickle(t_n_file)
            top_g_dists = np.array(top_g_dists)

            if print_gt:
                print_gt = False
                Y = [float(sum(gt_g_dist < x)) / float(len(gt_g_dist)) * 100 for x in T]
                table[query].append(['{:.1f}'.format(y) for y in Y])
                for_mean[query].append(Y)

            t_1_d = np.array([td[0] for td in top_g_dists])

            Y = [float(sum(t_1_d < x)) / float(len(t_1_d)) * 100 for x in T]
            table[query].append(['{:.1f}'.format(y) for y in Y])
            for_mean[query].append(Y)

        # Highlight best values:
        b = np.argmax(np.array(for_mean[query])[1:], axis=0)
        b = b + 1
        for ii, ib in enumerate(b):
            table[query][ib][ii] = '\\textbf{' + table[query][ib][ii] + '}'

        for ii in range(len(losses)):
            table[query][ii] = '/'.join(table[query][ii])

    for i in range(len(losses)):
        all = np.array([for_mean[query][i] for query in queries if for_mean[query][i][0] > -1])
        Y = np.mean(all, axis=0)
        table['mean'].append(['{:.1f}'.format(y) for y in Y])
        for_mean['mean'].append(Y)

    # Highlight best values:
    b = np.argmax(np.array(for_mean['mean'])[1:], axis=0)
    b = b + 1
    for ii, ib in enumerate(b):
        table['mean'][ib][ii] = '\\textbf{' + table['mean'][ib][ii] + '}'

    for ii in range(len(losses)):
        table['mean'][ii] = '/'.join(table['mean'][ii])

    out_name = os.path.join(OUT_ROOT, 'accuracy_table.csv')
    save_csv(table, out_name)
out_file = os.path.join(
    fs_root(),
    'data/learnlarge/scale_factor/offtheshelf_train_ref_10000_{}_10_25.pickle'.
    format(part_idx))
out_file_meta = os.path.join(
    fs_root(),
    'data/learnlarge/scale_factor/offtheshelf_train_ref_10000_{}_10_25.csv'.
    format(part_idx))
out_file_hist = os.path.join(
    fs_root(),
    'data/learnlarge/scale_factor/offtheshelf_train_ref_10000_{}_10_25.png'.
    format(part_idx))

if not os.path.exists(out_file) or True:

    image_info, features, xy = load_pickle(lv_file)
    tuple_info = load_pickle(tuple_file)
    xy = np.array(xy)

    f_dists = []
    e_dists = []
    for i in tqdm(range(len(xy))):
        for j in tuple_info['positives'][i]:
            if j < i:
                f_dist = np.sum((features[i] - features[j])**2)
                f_dists.append(f_dist)
                e_dist = np.sum((xy[i, :] - xy[j, :])**2)
                e_dists.append(e_dist)

    save_pickle([e_dists, f_dists], out_file)