Python save_csv 예제들, learnlarge.util.io.save_csv Python 예제들

예제 #1

0

파일 보기

파일: 004_merge_and_clean.py 프로젝트: janinethoma/learning1M

def merge_dates(in_root, ins_root, out_root):
    # Find all dates with INS data (not all images have ins, but all ins should have images)
    all_dates = sorted(
        os.listdir(ins_root))  # Sort to make sure we always get the same order

    first = True
    all_info = dict()
    for date in all_dates:

        split_file = os.path.join(in_root, '{}.csv'.format(date))
        if not os.path.exists(split_file):
            print('Missing {}.'.format(split_file))
            continue

        date_info = load_csv(split_file)

        # Add date and tags column
        num_entries = len(date_info['easting'])
        rep_date = [date] * num_entries

        date_info['date'] = rep_date

        if first:
            all_info = date_info
            first = False
        else:
            for key in all_info.keys():
                all_info[key] = all_info[key] + date_info[key]

    out_file = os.path.join(out_root, 'merged.csv')
    save_csv(all_info, out_file)

예제 #2

0

파일 보기

파일: 014_presample_anchors.py 프로젝트: janinethoma/learning1M

def sample_anchors(shuffled_root, cluster_root, out_root, s, mode, r, epoch):
    train_meta = load_csv(os.path.join(shuffled_root, '{}_{}_{:03d}.csv'.format(s, mode, epoch)))
    train_xy = get_xy(train_meta)

    out_file = os.path.join(out_root, '{}_{}_{}_{:03d}.csv'.format(s, mode, r, epoch))
    if not os.path.exists(out_file):

        ref_meata = load_csv(os.path.join(cluster_root, '{}_{}_{}.csv'.format(s, mode, r)))
        ref_xy = get_xy(ref_meata)

        # Sample reference images (random image withing r/2 of reference location)
        ref_tree = KDTree(train_xy)
        ref_neighbors = ref_tree.query_radius(ref_xy, r=1, return_distance=False)
        anchors = [np.random.choice(potential_anchors) for potential_anchors in ref_neighbors]

        np.random.shuffle(anchors)
        anchor_indices = {'idx': anchors}
        save_csv(anchor_indices, out_file)

    else:
        anchor_indices = load_csv(out_file)

    anchor_xy = np.array([train_xy[int(i), :] for i in anchor_indices['idx']])

    out_img = os.path.join(out_root, '{}_{}_{}_{}.png'.format(s, mode, r, epoch))
    plt.clf()
    plt.clf()
    f, (ax1) = plt.subplots(1, 1, sharey=False)
    f.set_figheight(50)
    f.set_figwidth(50)
    ax1.scatter(anchor_xy[:, 0], anchor_xy[:, 1], c=np.arange(len(anchor_xy)))
    plt.savefig(out_img)

예제 #3

0

파일 보기

파일: 008_clean_parametrization.py 프로젝트: janinethoma/learning1M

def find_and_remove_errors(mode, out_root, ref_bin_xy, ref_data, s):
    true_ref_xy = np.array([[e, n] for e, n in zip(ref_data['easting'], ref_data['northing'])])
    binned_ref_xy = np.array([ref_bin_xy[math.floor(l)] for l in ref_data['l']])
    ref_errors = np.linalg.norm(true_ref_xy - binned_ref_xy, axis=1)
    ref_hist_path = os.path.join(out_root, '{}_{}_bin_errors.png'.format(s, mode))
    if not os.path.exists(ref_hist_path):
        plt.clf()
        plt.hist(ref_errors, bins=1000, histtype='step')
        plt.savefig(ref_hist_path)
    for key in ref_data.keys():
        ref_data[key] = [el for el, er in zip(ref_data[key], ref_errors) if er < 5.0]
    save_csv(ref_data, os.path.join(out_root, '{}_{}.csv'.format(s, mode)))

    stats = dict()
    stats['raw_mean_error'] = np.mean(ref_errors)
    stats['raw_median_error'] = np.median(ref_errors)
    stats['raw_max_error'] = np.max(ref_errors)
    stats['raw_min_error'] = np.min(ref_errors)
    stats['raw_error_std'] = np.std(ref_errors)
    clean_errors = [er for er in ref_errors if er < 5.0]
    stats['clean_mean_error'] = np.mean(clean_errors)
    stats['clean_median_error'] = np.median(clean_errors)
    stats['clean_max_error'] = np.max(clean_errors)
    stats['clean_min_error'] = np.min(clean_errors)
    stats['clean_error_std'] = np.std(clean_errors)
    save_csv(stats, os.path.join(out_root, '{}_{}_errors.csv'.format(s, mode)))
    return len(ref_data['t']), ref_data

예제 #4

0

파일 보기

def create_reference(s):
    date = getattr(sys.modules[__name__], '{}_ref_date'.format(s))
    out_file = os.path.join(out_root, '{}_{}_geodesic.csv'.format(s, date))
    if not os.path.exists(out_file):

        data = load_csv(os.path.join(in_root, 'clean_{}.csv'.format(s)))

        ref_data = dict()
        for key in data.keys():
            ref_data[key] = [
                e for e, d in zip(data[key], data['date']) if d == date
            ]

        ref_xy = [(float(x), float(y))
                  for x, y in zip(ref_data['easting'], ref_data['northing'])]
        ref_d = [0] + [
            math.sqrt((p[0] - q[0])**2 + (p[1] - q[1])**2)
            for p, q in zip(ref_xy[1:], ref_xy[:-1])
        ]
        ref_l = [sum(ref_d[:i]) for i in range(1, len(ref_data['date']) + 1)]

        vmin = min(ref_l)
        vmax = max(ref_l)

        ref_data['l'] = ref_l
        ref_yaw = np.array(ref_data['yaw'], dtype=float)
        plot_results(ref_xy, ref_yaw, ref_l, date, ref_data, s, vmin, vmax)
        save_csv(ref_data, out_file)

예제 #5

0

파일 보기

def get_l_based_fixed_localization_reference(in_root, out_root, s, r):
    out_txt = os.path.join(out_root, '{}_ref_l_{}.txt'.format(s, int(r)))
    out_csv = os.path.join(out_root, '{}_ref_l_{}.csv'.format(s, int(r)))

    if not os.path.exists(out_csv):
        meta = load_csv(os.path.join(in_root, '{}_ref.csv'.format(s)))  # Not using query locations for this

        l = np.array(meta['l']).reshape(-1, 1)
        ll = np.arange(math.floor(l[-1]), step=r).reshape(-1, 1)

        l_tree = KDTree(l)
        i_l = l_tree.query(ll, return_distance=False, k=1)
        i_l = np.squeeze(i_l)

        save_txt('\n'.join(['{}'.format(i) for i in i_l]), out_txt)

        selected_meta = dict()
        for key in meta.keys():
            selected_meta[key] = [meta[key][i] for i in i_l]

        save_csv(selected_meta, out_csv)

    else:
        selected_meta = load_csv(out_csv)

    out_folder = os.path.join(out_root, '{}_ref_l_{}'.format(s, int(r)))
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    for i, (d, f, t) in tqdm(enumerate(zip(selected_meta['date'], selected_meta['folder'], selected_meta['t']))):
        f = int(f)
        img = load_img(img_path((d, f, t)))
        save_img(img, os.path.join(out_folder, '{:04d}_{}_{:02d}_{}.png'.format(i, d, f, t)))

예제 #6

0

파일 보기

def downsize_images(task_id, max_side, img_root, ins_root, tar_root, out_img_root, out_root, cams):
    # Find all dates with INS data (not all images have ins, but all ins should have images)
    all_dates = sorted(os.listdir(ins_root))  # Sort to make sure we always get the same order

    date = all_dates[int(task_id) - 1]
    print(date)

    out_file = os.path.join(out_root, 'img_info_{}'.format(max_side), '{}.csv'.format(date))
    if os.path.exists(out_file):
        print('Output already exists.')
        return

    imgs = load_csv(os.path.join(img_root, date, 'stereo.timestamps'), has_header=False, delimiter=' ',
                    keys=['t', 'folder'])
    cam = oxford_camera.CameraModel(cams,
                                    '/stereo/centre/')
    exposures = [0] * len(imgs['t'])
    max_folder = max(np.array(imgs['folder'], dtype=int))

    if date == '2015-09-02-10-37-32':
        max_folder = 4  # Folders 5 and 6 are missing from the website
        imgs['t'] = [t for f, t in zip(imgs['folder'], imgs['t']) if int(f) <= max_folder]
        imgs['folder'] = [f for f in imgs['folder'] if int(f) <= max_folder]

    for folder in range(1, max_folder + 1):
        filename = os.path.join(tar_root, '{}_stereo_centre_{:02d}.tar'.format(date, folder))
        print(filename)
        if not os.path.exists(filename):
            print("MISSING!!")
            save_txt(txt=filename, mode='a', out_file=os.path.join(out_root, 'missing.txt'))

        with tarfile.open(filename) as archive:
            print(archive)
            for entry in archive.getmembers():
                img_name = os.path.basename(entry.name)
                if '.png' not in img_name:
                    continue
                ts = img_name.split('.')[0]
                img_path = entry.name
                with archive.extractfile(archive.getmember(img_path)) as file:
                    timer = time.time()
                    index = imgs['t'].index(ts)  # Assuming that timestamps are not ordered
                    try:
                        img = oxford_image.load_image(file, cam)  # One file has unloadable image...
                        img = resize_img(img, max_side)
                        exposures[index] = sum(np.array(img).flatten())
                        out_img_folder = os.path.join(out_img_root, '{}_stereo_centre_{:02d}'.format(date, folder))
                        if not os.path.exists(out_img_folder):
                            os.makedirs(out_img_folder)
                        out_img_path = os.path.join(out_img_folder, img_name)
                        save_img(img, out_img_path)
                        print('Processed {} in {}s.'.format(ts, time.time() - timer))
                    except:
                        del exposures[index]
                        del imgs['t'][index]
                        del imgs['folder'][index]

    imgs['exposure'] = exposures
    save_csv(imgs, out_file)

예제 #7

0

파일 보기

def get_splits(task_id, grids, in_root, ins_root, out_root):
    # Find all dates with INS data (not all images have ins, but all ins should have images)
    all_dates = sorted(os.listdir(ins_root))  # Sort to make sure we always get the same order

    date = all_dates[int(task_id) - 1]
    print(date)

    out_file = os.path.join(out_root, '{}.csv'.format(date))
    if os.path.exists(out_file):
        print('Already calculated {}.'.format(out_file))
        return

    xy_file = os.path.join(in_root, '{}.csv'.format(date))
    if not os.path.exists(xy_file):
        print('Missing {}.'.format(xy_file))
        return

    xy = load_csv(xy_file)

    X = [0 if math.isnan(float(e)) else int(float(e) - 619500.0) for e in xy['easting']]
    Y = [0 if math.isnan(float(n)) else int(5736480.0 - float(n)) for n in xy['northing']]

    out_img_grid = os.path.join(out_root, '{}_grid.png'.format(date))
    draw_grid(X, Y, out_img_grid)

    out_img_scatter = os.path.join(out_root, '{}_scatter.png'.format(date))
    plt.clf()
    plt.scatter(np.array(xy['easting'], dtype=float), np.array(xy['northing'], dtype=float),
                c=np.array(xy['yaw'], dtype=float))
    plt.savefig(out_img_scatter)

    for grid_name in grids.keys():

        grid = cv2.imread(grids[grid_name])
        grid = np.asarray(grid, dtype=np.uint8)  # Fix for failing img loading

        in_fold = list()

        for x, y in zip(X, Y):
            if x < 0 or y < 0 or x >= grid.shape[1] or y >= grid.shape[0]:
                in_fold.append(0)
            elif grid[y, x, 0] > 0:  # All color channels are the same
                in_fold.append(1)
            else:
                in_fold.append(0)

        xy[grid_name] = in_fold

    max_assigned = [a1 + a2 + a3 for a1, a2, a3 in zip(xy['train'], xy['test'], xy['val'])]
    assert max(max_assigned) <= 1, 'Please increase in_fold grid threshold.'

    for grid_name in grids.keys():
        X_g = [x for x, in_fold in zip(X, xy[grid_name]) if in_fold == 1]
        Y_g = [y for y, in_fold in zip(Y, xy[grid_name]) if in_fold == 1]
        print('Found {} imgs in {} for {}.'.format(len(X_g), grid_name, date))
        out_img_file = os.path.join(out_root, '{}_{}.png'.format(date, grid_name))
        draw_grid(X_g, Y_g, out_img_file)
    save_csv(xy, out_file)

예제 #8

0

파일 보기

파일: 013_cluster_linear.py 프로젝트: janinethoma/learning1M

def cluster(in_root, out_root, s, mode, r):
    out_file = os.path.join(out_root, '{}_{}_{}.pickle'.format(s, mode, r))

    meta_file = os.path.join(in_root, '{}_{}_000.csv'.format(s, mode))
    meta = load_csv(meta_file)

    if not os.path.exists(out_file):

        date = getattr(sys.modules[__name__], '{}_ref_date'.format(s))

        temp_meta = dict()
        for key in meta.keys():
            temp_meta[key] = [
                e for e, d in zip(meta[key], meta['date']) if d in date
            ]

        t_idx = np.argsort(temp_meta['t'])
        date_meta = dict()
        for key in meta.keys():
            date_meta[key] = [temp_meta[key][i] for i in t_idx]

        print(len(date_meta['t']))
        xy = get_xy(date_meta)

        ref_xy = [xy[0, :]]
        ref_idx = [0]
        for i in tqdm(range(len(date_meta['t']))):
            if sum((xy[i, :] - ref_xy[-1])**2) > r**2:
                ref_xy.append(xy[i, :])
                ref_idx.append(i)

        ref_xy = np.array(ref_xy)
        save_pickle([ref_xy, date_meta, ref_idx], out_file)
    else:
        ref_xy, date_meta, ref_idx = load_pickle(out_file)

    print('{}: {}'.format(s, len(ref_idx)))

    out_img = os.path.join(out_root, '{}_{}_{}.png'.format(s, mode, r))
    plt.clf()
    plt.clf()
    f, (ax1) = plt.subplots(1, 1, sharey=False)
    f.set_figheight(50)
    f.set_figwidth(50)
    ax1.scatter(ref_xy[:, 0], ref_xy[:, 1], c=np.arange(len(ref_xy)))
    plt.savefig(out_img)

    out_meta = dict()
    for key in meta.keys():
        out_meta[key] = [date_meta[key][i] for i in ref_idx]

    out_file = os.path.join(out_root, '{}_{}_{}.csv'.format(s, mode, r))
    save_csv(out_meta, out_file)

예제 #9

0

파일 보기

def shuffle(in_root, out_root, s, mode, num_epochs):
    meta = load_csv(os.path.join(in_root, '{}_{}.csv'.format(
        s, mode)))  # Not using query locations for this
    for e in range(num_epochs):
        out_file = os.path.join(out_root,
                                '{}_{}_{:03d}.csv'.format(s, mode, e))
        if os.path.exists(out_file):
            print('{} exists. Not recalculating.'.format(out_file))
        else:
            print('Shuffling {}.'.format(out_file))
            shuffled_indices = np.random.permutation(len(meta['t']))

            shuffled_meta = dict()
            for key in meta.keys():
                shuffled_meta[key] = [meta[key][i] for i in shuffled_indices]
            save_csv(shuffled_meta, out_file)

예제 #10

0

파일 보기

def merge_parametrized(in_root, folds, cols_to_keep, out_root):
    files = os.listdir(in_root)

    meta_info = dict()

    full_data = dict()
    for c in cols_to_keep:
        full_data[c] = []

    for fold in folds:
        data = dict()
        date_count = dict()
        for c in cols_to_keep:
            data[c] = []

        fold_files = [f for f in files if f.split('_')[0] == fold]
        for file in fold_files:
            if '.csv' in file:
                date_data = load_csv(os.path.join(in_root, file))
                if len(
                        date_data['t']
                ) < 100:  # Very few files indicate bad l alignment or bad ins estimates
                    continue

                for c in cols_to_keep:
                    data[c].extend(date_data[c])
                    full_data[c].extend(date_data[c])
                date_count[file.split('_')[1]] = len(date_data['t'])
        out_file = os.path.join(out_root, '{}.csv'.format(fold))
        save_csv(data, out_file)
        meta_info[fold] = len(data['t'])
        save_csv(date_count,
                 os.path.join(out_root, '{}_date_count.csv'.format(fold)))
    out_file = os.path.join(out_root, 'full.csv')
    save_csv(full_data, out_file)
    meta_info['full'] = len(full_data['t'])
    save_csv(meta_info, os.path.join(out_root, 'meta.csv'))

예제 #11

0

파일 보기

파일: 007_set_aside_queries.py 프로젝트: janinethoma/learning1M

def set_aside_queries(in_root, folds, query_dates):
    num_per_fold = dict()

    for fold in folds:
        clean_file = os.path.join(in_root, '{}.csv'.format(fold))
        data = load_csv(clean_file)

        query_out = clean_file.replace(fold, '{}_query'.format(fold))
        ref_out = clean_file.replace(fold, '{}_ref'.format(fold))

        query_data = dict()
        ref_data = dict()

        for key in data.keys():
            query_data[key] = [el for el, date in zip(data[key], data['date']) if date in query_dates]
            ref_data[key] = [el for el, date in zip(data[key], data['date']) if date not in query_dates]

        num_per_fold['{}_query'.format(fold)] = len(query_data['t'])
        num_per_fold['{}_ref'.format(fold)] = len(ref_data['t'])
        save_csv(query_data, query_out)
        save_csv(ref_data, ref_out)
    save_csv(num_per_fold, os.path.join(in_root, 'num_per_fold.csv'))

예제 #12

0

파일 보기

파일: 009_plot_statistics.py 프로젝트: janinethoma/learning1M

def plot_statistics(in_root, out_root, folds, tag_root):
    date_tags, all_tags = get_tags(tag_root)

    for fold in folds:
        print('Plotting {} statistics.'.format(fold))
        clean_file = os.path.join(in_root, '{}.csv'.format(fold))
        data = load_csv(clean_file)

        # Images per date
        images_per_date = Counter(data['date'])
        save_csv(images_per_date, os.path.join(out_root, 'images_per_date_{}.csv'.format(fold)))
        dict_to_bar(images_per_date, os.path.join(out_root, 'images_per_date_{}.pdf'.format(fold)))

        # Images/dates per tag, month and hour
        images_per_tag = dict.fromkeys(all_tags, 0)
        images_per_month = dict.fromkeys(range(1, 13), 0)
        images_per_hour = dict.fromkeys(range(0, 24), 0)

        dates_per_tag = dict.fromkeys(all_tags, 0)
        dates_per_month = dict.fromkeys(range(1, 13), 0)
        dates_per_hour = dict.fromkeys(range(0, 24), 0)

        for date in images_per_date.keys():
            month = int(date[5:7])
            hour = int(date[11:13])
            images_per_month[month] = images_per_date[date] + images_per_month[month]
            images_per_hour[hour] = images_per_date[date] + images_per_hour[hour]

            dates_per_month[month] = 1 + dates_per_month[month]
            dates_per_hour[hour] = 1 + dates_per_hour[hour]
            for tag in date_tags[date]:
                images_per_tag[tag] = images_per_date[date] + images_per_tag[tag]
                dates_per_tag[tag] = 1 + dates_per_tag[tag]

        save_csv(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.csv'.format(fold)))
        dict_to_bar(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.pdf'.format(fold)))

        save_csv(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.csv'.format(fold)))
        dict_to_bar(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.pdf'.format(fold)))

        save_csv(images_per_month, os.path.join(out_root, 'images_per_month_{}.csv'.format(fold)))
        dict_to_bar(images_per_month, os.path.join(out_root, 'images_per_month_{}.pdf'.format(fold)))

        new_months = OrderedDict()
        months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October',
                  'November', 'December']
        for i in range(12):
            new_months[months[i]] = images_per_month[i + 1]

        save_csv(new_months, os.path.join(out_root, 'images_per_month_pretty_{}.csv'.format(fold)))
        dict_to_bar(new_months, os.path.join(out_root, 'images_per_month_pretty_{}.pdf'.format(fold)))

        save_csv(images_per_hour, os.path.join(out_root, 'images_per_hour_{}.csv'.format(fold)))
        dict_to_bar(images_per_hour, os.path.join(out_root, 'images_per_hour_{}.pdf'.format(fold)))

        new_hours = OrderedDict()
        for i in range(6, 22):
            new_hours['{:02d}:00'.format(i)] = images_per_hour[i]
        save_csv(new_hours, os.path.join(out_root, 'images_per_pretty_hour_{}.csv'.format(fold)))
        dict_to_bar(new_hours, os.path.join(out_root, 'images_per_pretty_hour_{}.pdf'.format(fold)))

        save_csv(dates_per_tag, os.path.join(out_root, 'dates_per_tag_{}.csv'.format(fold)))
        dict_to_bar(dates_per_tag, os.path.join(out_root, 'dates_per_tag_{}.pdf'.format(fold)))

        save_csv(dates_per_month, os.path.join(out_root, 'dates_per_month_{}.csv'.format(fold)))
        dict_to_bar(dates_per_month, os.path.join(out_root, 'dates_per_month_{}.pdf'.format(fold)))

        save_csv(dates_per_hour, os.path.join(out_root, 'dates_per_hour_{}.csv'.format(fold)))
        dict_to_bar(dates_per_hour, os.path.join(out_root, 'dates_per_hour_{}.pdf'.format(fold)))

예제 #13

0

파일 보기

파일: 010_get_scale_factor.py 프로젝트: janinethoma/learning1M

        for j in tuple_info['positives'][i]:
            if j < i:
                f_dist = np.sum((features[i] - features[j])**2)
                f_dists.append(f_dist)
                e_dist = np.sum((xy[i, :] - xy[j, :])**2)
                e_dists.append(e_dist)

    save_pickle([e_dists, f_dists], out_file)

else:
    e_dists, f_dists = load_pickle(out_file)

full_info = dict()
full_info['f_mean'] = np.mean(f_dists)
full_info['e_mean'] = np.mean(e_dists)
full_info['f_med'] = np.median(f_dists)
full_info['e_med'] = np.median(e_dists)
full_info['f_max'] = np.max(f_dists)
full_info['e_max'] = np.max(e_dists)
save_csv(full_info, out_file_meta)

plt.clf()
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
f.set_figheight(10)
f.set_figwidth(20)
ax1.hist(f_dists, bins=10000, histtype='step')
ax1.title.set_text('F dists')
ax2.hist(e_dists, bins=10000, histtype='step')
ax2.title.set_text('E dists')
plt.savefig(out_file_hist)

예제 #14

0

파일 보기

파일: 008_clean_parametrization.py 프로젝트: janinethoma/learning1M

def clean_parametrization(in_root, folds, cols_to_keep, out_root):
    full_data = dict()
    full_ref_data = dict()
    full_query_data = dict()

    for key in cols_to_keep:
        full_data[key] = []
        full_ref_data[key] = []
        full_query_data[key] = []

    meta = dict()
    for s in folds:
        ref_data = load_csv(os.path.join(in_root, '{}_ref.csv'.format(s)))
        query_data = load_csv(os.path.join(in_root, '{}_query.csv'.format(s)))  # Not used to detect ref outliers

        for key in ['l', 'northing', 'easting']:
            ref_data[key] = np.array(ref_data[key], dtype=float)
            query_data[key] = np.array(query_data[key], dtype=float)

        l_max = max(ref_data['l'])
        num_bins = math.ceil(l_max)

        ref_member_path = os.path.join(out_root, '{}_ref_bin_raw_members.pickle'.format(s))
        if not os.path.exists(ref_member_path):
            bin_members = [[i for i in range(len(ref_data['t'])) if math.floor(ref_data['l'][i]) == j] for j in
                           tqdm(range(num_bins))]
            save_pickle(bin_members, ref_member_path)
        else:
            bin_members = load_pickle(ref_member_path)

        ref_bin_xy_path = os.path.join(out_root, '{}_ref_bin_raw_xy.pickle'.format(s))
        if not os.path.exists(ref_bin_xy_path):
            ref_bin_xy = [
                np.median(np.array([[ref_data['easting'][i], ref_data['northing'][i]] for i in bin_members[j]]),
                          axis=0) if len(
                    bin_members[j]) else np.array([-1, -1]) for j
                in tqdm(range(num_bins))]
            save_pickle(ref_bin_xy, ref_bin_xy_path)
        else:
            ref_bin_xy = load_pickle(ref_bin_xy_path)

        meta['{}_ref'.format(s)], clean_ref_data = find_and_remove_errors('ref', out_root, ref_bin_xy, ref_data, s)

        # Cleaning query files to allow for more efficient testing, should not influence performance
        # (other than possibly excluding faulty gps/ins 'ground truth', which we don't want anyways)
        meta['{}_query'.format(s)], clean_query_data = find_and_remove_errors('query', out_root, ref_bin_xy, query_data,
                                                                              s)

        fold_clean_data = dict()
        for key in clean_ref_data.keys():
            fold_clean_data[key] = []

            fold_clean_data[key].extend(clean_ref_data[key])
            fold_clean_data[key].extend(clean_query_data[key])

            full_data[key].extend(clean_ref_data[key])
            full_data[key].extend(clean_query_data[key])

            full_query_data[key].extend(clean_ref_data[key])
            full_ref_data[key].extend(clean_query_data[key])

        save_csv(fold_clean_data, os.path.join(out_root, '{}.csv'.format(s)))

    save_csv(full_data, os.path.join(out_root, 'full.csv'.format(s)))
    save_csv(full_ref_data, os.path.join(out_root, 'full_ref.csv'.format(s)))
    save_csv(full_query_data, os.path.join(out_root, 'full_query.csv'.format(s)))

    save_csv(meta, os.path.join(out_root, 'meta.csv'))

예제 #15

0

파일 보기

파일: 007_quantitative_table.py 프로젝트: janinethoma/learning1M

def compile_table(l, d):
    mkdir(OUT_ROOT)
    top_n_root = os.path.join(fs_root(), 'top_n')

    queries = [
        'oxford_night',
        'oxford_overcast',
        'oxford_snow',
        'oxford_sunny',
        'freiburg_cloudy',
        'freiburg_sunny',
        'pittsburgh_query'
    ]

    checkpoints = [
        # Trained on cold
        'triplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',
        'quadruplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',

        'lazy_triplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',
        'lazy_quadruplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',

        'sum_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',
        'h_sum_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0',

        # Trained on small oxford
        'triplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0',
        'quadruplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0',

        'lazy_triplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0',
        'lazy_quadruplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0',

        'h_sum_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0',

        # Trained on large oxford
        'ha0_lotriplet_vl64'
        'ha0_loquadruplet_vl64'
        'ha0_lolazy_triplet_vl64'
        'ha0_lolazy_quadruplet_vl64'
        'ha0_lodistance_triplet_vl64'
        'ha0_lohuber_distance_triplet_vl64'
        'ha6_loevil_triplet_muTrue_vl64'
        'ha6_loevil_quadruplet_muTrue_vl64'
        'ha6_loresidual_det_muTrue_vl64'
        'ha0_lotriplet_vl0'
        'ha0_loquadruplet_vl0'
        'ha6_loevil_quadruplet_muTrue_vl0'
        'ha6_loresidual_det_muTrue_vl0'
        'ha0_lotriplet_muTrue_vl64'
        'ha0_lotriplet_muFalse_vl64'
        'ha6_lotriplet_muTrue_vl64'
        'ha6_lotriplet_muFalse_vl64'

        # Treined on Pittsburgh
        'pittsnetvlad',

        # Image-net
        'offtheshelf'
    ]

    losses = [
        'GT',

        'Triplet \cite{arandjelovic2016netvlad}',
        'Quadruplet \cite{chen2017beyond}',

        'Lazy triplet \cite{angelina2018pointnetvlad}',
        'Lazy quadruplet \cite{angelina2018pointnetvlad}',

        'Triplet + distance \cite{thoma2020geometrically}',
        'Triplet + Huber dist.~\cite{thoma2020geometrically}',

        'Triplet \cite{arandjelovic2016netvlad}',
        'Quadruplet \cite{chen2017beyond}',

        'Lazy triplet \cite{angelina2018pointnetvlad}',
        'Lazy quadruplet \cite{angelina2018pointnetvlad}',

        'Triplet + Huber dist.~\cite{thoma2020geometrically}',

        'Triplet \cite{arandjelovic2016netvlad}',
        'Quadruplet \cite{chen2017beyond}',

        'Lazy triplet \cite{angelina2018pointnetvlad}',
        'Lazy quadruplet \cite{angelina2018pointnetvlad}',

        'Triplet + distance \cite{thoma2020geometrically}',
        'Triplet + Huber dist.~\cite{thoma2020geometrically}',

        '\\textit{Triplet + HP}',
        '\\textit{Quadruplet + HP}',

        '\\textit{Volume}',
        '$\\mathit{Volume}^*$',

        'Triplet \cite{arandjelovic2016netvlad}',

        'Off-the-shelf \cite{deng2009imagenet}'
    ]

    setting = 'l{}_dim{}'.format(l, d)
    print(setting)

    table = defaultdict(list)

    table['Loss'] = losses

    for_mean = defaultdict(list)
    for i, query in enumerate(queries):
        print(query)

        print_gt = True

        if query.startswith('freiburg'):
            T = [0.5, 1.0, 1.5]
        else:
            T = [5.0, 10.0, 15.0]

        for j, checkpoint in enumerate(checkpoints):

            cp_name = checkpoint

            t_n_file = os.path.join(top_n_root, setting, '{}_{}.pickle'.format(query, cp_name))
            if not os.path.exists(t_n_file):
                print('Missing: {}'.format(t_n_file))
                table[query].append('-')
                for_mean[query].append([-1, -1, -1])
                continue
            print(t_n_file)

            [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx] = load_pickle(t_n_file)
            top_g_dists = np.array(top_g_dists)

            if print_gt:
                print_gt = False
                Y = [float(sum(gt_g_dist < x)) / float(len(gt_g_dist)) * 100 for x in T]
                table[query].append(['{:.1f}'.format(y) for y in Y])
                for_mean[query].append(Y)

            t_1_d = np.array([td[0] for td in top_g_dists])

            Y = [float(sum(t_1_d < x)) / float(len(t_1_d)) * 100 for x in T]
            table[query].append(['{:.1f}'.format(y) for y in Y])
            for_mean[query].append(Y)

        # Highlight best values:
        b = np.argmax(np.array(for_mean[query])[1:], axis=0)
        b = b + 1
        for ii, ib in enumerate(b):
            table[query][ib][ii] = '\\textbf{' + table[query][ib][ii] + '}'

        for ii in range(len(losses)):
            table[query][ii] = '/'.join(table[query][ii])

    for i in range(len(losses)):
        all = np.array([for_mean[query][i] for query in queries if for_mean[query][i][0] > -1])
        Y = np.mean(all, axis=0)
        table['mean'].append(['{:.1f}'.format(y) for y in Y])
        for_mean['mean'].append(Y)

    # Highlight best values:
    b = np.argmax(np.array(for_mean['mean'])[1:], axis=0)
    b = b + 1
    for ii, ib in enumerate(b):
        table['mean'][ib][ii] = '\\textbf{' + table['mean'][ib][ii] + '}'

    for ii in range(len(losses)):
        table['mean'][ii] = '/'.join(table['mean'][ii])

    out_name = os.path.join(OUT_ROOT, 'accuracy_table.csv')
    save_csv(table, out_name)

예제 #16

0

파일 보기

def img_path(info):
    date = info[0]
    folder = info[1]
    t = info[2]
    return os.path.join('datasets/oxford_512', '{}_stereo_centre_{:02d}'.format(date, int(folder)), '{}.png'.format(t))


# Preselected reference
preselected_ref = os.path.join(fs_root(), 'data/learnlarge/shuffled/train_ref_000.csv')
p_meta = load_csv(preselected_ref)
p_meta['path'] = [img_path((d, f, t)) for d, f, t in
                  zip(p_meta['date'], p_meta['folder'], p_meta['t'])]
idxs_to_keep = np.linspace(0, len(p_meta['path']), num=N_SAMPLES, endpoint=False, dtype=int)
for key in p_meta.keys():
    p_meta[key] = [p_meta[key][i] for i in idxs_to_keep]
save_csv(p_meta, os.path.join(out_root, '{}_pca.csv'.format(place)))

# Cold
place = 'cold'


def parse_cold_folder(path, pattern):
    all_files = get_recursive_file_list(path, pattern)
    all_files, TXYA = parse_file_list(all_files)

    if len(all_files) > N_SAMPLES:
        idxs_to_keep = np.linspace(0, len(all_files), num=N_SAMPLES, endpoint=False, dtype=int)
    else:
        idxs_to_keep = np.arange(len(all_files))

    meta = dict()

예제 #17

0

파일 보기

ax.legend([
    '{} reference images'.format(len(remaining_ref_i)),
    '{} query images'.format(len(remaining_query_i))
],
          markerscale=5)
ax.set_xlabel('Easting [m]')
ax.set_ylabel('Northing [m]')

ref_meta = dict()
ref_meta['path'] = [
    os.path.join('datasets/pittsburgh_used/ref', ref_paths[i])
    for i in remaining_ref_i
]
ref_meta['easting'] = [ref_xy[i, 0] for i in remaining_ref_i]
ref_meta['northing'] = [ref_xy[i, 1] for i in remaining_ref_i]
save_csv(ref_meta, os.path.join(list_out_root, '{}_ref.csv'.format(place)))

query_meta = dict()
query_meta['path'] = [
    os.path.join('datasets/pittsburgh_used/query', query_paths[i])
    for i in remaining_query_i
]
query_meta['easting'] = [query_xy[i, 0] for i in remaining_query_i]
query_meta['northing'] = [query_xy[i, 1] for i in remaining_query_i]
save_csv(query_meta, os.path.join(list_out_root, '{}_query.csv'.format(place)))

# ------------------------------------- Oxford -------------------------------------
place = 'oxford'


def img_path(info):

예제 #18

0

파일 보기

def interpolate_xy(task_id, in_root, ins_root, out_root):
    # Find all dates with INS data (not all images have ins, but all ins should have images)
    all_dates = sorted(
        os.listdir(ins_root))  # Sort to make sure we always get the same order

    date = all_dates[int(task_id) - 1]

    out_file = os.path.join(out_root, '{}.csv'.format(date))
    if os.path.exists(out_file):
        # print('Already calculated {}.'.format(out_file))
        return

    imgs_file = os.path.join(in_root, '{}.csv'.format(date))
    if not os.path.exists(imgs_file):
        print('Missing {}: {}.'.format(task_id, imgs_file))
        return

    imgs = load_csv(imgs_file)
    ins = load_csv(os.path.join(ins_root, date, 'gps', 'ins.csv'))

    ins_ts = np.array(ins['timestamp'], dtype=int).reshape(
        (-1, 1))  # num_samples x num_features
    img_ts = np.array(imgs['t'], dtype=int).reshape((-1, 1))
    northing = np.array(ins['northing'], dtype=float)
    easting = np.array(ins['easting'], dtype=float)
    yaw = np.array(ins['yaw'], dtype=float)  # Yaw range: 0-2pi
    status = ins['ins_status']

    # Ins measures are roughly 3 times more frequent than images
    mean_td_img = np.mean(
        [img_ts[i, 0] - img_ts[i - 1, 0] for i in range(1, img_ts.shape[0])])
    mean_td_ins = np.mean(
        [ins_ts[i, 0] - ins_ts[i - 1, 0] for i in range(1, ins_ts.shape[0])])
    print('Found {} times more ins measures than images.'.format(mean_td_img /
                                                                 mean_td_ins))
    print('The mean time between ins measures is {}.'.format(mean_td_ins))
    print('The mean time between img measures is {}.'.format(mean_td_img))

    ins_ts_tree = KDTree(ins_ts)
    d_closest, i_closest = ins_ts_tree.query(img_ts, 2)

    img_northing = [
        lin_ip(northing[i_c[0]], northing[i_c[1]], d_c[0], d_c[1])
        for d_c, i_c in zip(d_closest, i_closest)
    ]
    img_easting = [
        lin_ip(easting[i_c[0]], easting[i_c[1]], d_c[0], d_c[1])
        for d_c, i_c in zip(d_closest, i_closest)
    ]

    img_yaw = [
        lin_ip(yaw[i_c[0]], yaw[i_c[1]], d_c[0], d_c[1]) % (2 * pi)
        for d_c, i_c in zip(d_closest, i_closest)
    ]  # Yaw range: 0-2pi

    # Remove interpolations of unclean ins states
    ins_good = [0] * len(img_easting)
    for j, i_c in enumerate(i_closest):
        if status[i_c[0]] == 'INS_SOLUTION_GOOD' and status[
                i_c[1]] == 'INS_SOLUTION_GOOD':
            ins_good[j] = 1

    imgs['northing'] = img_northing
    imgs['easting'] = img_easting
    imgs['ins_good'] = ins_good
    imgs['yaw'] = img_yaw

    ic1 = [i_c[0] for i_c in i_closest]
    ic2 = [i_c[1] for i_c in i_closest]
    tn1 = [ins_ts[i, 0] for i in ic1]
    tn2 = [ins_ts[i, 0] for i in ic2]

    imgs['ic1'] = ic1  # Index of closest ins point
    imgs['ic2'] = ic2
    imgs['tn1'] = tn1  # Timestamp of closest ins point
    imgs['tn2'] = tn2

    save_csv(imgs, out_file)

예제 #19

0

파일 보기

파일: 004_merge_and_clean.py 프로젝트: janinethoma/learning1M

def clean(in_root, out_root, folds, cols_to_keep):
    merged_file = os.path.join(in_root, 'merged.csv')
    meta_file = os.path.join(out_root, 'meta.csv')
    meta_info = dict()

    merged = load_csv(merged_file)

    # Original number of imgs
    meta_info['total_imgs'] = len(merged['exposure'])

    # Valid ins
    valid_ins = np.array(merged['ins_good'], dtype=int)
    meta_info['valid_ins'] = sum(valid_ins)

    # Valid location on grid
    valid_grid = np.array(merged['full'], dtype=int)
    meta_info['valid_grid'] = sum(valid_grid)

    # Analise and clean exposure
    # Visual inspection shows that images below 50'000'000 are very dark and above 110'000'000 very light
    exposures = np.array(merged['exposure'], dtype=float)
    low_exposure = np.percentile(exposures, 1)
    high_exposure = np.percentile(exposures, 99)
    print('Lo: {} \nHi: {}'.format(low_exposure, high_exposure))

    plt.clf()
    plt.hist(exposures, bins=10000, histtype='step')
    plt.xticks(rotation=90)
    plt.savefig(os.path.join(out_root, 'exposures.pdf'))

    valid_exposure = [
        1 if low_exposure < e < high_exposure else 0 for e in exposures
    ]
    meta_info['valid_exposures'] = sum(valid_exposure)

    # Manual cleaning
    valid_date = [1 if d not in bad_dates else 0 for d in merged['date']]
    meta_info['valid_date'] = sum(valid_date)

    # Get fully valid
    fully_valid = np.array(valid_exposure) * np.array(valid_grid) * np.array(
        valid_ins) * np.array(valid_date)
    meta_info['fully_valid'] = sum(fully_valid)

    # Save for different folds
    for fold in folds:
        fold_valid = np.array(fully_valid) * np.array(merged[fold], dtype=int)
        meta_info['valid_{}'.format(fold)] = sum(fold_valid)

        out_data = dict()
        for col in cols_to_keep:
            out_col = [e for e, v in zip(merged[col], fold_valid) if v == 1]
            out_data[col] = out_col
        clean_file = os.path.join(out_root, 'clean_{}.csv'.format(fold))
        save_csv(out_data, clean_file)

        # Plot fold exposure:
        fold_exposure = [e for e, v in zip(exposures, fold_valid) if v == 1]
        plt.clf()
        plt.hist(fold_exposure, bins=10000, histtype='step')
        plt.xticks(rotation=90)
        plt.savefig(os.path.join(out_root, 'exposures_{}.pdf'.format(fold)))

    save_csv(meta_info, meta_file)
    dict_to_bar(meta_info, os.path.join(out_root, 'meta_info.pdf'))

예제 #20

0

파일 보기

def parametrize(s, date):
    ref_date = getattr(sys.modules[__name__], '{}_ref_date'.format(s))
    ref_file = os.path.join(out_root, '{}_{}_geodesic.csv'.format(s, ref_date))

    data = load_csv(os.path.join(in_root, 'clean_{}.csv'.format(s)))

    ref_data = load_csv(ref_file)
    ref_xy = [(float(x), float(y))
              for x, y in zip(ref_data['easting'], ref_data['northing'])]

    ref_l = np.array(ref_data['l'], dtype=float)
    ref_yaw = np.array(ref_data['yaw'], dtype=float)

    ref_tree = KDTree(np.array(ref_xy))

    vmin = min(ref_l)
    vmax = max(ref_l)

    date_data = dict()
    for key in data.keys():
        date_data[key] = [
            e for e, d in zip(data[key], data['date']) if d == date
        ]
    date_xy = [(float(x), float(y))
               for x, y in zip(date_data['easting'], date_data['northing'])]
    date_d = [0] + [
        math.sqrt((p[0] - q[0])**2 + (p[1] - q[1])**2)
        for p, q in zip(date_xy[1:], date_xy[:-1])
    ]
    date_l = [sum(date_d[:i]) for i in range(1, len(date_d) + 1)]
    date_yaw = np.array(date_data['yaw'], dtype=float)

    matched_l = np.zeros(len(date_yaw))
    matchable = []
    r = 20
    if s == 'val':
        r = 100

    date_ni, date_nd = ref_tree.query_radius(np.array(date_xy),
                                             r=100,
                                             return_distance=True,
                                             sort_results=True)

    current_l = 0
    latest_valid = 0

    for j, (yaw, ni, nd) in enumerate(zip(date_yaw, date_ni, date_nd)):

        if len(ni) < 2:
            continue

        angle_neighbors = [
            i for i in range(len(ni))
            if abs(yaw - ref_yaw[ni[i]]) % (2 * math.pi) < math.pi / 3
        ]

        ni = [ni[i] for i in angle_neighbors]
        nd = [nd[i] for i in angle_neighbors]

        if len(ni) < 2:
            continue

        potential_l = np.array([ref_l[i] for i in ni])

        if j == 0:
            threshold = 40
            if s == 'val':
                threshold = 5

            km = KMeans(n_clusters=2,
                        random_state=0).fit(potential_l.reshape(-1, 1))
            if abs(km.cluster_centers_[0] -
                   km.cluster_centers_[1]) > threshold:
                closest_center = km.predict(
                    np.array(current_l).reshape(-1, 1))[0]
                assignments = km.labels_
                l_neighbors = [
                    i for i, a in zip(range(len(ni)), assignments)
                    if a == closest_center
                ]
            else:
                l_neighbors = range(len(ni))
        else:
            l_neighbors = [
                i for i, l in enumerate(potential_l)
                if abs(current_l - date_l[latest_valid] + date_l[j] - l) < 500
            ]
        ni = [ni[i] for i in l_neighbors]
        nd = [nd[i] for i in l_neighbors]

        if len(ni) < 2:
            continue

        interp_l = lin_ip(ref_l[ni[0]], ref_l[ni[1]], nd[0], nd[1])
        current_l = interp_l
        latest_valid = j
        matched_l[j] = interp_l
        print(interp_l)
        matchable.append(j)

    if len(matchable) > 0:
        date_data['l'] = matched_l
        for key in ref_data.keys():
            date_data[key] = [date_data[key][i] for i in matchable]
        plot_results(date_xy, date_yaw, date_l, date, date_data, s, vmin, vmax)
        out_file = os.path.join(out_root, '{}_{}_geodesic.csv'.format(s, date))
        save_csv(date_data, out_file)