Beispiel #1
0
 def __call__(self, interactions, max_prefetch=20):
     import prefetch_generator
     self.interactions = interactions
     self.generator = prefetch_generator.BackgroundGenerator(
         generate_frames_for_interactions(interactions),
         max_prefetch=max_prefetch)
     self.view_next_interaction()
Beispiel #2
0
 def __call__(self,
              interactions,
              max_prefetch=20,
              frame_width=None,
              n_frames=None):
     import prefetch_generator
     self.interactions = interactions
     self.generator = prefetch_generator.BackgroundGenerator(
         generate_frames_for_interactions(interactions,
                                          frame_width=frame_width,
                                          n_frames=n_frames),
         max_prefetch=max_prefetch)
     self.view_next_interaction()
Beispiel #3
0
def generate_image_sequence_data(dataframe,
                                 output_file,
                                 video_root,
                                 video_cache_path,
                                 n_sequences=None,
                                 append=True,
                                 verbose=False,
                                 dry=False,
                                 skip_first_n_frames=0):
    """Generates and stores image sequences for all rows in a pandas.DataFrame.

    Arguments:
        dataframe: pandas.DataFrame
            DataFrame containing at least the columns bee_id0, bee_Id1, label, event_id.
        output_file: string
            Path to .zip file where the output is stored.
        video_root: string
            Path to the beesbook video location.
        video_cache_path: string
            Path where frame images can be cached (recommendation: use a ramdisk).
        n_sequences: int
            Optional. Whether to stop after having generated at least n_sequence image sequences.
        append: bool
            Whether to append to the output file instead of clearing it first.
        verbose: bool
            Whether to print additional output.
        dry: bool
            If set, no data is actually stored to the file.
        skip_first_n_frames: int
            Skips the first skip_first_n_frames frame IDs (after sorting).
    """
    video_manager = BeesbookVideoManager(video_root=video_root,
                                         cache_path=video_cache_path)

    dataframe.sort_values(["cam_id", "timestamp"], inplace=True)
    # Get a list containing both the unique frame IDs and the respective frame containers to make caching faster.
    unique_frames_fcs = list(dataframe[["frame_id",
                                        "fc_id"]].drop_duplicates().itertuples(
                                            index=False, name=None))
    image_source = get_whole_frame_image_sequences(
        video_manager,
        unique_frames_fcs[skip_first_n_frames:],
        verbose=verbose)
    image_source = prefetch_generator.BackgroundGenerator(image_source,
                                                          max_prefetch=1)

    iterable = zip(
        itertools.islice(dataframe.groupby("frame_id", sort=False),
                         skip_first_n_frames, None), image_source)
    iterable = tqdm_notebook(iterable, total=len(unique_frames_fcs))

    generated_sequence_count = 0
    with DatabaseCursorContext("Troph. image retrieval") as cursor:

        mode = "a" if append else "w"
        with zipfile.ZipFile(output_file,
                             mode=mode,
                             compression=zipfile.ZIP_DEFLATED) as zf:

            for (frame_id, df), (images, neighbour_frames) in iterable:
                if images is None:
                    continue
                if frame_id != neighbour_frames[len(neighbour_frames) // 2][1]:
                    print(
                        "Error extracting around frame_id {}".format(frame_id))
                    print("Neighbours: {}".format(
                        str([int(n[1]) for n in neighbour_frames])))
                    raise ValueError("Wrong neighbour frames returned.")
                results = get_all_crops_for_frame(frame_id,
                                                  df,
                                                  images,
                                                  neighbour_frames,
                                                  cursor=cursor,
                                                  verbose=verbose)

                for (metadata, images) in results:

                    event_id = metadata["event_id"]
                    frame_id = metadata["frame_id"]
                    bee_id0 = metadata["bee_id0"]
                    bee_id1 = metadata["bee_id1"]
                    filename = "{}_{}_{}_{}".format(event_id, frame_id,
                                                    bee_id0, bee_id1)
                    if not dry:
                        zf.writestr(filename + ".json", json.dumps(metadata))

                        with zf.open(filename + ".npy",
                                     mode="w") as image_file:
                            np.save(image_file, images)

                    if verbose:
                        import matplotlib.pyplot as plt
                        fig, axes = plt.subplots(1,
                                                 len(images),
                                                 figsize=(20, 5))
                        print(filename)
                        for idx, (xy0, xy1, im) in enumerate(
                                zip(metadata["local_traj0"],
                                    metadata["local_traj1"], images)):
                            ax = axes[idx]
                            ax.imshow(im, cmap="gray")
                            ax.set_axis_off()
                            label = metadata["label"]
                            ax.set_title("{} {:2.2f}".format(
                                int(label), xy0[2] / np.pi * 180.0))
                        plt.show()

                    generated_sequence_count += 1

                if n_sequences and generated_sequence_count >= n_sequences:
                    break
Beispiel #4
0
def process_preprocessed_data(progress="tqdm",
                              use_cuda=True,
                              n_loader_processes=16,
                              n_prediction_threads=3):
    class ThreadCtx():
        def __enter__(self):
            return dict()

        def __exit__(self, *args):
            pass

    progress_bar_fun = lambda x, **kwargs: x
    if progress == "tqdm":
        import tqdm
        progress_bar_fun = tqdm.tqdm
    elif progress == "tqdm_notebook":
        import tqdm
        progress_bar_fun = tqdm.tqdm_notebook

    available_files_df = prefilter.get_available_processed_days()
    processed = list(
        map(lambda x: os.path.isfile(to_output_filename(x)),
            available_files_df.filename.values))
    available_files_df = available_files_df.iloc[~np.array(processed), :]

    def iter_and_load_data():
        for cam_id, dt_begin, dt_end, filepath in available_files_df[[
                "cam_id", "begin", "end", "filename"
        ]].itertuples(index=False):
            if os.path.isfile(to_output_filename(filepath)):
                print("Skipping {}".format(filepath))
                continue
            data = None
            try:
                data = prefilter.load_processed_data(filepath,
                                                     warnings_as_errors=True)
            except Exception as e:
                e = "Error! Skipping file {}. [{}]".format(filepath, str(e))
                print(e)
                continue
            if data is None:
                continue
            data.sort_values("frame_id", inplace=True)
            yield filepath, data

    n_features_loaded = 0

    def predict(X, samples, valid_sample_indices, thread_context, **kwargs):
        if not "model" in thread_context:
            torch_kwargs = dict()
            if not use_cuda:
                torch_kwargs["map_location"] = torch.device("cpu")
            thread_context["model"] = torch.load(
                "/mnt/storage/david/cache/beesbook/trophallaxis/1dcnn.cache",
                **torch_kwargs)
            if not use_cuda:
                thread_context["model"].use_cuda = False
        model = thread_context["model"]
        Y = model.predict_proba(X)[:, 1]
        results = []
        #samples = datareader.samples
        for idx in range(Y.shape[0]):
            y = Y[idx]
            sample_idx = valid_sample_indices[idx]
            frame_id, bee_id0, bee_id1 = samples.frame_id.iloc[sample_idx], \
                                            samples.bee_id0.iloc[sample_idx], \
                                            samples.bee_id1.iloc[sample_idx]
            results.append(
                dict(frame_id=frame_id,
                     bee_id0=bee_id0,
                     bee_id1=bee_id1,
                     score=y))
        results = pd.DataFrame(results)
        return results

    generator = prefetch_generator.BackgroundGenerator(iter_and_load_data(),
                                                       max_prefetch=1)
    trange = progress_bar_fun(generator,
                              desc="Input",
                              total=available_files_df.shape[0])
    total_output = 0
    for filepath, filepath_data in trange:
        if progress is not None:
            trange.set_postfix(classified_samples=total_output)

        batchsize = 2000
        n_chunks = (filepath_data.shape[0] // batchsize) + 1

        chunk_results = []
        chunk_range = None

        def save_chunk_results(results, **kwargs):
            nonlocal chunk_results
            nonlocal chunk_range
            nonlocal n_features_loaded
            if progress is not None:
                if chunk_range is None:
                    chunk_range = progress_bar_fun(total=n_chunks - 1,
                                                   desc="Chunks")
                else:
                    chunk_range.update()
            chunk_results.append(results)

        def generate_chunks():
            yield from utils.iterate_minibatches(filepath_data,
                                                 targets=None,
                                                 batchsize=batchsize)

        def generate_chunked_features():
            yield from utils.prefetch_map(load_features,
                                          generate_chunks(),
                                          max_workers=n_loader_processes)

        pipeline = utils.ParallelPipeline(
            [generate_chunked_features, predict, save_chunk_results],
            n_thread_map={1: n_prediction_threads},
            thread_context_factory=lambda: ThreadCtx())
        n_features_loaded = 0
        pipeline()
        if chunk_range is not None:
            chunk_range.close()
        if len(chunk_results) == 0:
            print("No results for {}".format(filepath))
            continue
        results_df = pd.concat(chunk_results, axis=0)

        df = filepath_data.merge(results_df,
                                 how="left",
                                 on=("frame_id", "bee_id0", "bee_id1"))
        df = df[["frame_id", "bee_id0", "bee_id1", "score"]]
        total_output += results_df.shape[0]

        df.frame_id = df.frame_id.astype(np.uint64)
        df.bee_id0 = df.bee_id0.astype(np.uint16)
        df.bee_id1 = df.bee_id1.astype(np.uint16)
        df.score = df.score.astype(np.float32)
        raw_df = list(df.itertuples(index=False))

        output_filename = to_output_filename(filepath)
        with zipfile.ZipFile(output_filename, "w", zipfile.ZIP_DEFLATED) as zf:
            with zf.open(
                    output_filename.split("/")[-1].replace("zip", "msgpack"),
                    "w") as file:
                msgpack.dump(raw_df, file, use_bin_type=True)