Example #1
0
def merge_windowsannotationdataONlinuxsystem(cfg):
    """ If a project was created on Windows (and labeled there,) but ran on unix then the data folders
    corresponding in the keys in cfg['video_sets'] are not found. This function gets them directly by
    looping over all folders in labeled-data """

    AnnotationData = []
    data_path = Path(cfg["project_path"], "labeled-data")
    use_cropped = cfg.get("croppedtraining", False)
    annotationfolders = []
    for elem in auxiliaryfunctions.grab_files_in_folder(data_path,
                                                        relative=False):
        if os.path.isdir(elem) and (
            (use_cropped and elem.endswith("_cropped"))
                or not (use_cropped or "_cropped" in elem)):
            annotationfolders.append(elem)
    print("The following folders were found:", annotationfolders)
    for folder in annotationfolders:
        filename = os.path.join(folder,
                                "CollectedData_" + cfg["scorer"] + ".h5")
        try:
            data = pd.read_hdf(filename, "df_with_missing")
            AnnotationData.append(data)
        except FileNotFoundError:
            print(filename, " not found (perhaps not annotated)")

    return AnnotationData
Example #2
0
def analyze_videos_converth5_to_nwb(
    config,
    video_folder,
    videotype=".mp4",
    listofvideos=False,
):
    """
    Convert all h5 output data files in `video_folder` to NWB format.

    Parameters
    ----------
    config : string
        Absolute path to the project YAML config file.

    video_folder : string
        Absolute path of a folder containing videos and the corresponding h5 data files.

    videotype: string, optional (default=.mp4)
        Only videos with this extension are screened.

    Examples
    --------

    Converts all pose-output files belonging to mp4 videos in the folder '/media/alex/experimentaldata/cheetahvideos' to csv files.
    deeplabcut.analyze_videos_converth5_to_csv('/media/alex/experimentaldata/cheetahvideos','.mp4')

    """
    if listofvideos:  # can also be called with a list of videos (from GUI)
        videos = video_folder  # GUI gives a list of videos
        if len(videos) > 0:
            h5_files = list(
                auxiliaryfunctions.grab_files_in_folder(Path(videos[0]).parent,
                                                        "h5",
                                                        relative=False))
        else:
            h5_files = []
    else:
        h5_files = list(
            auxiliaryfunctions.grab_files_in_folder(video_folder,
                                                    "h5",
                                                    relative=False))
        videos = auxiliaryfunctions.grab_files_in_folder(video_folder,
                                                         videotype,
                                                         relative=False)

    _convert_h5_files_to("nwb", config, h5_files, videos)
Example #3
0
def analyze_videos_converth5_to_csv(video_folder,
                                    videotype=".mp4",
                                    listofvideos=False):
    """
    By default the output poses (when running analyze_videos) are stored as MultiIndex Pandas Array, which contains the name of the network, body part name, (x, y) label position \n
    in pixels, and the likelihood for each frame per body part. These arrays are stored in an efficient Hierarchical Data Format (HDF) \n
    in the same directory, where the video is stored. This functions converts hdf (h5) files to the comma-separated values format (.csv),
    which in turn can be imported in many programs, such as MATLAB, R, Prism, etc.

    Parameters
    ----------

    video_folder : string
        Absolute path of a folder containing videos and the corresponding h5 data files.

    videotype: string, optional (default=.mp4)
        Only videos with this extension are screened.

    Examples
    --------

    Converts all pose-output files belonging to mp4 videos in the folder '/media/alex/experimentaldata/cheetahvideos' to csv files.
    deeplabcut.analyze_videos_converth5_to_csv('/media/alex/experimentaldata/cheetahvideos','.mp4')

    """

    if listofvideos:  # can also be called with a list of videos (from GUI)
        videos = video_folder  # GUI gives a list of videos
        if len(videos) > 0:
            h5_files = list(
                auxiliaryfunctions.grab_files_in_folder(Path(videos[0]).parent,
                                                        "h5",
                                                        relative=False))
        else:
            h5_files = []
    else:
        h5_files = list(
            auxiliaryfunctions.grab_files_in_folder(video_folder,
                                                    "h5",
                                                    relative=False))
        videos = auxiliaryfunctions.grab_files_in_folder(video_folder,
                                                         videotype,
                                                         relative=False)

    _convert_h5_files_to("csv", None, h5_files, videos)
Example #4
0
def analyze_videos_converth5_to_csv(video_folder, videotype=".mp4"):
    """
    By default the output poses (when running analyze_videos) are stored as MultiIndex Pandas Array, which contains the name of the network, body part name, (x, y) label position \n
    in pixels, and the likelihood for each frame per body part. These arrays are stored in an efficient Hierarchical Data Format (HDF) \n
    in the same directory, where the video is stored. If the flag save_as_csv is set to True, the data is also exported as comma-separated value file. However,
    if the flag was *not* set, then this function allows the conversion of all h5 files to csv files (without having to analyze the videos again)!

    This functions converts hdf (h5) files to the comma-separated values format (.csv), which in turn can be imported in many programs, such as MATLAB, R, Prism, etc.

    Parameters
    ----------

    video_folder : string
        Absolute path of a folder containing videos and the corresponding h5 data files.

    videotype: string, optional (default=.mp4)
        Only videos with this extension are screened.

    Examples
    --------

    Converts all pose-output files belonging to mp4 videos in the folder '/media/alex/experimentaldata/cheetahvideos' to csv files.
    deeplabcut.analyze_videos_converth5_to_csv('/media/alex/experimentaldata/cheetahvideos','.mp4')

    """
    h5_files = list(
        auxiliaryfunctions.grab_files_in_folder(video_folder,
                                                "h5",
                                                relative=False))
    videos = auxiliaryfunctions.grab_files_in_folder(video_folder,
                                                     videotype,
                                                     relative=False)
    for video in videos:
        if "_labeled" in video:
            continue
        vname = Path(video).stem
        for file in h5_files:
            if vname in file:
                scorer = file.split(vname)[1].split(".h5")[0]
                if "DLC" in scorer or "DeepCut" in scorer:
                    print("Found output file for scorer:", scorer)
                    print(f"Converting {file}...")
                    df = pd.read_hdf(file)
                    df.to_csv(file.replace(".h5", ".csv"))
    print("All pose files were converted.")
Example #5
0
def cropimagesandlabels(
    config,
    numcrops=10,
    size=(400, 400),
    userfeedback=True,
    cropdata=True,
    excludealreadycropped=True,
    updatevideoentries=True,
):
    """
    Crop images into multiple random crops (defined by numcrops) of size dimensions. If cropdata=True then the
    annotation data is loaded and labels for cropped images are inherited.
    If false, then one can make crops for unlabeled folders.

    This can be helpul for large frames with multiple animals. Then a smaller set of equally sized images is created.

    Parameters
    ----------
    config : string
        String containing the full path of the config file in the project.

    numcrops: number of random crops (around random bodypart)

    size: height x width in pixels

    userfeedback: bool, optional
        If this is set to false, then all requested train/test splits are created (no matter if they already exist). If you
        want to assure that previous splits etc. are not overwritten, then set this to True and you will be asked for each split.

    cropdata: bool, default True:
        If true creates corresponding annotation data (from ground truth)

    excludealreadycropped: bool, def true
        If true excludes folders that already contain _cropped in their name.

    updatevideoentries, bool, default true
        If true updates video_list entries to refer to cropped frames instead. This makes sense for subsequent processing.

    Example
    --------
    for labeling the frames
    >>> deeplabcut.cropimagesandlabels('/analysis/project/reaching-task/config.yaml')

    --------
    """
    from tqdm import trange

    indexlength = int(np.ceil(np.log10(numcrops)))
    project_path = os.path.dirname(config)
    cfg = auxiliaryfunctions.read_config(config)
    videos = cfg["video_sets"].keys()
    video_names = []
    for video in videos:
        parent, filename, ext = _robust_path_split(video)
        if excludealreadycropped and "_cropped" in filename:
            continue
        video_names.append([parent, filename, ext])

    if ("video_sets_original" not in cfg.keys() and updatevideoentries
        ):  # this dict is kept for storing links to original full-sized videos
        cfg["video_sets_original"] = {}

    for vidpath, vidname, videotype in video_names:
        folder = os.path.join(project_path, "labeled-data", vidname)
        if userfeedback:
            print("Do you want to crop frames for folder: ", folder, "?")
            askuser = input("(yes/no):")
        else:
            askuser = "******"
        if askuser == "y" or askuser == "yes" or askuser == "Y" or askuser == "Yes":
            new_vidname = vidname + "_cropped"
            new_folder = os.path.join(project_path, "labeled-data",
                                      new_vidname)
            auxiliaryfunctions.attempttomakefolder(new_folder)

            AnnotationData = []
            pd_index = []

            fn = os.path.join(folder, f"CollectedData_{cfg['scorer']}.h5")
            df = pd.read_hdf(fn, "df_with_missing")
            data = df.values.reshape((df.shape[0], -1, 2))
            sep = "/" if "/" in df.index[0] else "\\"
            if sep != os.path.sep:
                df.index = df.index.str.replace(sep, os.path.sep)
            images = project_path + os.path.sep + df.index
            # Avoid cropping already cropped images
            cropped_images = auxiliaryfunctions.grab_files_in_folder(
                new_folder, "png")
            cropped_names = set(map(lambda x: x.split("c")[0], cropped_images))
            imnames = [
                im for im in images.to_list()
                if Path(im).stem not in cropped_names
            ]
            ic = io.imread_collection(imnames)
            for i in trange(len(ic)):
                frame = ic[i]
                h, w = np.shape(frame)[:2]
                if size[0] >= h or size[1] >= w:
                    shutil.rmtree(new_folder, ignore_errors=True)
                    raise ValueError(
                        "Crop dimensions are larger than image size")

                imagename = os.path.relpath(ic.files[i], project_path)
                ind = np.flatnonzero(df.index == imagename)[0]
                cropindex = 0
                attempts = -1
                while cropindex < numcrops:
                    dd = np.array(data[ind].copy(), dtype=float)
                    y0, x0 = (
                        np.random.randint(h - size[0]),
                        np.random.randint(w - size[1]),
                    )
                    y1 = y0 + size[0]
                    x1 = x0 + size[1]
                    with np.errstate(invalid="ignore"):
                        within = np.all((dd >= [x0, y0]) & (dd < [x1, y1]),
                                        axis=1)
                    if cropdata:
                        dd[within] -= [x0, y0]
                        dd[~within] = np.nan
                    attempts += 1
                    if within.any() or attempts > 10:
                        newimname = str(
                            Path(imagename).stem + "c" +
                            str(cropindex).zfill(indexlength) + ".png")
                        cropppedimgname = os.path.join(new_folder, newimname)
                        io.imsave(cropppedimgname, frame[y0:y1, x0:x1])
                        cropindex += 1
                        pd_index.append(
                            os.path.join("labeled-data", new_vidname,
                                         newimname))
                        AnnotationData.append(dd.flatten())

            if cropdata:
                df = pd.DataFrame(AnnotationData,
                                  index=pd_index,
                                  columns=df.columns)
                fn_new = fn.replace(folder, new_folder)
                df.to_hdf(fn_new, key="df_with_missing", mode="w")
                df.to_csv(fn_new.replace(".h5", ".csv"))

            if updatevideoentries and cropdata:
                # moving old entry to _original, dropping it from video_set and update crop parameters
                video_orig = sep.join((vidpath, vidname + videotype))
                cfg["video_sets_original"][video_orig] = cfg["video_sets"][
                    video_orig]
                cfg["video_sets"].pop(video_orig)
                cfg["video_sets"][sep.join(
                    (vidpath, new_vidname + videotype))] = {
                        "crop": ", ".join(map(str, [0, size[1], 0, size[0]]))
                    }

    cfg["croppedtraining"] = True
    auxiliaryfunctions.write_config(config, cfg)
    ] * len(bodyparts_single)
    map_ = dict(zip(range(len(animals)), animals))
    individuals = [map_[ind] for ind in animals_id for _ in range(2)]
    scorer = [SCORER] * len(individuals)
    coords = ["x", "y"] * len(animals_id)
    bodyparts = [
        bp for _ in range(n_animals) for bp in bodyparts_multi for _ in range(2)
    ]
    bodyparts += [bp for bp in bodyparts_single for _ in range(2)]
    columns = pd.MultiIndex.from_arrays(
        [scorer, individuals, bodyparts, coords],
        names=["scorer", "individuals", "bodyparts", "coords"],
    )
    index = [
        os.path.join(rel_folder, image)
        for image in auxiliaryfunctions.grab_files_in_folder(image_folder, "png")
    ]
    fake_data = np.tile(
        np.repeat(50 * np.arange(len(animals_id)) + 50, 2), (len(index), 1)
    )
    df = pd.DataFrame(fake_data, index=index, columns=columns)
    output_path = os.path.join(image_folder, f"CollectedData_{SCORER}.csv")
    df.to_csv(output_path)
    df.to_hdf(
        output_path.replace("csv", "h5"), "df_with_missing", format="table", mode="w"
    )
    print("Artificial data created.")

    print("Checking labels...")
    deeplabcut.check_labels(config_path, draw_skeleton=False)
    print("Labels checked.")