예제 #1
0
def convertannotationdata_fromwindows2unixstyle(config,
                                                userfeedback=True,
                                                win2linux=True):
    """
    Converts paths in annotation file (CollectedData_*user*.h5) in labeled-data/videofolder1, etc.

    from windows to linux format. This is important when one e.g. labeling on Windows, but
    wants to re-label/check_labels/ on a Linux computer (and vice versa).

    Note for training data annotated on Windows in Linux this is not necessary, as the data
    gets converted during training set creation.

    config : string
        Full path of the config.yaml file as a string.

    userfeedback: bool, optional
        If true the user will be asked specifically for each folder in labeled-data if the containing csv shall be converted to hdf format.

    win2linux: bool, optional.
        By default converts from windows to linux. If false, converts from unix to windows.
    """
    cfg = auxiliaryfunctions.read_config(config)
    folders = [
        Path(config).parent / "labeled-data" /
        trainingsetmanipulation._robust_path_split(vid)[1]
        for vid in cfg["video_sets"]
    ]

    for folder in folders:
        if userfeedback:
            print("Do you want to convert the annotationdata in folder:",
                  folder, "?")
            askuser = input("yes/no")
        else:
            askuser = "******"

        if askuser == "y" or askuser == "yes" or askuser == "Ja" or askuser == "ha":
            fn = os.path.join(str(folder), "CollectedData_" + cfg["scorer"])
            if os.path.exists(fn + ".h5"):
                Data = pd.read_hdf(fn + ".h5")
                if win2linux:
                    convertpaths_to_unixstyle(Data, fn)
                else:
                    convertpaths_to_windowsstyle(Data, fn)
            else:
                warnings.warn(f"Could not find '{fn+'.h5'}'. skipping")
예제 #2
0
def convert_cropped_to_standard_dataset(
    config_path,
    recreate_datasets=True,
    delete_crops=True,
    back_up=True,
):
    import pandas as pd
    import pickle
    import shutil
    from deeplabcut.generate_training_dataset import trainingsetmanipulation
    from deeplabcut.utils import read_plainconfig, write_config

    cfg = auxiliaryfunctions.read_config(config_path)
    videos_orig = cfg.pop("video_sets_original")
    is_cropped = cfg.pop("croppedtraining")
    if videos_orig is None or not is_cropped:
        print("Labeled data do not appear to be cropped. "
              "Project will remain unchanged...")
        return

    project_path = cfg["project_path"]

    if back_up:
        print("Backing up project...")
        shutil.copytree(project_path, project_path + "_bak", symlinks=True)

    if delete_crops:
        print("Deleting crops...")
        data_path = os.path.join(project_path, "labeled-data")
        for video in cfg["video_sets"]:
            _, filename, _ = trainingsetmanipulation._robust_path_split(video)
            if "_cropped" in video:  # One can never be too safe...
                shutil.rmtree(os.path.join(data_path, filename),
                              ignore_errors=True)

    cfg["video_sets"] = videos_orig
    write_config(config_path, cfg)

    if not recreate_datasets:
        return

    datasets_folder = os.path.join(
        project_path,
        auxiliaryfunctions.GetTrainingSetFolder(cfg),
    )
    df_old = pd.read_hdf(
        os.path.join(datasets_folder,
                     "CollectedData_" + cfg["scorer"] + ".h5"), )

    def strip_cropped_image_name(path):
        head, filename = os.path.split(path)
        head = head.replace("_cropped", "")
        file, ext = filename.split(".")
        file = file.split("c")[0]
        return os.path.join(head, file + "." + ext)

    img_names_old = np.asarray(
        [strip_cropped_image_name(img) for img in df_old.index.to_list()])
    df = merge_annotateddatasets(cfg, datasets_folder)
    img_names = df.index.to_numpy()
    train_idx = []
    test_idx = []
    pickle_files = []
    for filename in os.listdir(datasets_folder):
        if filename.endswith("pickle"):
            pickle_file = os.path.join(datasets_folder, filename)
            pickle_files.append(pickle_file)
            if filename.startswith("Docu"):
                with open(pickle_file, "rb") as f:
                    _, train_inds, test_inds, train_frac = pickle.load(f)
                    train_inds_temp = np.flatnonzero(
                        np.isin(img_names, img_names_old[train_inds]))
                    test_inds_temp = np.flatnonzero(
                        np.isin(img_names, img_names_old[test_inds]))
                    train_inds, test_inds = pad_train_test_indices(
                        train_inds_temp, test_inds_temp, train_frac)
                    train_idx.append(train_inds)
                    test_idx.append(test_inds)

    # Search a pose_config.yaml file to parse missing information
    pose_config_path = ""
    for dirpath, _, filenames in os.walk(
            os.path.join(project_path, "dlc-models")):
        for file in filenames:
            if file.endswith("pose_cfg.yaml"):
                pose_config_path = os.path.join(dirpath, file)
                break
    pose_cfg = read_plainconfig(pose_config_path)
    net_type = pose_cfg["net_type"]
    if net_type == "resnet_50" and pose_cfg.get("multi_stage", False):
        net_type = "dlcrnet_ms5"

    # Clean the training-datasets folder prior to recreating the data pickles
    shuffle_inds = set()
    for file in pickle_files:
        os.remove(file)
        shuffle_inds.add(int(re.findall(r"shuffle(\d+)", file)[0]))
    create_multianimaltraining_dataset(
        config_path,
        trainIndices=train_idx,
        testIndices=test_idx,
        Shuffles=sorted(shuffle_inds),
        net_type=net_type,
        paf_graph=pose_cfg["partaffinityfield_graph"],
        crop_size=pose_cfg.get("crop_size", [400, 400]),
        crop_sampling=pose_cfg.get("crop_sampling", "hybrid"),
    )
예제 #3
0
def convert2_maDLC(config, userfeedback=True, forceindividual=None):
    """
    Converts single animal annotation file into a multianimal annotation file,
    by introducing an individuals column with either the first individual
    in individuals list in config.yaml or whatever is passsed via "forceindividual".

    ----------
    config : string
        Full path of the config.yaml file as a string.

    userfeedback: bool, optional
            If this is set to false during automatic mode then frames for all videos are extracted. The user can set this to true, which will result in a dialog,
            where the user is asked for each video if (additional/any) frames from this video should be extracted. Use this, e.g. if you have already labeled
            some folders and want to extract data for new videos.

    forceindividual: None default
            If a string is given that is used in the individuals column.

    Examples
    --------
    Converts mulianimalbodyparts under the 'first individual' in individuals list in config.yaml
    and uniquebodyparts under 'single'
    >>> deeplabcut.convert2_maDLC('/socialrearing-task/config.yaml')

    --------
    Converts mulianimalbodyparts under the individual label mus17 and uniquebodyparts under 'single'
    >>> deeplabcut.convert2_maDLC('/socialrearing-task/config.yaml', forceindividual='mus17')
    """

    cfg = auxiliaryfunctions.read_config(config)
    videos = cfg["video_sets"].keys()
    video_names = [
        trainingsetmanipulation._robust_path_split(i)[1] for i in videos
    ]
    folders = [
        Path(config).parent / "labeled-data" / Path(i) for i in video_names
    ]

    individuals, uniquebodyparts, multianimalbodyparts = extractindividualsandbodyparts(
        cfg)

    if forceindividual is None:
        if len(individuals) == 0:
            print("At least one individual should exist...")
            folders = []
            forceindividual = ""
        else:
            forceindividual = individuals[
                0]  # note that single is added at then end!

        if forceindividual == "single":  # no specific individual ()
            if len(multianimalbodyparts
                   ) > 0:  # there should be an individual name...
                print(
                    "At least one individual should exist beyond 'single', as there are multianimalbodyparts..."
                )
                folders = []

    for folder in folders:
        if userfeedback == True:
            print("Do you want to convert the annotation file in folder:",
                  folder, "?")
            askuser = input("yes/no")
        else:
            askuser = "******"

        if (askuser == "y" or askuser == "yes" or askuser == "Ja"
                or askuser == "ha"):  # multilanguage support :)

            fn = os.path.join(str(folder), "CollectedData_" + cfg["scorer"])
            Data = pd.read_hdf(fn + ".h5")
            imindex = Data.index

            print("This is a single animal data set, converting to multi...",
                  folder)

            # -> adding (single,bpt) for uniquebodyparts
            for j, bpt in enumerate(uniquebodyparts):
                index = pd.MultiIndex.from_arrays(
                    np.array([
                        2 * [cfg["scorer"]], 2 * ["single"], 2 * [bpt],
                        ["x", "y"]
                    ]),
                    names=["scorer", "individuals", "bodyparts", "coords"],
                )

                if bpt in Data[cfg["scorer"]].keys():
                    frame = pd.DataFrame(Data[cfg["scorer"]][bpt].values,
                                         columns=index,
                                         index=imindex)
                else:
                    frame = pd.DataFrame(
                        np.ones((len(imindex), 2)) * np.nan,
                        columns=index,
                        index=imindex,
                    )

                if j == 0:
                    dataFrame = frame
                else:
                    dataFrame = pd.concat([dataFrame, frame], axis=1)

            if len(uniquebodyparts) == 0:
                dataFrame = None

            # -> adding (indivdual,bpt) for multianimalbodyparts
            for j, bpt in enumerate(multianimalbodyparts):
                index = pd.MultiIndex.from_arrays(
                    np.array([
                        2 * [cfg["scorer"]],
                        2 * [str(forceindividual)],
                        2 * [bpt],
                        ["x", "y"],
                    ]),
                    names=["scorer", "individuals", "bodyparts", "coords"],
                )

                if bpt in Data[cfg["scorer"]].keys():
                    frame = pd.DataFrame(Data[cfg["scorer"]][bpt].values,
                                         columns=index,
                                         index=imindex)
                else:
                    frame = pd.DataFrame(
                        np.ones((len(imindex), 2)) * np.nan,
                        columns=index,
                        index=imindex,
                    )

                if j == 0 and dataFrame is None:
                    dataFrame = frame
                else:
                    dataFrame = pd.concat([dataFrame, frame], axis=1)

            Data.to_hdf(fn + "singleanimal.h5",
                        "df_with_missing",
                        format="table",
                        mode="w")
            Data.to_csv(fn + "singleanimal.csv")

            dataFrame.to_hdf(fn + ".h5",
                             "df_with_missing",
                             format="table",
                             mode="w")
            dataFrame.to_csv(fn + ".csv")