예제 #1
0
def merge_annotateddatasets(cfg, trainingsetfolder_full):
    """
    Merges all the h5 files for all labeled-datasets (from individual videos).

    This is a bit of a mess because of cross platform compatibility.

    Within platform comp. is straightforward. But if someone labels on windows and wants to train on a unix cluster or colab...
    """
    AnnotationData = []
    data_path = Path(os.path.join(cfg["project_path"], "labeled-data"))
    videos = cfg["video_sets"].keys()
    for video in videos:
        _, filename, _ = _robust_path_split(video)
        file_path = os.path.join(data_path / filename,
                                 f'CollectedData_{cfg["scorer"]}.h5')
        try:
            data = pd.read_hdf(file_path)
            conversioncode.guarantee_multiindex_rows(data)
            AnnotationData.append(data)
        except FileNotFoundError:
            print(file_path, " not found (perhaps not annotated).")

    if not len(AnnotationData):
        print(
            "Annotation data was not found by splitting video paths (from config['video_sets']). An alternative route is taken..."
        )
        AnnotationData = conversioncode.merge_windowsannotationdataONlinuxsystem(
            cfg)
        if not len(AnnotationData):
            print("No data was found!")
            return

    AnnotationData = pd.concat(AnnotationData).sort_index()
    # When concatenating DataFrames with misaligned column labels,
    # all sorts of reordering may happen (mainly depending on 'sort' and 'join')
    # Ensure the 'bodyparts' level agrees with the order in the config file.
    if cfg.get("multianimalproject", False):
        (
            _,
            uniquebodyparts,
            multianimalbodyparts,
        ) = auxfun_multianimal.extractindividualsandbodyparts(cfg)
        bodyparts = multianimalbodyparts + uniquebodyparts
    else:
        bodyparts = cfg["bodyparts"]
    AnnotationData = AnnotationData.reindex(
        bodyparts,
        axis=1,
        level=AnnotationData.columns.names.index("bodyparts"))
    filename = os.path.join(trainingsetfolder_full,
                            f'CollectedData_{cfg["scorer"]}')
    AnnotationData.to_hdf(filename + ".h5", key="df_with_missing", mode="w")
    AnnotationData.to_csv(filename + ".csv")  # human readable.
    return AnnotationData
예제 #2
0
def test_guarantee_multiindex_rows():
    df_unix = pd.read_hdf(os.path.join(TEST_DATA_DIR, "trimouse_calib.h5"))
    df_posix = df_unix.copy()
    df_posix.index = df_posix.index.str.replace("/", "\\")
    nrows = len(df_unix)
    for df in (df_unix, df_posix):
        conversioncode.guarantee_multiindex_rows(df)
        assert isinstance(df.index, pd.MultiIndex)
        assert len(df) == nrows
        assert df.index.nlevels == 3
        assert all(df.index.get_level_values(0) == "labeled-data")
        assert all(img.endswith(".png") for img in df.index.get_level_values(2))
    def chooseFrame(self):
        frame = img_as_ubyte(self.vid.read_frame(crop=self.cropping))
        fname = Path(self.filename)
        output_path = self.config_path.parents[0] / "labeled-data" / fname.stem
        img_name = (str(output_path) + "/img" + str(self.currFrame).zfill(
            int(np.ceil(np.log10(self.numberFrames)))) + ".png")
        if os.path.exists(img_name):
            warnings.warn(
                "The selected frame has already been extracted; please select another one."
            )
            return

        self.machinefile = os.path.join(
            str(output_path),
            "machinelabels-iter" + str(self.iterationindex) + ".h5")
        name = str(fname.stem)
        DF = self.Dataframe.iloc[[self.currFrame]]
        DF.index = pd.MultiIndex.from_tuples([
            ("labeled-data", name,
             "img" + str(index).zfill(self.strwidth) + ".png")
            for index in DF.index
        ])
        labeled_img_name = (str(output_path) + "/img" + str(
            self.currFrame).zfill(int(np.ceil(np.log10(self.numberFrames)))) +
                            "labeled.png")

        # Check for it output path and a machine label file exist
        if output_path.exists() and Path(self.machinefile).is_file():
            io.imsave(img_name, frame)
            if self.savelabeled:
                self.figure.savefig(labeled_img_name, bbox_inches="tight")
            Data = pd.read_hdf(self.machinefile)
            conversioncode.guarantee_multiindex_rows(Data)
            DataCombined = pd.concat([Data, DF])
            DataCombined = DataCombined[~DataCombined.index.duplicated(
                keep="first")]
            DataCombined.to_hdf(self.machinefile,
                                key="df_with_missing",
                                mode="w")
            DataCombined.to_csv(
                os.path.join(str(output_path), "machinelabels.csv"))
        # If machine label file does not exist then create one
        elif output_path.exists() and not (Path(self.machinefile).is_file()):
            if self.savelabeled:
                self.figure.savefig(labeled_img_name, bbox_inches="tight")
            io.imsave(img_name, frame)
            DF.to_hdf(self.machinefile, key="df_with_missing", mode="w")
            DF.to_csv(os.path.join(str(output_path), "machinelabels.csv"))
        else:
            print(
                "%s path not found. Please make sure that the video was added to the config file using the function 'deeplabcut.add_new_videos'.Quitting for now!"
                % output_path)
            self.Destroy()
def test_format_multianimal_training_data(monkeypatch):
    fake_shape = 3, 480, 640
    monkeypatch.setattr(
        multiple_individuals_trainingsetmanipulation,
        "read_image_shape_fast",
        lambda _: fake_shape,
    )
    df = pd.read_hdf(os.path.join(TEST_DATA_DIR, "trimouse_calib.h5"))
    guarantee_multiindex_rows(df)
    train_inds = list(range(10))
    n_decimals = 1
    data = format_multianimal_training_data(df, train_inds, "", n_decimals)
    assert len(data) == len(train_inds)
    assert all(isinstance(d, dict) for d in data)
    assert all(len(d["image"]) == 3 for d in data)
    assert all(np.all(d["size"] == np.array(fake_shape)) for d in data)
    assert all(
        (xy.shape[1] == 3 and np.isfinite(xy).all())
        for d in data
        for xy in d["joints"].values()
    )
def test_format_training_data(monkeypatch):
    fake_shape = 3, 480, 640
    monkeypatch.setattr(
        trainingsetmanipulation, "read_image_shape_fast", lambda _: fake_shape,
    )
    df = pd.read_hdf(os.path.join(TEST_DATA_DIR, "trimouse_calib.h5")).xs(
        "mus1", level="individuals", axis=1
    )
    guarantee_multiindex_rows(df)
    train_inds = list(range(10))
    _, data = format_training_data(df, train_inds, 12, "")
    assert len(data) == len(train_inds)
    # Check data comprise path, shape, and xy coordinates
    assert all(len(d) == 3 for d in data)
    assert all(
        (d[0].size == 3 and d[0].dtype.char == "U" and d[0][0, -1].endswith(".png"))
        for d in data
    )
    assert all(np.all(d[1] == np.array(fake_shape)[None]) for d in data)
    assert all(
        (d[2][0, 0].shape[1] == 3 and d[2][0, 0].dtype == np.int64) for d in data
    )
예제 #6
0
    def browseDir(self, event):
        """
        Show the DirDialog and ask the user to change the directory where machine labels are stored
        """
        if self.jump_unlabeled:
            self.dir = str(
                auxiliaryfunctions.find_next_unlabeled_folder(
                    self.config_file))
        else:
            self.statusbar.SetStatusText(
                "Looking for a folder to start labeling...")
            cwd = os.path.join(os.getcwd(), "labeled-data")
            dlg = wx.DirDialog(
                self,
                "Choose the directory where your extracted frames are saved:",
                cwd,
                style=wx.DD_DEFAULT_STYLE,
            )
            if dlg.ShowModal() != wx.ID_OK:
                dlg.Destroy()
                self.Close(True)
                return
            self.dir = dlg.GetPath()
            dlg.Destroy()

        self.load.Enable(False)
        self.next.Enable(True)
        self.save.Enable(True)

        # Enabling the zoom, pan and home buttons
        self.zoom.Enable(True)
        self.home.Enable(True)
        self.pan.Enable(True)
        self.lock.Enable(True)

        # Reading config file and its variables
        self.cfg = auxiliaryfunctions.read_config(self.config_file)
        self.scorer = self.cfg["scorer"]
        self.bodyparts = self.cfg["bodyparts"]
        self.videos = self.cfg["video_sets"].keys()
        self.markerSize = self.cfg["dotsize"]
        self.alpha = self.cfg["alphavalue"]
        self.colormap = plt.get_cmap(self.cfg["colormap"])
        self.colormap = self.colormap.reversed()
        self.project_path = self.cfg["project_path"]

        imlist = []
        for imtype in self.imtypes:
            imlist.extend([
                fn for fn in glob.glob(os.path.join(self.dir, imtype))
                if ("labeled.png" not in fn)
            ])

        if len(imlist) == 0:
            print("No images found!!")

        self.index = np.sort(imlist)
        self.statusbar.SetStatusText("Working on folder: {}".format(
            os.path.split(str(self.dir))[-1]))
        relativeimagenames = [
            "labeled" + n.split("labeled")[1] for n in self.index
        ]  # [n.split(self.project_path+'/')[1] for n in self.index]
        self.relativeimagenames = [
            tuple(name.split(os.path.sep)) for name in relativeimagenames
        ]
        # Reading the existing dataset,if already present
        try:
            self.dataFrame = pd.read_hdf(
                os.path.join(self.dir, "CollectedData_" + self.scorer + ".h5"))
            conversioncode.guarantee_multiindex_rows(self.dataFrame)
            self.dataFrame.sort_index(inplace=True)
            self.prev.Enable(True)

            # Finds the first empty row in the dataframe and sets the iteration to that index
            for idx, j in enumerate(self.dataFrame.index):
                values = self.dataFrame.loc(axis=0)[j].values
                if np.prod(np.isnan(values)) == 1:
                    self.iter = idx
                    break
                else:
                    self.iter = 0

        except:
            a = np.empty((len(self.index), 2))
            a[:] = np.nan
            for bodypart in self.bodyparts:
                cols = pd.MultiIndex.from_product(
                    [[self.scorer], [bodypart], ["x", "y"]],
                    names=["scorer", "bodyparts", "coords"],
                )
                index = pd.MultiIndex.from_tuples(self.relativeimagenames)
                frame = pd.DataFrame(a, columns=cols, index=index)
                self.dataFrame = pd.concat([self.dataFrame, frame], axis=1)
            self.iter = 0

        # Reading the image name
        self.img = os.path.join(*self.dataFrame.index[self.iter])
        img_name = Path(self.img).name
        self.norm, self.colorIndex = getColorIndices(self.img, self.bodyparts)

        # Checking for new frames and adding them to the existing dataframe
        old_imgs = sorted(self.dataFrame.index)
        self.newimages = list(set(self.relativeimagenames) - set(old_imgs))
        if self.newimages:
            print("Found new frames..")
            # Create an empty dataframe with all the new images and then merge this to the existing dataframe.
            self.df = None
            a = np.empty((len(self.newimages), 2))
            a[:] = np.nan
            for bodypart in self.bodyparts:
                cols = pd.MultiIndex.from_product(
                    [[self.scorer], [bodypart], ["x", "y"]],
                    names=["scorer", "bodyparts", "coords"],
                )
                index = pd.MultiIndex.from_tuples(self.newimages)
                frame = pd.DataFrame(a, columns=cols, index=index)
                self.df = pd.concat([self.df, frame], axis=1)
            self.dataFrame = pd.concat([self.dataFrame, self.df], axis=0)
            # Sort it by the index values
            self.dataFrame.sort_index(inplace=True)

        # checks for unique bodyparts
        if len(self.bodyparts) != len(set(self.bodyparts)):
            print(
                "Error - bodyparts must have unique labels! Please choose unique bodyparts in config.yaml file and try again. Quitting for now!"
            )
            self.Close(True)

        # Extracting the list of new labels
        oldBodyParts = self.dataFrame.columns.get_level_values(1)
        _, idx = np.unique(oldBodyParts, return_index=True)
        oldbodyparts2plot = list(oldBodyParts[np.sort(idx)])
        self.new_bodyparts = [
            x for x in self.bodyparts if x not in oldbodyparts2plot
        ]
        # Checking if user added a new label
        if not self.new_bodyparts:  # i.e. no new label
            (
                self.figure,
                self.axes,
                self.canvas,
                self.toolbar,
            ) = self.image_panel.drawplot(self.img, img_name, self.iter,
                                          self.index, self.bodyparts,
                                          self.colormap)
            self.axes.callbacks.connect("xlim_changed", self.onZoom)
            self.axes.callbacks.connect("ylim_changed", self.onZoom)

            (
                self.choiceBox,
                self.rdb,
                self.slider,
                self.checkBox,
            ) = self.choice_panel.addRadioButtons(self.bodyparts, self.file,
                                                  self.markerSize)
            self.buttonCounter = MainFrame.plot(self, self.img)
            self.cidClick = self.canvas.mpl_connect("button_press_event",
                                                    self.onClick)
            self.canvas.mpl_connect("button_release_event",
                                    self.onButtonRelease)
        else:
            dlg = wx.MessageDialog(
                None,
                "New label found in the config file. Do you want to see all the other labels?",
                "New label found",
                wx.YES_NO | wx.ICON_WARNING,
            )
            result = dlg.ShowModal()
            if result == wx.ID_NO:
                self.bodyparts = self.new_bodyparts
                self.norm, self.colorIndex = getColorIndices(
                    self.img, self.bodyparts)
            a = np.empty((len(self.index), 2))
            a[:] = np.nan
            for bodypart in self.new_bodyparts:
                cols = pd.MultiIndex.from_product(
                    [[self.scorer], [bodypart], ["x", "y"]],
                    names=["scorer", "bodyparts", "coords"],
                )
                index = pd.MultiIndex.from_tuples(self.relativeimagenames)
                frame = pd.DataFrame(a, columns=cols, index=index)
                self.dataFrame = pd.concat([self.dataFrame, frame], axis=1)

            (
                self.figure,
                self.axes,
                self.canvas,
                self.toolbar,
            ) = self.image_panel.drawplot(self.img, img_name, self.iter,
                                          self.index, self.bodyparts,
                                          self.colormap)
            self.axes.callbacks.connect("xlim_changed", self.onZoom)
            self.axes.callbacks.connect("ylim_changed", self.onZoom)

            (
                self.choiceBox,
                self.rdb,
                self.slider,
                self.checkBox,
            ) = self.choice_panel.addRadioButtons(self.bodyparts, self.file,
                                                  self.markerSize)
            self.cidClick = self.canvas.mpl_connect("button_press_event",
                                                    self.onClick)
            self.canvas.mpl_connect("button_release_event",
                                    self.onButtonRelease)
            self.buttonCounter = MainFrame.plot(self, self.img)

        self.checkBox.Bind(wx.EVT_CHECKBOX, self.activateSlider)
        self.slider.Bind(wx.EVT_SLIDER, self.OnSliderScroll)
예제 #7
0
def evaluate_multianimal_full(
    config,
    Shuffles=[1],
    trainingsetindex=0,
    plotting=False,
    show_errors=True,
    comparisonbodyparts="all",
    gputouse=None,
    modelprefix="",
):
    from deeplabcut.pose_estimation_tensorflow.core import (
        predict,
        predict_multianimal as predictma,
    )
    from deeplabcut.utils import (
        auxiliaryfunctions,
        auxfun_multianimal,
        auxfun_videos,
        conversioncode,
    )

    import tensorflow as tf

    if "TF_CUDNN_USE_AUTOTUNE" in os.environ:
        del os.environ["TF_CUDNN_USE_AUTOTUNE"]  # was potentially set during training

    tf.compat.v1.reset_default_graph()
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  #
    if gputouse is not None:  # gpu selectinon
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse)

    start_path = os.getcwd()

    if plotting is True:
        plotting = "bodypart"

    ##################################################
    # Load data...
    ##################################################
    cfg = auxiliaryfunctions.read_config(config)
    if trainingsetindex == "all":
        TrainingFractions = cfg["TrainingFraction"]
    else:
        TrainingFractions = [cfg["TrainingFraction"][trainingsetindex]]

    # Loading human annotatated data
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    Data = pd.read_hdf(
        os.path.join(
            cfg["project_path"],
            str(trainingsetfolder),
            "CollectedData_" + cfg["scorer"] + ".h5",
        )
    )
    conversioncode.guarantee_multiindex_rows(Data)

    # Get list of body parts to evaluate network for
    comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
        cfg, comparisonbodyparts
    )
    all_bpts = np.asarray(
        len(cfg["individuals"]) * cfg["multianimalbodyparts"] + cfg["uniquebodyparts"]
    )
    colors = visualization.get_cmap(len(comparisonbodyparts), name=cfg["colormap"])
    # Make folder for evaluation
    auxiliaryfunctions.attempttomakefolder(
        str(cfg["project_path"] + "/evaluation-results/")
    )
    for shuffle in Shuffles:
        for trainFraction in TrainingFractions:
            ##################################################
            # Load and setup CNN part detector
            ##################################################
            datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
                trainingsetfolder, trainFraction, shuffle, cfg
            )
            modelfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetModelFolder(
                        trainFraction, shuffle, cfg, modelprefix=modelprefix
                    )
                ),
            )
            path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml"

            # Load meta data
            (
                data,
                trainIndices,
                testIndices,
                trainFraction,
            ) = auxiliaryfunctions.LoadMetadata(
                os.path.join(cfg["project_path"], metadatafn)
            )

            try:
                dlc_cfg = load_config(str(path_test_config))
            except FileNotFoundError:
                raise FileNotFoundError(
                    "It seems the model for shuffle %s and trainFraction %s does not exist."
                    % (shuffle, trainFraction)
                )

            pipeline = iaa.Sequential(random_order=False)
            pre_resize = dlc_cfg.get("pre_resize")
            if pre_resize:
                width, height = pre_resize
                pipeline.add(iaa.Resize({"height": height, "width": width}))

            # TODO: IMPLEMENT for different batch sizes?
            dlc_cfg["batch_size"] = 1  # due to differently sized images!!!

            stride = dlc_cfg["stride"]
            # Ignore best edges possibly defined during a prior evaluation
            _ = dlc_cfg.pop("paf_best", None)
            joints = dlc_cfg["all_joints_names"]

            # Create folder structure to store results.
            evaluationfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetEvaluationFolder(
                        trainFraction, shuffle, cfg, modelprefix=modelprefix
                    )
                ),
            )
            auxiliaryfunctions.attempttomakefolder(evaluationfolder, recursive=True)
            # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml'

            # Check which snapshots are available and sort them by # iterations
            Snapshots = np.array(
                [
                    fn.split(".")[0]
                    for fn in os.listdir(os.path.join(str(modelfolder), "train"))
                    if "index" in fn
                ]
            )
            if len(Snapshots) == 0:
                print(
                    "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."
                    % (shuffle, trainFraction)
                )
            else:
                increasing_indices = np.argsort(
                    [int(m.split("-")[1]) for m in Snapshots]
                )
                Snapshots = Snapshots[increasing_indices]

                if cfg["snapshotindex"] == -1:
                    snapindices = [-1]
                elif cfg["snapshotindex"] == "all":
                    snapindices = range(len(Snapshots))
                elif cfg["snapshotindex"] < len(Snapshots):
                    snapindices = [cfg["snapshotindex"]]
                else:
                    print(
                        "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
                    )

                final_result = []
                ##################################################
                # Compute predictions over images
                ##################################################
                for snapindex in snapindices:
                    dlc_cfg["init_weights"] = os.path.join(
                        str(modelfolder), "train", Snapshots[snapindex]
                    )  # setting weights to corresponding snapshot.
                    trainingsiterations = (
                        dlc_cfg["init_weights"].split(os.sep)[-1]
                    ).split("-")[
                        -1
                    ]  # read how many training siterations that corresponds to.

                    # name for deeplabcut net (based on its parameters)
                    DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName(
                        cfg,
                        shuffle,
                        trainFraction,
                        trainingsiterations,
                        modelprefix=modelprefix,
                    )
                    print(
                        "Running ",
                        DLCscorer,
                        " with # of trainingiterations:",
                        trainingsiterations,
                    )
                    (
                        notanalyzed,
                        resultsfilename,
                        DLCscorer,
                    ) = auxiliaryfunctions.CheckifNotEvaluated(
                        str(evaluationfolder),
                        DLCscorer,
                        DLCscorerlegacy,
                        Snapshots[snapindex],
                    )

                    data_path = resultsfilename.split(".h5")[0] + "_full.pickle"

                    if plotting:
                        foldername = os.path.join(
                            str(evaluationfolder),
                            "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex],
                        )
                        auxiliaryfunctions.attempttomakefolder(foldername)
                        if plotting == "bodypart":
                            fig, ax = visualization.create_minimal_figure()

                    if os.path.isfile(data_path):
                        print("Model already evaluated.", resultsfilename)
                    else:

                        (sess, inputs, outputs,) = predict.setup_pose_prediction(
                            dlc_cfg
                        )

                        PredicteData = {}
                        dist = np.full((len(Data), len(all_bpts)), np.nan)
                        conf = np.full_like(dist, np.nan)
                        print("Network Evaluation underway...")
                        for imageindex, imagename in tqdm(enumerate(Data.index)):
                            image_path = os.path.join(cfg["project_path"], *imagename)
                            frame = auxfun_videos.imread(image_path, mode="skimage")

                            GT = Data.iloc[imageindex]
                            if not GT.any():
                                continue

                            # Pass the image and the keypoints through the resizer;
                            # this has no effect if no augmenters were added to it.
                            keypoints = [GT.to_numpy().reshape((-1, 2)).astype(float)]
                            frame_, keypoints = pipeline(
                                images=[frame], keypoints=keypoints
                            )
                            frame = frame_[0]
                            GT[:] = keypoints[0].flatten()

                            df = GT.unstack("coords").reindex(joints, level="bodyparts")

                            # FIXME Is having an empty array vs nan really that necessary?!
                            groundtruthidentity = list(
                                df.index.get_level_values("individuals")
                                .to_numpy()
                                .reshape((-1, 1))
                            )
                            groundtruthcoordinates = list(df.values[:, np.newaxis])
                            for i, coords in enumerate(groundtruthcoordinates):
                                if np.isnan(coords).any():
                                    groundtruthcoordinates[i] = np.empty(
                                        (0, 2), dtype=float
                                    )
                                    groundtruthidentity[i] = np.array([], dtype=str)

                            # Form 2D array of shape (n_rows, 4) where the last dimension
                            # is (sample_index, peak_y, peak_x, bpt_index) to slice the PAFs.
                            temp = df.reset_index(level="bodyparts").dropna()
                            temp["bodyparts"].replace(
                                dict(zip(joints, range(len(joints)))), inplace=True,
                            )
                            temp["sample"] = 0
                            peaks_gt = temp.loc[
                                :, ["sample", "y", "x", "bodyparts"]
                            ].to_numpy()
                            peaks_gt[:, 1:3] = (peaks_gt[:, 1:3] - stride // 2) / stride

                            pred = predictma.predict_batched_peaks_and_costs(
                                dlc_cfg,
                                np.expand_dims(frame, axis=0),
                                sess,
                                inputs,
                                outputs,
                                peaks_gt.astype(int),
                            )

                            if not pred:
                                continue
                            else:
                                pred = pred[0]

                            PredicteData[imagename] = {}
                            PredicteData[imagename]["index"] = imageindex
                            PredicteData[imagename]["prediction"] = pred
                            PredicteData[imagename]["groundtruth"] = [
                                groundtruthidentity,
                                groundtruthcoordinates,
                                GT,
                            ]

                            coords_pred = pred["coordinates"][0]
                            probs_pred = pred["confidence"]
                            for bpt, xy_gt in df.groupby(level="bodyparts"):
                                inds_gt = np.flatnonzero(
                                    np.all(~np.isnan(xy_gt), axis=1)
                                )
                                n_joint = joints.index(bpt)
                                xy = coords_pred[n_joint]
                                if inds_gt.size and xy.size:
                                    # Pick the predictions closest to ground truth,
                                    # rather than the ones the model has most confident in
                                    xy_gt_values = xy_gt.iloc[inds_gt].values
                                    neighbors = _find_closest_neighbors(
                                        xy_gt_values, xy, k=3
                                    )
                                    found = neighbors != -1
                                    min_dists = np.linalg.norm(
                                        xy_gt_values[found] - xy[neighbors[found]],
                                        axis=1,
                                    )
                                    inds = np.flatnonzero(all_bpts == bpt)
                                    sl = imageindex, inds[inds_gt[found]]
                                    dist[sl] = min_dists
                                    conf[sl] = probs_pred[n_joint][
                                        neighbors[found]
                                    ].squeeze()

                            if plotting == "bodypart":
                                temp_xy = GT.unstack("bodyparts")[joints].values
                                gt = temp_xy.reshape(
                                    (-1, 2, temp_xy.shape[1])
                                ).T.swapaxes(1, 2)
                                h, w, _ = np.shape(frame)
                                fig.set_size_inches(w / 100, h / 100)
                                ax.set_xlim(0, w)
                                ax.set_ylim(0, h)
                                ax.invert_yaxis()
                                ax = visualization.make_multianimal_labeled_image(
                                    frame,
                                    gt,
                                    coords_pred,
                                    probs_pred,
                                    colors,
                                    cfg["dotsize"],
                                    cfg["alphavalue"],
                                    cfg["pcutoff"],
                                    ax=ax,
                                )
                                visualization.save_labeled_frame(
                                    fig,
                                    image_path,
                                    foldername,
                                    imageindex in trainIndices,
                                )
                                visualization.erase_artists(ax)

                        sess.close()  # closes the current tf session

                        # Compute all distance statistics
                        df_dist = pd.DataFrame(dist, columns=df.index)
                        df_conf = pd.DataFrame(conf, columns=df.index)
                        df_joint = pd.concat(
                            [df_dist, df_conf],
                            keys=["rmse", "conf"],
                            names=["metrics"],
                            axis=1,
                        )
                        df_joint = df_joint.reorder_levels(
                            list(np.roll(df_joint.columns.names, -1)), axis=1
                        )
                        df_joint.sort_index(
                            axis=1,
                            level=["individuals", "bodyparts"],
                            ascending=[True, True],
                            inplace=True,
                        )
                        write_path = os.path.join(
                            evaluationfolder, f"dist_{trainingsiterations}.csv"
                        )
                        df_joint.to_csv(write_path)

                        # Calculate overall prediction error
                        error = df_joint.xs("rmse", level="metrics", axis=1)
                        mask = (
                            df_joint.xs("conf", level="metrics", axis=1)
                            >= cfg["pcutoff"]
                        )
                        error_masked = error[mask]
                        error_train = np.nanmean(error.iloc[trainIndices])
                        error_train_cut = np.nanmean(error_masked.iloc[trainIndices])
                        error_test = np.nanmean(error.iloc[testIndices])
                        error_test_cut = np.nanmean(error_masked.iloc[testIndices])
                        results = [
                            trainingsiterations,
                            int(100 * trainFraction),
                            shuffle,
                            np.round(error_train, 2),
                            np.round(error_test, 2),
                            cfg["pcutoff"],
                            np.round(error_train_cut, 2),
                            np.round(error_test_cut, 2),
                        ]
                        final_result.append(results)

                        if show_errors:
                            string = (
                                "Results for {} training iterations, training fraction of {}, and shuffle {}:\n"
                                "Train error: {} pixels. Test error: {} pixels.\n"
                                "With pcutoff of {}:\n"
                                "Train error: {} pixels. Test error: {} pixels."
                            )
                            print(string.format(*results))

                            print("##########################################")
                            print(
                                "Average Euclidean distance to GT per individual (in pixels; test-only)"
                            )
                            print(
                                error_masked.iloc[testIndices]
                                .groupby("individuals", axis=1)
                                .mean()
                                .mean()
                                .to_string()
                            )
                            print(
                                "Average Euclidean distance to GT per bodypart (in pixels; test-only)"
                            )
                            print(
                                error_masked.iloc[testIndices]
                                .groupby("bodyparts", axis=1)
                                .mean()
                                .mean()
                                .to_string()
                            )

                        PredicteData["metadata"] = {
                            "nms radius": dlc_cfg["nmsradius"],
                            "minimal confidence": dlc_cfg["minconfidence"],
                            "sigma": dlc_cfg.get("sigma", 1),
                            "PAFgraph": dlc_cfg["partaffinityfield_graph"],
                            "PAFinds": np.arange(
                                len(dlc_cfg["partaffinityfield_graph"])
                            ),
                            "all_joints": [
                                [i] for i in range(len(dlc_cfg["all_joints"]))
                            ],
                            "all_joints_names": [
                                dlc_cfg["all_joints_names"][i]
                                for i in range(len(dlc_cfg["all_joints"]))
                            ],
                            "stride": dlc_cfg.get("stride", 8),
                        }
                        print(
                            "Done and results stored for snapshot: ",
                            Snapshots[snapindex],
                        )

                        dictionary = {
                            "Scorer": DLCscorer,
                            "DLC-model-config file": dlc_cfg,
                            "trainIndices": trainIndices,
                            "testIndices": testIndices,
                            "trainFraction": trainFraction,
                        }
                        metadata = {"data": dictionary}
                        _ = auxfun_multianimal.SaveFullMultiAnimalData(
                            PredicteData, metadata, resultsfilename
                        )

                        tf.compat.v1.reset_default_graph()

                    n_multibpts = len(cfg["multianimalbodyparts"])
                    if n_multibpts == 1:
                        continue

                    # Skip data-driven skeleton selection unless
                    # the model was trained on the full graph.
                    max_n_edges = n_multibpts * (n_multibpts - 1) // 2
                    n_edges = len(dlc_cfg["partaffinityfield_graph"])
                    if n_edges == max_n_edges:
                        print("Selecting best skeleton...")
                        n_graphs = 10
                        paf_inds = None
                    else:
                        n_graphs = 1
                        paf_inds = [list(range(n_edges))]
                    (
                        results,
                        paf_scores,
                        best_assemblies,
                    ) = crossvalutils.cross_validate_paf_graphs(
                        config,
                        str(path_test_config).replace("pose_", "inference_"),
                        data_path,
                        data_path.replace("_full.", "_meta."),
                        n_graphs=n_graphs,
                        paf_inds=paf_inds,
                        oks_sigma=dlc_cfg.get("oks_sigma", 0.1),
                        margin=dlc_cfg.get("bbox_margin", 0),
                        symmetric_kpts=dlc_cfg.get("symmetric_kpts"),
                    )
                    if plotting == "individual":
                        assemblies, assemblies_unique, image_paths = best_assemblies
                        fig, ax = visualization.create_minimal_figure()
                        n_animals = len(cfg["individuals"])
                        if cfg["uniquebodyparts"]:
                            n_animals += 1
                        colors = visualization.get_cmap(n_animals, name=cfg["colormap"])
                        for k, v in tqdm(assemblies.items()):
                            imname = image_paths[k]
                            image_path = os.path.join(cfg["project_path"], *imname)
                            frame = auxfun_videos.imread(image_path, mode="skimage")

                            h, w, _ = np.shape(frame)
                            fig.set_size_inches(w / 100, h / 100)
                            ax.set_xlim(0, w)
                            ax.set_ylim(0, h)
                            ax.invert_yaxis()

                            gt = [
                                s.to_numpy().reshape((-1, 2))
                                for _, s in Data.loc[imname].groupby("individuals")
                            ]
                            coords_pred = []
                            coords_pred += [ass.xy for ass in v]
                            probs_pred = []
                            probs_pred += [ass.data[:, 2:3] for ass in v]
                            if assemblies_unique is not None:
                                unique = assemblies_unique.get(k, None)
                                if unique is not None:
                                    coords_pred.append(unique[:, :2])
                                    probs_pred.append(unique[:, 2:3])
                            while len(coords_pred) < len(gt):
                                coords_pred.append(np.full((1, 2), np.nan))
                                probs_pred.append(np.full((1, 2), np.nan))
                            ax = visualization.make_multianimal_labeled_image(
                                frame,
                                gt,
                                coords_pred,
                                probs_pred,
                                colors,
                                cfg["dotsize"],
                                cfg["alphavalue"],
                                cfg["pcutoff"],
                                ax=ax,
                            )
                            visualization.save_labeled_frame(
                                fig, image_path, foldername, k in trainIndices,
                            )
                            visualization.erase_artists(ax)

                    df = results[1].copy()
                    df.loc(axis=0)[("mAP_train", "mean")] = [
                        d[0]["mAP"] for d in results[2]
                    ]
                    df.loc(axis=0)[("mAR_train", "mean")] = [
                        d[0]["mAR"] for d in results[2]
                    ]
                    df.loc(axis=0)[("mAP_test", "mean")] = [
                        d[1]["mAP"] for d in results[2]
                    ]
                    df.loc(axis=0)[("mAR_test", "mean")] = [
                        d[1]["mAR"] for d in results[2]
                    ]
                    with open(data_path.replace("_full.", "_map."), "wb") as file:
                        pickle.dump((df, paf_scores), file)

                if len(final_result) > 0:  # Only append if results were calculated
                    make_results_file(final_result, evaluationfolder, DLCscorer)

    os.chdir(str(start_path))
예제 #8
0
def evaluate_network(
    config,
    Shuffles=[1],
    trainingsetindex=0,
    plotting=False,
    show_errors=True,
    comparisonbodyparts="all",
    gputouse=None,
    rescale=False,
    modelprefix="",
):
    """

    Evaluates the network based on the saved models at different stages of the training network.\n
    The evaluation results are stored in the .h5 and .csv file under the subdirectory 'evaluation_results'.
    Change the snapshotindex parameter in the config file to 'all' in order to evaluate all the saved models.
    Parameters
    ----------
    config : string
        Full path of the config.yaml file as a string.

    Shuffles: list, optional
        List of integers specifying the shuffle indices of the training dataset. The default is [1]

    trainingsetindex: int, optional
        Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This
        variable can also be set to "all".

    plotting: bool or str, optional
        Plots the predictions on the train and test images.
        The default is ``False``; if provided it must be either ``True``, ``False``, "bodypart", or "individual".
        Setting to ``True`` defaults as "bodypart" for multi-animal projects.

    show_errors: bool, optional
        Display train and test errors. The default is `True``

    comparisonbodyparts: list of bodyparts, Default is "all".
        The average error will be computed for those body parts only (Has to be a subset of the body parts).

    gputouse: int, optional. Natural number indicating the number of your GPU (see number in nvidia-smi). If you do not have a GPU put None.
        See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries

    rescale: bool, default False
        Evaluate the model at the 'global_scale' variable (as set in the test/pose_config.yaml file for a particular project). I.e. every
        image will be resized according to that scale and prediction will be compared to the resized ground truth. The error will be reported
        in pixels at rescaled to the *original* size. I.e. For a [200,200] pixel image evaluated at global_scale=.5, the predictions are calculated
        on [100,100] pixel images, compared to 1/2*ground truth and this error is then multiplied by 2!. The evaluation images are also shown for the
        original size!

    Examples
    --------
    If you do not want to plot, just evaluate shuffle 1.
    >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml', Shuffles=[1])
    --------
    If you want to plot and evaluate shuffle 0 and 1.
    >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',Shuffles=[0, 1],plotting = True)

    --------
    If you want to plot assemblies for a maDLC project:
    >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',Shuffles=[1],plotting = "individual")

    Note: this defaults to standard plotting for single-animal projects.

    """
    if plotting not in (True, False, "bodypart", "individual"):
        raise ValueError(f"Unknown value for `plotting`={plotting}")

    import os

    start_path = os.getcwd()
    from deeplabcut.utils import auxiliaryfunctions

    cfg = auxiliaryfunctions.read_config(config)

    if cfg.get("multianimalproject", False):
        from .evaluate_multianimal import evaluate_multianimal_full

        # TODO: Make this code not so redundant!
        evaluate_multianimal_full(
            config=config,
            Shuffles=Shuffles,
            trainingsetindex=trainingsetindex,
            plotting=plotting,
            comparisonbodyparts=comparisonbodyparts,
            gputouse=gputouse,
            modelprefix=modelprefix,
        )
    else:
        from deeplabcut.utils.auxfun_videos import imread, imresize
        from deeplabcut.pose_estimation_tensorflow.core import predict
        from deeplabcut.pose_estimation_tensorflow.config import load_config
        from deeplabcut.pose_estimation_tensorflow.datasets.utils import data_to_input
        from deeplabcut.utils import auxiliaryfunctions, conversioncode
        import tensorflow as tf

        # If a string was passed in, auto-convert to True for backward compatibility
        plotting = bool(plotting)

        if "TF_CUDNN_USE_AUTOTUNE" in os.environ:
            del os.environ[
                "TF_CUDNN_USE_AUTOTUNE"]  # was potentially set during training

        tf.compat.v1.reset_default_graph()
        os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  #
        #    tf.logging.set_verbosity(tf.logging.WARN)

        start_path = os.getcwd()
        # Read file path for pose_config file. >> pass it on
        cfg = auxiliaryfunctions.read_config(config)
        if gputouse is not None:  # gpu selectinon
            os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse)

        if trainingsetindex == "all":
            TrainingFractions = cfg["TrainingFraction"]
        else:
            if (trainingsetindex < len(cfg["TrainingFraction"])
                    and trainingsetindex >= 0):
                TrainingFractions = [
                    cfg["TrainingFraction"][int(trainingsetindex)]
                ]
            else:
                raise Exception(
                    "Please check the trainingsetindex! ",
                    trainingsetindex,
                    " should be an integer from 0 .. ",
                    int(len(cfg["TrainingFraction"]) - 1),
                )

        # Loading human annotatated data
        trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
        Data = pd.read_hdf(
            os.path.join(
                cfg["project_path"],
                str(trainingsetfolder),
                "CollectedData_" + cfg["scorer"] + ".h5",
            ))

        # Get list of body parts to evaluate network for
        comparisonbodyparts = (
            auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
                cfg, comparisonbodyparts))
        # Make folder for evaluation
        auxiliaryfunctions.attempttomakefolder(
            str(cfg["project_path"] + "/evaluation-results/"))
        for shuffle in Shuffles:
            for trainFraction in TrainingFractions:
                ##################################################
                # Load and setup CNN part detector
                ##################################################
                datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
                    trainingsetfolder, trainFraction, shuffle, cfg)
                modelfolder = os.path.join(
                    cfg["project_path"],
                    str(
                        auxiliaryfunctions.GetModelFolder(
                            trainFraction,
                            shuffle,
                            cfg,
                            modelprefix=modelprefix)),
                )

                path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml"
                # Load meta data
                (
                    data,
                    trainIndices,
                    testIndices,
                    trainFraction,
                ) = auxiliaryfunctions.LoadMetadata(
                    os.path.join(cfg["project_path"], metadatafn))

                try:
                    dlc_cfg = load_config(str(path_test_config))
                except FileNotFoundError:
                    raise FileNotFoundError(
                        "It seems the model for shuffle %s and trainFraction %s does not exist."
                        % (shuffle, trainFraction))

                # change batch size, if it was edited during analysis!
                dlc_cfg[
                    "batch_size"] = 1  # in case this was edited for analysis.

                # Create folder structure to store results.
                evaluationfolder = os.path.join(
                    cfg["project_path"],
                    str(
                        auxiliaryfunctions.GetEvaluationFolder(
                            trainFraction,
                            shuffle,
                            cfg,
                            modelprefix=modelprefix)),
                )
                auxiliaryfunctions.attempttomakefolder(evaluationfolder,
                                                       recursive=True)
                # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml'

                # Check which snapshots are available and sort them by # iterations
                Snapshots = np.array([
                    fn.split(".")[0] for fn in os.listdir(
                        os.path.join(str(modelfolder), "train"))
                    if "index" in fn
                ])
                try:  # check if any where found?
                    Snapshots[0]
                except IndexError:
                    raise FileNotFoundError(
                        "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."
                        % (shuffle, trainFraction))

                increasing_indices = np.argsort(
                    [int(m.split("-")[1]) for m in Snapshots])
                Snapshots = Snapshots[increasing_indices]

                if cfg["snapshotindex"] == -1:
                    snapindices = [-1]
                elif cfg["snapshotindex"] == "all":
                    snapindices = range(len(Snapshots))
                elif cfg["snapshotindex"] < len(Snapshots):
                    snapindices = [cfg["snapshotindex"]]
                else:
                    raise ValueError(
                        "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
                    )

                final_result = []

                ########################### RESCALING (to global scale)
                if rescale:
                    scale = dlc_cfg["global_scale"]
                    Data = (pd.read_hdf(
                        os.path.join(
                            cfg["project_path"],
                            str(trainingsetfolder),
                            "CollectedData_" + cfg["scorer"] + ".h5",
                        )) * scale)
                else:
                    scale = 1

                conversioncode.guarantee_multiindex_rows(Data)
                ##################################################
                # Compute predictions over images
                ##################################################
                for snapindex in snapindices:
                    dlc_cfg["init_weights"] = os.path.join(
                        str(modelfolder), "train", Snapshots[snapindex]
                    )  # setting weights to corresponding snapshot.
                    trainingsiterations = (
                        dlc_cfg["init_weights"].split(os.sep)[-1]
                    ).split(
                        "-"
                    )[-1]  # read how many training siterations that corresponds to.

                    # Name for deeplabcut net (based on its parameters)
                    DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName(
                        cfg,
                        shuffle,
                        trainFraction,
                        trainingsiterations,
                        modelprefix=modelprefix,
                    )
                    print(
                        "Running ",
                        DLCscorer,
                        " with # of training iterations:",
                        trainingsiterations,
                    )
                    (
                        notanalyzed,
                        resultsfilename,
                        DLCscorer,
                    ) = auxiliaryfunctions.CheckifNotEvaluated(
                        str(evaluationfolder),
                        DLCscorer,
                        DLCscorerlegacy,
                        Snapshots[snapindex],
                    )
                    if notanalyzed:
                        # Specifying state of model (snapshot / training state)
                        sess, inputs, outputs = predict.setup_pose_prediction(
                            dlc_cfg)
                        Numimages = len(Data.index)
                        PredicteData = np.zeros(
                            (Numimages, 3 * len(dlc_cfg["all_joints_names"])))
                        print("Running evaluation ...")
                        for imageindex, imagename in tqdm(enumerate(
                                Data.index)):
                            image = imread(
                                os.path.join(cfg["project_path"], *imagename),
                                mode="skimage",
                            )
                            if scale != 1:
                                image = imresize(image, scale)

                            image_batch = data_to_input(image)
                            # Compute prediction with the CNN
                            outputs_np = sess.run(
                                outputs, feed_dict={inputs: image_batch})
                            scmap, locref = predict.extract_cnn_output(
                                outputs_np, dlc_cfg)

                            # Extract maximum scoring location from the heatmap, assume 1 person
                            pose = predict.argmax_pose_predict(
                                scmap, locref, dlc_cfg["stride"])
                            PredicteData[imageindex, :] = (
                                pose.flatten()
                            )  # NOTE: thereby     cfg_test['all_joints_names'] should be same order as bodyparts!

                        sess.close()  # closes the current tf session

                        index = pd.MultiIndex.from_product(
                            [
                                [DLCscorer],
                                dlc_cfg["all_joints_names"],
                                ["x", "y", "likelihood"],
                            ],
                            names=["scorer", "bodyparts", "coords"],
                        )

                        # Saving results
                        DataMachine = pd.DataFrame(PredicteData,
                                                   columns=index,
                                                   index=Data.index)
                        DataMachine.to_hdf(resultsfilename, "df_with_missing")

                        print(
                            "Analysis is done and the results are stored (see evaluation-results) for snapshot: ",
                            Snapshots[snapindex],
                        )
                        DataCombined = pd.concat([Data.T, DataMachine.T],
                                                 axis=0,
                                                 sort=False).T

                        RMSE, RMSEpcutoff = pairwisedistances(
                            DataCombined,
                            cfg["scorer"],
                            DLCscorer,
                            cfg["pcutoff"],
                            comparisonbodyparts,
                        )
                        testerror = np.nanmean(
                            RMSE.iloc[testIndices].values.flatten())
                        trainerror = np.nanmean(
                            RMSE.iloc[trainIndices].values.flatten())
                        testerrorpcutoff = np.nanmean(
                            RMSEpcutoff.iloc[testIndices].values.flatten())
                        trainerrorpcutoff = np.nanmean(
                            RMSEpcutoff.iloc[trainIndices].values.flatten())
                        results = [
                            trainingsiterations,
                            int(100 * trainFraction),
                            shuffle,
                            np.round(trainerror, 2),
                            np.round(testerror, 2),
                            cfg["pcutoff"],
                            np.round(trainerrorpcutoff, 2),
                            np.round(testerrorpcutoff, 2),
                        ]
                        final_result.append(results)

                        if show_errors:
                            print(
                                "Results for",
                                trainingsiterations,
                                " training iterations:",
                                int(100 * trainFraction),
                                shuffle,
                                "train error:",
                                np.round(trainerror, 2),
                                "pixels. Test error:",
                                np.round(testerror, 2),
                                " pixels.",
                            )
                            print(
                                "With pcutoff of",
                                cfg["pcutoff"],
                                " train error:",
                                np.round(trainerrorpcutoff, 2),
                                "pixels. Test error:",
                                np.round(testerrorpcutoff, 2),
                                "pixels",
                            )
                            if scale != 1:
                                print(
                                    "The predictions have been calculated for rescaled images (and rescaled ground truth). Scale:",
                                    scale,
                                )
                            print(
                                "Thereby, the errors are given by the average distances between the labels by DLC and the scorer."
                            )

                        if plotting:
                            print("Plotting...")
                            foldername = os.path.join(
                                str(evaluationfolder),
                                "LabeledImages_" + DLCscorer + "_" +
                                Snapshots[snapindex],
                            )
                            auxiliaryfunctions.attempttomakefolder(foldername)
                            Plotting(
                                cfg,
                                comparisonbodyparts,
                                DLCscorer,
                                trainIndices,
                                DataCombined * 1.0 / scale,
                                foldername,
                            )  # Rescaling coordinates to have figure in original size!

                        tf.compat.v1.reset_default_graph()
                        # print(final_result)
                    else:
                        DataMachine = pd.read_hdf(resultsfilename)
                        conversioncode.guarantee_multiindex_rows(DataMachine)
                        if plotting:
                            DataCombined = pd.concat([Data.T, DataMachine.T],
                                                     axis=0,
                                                     sort=False).T
                            print(
                                "Plotting...(attention scale might be inconsistent in comparison to when data was analyzed; i.e. if you used rescale)"
                            )
                            foldername = os.path.join(
                                str(evaluationfolder),
                                "LabeledImages_" + DLCscorer + "_" +
                                Snapshots[snapindex],
                            )
                            auxiliaryfunctions.attempttomakefolder(foldername)
                            Plotting(
                                cfg,
                                comparisonbodyparts,
                                DLCscorer,
                                trainIndices,
                                DataCombined * 1.0 / scale,
                                foldername,
                            )

                if len(final_result
                       ) > 0:  # Only append if results were calculated
                    make_results_file(final_result, evaluationfolder,
                                      DLCscorer)
                    print(
                        "The network is evaluated and the results are stored in the subdirectory 'evaluation_results'."
                    )
                    print(
                        "Please check the results, then choose the best model (snapshot) for prediction. You can update the config.yaml file with the appropriate index for the 'snapshotindex'.\nUse the function 'analyze_video' to make predictions on new videos."
                    )
                    print(
                        "Otherwise, consider adding more labeled-data and retraining the network (see DeepLabCut workflow Fig 2, Nath 2019)"
                    )

    # returning to initial folder
    os.chdir(str(start_path))
예제 #9
0
    def browseDir(self, event):
        """
        Show the DirDialog and ask the user to change the directory where machine labels are stored
        """

        fname = str("machinelabels-iter" + str(self.iterationindex) + ".h5")
        self.statusbar.SetStatusText(
            "Looking for a folder to start refining...")
        if self.jump_unlabeled:
            cwd = str(
                auxiliaryfunctions.find_next_unlabeled_folder(
                    self.config_file))
        else:
            cwd = os.path.join(os.getcwd(), "labeled-data")
        #        dlg = wx.FileDialog(self, "Choose the machinelabels file for current iteration.",cwd, "",wildcard=fname,style=wx.FD_OPEN | wx.FD_FILE_MUST_EXIST)
        platform.system()
        if platform.system() == "Darwin":
            dlg = wx.FileDialog(
                self,
                "Select the machinelabels-iterX.h5 file.",
                cwd,
                fname,
                wildcard="(*.h5)|*.h5",
                style=wx.FD_OPEN | wx.FD_FILE_MUST_EXIST,
            )
        else:
            dlg = wx.FileDialog(
                self,
                "Select the machinelabels-iterX.h5 file.",
                cwd,
                "",
                wildcard=fname,
                style=wx.FD_OPEN | wx.FD_FILE_MUST_EXIST,
            )

        if dlg.ShowModal() == wx.ID_OK:
            self.data_file = dlg.GetPath()
            self.dir = str(Path(self.data_file).parents[0])
            self.fileName = str(Path(self.data_file).stem)
            self.load.Enable(False)
            self.next.Enable(True)
            self.save.Enable(True)
            self.zoom.Enable(True)
            self.pan.Enable(True)
            self.home.Enable(True)
            self.quit.Enable(True)
            self.lock.Enable(True)
        else:
            dlg.Destroy()
            self.Destroy()
            return
        dlg.Destroy()

        try:
            self.dataname = str(self.data_file)

        except:
            print("No machinelabels file found!")
            self.Destroy()
        self.statusbar.SetStatusText("Working on folder: {}".format(
            os.path.split(str(self.dir))[-1]))
        self.preview = True
        self.iter = 0

        if os.path.isfile(self.dataname):
            self.Dataframe = pd.read_hdf(self.dataname)
            conversioncode.guarantee_multiindex_rows(self.Dataframe)
            self.Dataframe.sort_index(inplace=True)
            self.scorer = self.Dataframe.columns.get_level_values(0)[0]

            # bodyParts = self.Dataframe.columns.get_level_values(1)
            # _, idx = np.unique(bodyParts, return_index=True)
            # self.num_joints = len(self.bodyparts)
            # self.bodyparts =  bodyParts[np.sort(idx)]
            self.index = list(self.Dataframe.iloc[:, 0].index)
            # Reading images

            self.img = os.path.join(self.project_path, *self.index[self.iter])
            img_name = Path(self.img).name
            self.norm, self.colorIndex = self.image_panel.getColorIndices(
                self.img, self.bodyparts)
            # Adding Slider and Checkbox

            (
                self.choiceBox,
                self.slider,
                self.checkBox,
            ) = self.choice_panel.addCheckBoxSlider(self.bodyparts, self.file,
                                                    self.markerSize)
            self.slider.Bind(wx.EVT_SLIDER, self.OnSliderScroll)
            self.checkBox.Bind(wx.EVT_CHECKBOX, self.activateSlider)
            self.slider.Enable(False)
            # Show image
            # Setting axis title:dont want to show the threshold as it is not selected yet.
            (
                self.figure,
                self.axes,
                self.canvas,
                self.toolbar,
            ) = self.image_panel.drawplot(
                self.img,
                img_name,
                self.iter,
                self.index,
                self.threshold,
                self.bodyparts,
                self.colormap,
                self.preview,
            )
            self.axes.callbacks.connect("xlim_changed", self.onZoom)
            self.axes.callbacks.connect("ylim_changed", self.onZoom)

            instruction = wx.MessageBox(
                "1. Enter the likelihood threshold. \n\n2. Each prediction will be shown with a unique color. \n All the data points above the threshold will be marked as circle filled with a unique color. All the data points below the threshold will be marked with a hollow circle. \n\n3. Enable the checkbox to adjust the marker size. \n\n4.  Hover your mouse over data points to see the labels and their likelihood. \n\n5. Left click and drag to move the data points. \n\n6. Middle click on any data point to remove it. Be careful, you cannot undo this step. \n Click once on the zoom button to zoom-in the image.The cursor will become cross, click and drag over a point to zoom in. \n Click on the zoom button again to disable the zooming function and recover the cursor. \n Use pan button to pan across the image while zoomed in. Use home button to go back to the full;default view. \n\n7. When finished click 'Save' to save all the changes. \n\n8. Click OK to continue",
                "User instructions",
                wx.OK | wx.ICON_INFORMATION,
            )

            if instruction == 4:
                """
                If ok is selected then the image is updated with the thresholded value of the likelihood
                """
                textBox = wx.TextEntryDialog(
                    self,
                    "Select the likelihood threshold",
                    caption="Enter the threshold",
                    value="0.4",
                )
                textBox.ShowModal()
                self.threshold = float(textBox.GetValue())
                textBox.Destroy()
                self.img = os.path.join(self.project_path,
                                        *self.index[self.iter])
                img_name = Path(self.img).name
                self.axes.clear()
                self.preview = False
                self.figure.delaxes(
                    self.figure.axes[1]
                )  # Removes the axes corresponding to the colorbar
                (
                    self.figure,
                    self.axes,
                    self.canvas,
                    self.toolbar,
                ) = self.image_panel.drawplot(
                    self.img,
                    img_name,
                    self.iter,
                    self.index,
                    self.threshold,
                    self.bodyparts,
                    self.colormap,
                    self.preview,
                )
                self.axes.callbacks.connect("xlim_changed", self.onZoom)
                self.axes.callbacks.connect("ylim_changed", self.onZoom)
                MainFrame.plot(self, self.img)
                MainFrame.saveEachImage(self)
            else:
                self.figure.delaxes(
                    self.figure.axes[1]
                )  # Removes the axes corresponding to the colorbar
                (
                    self.figure,
                    self.axes,
                    self.canvas,
                    self.toolbar,
                ) = self.image_panel.drawplot(
                    self.img,
                    img_name,
                    self.iter,
                    self.index,
                    self.threshold,
                    self.bodyparts,
                    self.colormap,
                    self.preview,
                )
                self.axes.callbacks.connect("xlim_changed", self.onZoom)
                self.axes.callbacks.connect("ylim_changed", self.onZoom)
                MainFrame.plot(self, self.img)
                MainFrame.saveEachImage(self)

        else:
            msg = wx.MessageBox(
                "No Machinelabels file found! Want to retry?",
                "Error!",
                wx.YES_NO | wx.ICON_WARNING,
            )
            if msg == 2:
                self.load.Enable(True)
                self.next.Enable(False)
                self.save.Enable(False)
예제 #10
0
def ExtractFramesbasedonPreselection(
    Index,
    extractionalgorithm,
    data,
    video,
    cfg,
    config,
    opencv=True,
    cluster_resizewidth=30,
    cluster_color=False,
    savelabeled=True,
    with_annotations=True,
):
    from deeplabcut.create_project import add

    start = cfg["start"]
    stop = cfg["stop"]
    numframes2extract = cfg["numframes2pick"]
    bodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(cfg, "all")

    videofolder = str(Path(video).parents[0])
    vname = str(Path(video).stem)
    tmpfolder = os.path.join(cfg["project_path"], "labeled-data", vname)
    if os.path.isdir(tmpfolder):
        print("Frames from video", vname, " already extracted (more will be added)!")
    else:
        auxiliaryfunctions.attempttomakefolder(tmpfolder, recursive=True)

    nframes = len(data)
    print("Loading video...")
    if opencv:
        vid = VideoWriter(video)
        fps = vid.fps
        duration = vid.calc_duration()
    else:
        from moviepy.editor import VideoFileClip

        clip = VideoFileClip(video)
        fps = clip.fps
        duration = clip.duration

    if cfg["cropping"]:  # one might want to adjust
        coords = (cfg["x1"], cfg["x2"], cfg["y1"], cfg["y2"])
    else:
        coords = None

    print("Duration of video [s]: ", duration, ", recorded @ ", fps, "fps!")
    print("Overall # of frames: ", nframes, "with (cropped) frame dimensions: ")
    if extractionalgorithm == "uniform":
        if opencv:
            frames2pick = frameselectiontools.UniformFramescv2(
                vid, numframes2extract, start, stop, Index
            )
        else:
            frames2pick = frameselectiontools.UniformFrames(
                clip, numframes2extract, start, stop, Index
            )
    elif extractionalgorithm == "kmeans":
        if opencv:
            frames2pick = frameselectiontools.KmeansbasedFrameselectioncv2(
                vid,
                numframes2extract,
                start,
                stop,
                cfg["cropping"],
                coords,
                Index,
                resizewidth=cluster_resizewidth,
                color=cluster_color,
            )
        else:
            if cfg["cropping"]:
                clip = clip.crop(y1=cfg["y1"], y2=cfg["x2"], x1=cfg["x1"], x2=cfg["x2"])
            frames2pick = frameselectiontools.KmeansbasedFrameselection(
                clip,
                numframes2extract,
                start,
                stop,
                Index,
                resizewidth=cluster_resizewidth,
                color=cluster_color,
            )

    else:
        print(
            "Please implement this method yourself! Currently the options are 'kmeans', 'jump', 'uniform'."
        )
        frames2pick = []

    # Extract frames + frames with plotted labels and store them in folder (with name derived from video name) nder labeled-data
    print("Let's select frames indices:", frames2pick)
    colors = visualization.get_cmap(len(bodyparts), cfg["colormap"])
    strwidth = int(np.ceil(np.log10(nframes)))  # width for strings
    for index in frames2pick:  ##tqdm(range(0,nframes,10)):
        if opencv:
            PlottingSingleFramecv2(
                vid,
                cfg["cropping"],
                coords,
                data,
                bodyparts,
                tmpfolder,
                index,
                cfg["dotsize"],
                cfg["pcutoff"],
                cfg["alphavalue"],
                colors,
                strwidth,
                savelabeled,
            )
        else:
            PlottingSingleFrame(
                clip,
                data,
                bodyparts,
                tmpfolder,
                index,
                cfg["dotsize"],
                cfg["pcutoff"],
                cfg["alphavalue"],
                colors,
                strwidth,
                savelabeled,
            )
        plt.close("all")

    # close videos
    if opencv:
        vid.close()
    else:
        clip.close()
        del clip

    # Extract annotations based on DeepLabCut and store in the folder (with name derived from video name) under labeled-data
    if len(frames2pick) > 0:
        try:
            if cfg["cropping"]:
                add.add_new_videos(
                    config, [video], coords=[coords]
                )  # make sure you pass coords as a list
            else:
                add.add_new_videos(config, [video], coords=None)
        except:  # can we make a catch here? - in fact we should drop indices from DataCombined if they are in CollectedData.. [ideal behavior; currently this is pretty unlikely]
            print(
                "AUTOMATIC ADDING OF VIDEO TO CONFIG FILE FAILED! You need to do this manually for including it in the config.yaml file!"
            )
            print("Videopath:", video, "Coordinates for cropping:", coords)
            pass

        if with_annotations:
            machinefile = os.path.join(
                tmpfolder, "machinelabels-iter" + str(cfg["iteration"]) + ".h5"
            )
            if isinstance(data, pd.DataFrame):
                df = data.loc[frames2pick]
                df.index = pd.MultiIndex.from_tuples([
                    ("labeled-data", vname, "img" + str(index).zfill(strwidth) + ".png")
                    for index in df.index
                ])  # exchange index number by file names.
            elif isinstance(data, dict):
                idx = pd.MultiIndex.from_tuples([
                    ("labeled-data", vname, "img" + str(index).zfill(strwidth) + ".png")
                    for index in frames2pick
                ])
                filename = os.path.join(
                    str(tmpfolder), f"CollectedData_{cfg['scorer']}.h5"
                )
                try:
                    df_temp = pd.read_hdf(filename, "df_with_missing")
                    columns = df_temp.columns
                except FileNotFoundError:
                    columns = pd.MultiIndex.from_product(
                        [
                            [cfg["scorer"]],
                            cfg["individuals"],
                            cfg["multianimalbodyparts"],
                            ["x", "y"],
                        ],
                        names=["scorer", "individuals", "bodyparts", "coords"],
                    )
                    if cfg["uniquebodyparts"]:
                        columns2 = pd.MultiIndex.from_product(
                            [
                                [cfg["scorer"]],
                                ["single"],
                                cfg["uniquebodyparts"],
                                ["x", "y"],
                            ],
                            names=["scorer", "individuals", "bodyparts", "coords"],
                        )
                        df_temp = pd.concat(
                            (
                                pd.DataFrame(columns=columns),
                                pd.DataFrame(columns=columns2),
                            )
                        )
                        columns = df_temp.columns
                array = np.full((len(frames2pick), len(columns)), np.nan)
                for i, index in enumerate(frames2pick):
                    data_temp = data.get(index)
                    if data_temp is not None:
                        vals = np.concatenate(data_temp)[:, :2].flatten()
                        array[i, : len(vals)] = vals
                df = pd.DataFrame(array, index=idx, columns=columns)
            else:
                return
            if Path(machinefile).is_file():
                Data = pd.read_hdf(machinefile, "df_with_missing")
                conversioncode.guarantee_multiindex_rows(Data)
                DataCombined = pd.concat([Data, df])
                # drop duplicate labels:
                DataCombined = DataCombined[
                    ~DataCombined.index.duplicated(keep="first")
                ]

                DataCombined.to_hdf(machinefile, key="df_with_missing", mode="w")
                DataCombined.to_csv(
                    os.path.join(tmpfolder, "machinelabels.csv")
                )  # this is always the most current one (as reading is from h5)
            else:
                df.to_hdf(machinefile, key="df_with_missing", mode="w")
                df.to_csv(os.path.join(tmpfolder, "machinelabels.csv"))

        print(
            "The outlier frames are extracted. They are stored in the subdirectory labeled-data\%s."
            % vname
        )
        print(
            "Once you extracted frames for all videos, use 'refine_labels' to manually correct the labels."
        )
    else:
        print("No frames were extracted.")
    def __init__(self, parent, config, video, shuffle, Dataframe, savelabeled,
                 multianimal):
        super(MainFrame,
              self).__init__("DeepLabCut2.0 - Manual Outlier Frame Extraction",
                             parent)

        ###################################################################################################################################################
        # Spliting the frame into top and bottom panels. Bottom panels contains the widgets. The top panel is for showing images and plotting!
        # topSplitter = wx.SplitterWindow(self)
        #
        # self.image_panel = ImagePanel(topSplitter, config,video,shuffle,Dataframe,self.gui_size)
        # self.widget_panel = WidgetPanel(topSplitter)
        #
        # topSplitter.SplitHorizontally(self.image_panel, self.widget_panel,sashPosition=self.gui_size[1]*0.83)#0.9
        # topSplitter.SetSashGravity(1)
        # sizer = wx.BoxSizer(wx.VERTICAL)
        # sizer.Add(topSplitter, 1, wx.EXPAND)
        # self.SetSizer(sizer)

        # Spliting the frame into top and bottom panels. Bottom panels contains the widgets. The top panel is for showing images and plotting!

        topSplitter = wx.SplitterWindow(self)
        vSplitter = wx.SplitterWindow(topSplitter)

        self.image_panel = ImagePanel(vSplitter, config, self.gui_size)
        self.choice_panel = ScrollPanel(vSplitter)

        vSplitter.SplitVertically(self.image_panel,
                                  self.choice_panel,
                                  sashPosition=self.gui_size[0] * 0.8)
        vSplitter.SetSashGravity(1)
        self.widget_panel = WidgetPanel(topSplitter)
        topSplitter.SplitHorizontally(vSplitter,
                                      self.widget_panel,
                                      sashPosition=self.gui_size[1] *
                                      0.83)  # 0.9
        topSplitter.SetSashGravity(1)
        sizer = wx.BoxSizer(wx.VERTICAL)
        sizer.Add(topSplitter, 1, wx.EXPAND)
        self.SetSizer(sizer)

        ###################################################################################################################################################
        # Add Buttons to the WidgetPanel and bind them to their respective functions.

        widgetsizer = wx.WrapSizer(orient=wx.HORIZONTAL)

        self.load_button_sizer = wx.BoxSizer(wx.VERTICAL)
        self.help_button_sizer = wx.BoxSizer(wx.VERTICAL)

        self.help = wx.Button(self.widget_panel, id=wx.ID_ANY, label="Help")
        self.help_button_sizer.Add(self.help, 1, wx.ALL, 15)
        #        widgetsizer.Add(self.help , 1, wx.ALL, 15)
        self.help.Bind(wx.EVT_BUTTON, self.helpButton)

        widgetsizer.Add(self.help_button_sizer, 1, wx.ALL, 0)

        self.grab = wx.Button(self.widget_panel,
                              id=wx.ID_ANY,
                              label="Grab Frames")
        widgetsizer.Add(self.grab, 1, wx.ALL, 15)
        self.grab.Bind(wx.EVT_BUTTON, self.grabFrame)
        self.grab.Enable(True)

        widgetsizer.AddStretchSpacer(5)
        self.slider = wx.Slider(
            self.widget_panel,
            id=wx.ID_ANY,
            value=0,
            minValue=0,
            maxValue=1,
            size=(200, -1),
            style=wx.SL_HORIZONTAL | wx.SL_AUTOTICKS | wx.SL_LABELS,
        )
        widgetsizer.Add(self.slider, 1, wx.ALL, 5)
        self.slider.Bind(wx.EVT_SLIDER, self.OnSliderScroll)

        widgetsizer.AddStretchSpacer(5)
        self.start_frames_sizer = wx.BoxSizer(wx.VERTICAL)
        self.end_frames_sizer = wx.BoxSizer(wx.VERTICAL)

        self.start_frames_sizer.AddSpacer(15)
        #        self.startFrame = wx.SpinCtrl(self.widget_panel, value='0', size=(100, -1), min=0, max=120)
        self.startFrame = wx.SpinCtrl(self.widget_panel,
                                      value="0",
                                      size=(100, -1))  # ,style=wx.SP_VERTICAL)
        self.startFrame.Enable(False)
        self.start_frames_sizer.Add(self.startFrame, 1,
                                    wx.EXPAND | wx.ALIGN_LEFT, 15)
        start_text = wx.StaticText(self.widget_panel,
                                   label="Start Frame Index")
        self.start_frames_sizer.Add(start_text, 1, wx.EXPAND | wx.ALIGN_LEFT,
                                    15)
        self.checkBox = wx.CheckBox(self.widget_panel,
                                    id=wx.ID_ANY,
                                    label="Range of frames")
        self.checkBox.Bind(wx.EVT_CHECKBOX, self.activate_frame_range)
        self.start_frames_sizer.Add(self.checkBox, 1,
                                    wx.EXPAND | wx.ALIGN_LEFT, 15)
        #
        self.end_frames_sizer.AddSpacer(15)
        self.endFrame = wx.SpinCtrl(self.widget_panel,
                                    value="1",
                                    size=(160, -1))  # , min=1, max=120)
        self.endFrame.Enable(False)
        self.end_frames_sizer.Add(self.endFrame, 1, wx.EXPAND | wx.ALIGN_LEFT,
                                  15)
        end_text = wx.StaticText(self.widget_panel, label="Number of Frames")
        self.end_frames_sizer.Add(end_text, 1, wx.EXPAND | wx.ALIGN_LEFT, 15)
        self.updateFrame = wx.Button(self.widget_panel,
                                     id=wx.ID_ANY,
                                     label="Update")
        self.end_frames_sizer.Add(self.updateFrame, 1,
                                  wx.EXPAND | wx.ALIGN_LEFT, 15)
        self.updateFrame.Bind(wx.EVT_BUTTON, self.updateSlider)
        self.updateFrame.Enable(False)

        widgetsizer.Add(self.start_frames_sizer, 1, wx.ALL, 0)
        widgetsizer.AddStretchSpacer(5)
        widgetsizer.Add(self.end_frames_sizer, 1, wx.ALL, 0)
        widgetsizer.AddStretchSpacer(15)

        self.quit = wx.Button(self.widget_panel, id=wx.ID_ANY, label="Quit")
        widgetsizer.Add(self.quit, 1, wx.ALL, 15)
        self.quit.Bind(wx.EVT_BUTTON, self.quitButton)
        self.quit.Enable(True)

        self.widget_panel.SetSizer(widgetsizer)
        self.widget_panel.SetSizerAndFit(widgetsizer)

        # Variables initialization
        self.numberFrames = 0
        self.currFrame = 0
        self.figure = Figure()
        self.axes = self.figure.add_subplot(111)
        self.drs = []
        self.extract_range_frame = False
        self.firstFrame = 0
        self.Colorscheme = []

        # Read confing file
        self.cfg = auxiliaryfunctions.read_config(config)
        self.Task = self.cfg["Task"]
        self.start = self.cfg["start"]
        self.stop = self.cfg["stop"]
        self.date = self.cfg["date"]
        self.trainFraction = self.cfg["TrainingFraction"]
        self.trainFraction = self.trainFraction[0]
        self.videos = self.cfg["video_sets"].keys()
        self.bodyparts = self.cfg["bodyparts"]
        self.colormap = plt.get_cmap(self.cfg["colormap"])
        self.colormap = self.colormap.reversed()
        self.markerSize = self.cfg["dotsize"]
        self.alpha = self.cfg["alphavalue"]
        self.iterationindex = self.cfg["iteration"]
        self.cropping = self.cfg["cropping"]
        self.video_names = [Path(i).stem for i in self.videos]
        self.config_path = Path(config)
        self.video_source = Path(video).resolve()
        self.shuffle = shuffle
        self.Dataframe = Dataframe
        conversioncode.guarantee_multiindex_rows(self.Dataframe)
        self.savelabeled = savelabeled
        self.multianimal = multianimal
        if self.multianimal:
            from deeplabcut.utils import auxfun_multianimal

            (
                self.individual_names,
                self.uniquebodyparts,
                self.multianimalbodyparts,
            ) = auxfun_multianimal.extractindividualsandbodyparts(self.cfg)
            self.choiceBox, self.visualization_rdb = self.choice_panel.addRadioButtons(
            )
            self.Colorscheme = visualization.get_cmap(
                len(self.individual_names), self.cfg["colormap"])
            self.visualization_rdb.Bind(wx.EVT_RADIOBOX, self.clear_plot)
        # Read the video file
        self.vid = VideoWriter(str(self.video_source))
        if self.cropping:
            self.vid.set_bbox(self.cfg["x1"], self.cfg["x2"], self.cfg["y1"],
                              self.cfg["y2"])
        self.filename = Path(self.video_source).name
        self.numberFrames = len(self.vid)
        self.strwidth = int(np.ceil(np.log10(self.numberFrames)))
        # Set the values of slider and range of frames
        self.startFrame.SetMax(self.numberFrames - 1)
        self.slider.SetMax(self.numberFrames - 1)
        self.endFrame.SetMax(self.numberFrames - 1)
        self.startFrame.Bind(wx.EVT_SPINCTRL, self.updateSlider)  # wx.EVT_SPIN
        # Set the status bar
        self.statusbar.SetStatusText("Working on video: {}".format(
            self.filename))
        # Adding the video file to the config file.
        if self.vid.name not in self.video_names:
            add.add_new_videos(self.config_path, [self.video_source])

        self.update()
        self.plot_labels()
        self.widget_panel.Layout()
    def saveDataSet(self, event):

        MainFrame.saveEachImage(self)

        # Checks if zoom/pan button is ON
        MainFrame.updateZoomPan(self)
        self.statusbar.SetStatusText("File saved")

        self.Dataframe = MainFrame.check_labels(self)
        # Overwrite machine label file
        self.Dataframe.to_hdf(self.dataname, key="df_with_missing", mode="w")

        self.Dataframe.columns.set_levels(
            [self.scorer.replace(self.scorer, self.humanscorer)],
            level=0,
            inplace=True)
        self.Dataframe = self.Dataframe.drop("likelihood", axis=1, level=3)

        if Path(self.dir,
                "CollectedData_" + self.humanscorer + ".h5").is_file():
            print(
                "A training dataset file is already found for this video. The refined machine labels are merged to this data!"
            )
            DataU1 = pd.read_hdf(
                os.path.join(self.dir,
                             "CollectedData_" + self.humanscorer + ".h5"))
            conversioncode.guarantee_multiindex_rows(DataU1)
            # combine datasets Original Col. + corrected machinefiles:
            DataCombined = pd.concat([self.Dataframe, DataU1])
            # Now drop redundant ones keeping the first one [this will make sure that the refined machine file gets preference]
            DataCombined = DataCombined[~DataCombined.index.duplicated(
                keep="first")]
            """
            if len(self.droppedframes)>0: #i.e. frames were dropped/corrupt. also remove them from original file (if they exist!)
                for fn in self.droppedframes:
                    try:
                        DataCombined.drop(fn,inplace=True)
                    except KeyError:
                        pass
            """
            DataCombined.sort_index(inplace=True)
            DataCombined.to_hdf(
                os.path.join(self.dir,
                             "CollectedData_" + self.humanscorer + ".h5"),
                key="df_with_missing",
                mode="w",
            )
            DataCombined.to_csv(
                os.path.join(self.dir,
                             "CollectedData_" + self.humanscorer + ".csv"))
        else:
            self.Dataframe.sort_index(inplace=True)
            self.Dataframe.to_hdf(
                os.path.join(self.dir,
                             "CollectedData_" + self.humanscorer + ".h5"),
                key="df_with_missing",
                mode="w",
            )
            self.Dataframe.to_csv(
                os.path.join(self.dir,
                             "CollectedData_" + self.humanscorer + ".csv"))
            self.next.Enable(False)
            self.prev.Enable(False)
            self.slider.Enable(False)
            self.checkBox.Enable(False)

        nextFilemsg = wx.MessageBox(
            "File saved. Do you want to refine another file?",
            "Repeat?",
            wx.YES_NO | wx.ICON_INFORMATION,
        )
        if nextFilemsg == 2:
            self.file = 1
            self.axes.clear()
            self.figure.delaxes(self.figure.axes[1])
            self.choiceBox.Clear(True)
            MainFrame.updateZoomPan(self)
            self.load.Enable(True)
            MainFrame.browseDir(self, event)
예제 #13
0
def convert_single2multiplelegacyAM(config, userfeedback=True, target=None):
    """ Convert multi animal to single animal code and vice versa. Note that by providing target='single'/'multi' this will be target! """
    cfg = auxiliaryfunctions.read_config(config)
    videos = cfg["video_sets"].keys()
    video_names = [Path(i).stem for i in videos]
    folders = [Path(config).parent / "labeled-data" / Path(i) for i in video_names]

    prefixes, uniquebodyparts, multianimalbodyparts = extractindividualsandbodyparts(
        cfg
    )
    for folder in folders:
        if userfeedback == True:
            print("Do you want to convert the annotation file in folder:", folder, "?")
            askuser = input("yes/no")
        else:
            askuser = "******"

        if (
            askuser == "y" or askuser == "yes" or askuser == "Ja" or askuser == "ha"
        ):  # multilanguage support :)
            fn = os.path.join(str(folder), "CollectedData_" + cfg["scorer"])
            Data = pd.read_hdf(fn + ".h5")
            conversioncode.guarantee_multiindex_rows(Data)
            imindex = Data.index

            if "individuals" in Data.columns.names and (
                target == None or target == "single"
            ):
                print("This is a multianimal data set, converting to single...", folder)
                for prfxindex, prefix in enumerate(prefixes):
                    if prefix == "single":
                        for j, bpt in enumerate(uniquebodyparts):
                            index = pd.MultiIndex.from_product(
                                [[cfg["scorer"]], [bpt], ["x", "y"]],
                                names=["scorer", "bodyparts", "coords"],
                            )
                            frame = pd.DataFrame(
                                Data[cfg["scorer"]][prefix][bpt].values,
                                columns=index,
                                index=imindex,
                            )
                            if j == 0:
                                dataFrame = frame
                            else:
                                dataFrame = pd.concat([dataFrame, frame], axis=1)
                    else:
                        for j, bpt in enumerate(multianimalbodyparts):
                            index = pd.MultiIndex.from_product(
                                [[cfg["scorer"]], [prefix + bpt], ["x", "y"]],
                                names=["scorer", "bodyparts", "coords"],
                            )
                            frame = pd.DataFrame(
                                Data[cfg["scorer"]][prefix][bpt].values,
                                columns=index,
                                index=imindex,
                            )
                            if j == 0:
                                dataFrame = frame
                            else:
                                dataFrame = pd.concat([dataFrame, frame], axis=1)
                    if prfxindex == 0:
                        DataFrame = dataFrame
                    else:
                        DataFrame = pd.concat([DataFrame, dataFrame], axis=1)

                Data.to_hdf(
                    fn + "multianimal.h5", "df_with_missing",
                )
                Data.to_csv(fn + "multianimal.csv")

                DataFrame.to_hdf(
                    fn + ".h5", "df_with_missing",
                )
                DataFrame.to_csv(fn + ".csv")
            elif target == None or target == "multi":
                print(
                    "This is a single animal data set, converting to multi...", folder
                )
                for prfxindex, prefix in enumerate(prefixes):
                    if prefix == "single":
                        if cfg["uniquebodyparts"] != [None]:
                            for j, bpt in enumerate(uniquebodyparts):
                                index = pd.MultiIndex.from_arrays(
                                    np.array(
                                        [
                                            2 * [cfg["scorer"]],
                                            2 * [prefix],
                                            2 * [bpt],
                                            ["x", "y"],
                                        ]
                                    ),
                                    names=[
                                        "scorer",
                                        "individuals",
                                        "bodyparts",
                                        "coords",
                                    ],
                                )
                                if bpt in Data[cfg["scorer"]].keys():
                                    frame = pd.DataFrame(
                                        Data[cfg["scorer"]][bpt].values,
                                        columns=index,
                                        index=imindex,
                                    )
                                else:  # fill with nans...
                                    frame = pd.DataFrame(
                                        np.ones((len(imindex), 2)) * np.nan,
                                        columns=index,
                                        index=imindex,
                                    )

                                if j == 0:
                                    dataFrame = frame
                                else:
                                    dataFrame = pd.concat([dataFrame, frame], axis=1)
                        else:
                            dataFrame = None
                    else:
                        for j, bpt in enumerate(multianimalbodyparts):
                            index = pd.MultiIndex.from_arrays(
                                np.array(
                                    [
                                        2 * [cfg["scorer"]],
                                        2 * [prefix],
                                        2 * [bpt],
                                        ["x", "y"],
                                    ]
                                ),
                                names=["scorer", "individuals", "bodyparts", "coords"],
                            )
                            if prefix + "_" + bpt in Data[cfg["scorer"]].keys():
                                frame = pd.DataFrame(
                                    Data[cfg["scorer"]][prefix + "_" + bpt].values,
                                    columns=index,
                                    index=imindex,
                                )
                            else:
                                frame = pd.DataFrame(
                                    np.ones((len(imindex), 2)) * np.nan,
                                    columns=index,
                                    index=imindex,
                                )

                            if j == 0:
                                dataFrame = frame
                            else:
                                dataFrame = pd.concat([dataFrame, frame], axis=1)
                    if prfxindex == 0:
                        DataFrame = dataFrame
                    else:
                        DataFrame = pd.concat([DataFrame, dataFrame], axis=1)

                Data.to_hdf(
                    fn + "singleanimal.h5", "df_with_missing",
                )
                Data.to_csv(fn + "singleanimal.csv")

                DataFrame.to_hdf(
                    fn + ".h5", "df_with_missing",
                )
                DataFrame.to_csv(fn + ".csv")
예제 #14
0
def convert2_maDLC(config, userfeedback=True, forceindividual=None):
    """
    Converts single animal annotation file into a multianimal annotation file,
    by introducing an individuals column with either the first individual
    in individuals list in config.yaml or whatever is passed via "forceindividual".

    ----------
    config : string
        Full path of the config.yaml file as a string.

    userfeedback: bool, optional
            If this is set to false during automatic mode then frames for all videos are extracted. The user can set this to true, which will result in a dialog,
            where the user is asked for each video if (additional/any) frames from this video should be extracted. Use this, e.g. if you have already labeled
            some folders and want to extract data for new videos.

    forceindividual: None default
            If a string is given that is used in the individuals column.

    Examples
    --------
    Converts mulianimalbodyparts under the 'first individual' in individuals list in config.yaml
    and uniquebodyparts under 'single'
    >>> deeplabcut.convert2_maDLC('/socialrearing-task/config.yaml')

    --------
    Converts mulianimalbodyparts under the individual label mus17 and uniquebodyparts under 'single'
    >>> deeplabcut.convert2_maDLC('/socialrearing-task/config.yaml', forceindividual='mus17')
    """

    cfg = auxiliaryfunctions.read_config(config)
    videos = cfg["video_sets"].keys()
    video_names = [trainingsetmanipulation._robust_path_split(i)[1] for i in videos]
    folders = [Path(config).parent / "labeled-data" / Path(i) for i in video_names]

    individuals, uniquebodyparts, multianimalbodyparts = extractindividualsandbodyparts(
        cfg
    )

    if forceindividual is None:
        if len(individuals) == 0:
            print("At least one individual should exist...")
            folders = []
            forceindividual = ""
        else:
            forceindividual = individuals[0]  # note that single is added at then end!

        if forceindividual == "single":  # no specific individual ()
            if len(multianimalbodyparts) > 0:  # there should be an individual name...
                print(
                    "At least one individual should exist beyond 'single', as there are multianimalbodyparts..."
                )
                folders = []

    for folder in folders:
        if userfeedback == True:
            print("Do you want to convert the annotation file in folder:", folder, "?")
            askuser = input("yes/no")
        else:
            askuser = "******"

        if (
            askuser == "y" or askuser == "yes" or askuser == "Ja" or askuser == "ha"
        ):  # multilanguage support :)

            fn = os.path.join(str(folder), "CollectedData_" + cfg["scorer"])
            Data = pd.read_hdf(fn + ".h5")
            conversioncode.guarantee_multiindex_rows(Data)
            imindex = Data.index

            print("This is a single animal data set, converting to multi...", folder)

            # -> adding (single,bpt) for uniquebodyparts
            for j, bpt in enumerate(uniquebodyparts):
                index = pd.MultiIndex.from_arrays(
                    np.array(
                        [2 * [cfg["scorer"]], 2 * ["single"], 2 * [bpt], ["x", "y"]]
                    ),
                    names=["scorer", "individuals", "bodyparts", "coords"],
                )

                if bpt in Data[cfg["scorer"]].keys():
                    frame = pd.DataFrame(
                        Data[cfg["scorer"]][bpt].values, columns=index, index=imindex
                    )
                else:
                    frame = pd.DataFrame(
                        np.ones((len(imindex), 2)) * np.nan,
                        columns=index,
                        index=imindex,
                    )

                if j == 0:
                    dataFrame = frame
                else:
                    dataFrame = pd.concat([dataFrame, frame], axis=1)

            if len(uniquebodyparts) == 0:
                dataFrame = None

            # -> adding (individual,bpt) for multianimalbodyparts
            for j, bpt in enumerate(multianimalbodyparts):
                index = pd.MultiIndex.from_arrays(
                    np.array(
                        [
                            2 * [cfg["scorer"]],
                            2 * [str(forceindividual)],
                            2 * [bpt],
                            ["x", "y"],
                        ]
                    ),
                    names=["scorer", "individuals", "bodyparts", "coords"],
                )

                if bpt in Data[cfg["scorer"]].keys():
                    frame = pd.DataFrame(
                        Data[cfg["scorer"]][bpt].values, columns=index, index=imindex
                    )
                else:
                    frame = pd.DataFrame(
                        np.ones((len(imindex), 2)) * np.nan,
                        columns=index,
                        index=imindex,
                    )

                if j == 0 and dataFrame is None:
                    dataFrame = frame
                else:
                    dataFrame = pd.concat([dataFrame, frame], axis=1)

            Data.to_hdf(
                fn + "singleanimal.h5", "df_with_missing",
            )
            Data.to_csv(fn + "singleanimal.csv")

            dataFrame.to_hdf(fn + ".h5", "df_with_missing")
            dataFrame.to_csv(fn + ".csv")
예제 #15
0
def mergeandsplit(config, trainindex=0, uniform=True):
    """
    This function allows additional control over "create_training_dataset".

    Merge annotated data sets (from different folders) and split data in a specific way, returns the split variables (train/test indices).
    Importantly, this allows one to freeze a split.

    One can also either create a uniform split (uniform = True; thereby indexing TrainingFraction in config file) or leave-one-folder out split
    by passing the index of the corresponding video from the config.yaml file as variable trainindex.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    trainindex: int, optional
        Either (in case uniform = True) indexes which element of TrainingFraction in the config file should be used (note it is a list!).
        Alternatively (uniform = False) indexes which folder is dropped, i.e. the first if trainindex=0, the second if trainindex =1, etc.

    uniform: bool, optional
        Perform uniform split (disregarding folder structure in labeled data), or (if False) leave one folder out.

    Examples
    --------
    To create a leave-one-folder-out model:
    >>> trainIndices, testIndices=deeplabcut.mergeandsplit(config,trainindex=0,uniform=False)
    returns the indices for the first video folder (as defined in config file) as testIndices and all others as trainIndices.
    You can then create the training set by calling (e.g. defining it as Shuffle 3):
    >>> deeplabcut.create_training_dataset(config,Shuffles=[3],trainIndices=trainIndices,testIndices=testIndices)

    To freeze a (uniform) split (i.e. iid sampled from all the data):
    >>> trainIndices, testIndices=deeplabcut.mergeandsplit(config,trainindex=0,uniform=True)

    You can then create two model instances that have the identical trainingset. Thereby you can assess the role of various parameters on the performance of DLC.
    >>> deeplabcut.create_training_dataset(config,Shuffles=[0,1],trainIndices=[trainIndices, trainIndices],testIndices=[testIndices, testIndices])
    --------

    """
    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg["scorer"]
    project_path = cfg["project_path"]
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(
        cfg)  # Path concatenation OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(
        os.path.join(project_path, str(trainingsetfolder))),
                                           recursive=True)
    fn = os.path.join(project_path, trainingsetfolder,
                      "CollectedData_" + cfg["scorer"])

    try:
        Data = pd.read_hdf(fn + ".h5")
    except FileNotFoundError:
        Data = merge_annotateddatasets(
            cfg,
            Path(os.path.join(project_path, trainingsetfolder)),
        )
        if Data is None:
            return [], []

    conversioncode.guarantee_multiindex_rows(Data)
    Data = Data[scorer]  # extract labeled data

    if uniform == True:
        TrainingFraction = cfg["TrainingFraction"]
        trainFraction = TrainingFraction[trainindex]
        trainIndices, testIndices = SplitTrials(
            range(len(Data.index)),
            trainFraction,
            True,
        )
    else:  # leave one folder out split
        videos = cfg["video_sets"].keys()
        test_video_name = [Path(i).stem for i in videos][trainindex]
        print("Excluding the following folder (from training):",
              test_video_name)
        trainIndices, testIndices = [], []
        for index, name in enumerate(Data.index):
            if test_video_name == name[1]:  # this is the video name
                # print(name,test_video_name)
                testIndices.append(index)
            else:
                trainIndices.append(index)

    return trainIndices, testIndices
예제 #16
0
def check_labels(
    config,
    Labels=["+", ".", "x"],
    scale=1,
    dpi=100,
    draw_skeleton=True,
    visualizeindividuals=True,
):
    """
    Double check if the labels were at correct locations and stored in a proper file format.\n
    This creates a new subdirectory for each video under the 'labeled-data' and all the frames are plotted with the labels.\n
    Make sure that these labels are fine.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    Labels: List of at least 3 matplotlib markers. The first one will be used to indicate the human ground truth location (Default: +)

    scale : float, default =1
        Change the relative size of the output images.

    dpi : int, optional
        Output resolution. 100 dpi by default.

    draw_skeleton: bool, default True.
        Plot skeleton overlaid over body parts.

    visualizeindividuals: bool, default True:
        For a multianimal project the different individuals have different colors (and all bodyparts the same).
        If False, the colors change over bodyparts rather than individuals.

    Example
    --------
    for labeling the frames
    >>> deeplabcut.check_labels('/analysis/project/reaching-task/config.yaml')
    --------
    """

    from deeplabcut.utils import visualization

    cfg = auxiliaryfunctions.read_config(config)
    videos = cfg["video_sets"].keys()
    video_names = [_robust_path_split(video)[1] for video in videos]

    folders = [
        os.path.join(cfg["project_path"], "labeled-data", str(Path(i)))
        for i in video_names
    ]
    print("Creating images with labels by %s." % cfg["scorer"])
    for folder in folders:
        try:
            DataCombined = pd.read_hdf(
                os.path.join(str(folder),
                             "CollectedData_" + cfg["scorer"] + ".h5"))
            conversioncode.guarantee_multiindex_rows(DataCombined)
            if cfg.get("multianimalproject", False):
                color_by = "individual" if visualizeindividuals else "bodypart"
            else:  # for single animal projects
                color_by = "bodypart"

            visualization.make_labeled_images_from_dataframe(
                DataCombined,
                cfg,
                folder,
                scale,
                dpi=dpi,
                keypoint=Labels[0],
                draw_skeleton=draw_skeleton,
                color_by=color_by,
            )
        except FileNotFoundError:
            print("Attention:", folder,
                  "does not appear to have labeled data!")

    print(
        "If all the labels are ok, then use the function 'create_training_dataset' to create the training dataset!"
    )