예제 #1
0
def merge_annotateddatasets(cfg, trainingsetfolder_full):
    """
    Merges all the h5 files for all labeled-datasets (from individual videos).

    This is a bit of a mess because of cross platform compatibility.

    Within platform comp. is straightforward. But if someone labels on windows and wants to train on a unix cluster or colab...
    """
    AnnotationData = []
    data_path = Path(os.path.join(cfg["project_path"], "labeled-data"))
    videos = cfg["video_sets"].keys()
    for video in videos:
        _, filename, _ = _robust_path_split(video)
        file_path = os.path.join(data_path / filename,
                                 f'CollectedData_{cfg["scorer"]}.h5')
        try:
            data = pd.read_hdf(file_path)
            conversioncode.guarantee_multiindex_rows(data)
            AnnotationData.append(data)
        except FileNotFoundError:
            print(file_path, " not found (perhaps not annotated).")

    if not len(AnnotationData):
        print(
            "Annotation data was not found by splitting video paths (from config['video_sets']). An alternative route is taken..."
        )
        AnnotationData = conversioncode.merge_windowsannotationdataONlinuxsystem(
            cfg)
        if not len(AnnotationData):
            print("No data was found!")
            return

    AnnotationData = pd.concat(AnnotationData).sort_index()
    # When concatenating DataFrames with misaligned column labels,
    # all sorts of reordering may happen (mainly depending on 'sort' and 'join')
    # Ensure the 'bodyparts' level agrees with the order in the config file.
    if cfg.get("multianimalproject", False):
        (
            _,
            uniquebodyparts,
            multianimalbodyparts,
        ) = auxfun_multianimal.extractindividualsandbodyparts(cfg)
        bodyparts = multianimalbodyparts + uniquebodyparts
    else:
        bodyparts = cfg["bodyparts"]
    AnnotationData = AnnotationData.reindex(
        bodyparts,
        axis=1,
        level=AnnotationData.columns.names.index("bodyparts"))
    filename = os.path.join(trainingsetfolder_full,
                            f'CollectedData_{cfg["scorer"]}')
    AnnotationData.to_hdf(filename + ".h5", key="df_with_missing", mode="w")
    AnnotationData.to_csv(filename + ".csv")  # human readable.
    return AnnotationData
def merge_annotateddatasets(cfg,project_path,trainingsetfolder_full,windows2linux, video_set=None):
    """
    Merges all the h5 files for all labeled-datasets (from individual videos).
    This is a bit of a mess because of cross platform compatablity. 
    
    Within platform comp. is straightforward. But if someone labels on windows and wants to train on a unix cluster or colab...
    """
    AnnotationData=None
    data_path = Path(os.path.join(project_path , 'labeled-data'))
    if video_set is None:
        videos = cfg['video_sets'].keys()
    else:
        videos = video_set
    video_names = [Path(i).stem for i in videos]
    for i in video_names:
        try:
            data = pd.read_hdf((str(data_path / Path(i))+'/CollectedData_'+cfg['scorer']+'.h5'),'df_with_missing')
            if AnnotationData is None:
                AnnotationData=data
            else:
                AnnotationData=pd.concat([AnnotationData, data])

        except FileNotFoundError:
            print((str(data_path / Path(i))+'/CollectedData_'+cfg['scorer']+'.h5'), " not found (perhaps not annotated)")

    if AnnotationData is None:
        print("Annotation data was not found by splitting video paths (from config['video_sets']). An alternative route is taken...")
        AnnotationData=conversioncode.merge_windowsannotationdataONlinuxsystem(cfg)
    if AnnotationData is None:
        print("No data was found!")
        windowspath=False
    else:
        windowspath=len((AnnotationData.index[0]).split('\\'))>1 #true if the first element is in windows path format
    
    # Let's check if the code is *not* run on windows (Source: #https://stackoverflow.com/questions/1325581/how-do-i-check-if-im-running-on-windows-in-python)
    # but the paths are in windows format...
    if os.name != 'nt' and windowspath and not windows2linux: 
        print("It appears that the images were labeled on a Windows system, but you are currently trying to create a training set on a Unix system. \n In this case the paths should be converted. Do you want to proceed with the conversion?")
        askuser = input("yes/no")
    else:
        askuser='******'
        
    filename=str(str(trainingsetfolder_full)+'/'+'/CollectedData_'+cfg['scorer'])
    if windows2linux or askuser=='yes' or askuser=='y' or askuser=='Ja': #convert windows path in pandas array \\ to unix / !
        AnnotationData=conversioncode.convertpaths_to_unixstyle(AnnotationData,filename,cfg)
        print("Annotation data converted to unix format...")
    else: #store as is
        AnnotationData.to_hdf(filename+'.h5', key='df_with_missing', mode='w')
        AnnotationData.to_csv(filename+'.csv') #human readable.
        
    return AnnotationData 
예제 #3
0
def merge_annotateddatasets(cfg, trainingsetfolder_full, windows2linux):
    """
    Merges all the h5 files for all labeled-datasets (from individual videos).

    This is a bit of a mess because of cross platform compatibility.

    Within platform comp. is straightforward. But if someone labels on windows and wants to train on a unix cluster or colab...
    """
    AnnotationData = []
    data_path = Path(os.path.join(cfg["project_path"], "labeled-data"))
    videos = cfg["video_sets"].keys()
    for video in videos:
        _, filename, _ = _robust_path_split(video)
        file_path = os.path.join(data_path / filename,
                                 f'CollectedData_{cfg["scorer"]}.h5')
        try:
            data = pd.read_hdf(file_path, "df_with_missing")
            AnnotationData.append(data)
        except FileNotFoundError:
            print(
                file_path,
                " not found (perhaps not annotated). If training on cropped data, "
                "make sure to call `cropimagesandlabels` prior to creating the dataset.",
            )

    if not len(AnnotationData):
        print(
            "Annotation data was not found by splitting video paths (from config['video_sets']). An alternative route is taken..."
        )
        AnnotationData = conversioncode.merge_windowsannotationdataONlinuxsystem(
            cfg)
        if not len(AnnotationData):
            print("No data was found!")
            return

    AnnotationData = pd.concat(AnnotationData).sort_index()
    # When concatenating DataFrames with misaligned column labels,
    # all sorts of reordering may happen (mainly depending on 'sort' and 'join')
    # Ensure the 'bodyparts' level agrees with the order in the config file.
    if cfg.get("multianimalproject", False):
        (
            _,
            uniquebodyparts,
            multianimalbodyparts,
        ) = auxfun_multianimal.extractindividualsandbodyparts(cfg)
        bodyparts = multianimalbodyparts + uniquebodyparts
    else:
        bodyparts = cfg["bodyparts"]
    AnnotationData = AnnotationData.reindex(
        bodyparts,
        axis=1,
        level=AnnotationData.columns.names.index("bodyparts"))

    # Let's check if the code is *not* run on windows (Source: #https://stackoverflow.com/questions/1325581/how-do-i-check-if-im-running-on-windows-in-python)
    # but the paths are in windows format...
    windowspath = "\\" in AnnotationData.index[0]
    if os.name != "nt" and windowspath and not windows2linux:
        print(
            "It appears that the images were labeled on a Windows system, but you are currently trying to create a training set on a Unix system. \n In this case the paths should be converted. Do you want to proceed with the conversion?"
        )
        askuser = input("yes/no")
    else:
        askuser = "******"

    filename = os.path.join(trainingsetfolder_full,
                            f'CollectedData_{cfg["scorer"]}')
    if (windows2linux or askuser == "yes" or askuser == "y" or askuser
            == "Ja"):  # convert windows path in pandas array \\ to unix / !
        AnnotationData = conversioncode.convertpaths_to_unixstyle(
            AnnotationData, filename)
        print("Annotation data converted to unix format...")
    else:  # store as is
        AnnotationData.to_hdf(filename + ".h5",
                              key="df_with_missing",
                              mode="w")
        AnnotationData.to_csv(filename + ".csv")  # human readable.

    return AnnotationData
예제 #4
0
def merge_annotateddatasets(cfg, project_path, trainingsetfolder_full,
                            windows2linux):
    """
    Merges all the h5 files for all labeled-datasets (from individual videos).
    This is a bit of a mess because of cross platform compatablity.

    Within platform comp. is straightforward. But if someone labels on windows and wants to train on a unix cluster or colab...
    """
    AnnotationData = []
    data_path = Path(os.path.join(project_path, 'labeled-data'))
    videos = cfg['video_sets'].keys()
    video_names = [Path(i).stem for i in videos]
    for i in video_names:
        filename = os.path.join(data_path / i,
                                f'CollectedData_{cfg["scorer"]}.h5')
        try:
            data = pd.read_hdf(filename, 'df_with_missing')
            AnnotationData.append(data)
        except FileNotFoundError:
            print(filename, " not found (perhaps not annotated)")

    if not len(AnnotationData):
        print(
            "Annotation data was not found by splitting video paths (from config['video_sets']). An alternative route is taken..."
        )
        AnnotationData = conversioncode.merge_windowsannotationdataONlinuxsystem(
            cfg)
        if not len(AnnotationData):
            print("No data was found!")
            return

    AnnotationData = pd.concat(AnnotationData).sort_index()
    # When concatenating DataFrames with misaligned column labels,
    # all sorts of reordering may happen (mainly depending on 'sort' and 'join')
    # Ensure the 'bodyparts' level agrees with the order in the config file.
    bodyparts = cfg['bodyparts']
    AnnotationData = AnnotationData.reindex(
        bodyparts,
        axis=1,
        level=AnnotationData.columns.names.index('bodyparts'))

    # Let's check if the code is *not* run on windows (Source: #https://stackoverflow.com/questions/1325581/how-do-i-check-if-im-running-on-windows-in-python)
    # but the paths are in windows format...
    windowspath = '\\' in AnnotationData.index[0]
    if os.name != 'nt' and windowspath and not windows2linux:
        print(
            "It appears that the images were labeled on a Windows system, but you are currently trying "
            "to create a training set on a Unix system. \n "
            "In this case the paths should be converted. Do you want to proceed with the conversion?"
        )
        askuser = input("yes/no")
    else:
        askuser = '******'

    filename = str(
        str(trainingsetfolder_full) + '/' + '/CollectedData_' + cfg['scorer'])
    if windows2linux or askuser == 'yes' or askuser == 'y' or askuser == 'Ja':  #convert windows path in pandas array \\ to unix / !
        AnnotationData = conversioncode.convertpaths_to_unixstyle(
            AnnotationData, filename, cfg)
        print("Annotation data converted to unix format...")
    else:  #store as is
        AnnotationData.to_hdf(filename + '.h5',
                              key='df_with_missing',
                              mode='w')
        AnnotationData.to_csv(filename + '.csv')  #human readable.

    return AnnotationData