Beispiel #1
0
        # Get additional information for newLabels from main unlabeled index
        # TODO: Don't do this again when merging auto and manual annotated indexes
        originalUnlabeledIndex = pd.read_csv(originalUnlabeledIndexPath)
        originalUnlabeledIndex = dutils.remove_duplicates(originalUnlabeledIndex, "FrameHash")

        newLabels = dutils.fill_index_information(originalUnlabeledIndex, newLabels,
                                                 "FrameHash", [ 'rede1', 'rede2', 'rede3'])
        oldLabels = dutils.fill_index_information(originalUnlabeledIndex, oldLabels,
                                                 "FrameHash", [ 'rede1', 'rede2', 'rede3'])

        mergedIndex = pd.concat([newLabels, oldLabels], axis=0, sort=False)
        mergedIndex.to_csv(manualIndexPath, index=False)

    ## Split train and val sets
    print("\nSTEP: Split train and val sets.")
    splitPercentages = [0.8, 0.2]
    
    dutils.copy_dataset_to_folder(manualIndexPath, sampledImageFolder, path_column="FramePath")
    
    imageIndex = dutils.move_to_class_folders(manualIndexPath, sampledImageFolder,
                                        target_net=commons.net_target_column[rede], target_class=target_class)
    input("\nDelete unwanted class folders and press Enter to continue.")

    # Split dataset in train and validation sets, sorting them in val and train folders
    splitIndex = dutils.data_folder_split(sampledImageFolder,
                                        splitPercentages, index=imageIndex.copy(), seed=seed)
    splitIndex.to_csv(splitIndexPath, index=False)

    # Save sample seed
    dutils.save_seed_log(seedLogPath, seed, "split")
trainIndex.dropna(axis=0, subset=["HashMD5"], inplace=True)
valIndex.dropna(axis=0, subset=["HashMD5"], inplace=True)

print("\nNaNs:")
print("Train: {}\nVal:\t{}".format(np.sum(trainIndex['rede3'].isna()),
                                   np.sum(valIndex['rede3'].isna())))

dutils.df_to_csv(trainIndex, trainPath)
dutils.df_to_csv(valIndex, valPath)

input("\nMoving datasets to train folder.\nPress enter to continue.\n")

# Move dataset to training folder, split in train/val folders
dutils.copy_dataset_to_folder(trainPath,
                              semiautoDatasetPath / "train",
                              path_column="FramePath")
dutils.move_to_class_folders(trainPath,
                             semiautoDatasetPath / "train",
                             target_net="rede3",
                             target_class=None,
                             move=True)

dutils.copy_dataset_to_folder(valPath,
                              semiautoDatasetPath / "val",
                              path_column="FramePath")
dutils.move_to_class_folders(valPath,
                             semiautoDatasetPath / "val",
                             target_net="rede3",
                             target_class=None,
                             move=True)
import libs.commons as commons
import libs.utils as utils
import libs.dataset_utils as dutils

iterPath = Path(dirs.iter_folder) / "full_dataset/iteration_2/"
indexPath = iterPath / "manual_annotated_images_iteration_2.csv"
imageFolderPath = iterPath / "sampled_images"
datasetFolder = Path(dirs.dataset) / "all_datasets_1s"
savePath = indexPath.parent / (indexPath.stem + "_train_val_split.csv")
seed = 42

splitPercentages = [0.8, 0.2]

# Move images from dataset folder to sampled images
dutils.copy_dataset_to_folder(indexPath,
                              datasetFolder,
                              path_column="FramePath")

# Sort images in sampled_images folder to separate class folders
imageIndex = dutils.move_to_class_folders(indexPath,
                                          imageFolderPath,
                                          target_net="rede1")
# input("\nDelete unwanted class folders and press Enter to continue.")

# Split dataset in train and validation sets, sorting them in val and train folders
otherIndex = dutils.data_folder_split(imageFolderPath,
                                      splitPercentages,
                                      index=imageIndex.copy(),
                                      seed=seed)
print(otherIndex.head())
otherIndex.to_csv(savePath, index=False)