Ejemplo n.º 1
0
    mergedIndex.to_csv(mergedIndexPath, index=False)

    ## Create unlabeled set for next iteration
    # TODO: Encapsulate this section in function
    print("\nCreate new unlabeled set.")
    mergedPathList = [get_iter_folder(x) / \
        "final_annotated_images_iteration_{}.csv".format(x) for x in range(1, iteration+1)]
    mergedIndexList = [pd.read_csv(x) for x in mergedPathList]
    originalUnlabeledIndex  = pd.read_csv(originalUnlabeledIndexPath)

    # print("Shape final_annotations_iter_{}: {}".format(iteration, mergedIndex.shape))
    # print("Shape final_annotations_iter_{}: {}".format(iteration-1, previousMergedIndex.shape))

    allAnnotations = pd.concat(mergedIndexList, axis=0, sort=False)

    allAnnotations = dutils.remove_duplicates(allAnnotations, "FrameHash")
    print("Duplicated elements in final_annotated_images.")
    print(allAnnotations.index.duplicated().sum())

    newIndex = dutils.index_complement(originalUnlabeledIndex, allAnnotations, "FrameHash")

    dirs.create_folder(newUnlabeledIndexPath.parent)
    newIndex.to_csv(newUnlabeledIndexPath, index=False)

    # TODO: Include train info in the report
    dutils.make_report(reportPath, sampledIndexPath, manualIndexPath, autoLabelIndexPath,
                       unlabeledIndexPath, None, rede=rede, target_class=target_class)

    # Save sample seed
    dutils.save_seed_log(seedLogPath, seed, "inference")
    plot_model_history(
        [trainLoss, valLoss],
        data_labels=["Train Loss", "Val Loss"],
        xlabel="Epochs",
        ylabel="Loss",
        title="Training loss history",
        save_path=lossPath,  #min_line=[False, True],
        show=False)

    plot_model_history([trainAcc, valAcc],
                       data_labels=["Train Acc", "Val Acc"],
                       xlabel="Epochs",
                       ylabel="Acc",
                       title="Training accuracy history",
                       save_path=accPath,
                       show=False)

    plot_model_history([trainF1, valF1],
                       data_labels=["Train F1", "Val F1"],
                       xlabel="Epochs",
                       ylabel="F1",
                       title="Training F1 history",
                       save_path=f1Path,
                       show=False)

    print("\nSaved results to folder ", historyFolder)

    # Save sample seed
    dutils.save_seed_log(seedLogPath, seed, "train")
Ejemplo n.º 3
0
        # Get additional information for newLabels from main unlabeled index
        # TODO: Don't do this again when merging auto and manual annotated indexes
        originalUnlabeledIndex = pd.read_csv(originalUnlabeledIndexPath)
        originalUnlabeledIndex = dutils.remove_duplicates(originalUnlabeledIndex, "FrameHash")

        newLabels = dutils.fill_index_information(originalUnlabeledIndex, newLabels,
                                                 "FrameHash", [ 'rede1', 'rede2', 'rede3'])
        oldLabels = dutils.fill_index_information(originalUnlabeledIndex, oldLabels,
                                                 "FrameHash", [ 'rede1', 'rede2', 'rede3'])

        mergedIndex = pd.concat([newLabels, oldLabels], axis=0, sort=False)
        mergedIndex.to_csv(manualIndexPath, index=False)

    ## Split train and val sets
    print("\nSTEP: Split train and val sets.")
    splitPercentages = [0.8, 0.2]
    
    dutils.copy_dataset_to_folder(manualIndexPath, sampledImageFolder, path_column="FramePath")
    
    imageIndex = dutils.move_to_class_folders(manualIndexPath, sampledImageFolder,
                                        target_net=commons.net_target_column[rede], target_class=target_class)
    input("\nDelete unwanted class folders and press Enter to continue.")

    # Split dataset in train and validation sets, sorting them in val and train folders
    splitIndex = dutils.data_folder_split(sampledImageFolder,
                                        splitPercentages, index=imageIndex.copy(), seed=seed)
    splitIndex.to_csv(splitIndexPath, index=False)

    # Save sample seed
    dutils.save_seed_log(seedLogPath, seed, "split")
Ejemplo n.º 4
0
    def get_iter_folder(iteration):
        return Path(dirs.iter_folder) / "{}/iteration_{}/".format(
            datasetName, iteration)

    previousIterFolder = get_iter_folder(iteration - 1)
    iterFolder = get_iter_folder(iteration)
    unlabeledIndexPath = previousIterFolder / "unlabeled_images_iteration_{}.csv".format(
        iteration - 1)
    sampledImageFolder = iterFolder / "sampled_images"
    seedLogPath = iterFolder / "seeds.txt"

    dirs.create_folder(iterFolder)
    dirs.create_folder(sampledImageFolder)

    ## Next Iteration
    print("\nSTEP: Sample images for manual annotation.")

    # Sample images for manual annotation
    sampler = SampleImages(unlabeledIndexPath, iterFolder, seed=seed)
    sampler.sample(percentage=0.01, sample_min=100)
    print(sampler.imageSourcePaths.shape)

    # Sampled images index will be created during the manual annotation
    print(
        "Image sampling finished.\nYou may now annotate sampled_images folder with the\
         labeling interface and run next step.")

    # Save sample seed
    dutils.save_seed_log(seedLogPath, seed, "sample")