def write_index(self, dest_path='auto', make_backup=True, prompt=True):
        '''
            Create a backup of old index and write current index DataFrame to a csv file.
            auto_path == True appends date and time to index path
        '''
        if prompt:
            print(
                "\n\nReally write index to file?\nPress any key to continue, Ctrl-C to cancel.\n"
            )
            input()

        if make_backup:
            self.make_backup()

        if dest_path == 'auto':
            newName = str(self.path.stem) + "_" + utils.get_time_string(
                self.date)
            self.indexPath = self.path.with_name(newName +
                                                 str(self.path.suffix))
        else:
            self.indexPath = Path(dest_path)

        # Create destination folder
        dirs.create_folder(self.indexPath.parent)

        self.index.to_csv(self.indexPath, index=False)
        self.report_changes()
Exemple #2
0
    def new_iteration(self):
        '''
            create new iteration folder v
            sample new images           v
            update iter_info            v
        label images
        merge new labels (manual) to annotated dataset
        train model
        set boundaries
        automatic annotation
        merge new labels (automatic) to annotated dataset
            update iter_info, iteration complete

            Executes the following operations:
                Check if it is the first iteration;
                Load base index, create folders and iter_info;
                Sample images
        '''
        if self.iterInfo.completed_iter == False and self.iterInfo.iteration != 0:
            raise ValueError(
                "Current iteration has not finished. Resolve it and try again."
            )

        self.iterInfo.iteration += 1
        self.iterInfo.completed_iter = False
        print("Starting iteration {}.".format(self.iterInfo.iteration))

        self.iterInfo.currentIterFolder = self.loopFolder / "iteration_{}".format(
            self.iterInfo.iteration)

        dirs.create_folder(self.iterInfo.currentIterFolder)
        print(
            "Iteration setup finished.\nCall sample_images method for next step: sample and label images."
        )
    def setup_sample_from_folder(self):
        self.sourceFolder = Path(dirs.test_assets) / "dataset_test"
        self.sampleImagesFolder = Path(dirs.test) / "test_sample_images"
        self.destFolderSFF = self.sampleImagesFolder / "test_sample_from_folder"

        # Guarantee that the destination folder was created for this test only
        if self.destFolderSFF.is_dir():
            self.teardown_sample_from_folder()
        dirs.create_folder(self.destFolderSFF)
Exemple #4
0
def plot_outputs_histogram(normalized_outputs,
                           labels=None,
                           lower_thresh=None,
                           upper_thresh=None,
                           title="Outputs Histogram",
                           show=True,
                           log=False,
                           save_path=None,
                           save_formats=[".png", ".pdf"]):
    fig = plt.figure(figsize=commons.MPL_FIG_SIZE_SMALL)
    # plt.subplots_adjust(left=0.09, bottom=0.09, right=0.95, top=0.80,
    #                     wspace=None, hspace=None)

    if labels is not None:
        posOutputs = normalized_outputs[labels == 0]
        negOutputs = normalized_outputs[labels == 1]

        plt.hist(posOutputs, bins=100, label="Positive Examples", log=log)
        plt.hist(negOutputs, bins=100, label="Negative Examples", log=log)
    else:
        plt.hist(normalized_outputs, bins=100, label="Examples", log=log)

    if lower_thresh is not None and upper_thresh is not None:
        plt.gca().axvline(lower_thresh,
                          0.,
                          1.,
                          color='b',
                          label="Lower Thresh")
        plt.gca().axvline(upper_thresh,
                          0.,
                          1.,
                          color='r',
                          label="Upper Thresh")

    plt.tight_layout(pad=2.)
    plt.xlim(0., 1.)
    plt.title(title)
    plt.legend()

    plt.xlabel("Normalized Score")
    yLabel = "Number of Examples"
    if log:
        yLabel += " (log)"
    plt.ylabel(yLabel)

    if save_path is not None:
        save_path = Path(save_path)
        dirs.create_folder(save_path.parent)

        # Save with desired format, and additional formats specified in save_formats
        plt.savefig(save_path)
        for ext in save_formats:
            if ext[0] == '.':
                plt.savefig(save_path.with_suffix(ext))
    if show and mpl.get_backend() != "agg":
        plt.show()
    return fig
def train_network(dataset_path,
                  data_transforms,
                  epochs=25,
                  batch_size=64,
                  model_path="./model_weights.pt",
                  history_path="./train_history.pickle",
                  weighted_loss=True,
                  seed=None,
                  device_id=None):
    if seed:
        set_torch_random_seeds(seed)

    # Load Dataset objects for train and val sets from folder
    sets = ['train', 'val']
    imageDataset = {}
    for phase in sets:
        f = dataset_path / phase
        imageDataset[phase] = datasets.ImageFolder(
            str(f),
            transform=data_transforms[phase],
            is_valid_file=utils.check_empty_file)

    # datasetLen = len(imageDataset['train']) + len(imageDataset['val'])

    # Instantiate trainer object
    trainer = TrainModel(seed=seed, verbose=True, device_id=device_id)

    # Load data and define model
    trainer.load_data(imageDataset, num_examples_per_batch=batch_size)
    modelFineTune = trainer.define_model_resnet18(finetune=False)

    # Set optimizer and Loss criterion
    optimizer = optim.Adam(modelFineTune.parameters())
    if weighted_loss:
        weights = torch.Tensor(get_loss_weights(imageDataset)).to(
            trainer.device)
        loss = nn.CrossEntropyLoss(weight=weights)
    else:
        loss = nn.CrossEntropyLoss()

    # Train model
    modelFineTune = trainer.train(modelFineTune,
                                  loss,
                                  optimizer,
                                  scheduler=None,
                                  num_epochs=epochs)

    # Save train history and trained model weights
    if model_path:
        dirs.create_folder(Path(model_path).parent)
        torch.save(modelFineTune.state_dict(), model_path)
    if history_path:
        dirs.create_folder(Path(history_path).parent)
        history = trainer.save_history(history_path)

    return history, modelFineTune.state_dict()
Exemple #6
0
    def load_info(self):
        if self.iterInfoPath.is_file():
            self.iterInfo = utils.load_pickle(self.iterInfoPath)
        else:
            self.iterInfo = IterInfo(self.unlabeledFolder,
                                     self.unlabeledIndexPath, self.loopFolder)
            dirs.create_folder(self.loopFolder)

            utils.save_pickle(self.iterInfo, self.iterInfoPath)

        return self.iterInfo
Exemple #7
0
def convert_video(video_input, video_output):
    print("\nProcessing video: ", video_input)
    print("Saving to : ", video_output)

    destFolder = '/'.join(video_output.split('/')[:-1])
    dirs.create_folder(destFolder)

    cmds = ['ffmpeg', '-i', video_input, video_output]
    subprocess.Popen(cmds)

    print("Video saved to : ", video_output)
    return 0
Exemple #8
0
def copy_files(source, destination):
    '''
        copy_files(source, destination)

        Copy file at source to destination path.
    '''
    if os.path.isfile(source):
        dirs.create_folder(Path(destination).parent)
        shutil.copy2(source, destination)
        return True
    else:
        return False
Exemple #9
0
    def __init__(self, source, destFolder, seed=None, verbose=True):
        self.date = datetime.now()
        self.source = Path(source)
        self.destFolder = Path(destFolder)
        self.imageFolder = self.destFolder / "sampled_images"
        self.percentage = None
        self.seed = seed
        self.verbose = verbose
        self.index = None

        np.random.seed(self.seed)
        dirs.create_folder(self.destFolder)
        dirs.create_folder(self.imageFolder)
    def copy_files(self, imagesDestFolder='auto', write=False, mode='copy'):
        '''
            Try to move all files in index to a new folder specified by destFolder input.
        '''
        assert self.indexExists, "Index does not exist. Cannot move files."

        self.imagesDestFolder = imagesDestFolder

        if self.imagesDestFolder == 'auto':
            self.imagesDestFolder = Path(
                dirs.dataset + "compiled_dataset_{}-{}-{}_{}-{}-{}".format(
                    self.date.year, self.date.month, self.date.day,
                    self.date.hour, self.date.minute, self.date.second))

        dirs.create_folder(self.imagesDestFolder, verbose=True)

        print("Copying {} files.".format(self.index.shape[0]))

        def _add_folder_path(x):
            return self.imagesDestFolder / x

        self.frameDestPaths = self.index.loc[:, 'FrameName'].apply(
            _add_folder_path)

        # Select copy or move mode
        if mode == 'copy':
            self.moveResults = list(
                map(utils.copy_files, self.index.loc[:, 'OriginalFramePath'],
                    self.frameDestPaths))
        else:
            raise NotImplementedError

        for i in range(self.get_index_len()):
            self.index.loc[i, "OriginalFramePath"] = copy(
                self.index.loc[i, "FramePath"])
            self.index.loc[i, "FramePath"] = self.frameDestPaths[i]

        if write:
            self.write_index(prompt=False)

        # Report results
        print("Found {} files.\n\
            Moved {} files to folder\n\
            {}\
            \n{} files were not found.".format(
            len(self.moveResults), sum(self.moveResults),
            self.imagesDestFolder,
            len(self.moveResults) - sum(self.moveResults)))
        return self.moveResults
Exemple #11
0
    def test_setup_merge_annotations(self):
        '''
            Check if test assets are in place and move files to active test folder.
        '''
        self.assetsFolder = Path(dirs.test_assets) / "test_loop/iteration_1/"

        self.testFolder = Path(dirs.test) / "test_loop/iteration_1/"
        self.indexPath = self.testFolder / "sampled_images.csv"
        self.newLabelsPath = self.testFolder / "sampled_images_labels.csv"

        fileList = get_file_list(str(self.assetsFolder))
        for f in fileList:
            fPath = Path(f)
            newPath = self.testFolder / fPath.relative_to(self.assetsFolder)
            dirs.create_folder(newPath.parent)
            copy_files(str(f), str(newPath))

        assert self.indexPath.is_file()
        assert self.newLabelsPath.is_file()
Exemple #12
0
    def __init__(self, destPath, videoFolder=dirs.base_videos, verbose=True, errorLog=True):
        self.destPath           = Path(destPath)
        self.verbose            = verbose
        self.errorLog           = errorLog
        self.estimatedFPS       = False
        self.videoFolder        = videoFolder
        self.criticalReadError  = False

        self.videoError   = {'read': False, 'set': False, 'write': False}
        if self.errorLog:
            self.errorCounter = {'read': 0,     'set': 0,     'write': 0}
            self.errorList = []

        self.frameCount  = 0
        self.datasetName = commons.unlabeledDatasetName

        if self.verbose:
            print("\nUsing opencv version: ", cv2.__version__)

        # Create destination folder
        dirs.create_folder(self.destPath)
    def make_backup(self):
        '''
            Moves any index files in destination folder to a backup folder.
        '''
        # Create backup folder
        dirs.create_folder(self.path.parent / self.bkpFolderName)

        existingIndex = self.path.parent.glob("*index*.csv")
        for entry in existingIndex:
            entry = Path(entry)
            newPath = self.path.parent / self.bkpFolderName / entry.name

            # Check if dest path already exists
            # If True, create a new path by appending a number at the end
            fileIndex = 2
            while newPath.is_file():
                newPath = self.path.parent / self.bkpFolderName / (
                    entry.stem + "_" + str(fileIndex) + entry.suffix)
                fileIndex += 1

            os.rename(entry, newPath)
Exemple #14
0
    def get_video_data(self):
        '''
            Get assorted details about the target video.
        '''
        # Get video MD5 hash
        self.videoHash = file_hash(self.videoPath)

        # Get video name with extension from full video path
        self.videoName      = Path(self.videoPath.name)

        # Get Report field
        self.videoReport = self.videoPath.relative_to(self.videoFolder).parts[0]
        # Get DVD field
        dvdIndex = str(self.videoPath).find("DVD-")
        if dvdIndex == -1:
            self.dvd = None
        else:
            self.dvd = str(self.videoPath)[dvdIndex+4]

        try:
            self.video = cv2.VideoCapture(str(self.videoPath))
        except:
            print("\nError opening video:\n")
            cv2.VideoCapture(str(self.videoPath))

        self.frameRate = self.video.get(cv2.CAP_PROP_FPS)
        if self.frameRate == 0:
            self.frameRate = 25  # Default frame rate is 30 or 25 fps
            self.estimatedFPS = True

        self.totalFrames = self.video.get(cv2.CAP_PROP_FRAME_COUNT)

        # if self.dvd != None:
        #     self.videoFolderPath = self.destPath / self.videoReport / ("DVD-" + self.dvd) / Path(self.videoName.stem)
        # else:
        #     self.videoFolderPath = self.destPath / self.videoReport / Path(self.videoName.stem)
        self.videoFolderPath = self.destPath
        dirs.create_folder(self.videoFolderPath)
        return self.video
Exemple #15
0
        datasetName = "full_dataset_rede_{}".format(rede)

    seed = np.random.randint(0, 100)

    def get_iter_folder(iteration):
        return Path(dirs.iter_folder) / "{}/iteration_{}/".format(
            datasetName, iteration)

    previousIterFolder = get_iter_folder(iteration - 1)
    iterFolder = get_iter_folder(iteration)
    unlabeledIndexPath = previousIterFolder / "unlabeled_images_iteration_{}.csv".format(
        iteration - 1)
    sampledImageFolder = iterFolder / "sampled_images"
    seedLogPath = iterFolder / "seeds.txt"

    dirs.create_folder(iterFolder)
    dirs.create_folder(sampledImageFolder)

    ## Next Iteration
    print("\nSTEP: Sample images for manual annotation.")

    # Sample images for manual annotation
    sampler = SampleImages(unlabeledIndexPath, iterFolder, seed=seed)
    sampler.sample(percentage=0.01, sample_min=100)
    print(sampler.imageSourcePaths.shape)

    # Sampled images index will be created during the manual annotation
    print(
        "Image sampling finished.\nYou may now annotate sampled_images folder with the\
         labeling interface and run next step.")
Exemple #16
0
    mergedIndex.to_csv(mergedIndexPath, index=False)

    ## Create unlabeled set for next iteration
    # TODO: Encapsulate this section in function
    print("\nCreate new unlabeled set.")
    mergedPathList = [get_iter_folder(x) / \
        "final_annotated_images_iteration_{}.csv".format(x) for x in range(1, iteration+1)]
    mergedIndexList = [pd.read_csv(x) for x in mergedPathList]
    originalUnlabeledIndex  = pd.read_csv(originalUnlabeledIndexPath)

    # print("Shape final_annotations_iter_{}: {}".format(iteration, mergedIndex.shape))
    # print("Shape final_annotations_iter_{}: {}".format(iteration-1, previousMergedIndex.shape))

    allAnnotations = pd.concat(mergedIndexList, axis=0, sort=False)

    allAnnotations = dutils.remove_duplicates(allAnnotations, "FrameHash")
    print("Duplicated elements in final_annotated_images.")
    print(allAnnotations.index.duplicated().sum())

    newIndex = dutils.index_complement(originalUnlabeledIndex, allAnnotations, "FrameHash")

    dirs.create_folder(newUnlabeledIndexPath.parent)
    newIndex.to_csv(newUnlabeledIndexPath, index=False)

    # TODO: Include train info in the report
    dutils.make_report(reportPath, sampledIndexPath, manualIndexPath, autoLabelIndexPath,
                       unlabeledIndexPath, None, rede=rede, target_class=target_class)

    # Save sample seed
    dutils.save_seed_log(seedLogPath, seed, "inference")
Exemple #17
0
historyPath = savedModelsFolder \
    / "history_{}_no_finetune_{}_epochs_rede_{}_iteration_{}.pickle".format(datasetName, epochs, rede, iteration)

resultsFolder        = Path(dirs.results) / historyPath.stem
nameEnd  = "history_{}_epochs_rede_{}_iteration_{}.pdf".format(epochs, rede, iteration)
lossName = "loss_"     + nameEnd
accName  = "accuracy_" + nameEnd
f1Name   = "f1_"       + nameEnd

if not(historyPath.is_file()):
    print("History file does not exist.\nFile:\n", historyPath)
    print("\nExiting program.")
    exit()

dirs.create_folder(resultsFolder)

history = utils.load_pickle(historyPath)

print(history.keys())
valLoss     = history['loss-val']
trainLoss   = history['loss-train']
trainAcc    = history['acc-train']
valAcc      = history['acc-val']
trainF1     = np.array((history['f1-train']))[:, 0]
valF1       = np.array((history['f1-val']))[:, 0]

plot_model_history([trainLoss, valLoss], data_labels=["Train Loss", "Val Loss"], xlabel="Epochs",
                     ylabel="Loss", title="Training loss history", save_path=resultsFolder / lossName,
                     show=False)
    if image_class == "confusion":
        image_class = "not_duct"
    tailPath = [image_set, image_class, image_name]

    return refDatasetPath / "/".join(tailPath)


referenceIndex = pd.read_csv(referenceIndexPath, low_memory=False)

# Move images to new dataset location and discard middle folders
# dataset should look like this "...dataset/set/class/img.jpg"
if refDatasetPath.is_dir():
    # input("\nDataset dest path already exists. Delete and overwrite?\n")
    sh.rmtree(refDatasetPath)
else:
    dirs.create_folder(refDatasetPath)

globString = str(remoteDatasetPath) + "/**/*jpg"
sourceList = glob(globString, recursive=True)
destList = list(map(_discard_middle_folders, sourceList))

# Copy reference dataset and merge class confusion to not-duct
success = sum(list(map(utils.copy_files, sourceList, destList)))
print("\nMoved {}/{} files.\n".format(success, len(sourceList)))

globStringVal = str(remoteDatasetPath) + "/val/**/*jpg"
globStringTrain = str(remoteDatasetPath) + "/train/**/*jpg"

imageListTrain = glob(globStringTrain, recursive=True)
imageListVal = glob(globStringVal, recursive=True)
print("\nTrain set: {} images.".format(len(imageListTrain)))
    modelPath            = savedModelsFolder / \
        "{}_{}_epochs_iteration_{}.pt".format(datasetName, epochs, iteration)
    historyPath          = savedModelsFolder / \
        "history_{}_{}_epochs_iteration_{}.pickle".format(datasetName, epochs, iteration)

    historyFolder = Path(dirs.results) / "{}/iteration_{}".format(
        datasetName, iteration)
    lossPath = historyFolder / "loss_history_{}_epochs_iteration{}.pdf".format(
        epochs, iteration)
    accPath = historyFolder / "accuracy_history_{}_epochs_iteration{}.pdf".format(
        epochs, iteration)
    f1Path = historyFolder / "f1_history_{}_epochs_iteration{}.pdf".format(
        epochs, iteration)
    seedLogPath = iterFolder / "seeds.txt"
    dirs.create_folder(historyFolder)

    ## Train model
    print("\nSTEP: Train model.")
    # ImageNet statistics
    mean = commons.IMAGENET_MEAN
    std = commons.IMAGENET_STD

    # Set transforms
    dataTransforms = mutils.resnet_transforms(mean, std)
    history, modelFineTune = mutils.train_network(sampledImageFolder,
                                                  dataTransforms,
                                                  epochs=epochs,
                                                  batch_size=trainBatchSize,
                                                  model_path=modelPath,
                                                  history_path=historyPath,
Exemple #20
0
def plot_confusion_matrix(conf_mat,
                          labels=[],
                          title=None,
                          normalize=True,
                          show=True,
                          save_path="./confusion_matrix.jpg"):
    '''
        conf_mat: array of floats or ints
        Square array that configures a confusion matrix. The true labels are assumed to be on the lines axis
        and the predicted labels, on the columns axis.

        labels: list
        List of class labels. Label list must be of lenght equal to the number of classes of the confusion
        matrix. Element i of list is the label of class in line i of the confusion matrix.

    '''
    fig = set_mpl_fig_options(commons.MPL_FIG_SIZE_SMALL)

    numClasses = np.shape(conf_mat)[0]
    conf_mat = np.array(conf_mat, dtype=np.float32)

    if normalize:
        # Normalize confusion matrix line-wise
        for line in range(numClasses):
            classSum = np.sum(conf_mat[line, :])
            conf_mat[line, :] = np.divide(conf_mat[line, :], classSum)

    # If labels list match number of classes, use it as class labels
    if len(labels) == numClasses:
        xLabels = labels
        yLabels = labels
    else:
        xLabels = False
        yLabels = False

    sns.heatmap(conf_mat,
                annot=True,
                cbar=True,
                square=True,
                vmin=0.,
                vmax=1.,
                fmt='.2f',
                xticklabels=xLabels,
                yticklabels=yLabels,
                cmap='cividis')

    ax = plt.gca()
    plt.setp(ax.get_yticklabels(), va="center")

    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")

    if title is not None:
        plt.title(title)
    else:
        plt.title("Confusion Matrix")

    if save_path is not None:
        # Save figure to given path
        save_path = Path(save_path)
        dirs.create_folder(save_path.parent)

        plt.savefig(save_path, bbox_inches='tight')

    if show:
        plt.show()
Exemple #21
0
def image_grid(folder_path,
               save_path="image_grid.jpg",
               prediction_index=None,
               upperCrop=0,
               lowerCrop=0,
               size_limit=None,
               shuffle=False,
               show=False,
               save=True):
    '''
        Creates a square grid of images randomly samples from available files on path.


        folder_path:
            Target images folder path;


        save_path:
            Path where resulting grid will be saved;


        upperCrop and lowerCrop:
            Number of pixels to be cropped from each composing image. The crops executed
        are horizontal crops and are measured from top to center and bottom to center,
        respectively.
    '''
    save_path = Path(save_path)
    globString = str(folder_path) + '**' + dirs.sep + '*.jpg'
    files = glob(globString, recursive=True)
    numImages = len(files)

    assert numImages > 0, "No jpg files found on destination."
    if size_limit is not None:
        numImages = np.clip(numImages, None, size_limit)
    squareNumImages = get_perfect_square(numImages)

    if shuffle:
        files = np.random.choice(files, size=squareNumImages, replace=False)
    else:
        files = files[:squareNumImages]

    # TODO: This should be done in an external test file
    # # Create fake predictions DataFrame
    # prediction_index = pd.DataFrame(files)
    # prediction_index['Prediction'] = np.random.choice([0, 1], size=squareNumImages, p=[0.8, 0.2])

    # Square Grid
    # Side of a square image grid. It will contain side^2 images.
    side = int(math.sqrt(numImages))

    # Image resizing dimension
    imageDim = (300, 300)  # (width, height)
    # imageDim = (100,100)

    destDim = (side * imageDim[0],
               side * (imageDim[1] - lowerCrop - upperCrop))

    im_grid = Image.new('RGB', destDim)
    index = 0
    for j in tqdm(range(0, destDim[1], imageDim[1] - lowerCrop - upperCrop)):
        for i in range(0, destDim[0], imageDim[0]):
            try:
                im = Image.open(files[index])
            except:
                continue

            im = im.resize(imageDim)
            im = im.crop((0, upperCrop, imageDim[0], imageDim[1] - lowerCrop))

            # TODO: Test this properly
            if prediction_index is not None:
                # Apply color filter if image has wrong prediction
                if prediction_index.loc[index, "Prediction"] == 1:
                    im = color_filter(im, filter='r', filter_strenght=3.5)

            im.thumbnail(imageDim)
            im_grid.paste(im, (i, j))
            index += 1

    if save is True:
        dirs.create_folder(save_path.parent)

        im_grid.save(save_path)
        print("\nYour image grid is ready. It was saved at {}\n".format(
            save_path))
    if show is True:
        im_grid.show()
    return 0