def write_index(self, dest_path='auto', make_backup=True, prompt=True): ''' Create a backup of old index and write current index DataFrame to a csv file. auto_path == True appends date and time to index path ''' if prompt: print( "\n\nReally write index to file?\nPress any key to continue, Ctrl-C to cancel.\n" ) input() if make_backup: self.make_backup() if dest_path == 'auto': newName = str(self.path.stem) + "_" + utils.get_time_string( self.date) self.indexPath = self.path.with_name(newName + str(self.path.suffix)) else: self.indexPath = Path(dest_path) # Create destination folder dirs.create_folder(self.indexPath.parent) self.index.to_csv(self.indexPath, index=False) self.report_changes()
def new_iteration(self): ''' create new iteration folder v sample new images v update iter_info v label images merge new labels (manual) to annotated dataset train model set boundaries automatic annotation merge new labels (automatic) to annotated dataset update iter_info, iteration complete Executes the following operations: Check if it is the first iteration; Load base index, create folders and iter_info; Sample images ''' if self.iterInfo.completed_iter == False and self.iterInfo.iteration != 0: raise ValueError( "Current iteration has not finished. Resolve it and try again." ) self.iterInfo.iteration += 1 self.iterInfo.completed_iter = False print("Starting iteration {}.".format(self.iterInfo.iteration)) self.iterInfo.currentIterFolder = self.loopFolder / "iteration_{}".format( self.iterInfo.iteration) dirs.create_folder(self.iterInfo.currentIterFolder) print( "Iteration setup finished.\nCall sample_images method for next step: sample and label images." )
def setup_sample_from_folder(self): self.sourceFolder = Path(dirs.test_assets) / "dataset_test" self.sampleImagesFolder = Path(dirs.test) / "test_sample_images" self.destFolderSFF = self.sampleImagesFolder / "test_sample_from_folder" # Guarantee that the destination folder was created for this test only if self.destFolderSFF.is_dir(): self.teardown_sample_from_folder() dirs.create_folder(self.destFolderSFF)
def plot_outputs_histogram(normalized_outputs, labels=None, lower_thresh=None, upper_thresh=None, title="Outputs Histogram", show=True, log=False, save_path=None, save_formats=[".png", ".pdf"]): fig = plt.figure(figsize=commons.MPL_FIG_SIZE_SMALL) # plt.subplots_adjust(left=0.09, bottom=0.09, right=0.95, top=0.80, # wspace=None, hspace=None) if labels is not None: posOutputs = normalized_outputs[labels == 0] negOutputs = normalized_outputs[labels == 1] plt.hist(posOutputs, bins=100, label="Positive Examples", log=log) plt.hist(negOutputs, bins=100, label="Negative Examples", log=log) else: plt.hist(normalized_outputs, bins=100, label="Examples", log=log) if lower_thresh is not None and upper_thresh is not None: plt.gca().axvline(lower_thresh, 0., 1., color='b', label="Lower Thresh") plt.gca().axvline(upper_thresh, 0., 1., color='r', label="Upper Thresh") plt.tight_layout(pad=2.) plt.xlim(0., 1.) plt.title(title) plt.legend() plt.xlabel("Normalized Score") yLabel = "Number of Examples" if log: yLabel += " (log)" plt.ylabel(yLabel) if save_path is not None: save_path = Path(save_path) dirs.create_folder(save_path.parent) # Save with desired format, and additional formats specified in save_formats plt.savefig(save_path) for ext in save_formats: if ext[0] == '.': plt.savefig(save_path.with_suffix(ext)) if show and mpl.get_backend() != "agg": plt.show() return fig
def train_network(dataset_path, data_transforms, epochs=25, batch_size=64, model_path="./model_weights.pt", history_path="./train_history.pickle", weighted_loss=True, seed=None, device_id=None): if seed: set_torch_random_seeds(seed) # Load Dataset objects for train and val sets from folder sets = ['train', 'val'] imageDataset = {} for phase in sets: f = dataset_path / phase imageDataset[phase] = datasets.ImageFolder( str(f), transform=data_transforms[phase], is_valid_file=utils.check_empty_file) # datasetLen = len(imageDataset['train']) + len(imageDataset['val']) # Instantiate trainer object trainer = TrainModel(seed=seed, verbose=True, device_id=device_id) # Load data and define model trainer.load_data(imageDataset, num_examples_per_batch=batch_size) modelFineTune = trainer.define_model_resnet18(finetune=False) # Set optimizer and Loss criterion optimizer = optim.Adam(modelFineTune.parameters()) if weighted_loss: weights = torch.Tensor(get_loss_weights(imageDataset)).to( trainer.device) loss = nn.CrossEntropyLoss(weight=weights) else: loss = nn.CrossEntropyLoss() # Train model modelFineTune = trainer.train(modelFineTune, loss, optimizer, scheduler=None, num_epochs=epochs) # Save train history and trained model weights if model_path: dirs.create_folder(Path(model_path).parent) torch.save(modelFineTune.state_dict(), model_path) if history_path: dirs.create_folder(Path(history_path).parent) history = trainer.save_history(history_path) return history, modelFineTune.state_dict()
def load_info(self): if self.iterInfoPath.is_file(): self.iterInfo = utils.load_pickle(self.iterInfoPath) else: self.iterInfo = IterInfo(self.unlabeledFolder, self.unlabeledIndexPath, self.loopFolder) dirs.create_folder(self.loopFolder) utils.save_pickle(self.iterInfo, self.iterInfoPath) return self.iterInfo
def convert_video(video_input, video_output): print("\nProcessing video: ", video_input) print("Saving to : ", video_output) destFolder = '/'.join(video_output.split('/')[:-1]) dirs.create_folder(destFolder) cmds = ['ffmpeg', '-i', video_input, video_output] subprocess.Popen(cmds) print("Video saved to : ", video_output) return 0
def copy_files(source, destination): ''' copy_files(source, destination) Copy file at source to destination path. ''' if os.path.isfile(source): dirs.create_folder(Path(destination).parent) shutil.copy2(source, destination) return True else: return False
def __init__(self, source, destFolder, seed=None, verbose=True): self.date = datetime.now() self.source = Path(source) self.destFolder = Path(destFolder) self.imageFolder = self.destFolder / "sampled_images" self.percentage = None self.seed = seed self.verbose = verbose self.index = None np.random.seed(self.seed) dirs.create_folder(self.destFolder) dirs.create_folder(self.imageFolder)
def copy_files(self, imagesDestFolder='auto', write=False, mode='copy'): ''' Try to move all files in index to a new folder specified by destFolder input. ''' assert self.indexExists, "Index does not exist. Cannot move files." self.imagesDestFolder = imagesDestFolder if self.imagesDestFolder == 'auto': self.imagesDestFolder = Path( dirs.dataset + "compiled_dataset_{}-{}-{}_{}-{}-{}".format( self.date.year, self.date.month, self.date.day, self.date.hour, self.date.minute, self.date.second)) dirs.create_folder(self.imagesDestFolder, verbose=True) print("Copying {} files.".format(self.index.shape[0])) def _add_folder_path(x): return self.imagesDestFolder / x self.frameDestPaths = self.index.loc[:, 'FrameName'].apply( _add_folder_path) # Select copy or move mode if mode == 'copy': self.moveResults = list( map(utils.copy_files, self.index.loc[:, 'OriginalFramePath'], self.frameDestPaths)) else: raise NotImplementedError for i in range(self.get_index_len()): self.index.loc[i, "OriginalFramePath"] = copy( self.index.loc[i, "FramePath"]) self.index.loc[i, "FramePath"] = self.frameDestPaths[i] if write: self.write_index(prompt=False) # Report results print("Found {} files.\n\ Moved {} files to folder\n\ {}\ \n{} files were not found.".format( len(self.moveResults), sum(self.moveResults), self.imagesDestFolder, len(self.moveResults) - sum(self.moveResults))) return self.moveResults
def test_setup_merge_annotations(self): ''' Check if test assets are in place and move files to active test folder. ''' self.assetsFolder = Path(dirs.test_assets) / "test_loop/iteration_1/" self.testFolder = Path(dirs.test) / "test_loop/iteration_1/" self.indexPath = self.testFolder / "sampled_images.csv" self.newLabelsPath = self.testFolder / "sampled_images_labels.csv" fileList = get_file_list(str(self.assetsFolder)) for f in fileList: fPath = Path(f) newPath = self.testFolder / fPath.relative_to(self.assetsFolder) dirs.create_folder(newPath.parent) copy_files(str(f), str(newPath)) assert self.indexPath.is_file() assert self.newLabelsPath.is_file()
def __init__(self, destPath, videoFolder=dirs.base_videos, verbose=True, errorLog=True): self.destPath = Path(destPath) self.verbose = verbose self.errorLog = errorLog self.estimatedFPS = False self.videoFolder = videoFolder self.criticalReadError = False self.videoError = {'read': False, 'set': False, 'write': False} if self.errorLog: self.errorCounter = {'read': 0, 'set': 0, 'write': 0} self.errorList = [] self.frameCount = 0 self.datasetName = commons.unlabeledDatasetName if self.verbose: print("\nUsing opencv version: ", cv2.__version__) # Create destination folder dirs.create_folder(self.destPath)
def make_backup(self): ''' Moves any index files in destination folder to a backup folder. ''' # Create backup folder dirs.create_folder(self.path.parent / self.bkpFolderName) existingIndex = self.path.parent.glob("*index*.csv") for entry in existingIndex: entry = Path(entry) newPath = self.path.parent / self.bkpFolderName / entry.name # Check if dest path already exists # If True, create a new path by appending a number at the end fileIndex = 2 while newPath.is_file(): newPath = self.path.parent / self.bkpFolderName / ( entry.stem + "_" + str(fileIndex) + entry.suffix) fileIndex += 1 os.rename(entry, newPath)
def get_video_data(self): ''' Get assorted details about the target video. ''' # Get video MD5 hash self.videoHash = file_hash(self.videoPath) # Get video name with extension from full video path self.videoName = Path(self.videoPath.name) # Get Report field self.videoReport = self.videoPath.relative_to(self.videoFolder).parts[0] # Get DVD field dvdIndex = str(self.videoPath).find("DVD-") if dvdIndex == -1: self.dvd = None else: self.dvd = str(self.videoPath)[dvdIndex+4] try: self.video = cv2.VideoCapture(str(self.videoPath)) except: print("\nError opening video:\n") cv2.VideoCapture(str(self.videoPath)) self.frameRate = self.video.get(cv2.CAP_PROP_FPS) if self.frameRate == 0: self.frameRate = 25 # Default frame rate is 30 or 25 fps self.estimatedFPS = True self.totalFrames = self.video.get(cv2.CAP_PROP_FRAME_COUNT) # if self.dvd != None: # self.videoFolderPath = self.destPath / self.videoReport / ("DVD-" + self.dvd) / Path(self.videoName.stem) # else: # self.videoFolderPath = self.destPath / self.videoReport / Path(self.videoName.stem) self.videoFolderPath = self.destPath dirs.create_folder(self.videoFolderPath) return self.video
datasetName = "full_dataset_rede_{}".format(rede) seed = np.random.randint(0, 100) def get_iter_folder(iteration): return Path(dirs.iter_folder) / "{}/iteration_{}/".format( datasetName, iteration) previousIterFolder = get_iter_folder(iteration - 1) iterFolder = get_iter_folder(iteration) unlabeledIndexPath = previousIterFolder / "unlabeled_images_iteration_{}.csv".format( iteration - 1) sampledImageFolder = iterFolder / "sampled_images" seedLogPath = iterFolder / "seeds.txt" dirs.create_folder(iterFolder) dirs.create_folder(sampledImageFolder) ## Next Iteration print("\nSTEP: Sample images for manual annotation.") # Sample images for manual annotation sampler = SampleImages(unlabeledIndexPath, iterFolder, seed=seed) sampler.sample(percentage=0.01, sample_min=100) print(sampler.imageSourcePaths.shape) # Sampled images index will be created during the manual annotation print( "Image sampling finished.\nYou may now annotate sampled_images folder with the\ labeling interface and run next step.")
mergedIndex.to_csv(mergedIndexPath, index=False) ## Create unlabeled set for next iteration # TODO: Encapsulate this section in function print("\nCreate new unlabeled set.") mergedPathList = [get_iter_folder(x) / \ "final_annotated_images_iteration_{}.csv".format(x) for x in range(1, iteration+1)] mergedIndexList = [pd.read_csv(x) for x in mergedPathList] originalUnlabeledIndex = pd.read_csv(originalUnlabeledIndexPath) # print("Shape final_annotations_iter_{}: {}".format(iteration, mergedIndex.shape)) # print("Shape final_annotations_iter_{}: {}".format(iteration-1, previousMergedIndex.shape)) allAnnotations = pd.concat(mergedIndexList, axis=0, sort=False) allAnnotations = dutils.remove_duplicates(allAnnotations, "FrameHash") print("Duplicated elements in final_annotated_images.") print(allAnnotations.index.duplicated().sum()) newIndex = dutils.index_complement(originalUnlabeledIndex, allAnnotations, "FrameHash") dirs.create_folder(newUnlabeledIndexPath.parent) newIndex.to_csv(newUnlabeledIndexPath, index=False) # TODO: Include train info in the report dutils.make_report(reportPath, sampledIndexPath, manualIndexPath, autoLabelIndexPath, unlabeledIndexPath, None, rede=rede, target_class=target_class) # Save sample seed dutils.save_seed_log(seedLogPath, seed, "inference")
historyPath = savedModelsFolder \ / "history_{}_no_finetune_{}_epochs_rede_{}_iteration_{}.pickle".format(datasetName, epochs, rede, iteration) resultsFolder = Path(dirs.results) / historyPath.stem nameEnd = "history_{}_epochs_rede_{}_iteration_{}.pdf".format(epochs, rede, iteration) lossName = "loss_" + nameEnd accName = "accuracy_" + nameEnd f1Name = "f1_" + nameEnd if not(historyPath.is_file()): print("History file does not exist.\nFile:\n", historyPath) print("\nExiting program.") exit() dirs.create_folder(resultsFolder) history = utils.load_pickle(historyPath) print(history.keys()) valLoss = history['loss-val'] trainLoss = history['loss-train'] trainAcc = history['acc-train'] valAcc = history['acc-val'] trainF1 = np.array((history['f1-train']))[:, 0] valF1 = np.array((history['f1-val']))[:, 0] plot_model_history([trainLoss, valLoss], data_labels=["Train Loss", "Val Loss"], xlabel="Epochs", ylabel="Loss", title="Training loss history", save_path=resultsFolder / lossName, show=False)
if image_class == "confusion": image_class = "not_duct" tailPath = [image_set, image_class, image_name] return refDatasetPath / "/".join(tailPath) referenceIndex = pd.read_csv(referenceIndexPath, low_memory=False) # Move images to new dataset location and discard middle folders # dataset should look like this "...dataset/set/class/img.jpg" if refDatasetPath.is_dir(): # input("\nDataset dest path already exists. Delete and overwrite?\n") sh.rmtree(refDatasetPath) else: dirs.create_folder(refDatasetPath) globString = str(remoteDatasetPath) + "/**/*jpg" sourceList = glob(globString, recursive=True) destList = list(map(_discard_middle_folders, sourceList)) # Copy reference dataset and merge class confusion to not-duct success = sum(list(map(utils.copy_files, sourceList, destList))) print("\nMoved {}/{} files.\n".format(success, len(sourceList))) globStringVal = str(remoteDatasetPath) + "/val/**/*jpg" globStringTrain = str(remoteDatasetPath) + "/train/**/*jpg" imageListTrain = glob(globStringTrain, recursive=True) imageListVal = glob(globStringVal, recursive=True) print("\nTrain set: {} images.".format(len(imageListTrain)))
modelPath = savedModelsFolder / \ "{}_{}_epochs_iteration_{}.pt".format(datasetName, epochs, iteration) historyPath = savedModelsFolder / \ "history_{}_{}_epochs_iteration_{}.pickle".format(datasetName, epochs, iteration) historyFolder = Path(dirs.results) / "{}/iteration_{}".format( datasetName, iteration) lossPath = historyFolder / "loss_history_{}_epochs_iteration{}.pdf".format( epochs, iteration) accPath = historyFolder / "accuracy_history_{}_epochs_iteration{}.pdf".format( epochs, iteration) f1Path = historyFolder / "f1_history_{}_epochs_iteration{}.pdf".format( epochs, iteration) seedLogPath = iterFolder / "seeds.txt" dirs.create_folder(historyFolder) ## Train model print("\nSTEP: Train model.") # ImageNet statistics mean = commons.IMAGENET_MEAN std = commons.IMAGENET_STD # Set transforms dataTransforms = mutils.resnet_transforms(mean, std) history, modelFineTune = mutils.train_network(sampledImageFolder, dataTransforms, epochs=epochs, batch_size=trainBatchSize, model_path=modelPath, history_path=historyPath,
def plot_confusion_matrix(conf_mat, labels=[], title=None, normalize=True, show=True, save_path="./confusion_matrix.jpg"): ''' conf_mat: array of floats or ints Square array that configures a confusion matrix. The true labels are assumed to be on the lines axis and the predicted labels, on the columns axis. labels: list List of class labels. Label list must be of lenght equal to the number of classes of the confusion matrix. Element i of list is the label of class in line i of the confusion matrix. ''' fig = set_mpl_fig_options(commons.MPL_FIG_SIZE_SMALL) numClasses = np.shape(conf_mat)[0] conf_mat = np.array(conf_mat, dtype=np.float32) if normalize: # Normalize confusion matrix line-wise for line in range(numClasses): classSum = np.sum(conf_mat[line, :]) conf_mat[line, :] = np.divide(conf_mat[line, :], classSum) # If labels list match number of classes, use it as class labels if len(labels) == numClasses: xLabels = labels yLabels = labels else: xLabels = False yLabels = False sns.heatmap(conf_mat, annot=True, cbar=True, square=True, vmin=0., vmax=1., fmt='.2f', xticklabels=xLabels, yticklabels=yLabels, cmap='cividis') ax = plt.gca() plt.setp(ax.get_yticklabels(), va="center") plt.xlabel("Predicted Label") plt.ylabel("True Label") if title is not None: plt.title(title) else: plt.title("Confusion Matrix") if save_path is not None: # Save figure to given path save_path = Path(save_path) dirs.create_folder(save_path.parent) plt.savefig(save_path, bbox_inches='tight') if show: plt.show()
def image_grid(folder_path, save_path="image_grid.jpg", prediction_index=None, upperCrop=0, lowerCrop=0, size_limit=None, shuffle=False, show=False, save=True): ''' Creates a square grid of images randomly samples from available files on path. folder_path: Target images folder path; save_path: Path where resulting grid will be saved; upperCrop and lowerCrop: Number of pixels to be cropped from each composing image. The crops executed are horizontal crops and are measured from top to center and bottom to center, respectively. ''' save_path = Path(save_path) globString = str(folder_path) + '**' + dirs.sep + '*.jpg' files = glob(globString, recursive=True) numImages = len(files) assert numImages > 0, "No jpg files found on destination." if size_limit is not None: numImages = np.clip(numImages, None, size_limit) squareNumImages = get_perfect_square(numImages) if shuffle: files = np.random.choice(files, size=squareNumImages, replace=False) else: files = files[:squareNumImages] # TODO: This should be done in an external test file # # Create fake predictions DataFrame # prediction_index = pd.DataFrame(files) # prediction_index['Prediction'] = np.random.choice([0, 1], size=squareNumImages, p=[0.8, 0.2]) # Square Grid # Side of a square image grid. It will contain side^2 images. side = int(math.sqrt(numImages)) # Image resizing dimension imageDim = (300, 300) # (width, height) # imageDim = (100,100) destDim = (side * imageDim[0], side * (imageDim[1] - lowerCrop - upperCrop)) im_grid = Image.new('RGB', destDim) index = 0 for j in tqdm(range(0, destDim[1], imageDim[1] - lowerCrop - upperCrop)): for i in range(0, destDim[0], imageDim[0]): try: im = Image.open(files[index]) except: continue im = im.resize(imageDim) im = im.crop((0, upperCrop, imageDim[0], imageDim[1] - lowerCrop)) # TODO: Test this properly if prediction_index is not None: # Apply color filter if image has wrong prediction if prediction_index.loc[index, "Prediction"] == 1: im = color_filter(im, filter='r', filter_strenght=3.5) im.thumbnail(imageDim) im_grid.paste(im, (i, j)) index += 1 if save is True: dirs.create_folder(save_path.parent) im_grid.save(save_path) print("\nYour image grid is ready. It was saved at {}\n".format( save_path)) if show is True: im_grid.show() return 0