def create_descriptors(self): """ Creates initial descriptors needed for the BoW by loading images in the trainSVM segment. Returns: list of descriptors """ # For testing purposes only load 15 classes and 20 images per class. descriptors = [] totalNumberOfDescriptors = float(self.testData.numberOfClasses * self.testData.segmentSizeMean["trainSVM"]) if Settings.G_DETAILED_CONSOLE_OUTPUT: print "\nCreating {0} {1} descriptors.".format(totalNumberOfDescriptors, str(self)) for img, _ in self.testData.load_data("trainSVM", outputActions=False, resolutionSize=self.imageSize, transformation=self.transform): _, descriptor = self.compute_descriptor(img) # Add the descriptor to the other descriptors we have. if descriptor is None or len(descriptor) == 0: print "\n** Could not find descriptors and/or keypoints for Image. **\n" # save for failure analysis vPath = self.modelSaver.get_save_path_for_visualizations() + "/noKeyPoints/" utils.create_dir_if_necessary(vPath) fileName = utils.get_uuid() + ".jpg" cv.imwrite(vPath + fileName, img) continue descriptors.append(descriptor) # clear to prevent messing up RAM. img = [] # Print progress utils.show_progress(Settings.G_DETAILED_CONSOLE_OUTPUT, len(descriptors), totalNumberOfDescriptors, "Descriptor creation Progress:") return descriptors
def _save(self, name, samples): path = os.path.join(os.getcwd(), 'data', 'data', 'cache') create_dir_if_necessary(path) samples_path = os.path.join(path, name + ".pkl") aspects_path = os.path.join(path, name + "_aspects.pkl") # print(f'Trying to save loaded dataset to {samples_path}.') with open(samples_path, 'wb') as f: pickle.dump(samples, f) # print(f'Model {name} successfully saved.') with open(aspects_path, "wb") as f: pickle.dump(self.aspects, f)
def get_save_path(self): """ Returns the current model root path.""" fileName = self.modelUuid path = utils.get_data_path() + fileName + "/" try: utils.create_dir_if_necessary(path) except: logging.exeption( "Could not create dir to save classifier in. Saving in {0} instead." .format(utils.get_data_path())) path = utils.get_data_path() return path
def __call__(self, model, validLoss): """ Saves the model.""" path = self.get_save_path() # save SVMs separately because OpenCV provides it's own method to save an SVM svmPath = path + "svms/" utils.create_dir_if_necessary(svmPath) for svmName in model.svms: model.svms[svmName].save(svmPath) # pickle self with open(path + "model", "wb") as f: pickle.dump(model, f) self.add_model_to_dict(path, validLoss, str(model))
def _try_load(self, name, fields): path = os.path.join(os.getcwd(), 'data', 'data', 'cache') create_dir_if_necessary(path) samples_path = os.path.join(path, name + "2.pkl") aspects_path = os.path.join(path, name + "_2aspects.pkl") if not check_if_file_exists(samples_path) or not check_if_file_exists( aspects_path): return [], None with open(samples_path, 'rb') as f: examples = pickle.load(f) with open(aspects_path, 'rb') as f: self.aspects = pickle.load(f) # get all fields fields = self._construct_fields(fields) return examples, fields
def normalize_test_data(self, size, newName="", forceOverwrite=False): normTestDataRootPath = utils.get_parent_dir(self.get_root_path()) + "/" if newName == "": if not forceOverwrite: overwrite = utils.radio_question("[?]", "Do you really wish to overwrite existing images?", None, ["Yes", "No"], [True, False]) else: overwrite = True if not overwrite: normTestDataRootPath += utils.value_question("", "Provide new foldername:", "s") else: normTestDataRootPath = self.get_root_path() else: normTestDataRootPath += newName utils.create_dir_if_necessary(normTestDataRootPath) print "Saving equalized test data set in path",normTestDataRootPath # segment data with only one segment self.segment_test_data({"all": 1}) self.new_segmentation() numberOfImages = self.testDataSize numberOfImagesDone = 0 currentClass = "" print "Starting equalization.\n" for img, class_, fileName in self.load_data("all", grayscale=False, outputActions=False, yieldFilename=True): path = normTestDataRootPath + "/" + class_ + "/" # reset counter if new class if not currentClass == class_: currentClass = class_ utils.create_dir_if_necessary(path) resizedImg = utils.equalize_image_size(img, size) path += fileName cv.imwrite(path, resizedImg) numberOfImagesDone += 1 utils.show_progress(True, numberOfImagesDone, numberOfImages, "Processing class {0}.\tTotal progress:", currentClass) print "\nEqualization finished." print "\n"
def __init__(self, name, netId, plotLayerWeigths, modelSaver): self.name = name self.netId = netId self.plotLayers = plotLayerWeigths path = modelSaver.get_save_path_for_visualizations() + "train_history/" self.lossPath = path + "loss/" self.accPath = path + "accuracy/" utils.create_dir_if_necessary(path) utils.create_dir_if_necessary(self.lossPath) utils.create_dir_if_necessary(self.accPath) if plotLayerWeigths: # create dir for each layer self.cWeightsPath = path + "conv_weights/" utils.create_dir_if_necessary(self.cWeightsPath) for layer in plotLayerWeigths: utils.create_dir_if_necessary(self.cWeightsPath + str(layer) + "/")
def crop_test_data_to_square(self, manuallyDecideFolderName): self.segment_test_data({"all": 1}) self.new_segmentation() numberOfImages = self.testDataSize numberOfImagesDone = 0 rejectedImages = 0 manDir = utils.get_parent_dir(self.get_root_path()) + "/" + manuallyDecideFolderName print "Starting cropping to square aspect ratio. Files that can't be processed automatically will be saved in path {0}.\n".format(manDir) currentClass = "" for img, class_, fileName in self.load_data("all", grayscale=False, outputActions=False, yieldFilename=True): currentFilePath = self.get_root_path() + "/" + class_ + "/" + fileName if not currentClass == class_: currentClass = class_ manDir = utils.get_parent_dir(self.get_root_path()) + "/" + manuallyDecideFolderName + "/" + class_ + "/" utils.create_dir_if_necessary(manDir) croppedImg = utils.crop_to_square(img) if croppedImg is None: # could not crop image to square because aspect ration was to big / small # save to path were we have to decide manually and remove the other image cv.imwrite(manDir + fileName, img) remove(currentFilePath) rejectedImages += 1 else: cv.imwrite(currentFilePath, croppedImg) numberOfImagesDone += 1 utils.show_progress(True, numberOfImagesDone, numberOfImages, "Processing class \t{0}.\tRejected images:{1}\tTotal progress:", class_, rejectedImages) print "\n\nCropping finished. Rejected images:{0}".format(rejectedImages) print "\n" if Settings.G_MAIL_REPORTS: MailServer.send_mail("Rejected images:{0}".format(rejectedImages), "cropping finished") raw_input("Press any key to continue.")
def _init_folders(self): create_dir_if_necessary(self.confusion_matrix_path) create_dir_if_necessary(self.loss_path_general) create_dir_if_necessary(os.path.join(self.image_path, 'f1')) create_dir_if_necessary(self.f1_path_heads) create_dir_if_necessary(self.f1_curves) create_dir_if_necessary(self.f1_path_heads_sentiment) create_dir_if_necessary(os.path.join(self.image_path, 'recall')) create_dir_if_necessary(self.recall_path_heads_sentiment)
def __augment_test_data_iteration(self, normTestDataRootPath, iteration, iterationOps, numberOfOps, uniqueOps=True, saveOriginalImage=False): """ normTestDataRootPath: root path for the dataset iteration: number of current iteration (only cosmetic) iterationOps: list of iteration operation tuples [(function, [possibleParams])] numberOfOps: number of ops to perform - format tuple range (min, max) uniqueOps: should operations be unique (only one op of this type per image) saveOriginalImage: should we save the original image """ print "\n\nIteration {0}:\n".format(iteration) # segment test data self.segment_test_data({"all": 1}) self.new_segmentation() numberOfImages = self.testDataSize numberOfImagesDone = 0 numberOfnewImagesDone = 0 currentClass = "" for img, class_, fileName in self.load_data("all", grayscale=False, outputActions=False, yieldFilename=True): fileId = osPath.splitext(fileName)[0] path = normTestDataRootPath + "/" + class_ + "/" # reset counter if new class if not currentClass == class_: currentClass = class_ utils.create_dir_if_necessary(path) path += str(numberOfImagesDone) + "_it" + str(iteration) # calculate the actual number of ops for this image using the numberOfOps range tuple numberOfOpsForImage = randint(numberOfOps[0], numberOfOps[1]) # get a list of unique indices for operations to perform on images if uniqueOps requires it ops = [] if uniqueOps: ops = sample(range(len(iterationOps)), numberOfOpsForImage) else: ops = [randint(0,len(iterationOps)-1) for _ in xrange(numberOfOpsForImage)] for op in ops: changedImg = None # check if op needs a parameter if iterationOps[op][1]: parameterIndex = randint(0, len(iterationOps[op][1])-1) changedImg = iterationOps[op][0](img, iterationOps[op][1][parameterIndex]) else: changedImg = iterationOps[op][0](img) changedImgPath = path + "_" + str(numberOfnewImagesDone) + "_OP" + str(op) + ".jpg" cv.imwrite(changedImgPath, changedImg) numberOfnewImagesDone += 1 # save original image in new dataset if saveOriginalImage requires it if saveOriginalImage: cv.imwrite(path + ".jpg", img) numberOfImagesDone += 1 utils.show_progress(True, numberOfImagesDone, numberOfImages, "Iteration {0} - New images: {1}\tProgress:", iteration, numberOfnewImagesDone) numberOfTotalImages = numberOfImagesDone + numberOfnewImagesDone print "\nIteration {0} done.\nCurrent number of images in data set: {1}\nReloading dataset.".format(iteration, numberOfTotalImages) self.reset_data_set() loadedSize = self.set_root_path(normTestDataRootPath) if not loadedSize >= numberOfTotalImages: print "Reloading was not successfull! Number of actual reloaded images: {0} - Expected number of images: {1}.".format(loadedSize, numberOfTotalImages) raw_input("Press any key to continue.") return None print "Reloading successfull." return numberOfnewImagesDone
def augment_test_data(self, newName, cherryPickIteration, equalizeAfter2ndIterationSize): # functions that get applied during 1st/2nd iteration firstIterationOps = [(utils.flip_image_horizontal, []), (utils.flip_image_vertical, []), (utils.rotate_image, [-90, -180, 90]), (utils.equalize_BGR_image, [])] # second iteration possibleAngleValues = range(5, 355) secondIterationOps = [(utils.rotate_image, possibleAngleValues)] # third iteration possibleLightValues = range(-70, -20, 5) possibleLightValues.extend(range(25, 75, 5)) thirdIterationOps = [(utils.change_brightness, possibleLightValues), (utils.change_saturation, possibleLightValues)] normTestDataRootPath = utils.get_parent_dir(self.get_root_path()) + "/" + newName # when cherrypicking take the same path as original if newName == "": normTestDataRootPath = self.get_root_path() utils.create_dir_if_necessary(normTestDataRootPath) print "Saving new test data set in path",normTestDataRootPath firstIterationData = [1, "-", "-", "-", "-"] secondIterationData = [2, "-", "-", "-", "-"] thirdIterationData = [3, "-", "-", "-", "-"] if cherryPickIteration == 1 or cherryPickIteration == -1: # First iteration startTime = time.clock() imagesBefore = self.testDataSize numberOfnewImagesDone = self.__augment_test_data_iteration(normTestDataRootPath, 1, firstIterationOps, (2,2), True, True) firstIterationData = [1, imagesBefore, numberOfnewImagesDone, self.testDataSize, time.clock() - startTime] # Second iteration if cherryPickIteration == 2 or cherryPickIteration == -1: startTime = time.clock() imagesBefore = self.testDataSize numberOfnewImagesDone = self.__augment_test_data_iteration(normTestDataRootPath, 2, secondIterationOps, (0,3), False, False) secondIterationData = [2, imagesBefore, numberOfnewImagesDone, self.testDataSize, time.clock() - startTime] # The thrid iteration contains light changes. For big images those take a really long time. # By equalization we can reduce the size before. if equalizeAfter2ndIterationSize > 0: self.normalize_test_data(equalizeAfter2ndIterationSize, "", True) self.reset_data_set() self.set_root_path(normTestDataRootPath) if cherryPickIteration == 3 or cherryPickIteration == -1: # Third iteration startTime = time.clock() imagesBefore = self.testDataSize numberOfnewImagesDone = self.__augment_test_data_iteration(normTestDataRootPath, 3, thirdIterationOps, (0,0), False, False) thirdIterationData = [3, imagesBefore, numberOfnewImagesDone, self.testDataSize, time.clock() - startTime] results = utils.get_table(["It", "IMGs Before", "New IMGs", "IMGs After", "Elapsed Time"], 1, firstIterationData, secondIterationData, thirdIterationData) print "All operations finished.\n\n" print results print "\n" if Settings.G_MAIL_REPORTS: MailServer.send_mail(results.get_html_string(), "Increasing finished") raw_input("Press any key to continue.")
**baseline, **{ 'task': 'coarse', 'log_every_xth_iteration': -1, 'seed': None } } rc = get_default_params(use_cuda=True, overwrite={}, from_default=test_params) logger = logging.getLogger(__name__) dataset_logger = logging.getLogger('data_loader') logger.debug('Load dataset') path = os.path.join(os.getcwd(), 'evaluation') utils.create_dir_if_necessary(path) f1_scores_test = [] f1_scores_val = [] for i in range(8): print('New Iteration') dataset = load_dataset(rc, dataset_logger, rc.task) logger.debug('dataset loaded') logger.debug('Load model') trainer = load_model(dataset, rc, experiment_name) logger.debug('model loaded') trainer.train(perform_evaluation=False, use_cuda=use_cuda) result = trainer.perform_final_evaluation(use_test_set=True, verbose=False)
def load_data(self, loader, custom_preprocessing: data.Pipeline = DEFAULT_DATA_PIPELINE, verbose=True): self.verbose = verbose if self.verbose: # create an image folder self.img_stats_folder = os.path.join(self.data_path, 'stats') create_dir_if_necessary(self.img_stats_folder) self.logger.info( f'Getting {self.pretrained_word_embeddings} with dimension {self.pretrained_word_embeddings_dim}' ) word_vectors: vocab word_vectors = None if self.pretrained_word_embeddings == 'glove': word_vectors = vocab.GloVe( name=self.pretrained_word_embeddings_name, dim=self.pretrained_word_embeddings_dim) elif self.pretrained_word_embeddings == 'fasttext': word_vectors = vocab.FastText(language=self.language) self.logger.info('Word vectors successfully loaded.') self.logger.debug('Start loading dataset') self.dataset = loader(self.name, word_vectors, self.configuration, self.batch_size, self.data_path, self.train_file, self.valid_file, self.test_file, self.use_cuda, self.verbose) self.vocabs = self.dataset['vocabs'] self.task = self.dataset['task'] self.ds_stats = self.dataset['stats'] self.split_length = self.dataset['split_length'] self.train_iter, self.valid_iter, self.test_iter = self.dataset[ 'iters'] self.fields = self.dataset['fields'] self.target = self.dataset['target'] self.target_names = [n for n, _ in self.target] self.examples = self.dataset['examples'] self.embedding = self.dataset['embeddings'] self.dummy_input = self.dataset['dummy_input'] self.source_field_name = self.dataset['source_field_name'] self.target_field_name = self.dataset['target_field_name'] self.padding_field_name = self.dataset['padding_field_name'] self.baselines = self.dataset['baselines'] self.target_size = len(self.vocabs[self.target_vocab_index]) self.source_embedding = self.embedding[self.source_index] self.class_labels = list(self.vocabs[self.target_vocab_index].itos) self.source_reverser = self.dataset['source_field'] self.target_reverser = self.target[0] self.log_parameters() if verbose: # sns.set(style="whitegrid") sns.set_style("white") sns.despine() sns.set_color_codes() # sns.set_context("paper") sns.set(rc={"font.size": 18, "axes.labelsize": 22}) # sns.set(font_scale=1.7) self.show_stats() else: self._calculate_dataset_stats() self.logger.info('Dataset loaded. Ready for training')
def get_save_path_for_visualizations(self): """ Returns the path for visualizations for the current model.""" path = self.get_save_path() + "visualizations/" utils.create_dir_if_necessary(path) return path