Exemple #1
0
    def create_descriptors(self):
        """
        Creates initial descriptors needed for the BoW by loading images in the trainSVM segment. 
        Returns: list of descriptors
        """
        # For testing purposes only load 15 classes and 20 images per class.
        descriptors = []
        totalNumberOfDescriptors = float(self.testData.numberOfClasses * self.testData.segmentSizeMean["trainSVM"])

        if Settings.G_DETAILED_CONSOLE_OUTPUT:
            print "\nCreating {0} {1} descriptors.".format(totalNumberOfDescriptors, str(self))
        for img, _ in self.testData.load_data("trainSVM", outputActions=False, resolutionSize=self.imageSize, transformation=self.transform):
            
            _, descriptor = self.compute_descriptor(img)
            # Add the descriptor to the other descriptors we have.
            if descriptor is None or len(descriptor) == 0:
                print "\n** Could not find descriptors and/or keypoints for Image. **\n"

                # save for failure analysis
                vPath = self.modelSaver.get_save_path_for_visualizations() + "/noKeyPoints/"
                utils.create_dir_if_necessary(vPath)
                fileName = utils.get_uuid() + ".jpg"
                cv.imwrite(vPath + fileName, img)
                continue
  
            descriptors.append(descriptor)   
            # clear to prevent messing up RAM.
            img = []

            # Print progress
            utils.show_progress(Settings.G_DETAILED_CONSOLE_OUTPUT, len(descriptors), totalNumberOfDescriptors, "Descriptor creation Progress:")

        return descriptors
    def _save(self, name, samples):
        path = os.path.join(os.getcwd(), 'data', 'data', 'cache')
        create_dir_if_necessary(path)
        samples_path = os.path.join(path, name + ".pkl")
        aspects_path = os.path.join(path, name + "_aspects.pkl")

        # print(f'Trying to save loaded dataset to {samples_path}.')
        with open(samples_path, 'wb') as f:
            pickle.dump(samples, f)
            # print(f'Model {name} successfully saved.')

        with open(aspects_path, "wb") as f:
            pickle.dump(self.aspects, f)
Exemple #3
0
    def get_save_path(self):
        """ Returns the current model root path."""
        fileName = self.modelUuid
        path = utils.get_data_path() + fileName + "/"

        try:
            utils.create_dir_if_necessary(path)
        except:
            logging.exeption(
                "Could not create dir to save classifier in. Saving in {0} instead."
                .format(utils.get_data_path()))
            path = utils.get_data_path()
        return path
Exemple #4
0
    def __call__(self, model, validLoss):
        """ Saves the model."""

        path = self.get_save_path()

        # save SVMs separately because OpenCV provides it's own method to save an SVM
        svmPath = path + "svms/"
        utils.create_dir_if_necessary(svmPath)
        for svmName in model.svms:
            model.svms[svmName].save(svmPath)

        # pickle self
        with open(path + "model", "wb") as f:
            pickle.dump(model, f)
        self.add_model_to_dict(path, validLoss, str(model))
    def _try_load(self, name, fields):
        path = os.path.join(os.getcwd(), 'data', 'data', 'cache')
        create_dir_if_necessary(path)
        samples_path = os.path.join(path, name + "2.pkl")
        aspects_path = os.path.join(path, name + "_2aspects.pkl")

        if not check_if_file_exists(samples_path) or not check_if_file_exists(
                aspects_path):
            return [], None

        with open(samples_path, 'rb') as f:
            examples = pickle.load(f)

        with open(aspects_path, 'rb') as f:
            self.aspects = pickle.load(f)

        # get all fields
        fields = self._construct_fields(fields)
        return examples, fields
Exemple #6
0
    def normalize_test_data(self, size, newName="", forceOverwrite=False):
        normTestDataRootPath = utils.get_parent_dir(self.get_root_path()) + "/"
        if newName == "":
            if not forceOverwrite:
                overwrite = utils.radio_question("[?]", "Do you really wish to overwrite existing images?", None, ["Yes", "No"], [True, False])
            else:
                overwrite = True
            if not overwrite:
                normTestDataRootPath += utils.value_question("", "Provide new foldername:", "s")
            else:
                normTestDataRootPath = self.get_root_path()
        else:
            normTestDataRootPath += newName

        utils.create_dir_if_necessary(normTestDataRootPath)
        print "Saving equalized test data set in path",normTestDataRootPath

        # segment data with only one segment
        self.segment_test_data({"all": 1})
        self.new_segmentation()

        numberOfImages = self.testDataSize
        numberOfImagesDone = 0
        
        currentClass = ""

        print "Starting equalization.\n"
        for img, class_, fileName in self.load_data("all", grayscale=False, outputActions=False, yieldFilename=True):

            path = normTestDataRootPath + "/" + class_ + "/" 
            # reset counter if new class
            if not currentClass == class_:
                currentClass = class_                
                utils.create_dir_if_necessary(path)
                                
            resizedImg = utils.equalize_image_size(img, size)
            path += fileName
            cv.imwrite(path, resizedImg)
            numberOfImagesDone += 1
            utils.show_progress(True, numberOfImagesDone, numberOfImages, "Processing class {0}.\tTotal progress:", currentClass)
        print "\nEqualization finished."
        print "\n"
    def __init__(self, name, netId, plotLayerWeigths, modelSaver):
        self.name = name
        self.netId = netId
        self.plotLayers = plotLayerWeigths

        path = modelSaver.get_save_path_for_visualizations() + "train_history/"
        self.lossPath = path + "loss/"
        self.accPath = path + "accuracy/"

        utils.create_dir_if_necessary(path)
        utils.create_dir_if_necessary(self.lossPath)
        utils.create_dir_if_necessary(self.accPath)

        if plotLayerWeigths:
            # create dir for each layer
            self.cWeightsPath = path + "conv_weights/"
            utils.create_dir_if_necessary(self.cWeightsPath)
            for layer in plotLayerWeigths:
                utils.create_dir_if_necessary(self.cWeightsPath + str(layer) +
                                              "/")
Exemple #8
0
    def crop_test_data_to_square(self, manuallyDecideFolderName):
        self.segment_test_data({"all": 1})
        self.new_segmentation()

        numberOfImages = self.testDataSize
        numberOfImagesDone = 0        
        rejectedImages = 0

        manDir = utils.get_parent_dir(self.get_root_path()) + "/" + manuallyDecideFolderName

        print "Starting cropping to square aspect ratio. Files that can't be processed automatically will be saved in path {0}.\n".format(manDir)

        currentClass = ""
        for img, class_, fileName in self.load_data("all", grayscale=False, outputActions=False, yieldFilename=True):
            currentFilePath = self.get_root_path() + "/" + class_ + "/" + fileName   

            if not currentClass == class_:
                currentClass = class_
                manDir = utils.get_parent_dir(self.get_root_path()) + "/" + manuallyDecideFolderName + "/" + class_ + "/" 
                utils.create_dir_if_necessary(manDir)

            croppedImg = utils.crop_to_square(img)
            if croppedImg is None:
                # could not crop image to square because aspect ration was to big / small
                # save to path were we have to decide manually and remove the other image
                cv.imwrite(manDir + fileName, img)
                remove(currentFilePath)
                rejectedImages += 1
            else:
                cv.imwrite(currentFilePath, croppedImg)
            numberOfImagesDone += 1
            utils.show_progress(True, numberOfImagesDone, numberOfImages, "Processing class \t{0}.\tRejected images:{1}\tTotal progress:", class_, rejectedImages)
        print "\n\nCropping finished. Rejected images:{0}".format(rejectedImages)
        print "\n"

        if Settings.G_MAIL_REPORTS:
            MailServer.send_mail("Rejected images:{0}".format(rejectedImages), "cropping finished")

        raw_input("Press any key to continue.")
	def _init_folders(self):
		create_dir_if_necessary(self.confusion_matrix_path)
		create_dir_if_necessary(self.loss_path_general)
		create_dir_if_necessary(os.path.join(self.image_path, 'f1'))
		create_dir_if_necessary(self.f1_path_heads)
		create_dir_if_necessary(self.f1_curves)
		create_dir_if_necessary(self.f1_path_heads_sentiment)
		create_dir_if_necessary(os.path.join(self.image_path, 'recall'))
		create_dir_if_necessary(self.recall_path_heads_sentiment)
Exemple #10
0
    def __augment_test_data_iteration(self, normTestDataRootPath, iteration, iterationOps, numberOfOps, uniqueOps=True, saveOriginalImage=False):
        """
        normTestDataRootPath: root path for the dataset
        iteration: number of current iteration (only cosmetic)
        iterationOps: list of iteration operation tuples [(function, [possibleParams])]
        numberOfOps: number of ops to perform - format tuple range (min, max)
        uniqueOps: should operations be unique (only one op of this type per image)
        saveOriginalImage: should we save the original image
        """
        print "\n\nIteration {0}:\n".format(iteration)

         # segment test data
        self.segment_test_data({"all": 1})
        self.new_segmentation()

        numberOfImages = self.testDataSize
        numberOfImagesDone = 0
        numberOfnewImagesDone = 0
        
        currentClass = ""
        for img, class_, fileName in self.load_data("all", grayscale=False, outputActions=False, yieldFilename=True):
            fileId = osPath.splitext(fileName)[0]
            path = normTestDataRootPath + "/" + class_ + "/" 
            # reset counter if new class
            if not currentClass == class_:
                currentClass = class_                
                utils.create_dir_if_necessary(path)
            path += str(numberOfImagesDone) + "_it" + str(iteration)
            
            # calculate the actual number of ops for this image using the numberOfOps range tuple
            numberOfOpsForImage = randint(numberOfOps[0], numberOfOps[1])

            # get a list of unique indices for operations to perform on images if uniqueOps requires it
            ops = []
            if uniqueOps:
                ops = sample(range(len(iterationOps)), numberOfOpsForImage)
            else:
                ops = [randint(0,len(iterationOps)-1) for _ in xrange(numberOfOpsForImage)]
            for op in ops:
                changedImg = None
                # check if op needs a parameter
                if iterationOps[op][1]:
                    parameterIndex = randint(0, len(iterationOps[op][1])-1)
                    changedImg = iterationOps[op][0](img, iterationOps[op][1][parameterIndex])
                else:
                    changedImg = iterationOps[op][0](img)
                changedImgPath = path + "_" + str(numberOfnewImagesDone) + "_OP" + str(op) + ".jpg"
                cv.imwrite(changedImgPath, changedImg)
                numberOfnewImagesDone += 1
            # save original image in new dataset if saveOriginalImage requires it
            if saveOriginalImage:
                cv.imwrite(path + ".jpg", img)
            numberOfImagesDone += 1
            utils.show_progress(True, numberOfImagesDone, numberOfImages, "Iteration {0} - New images: {1}\tProgress:", iteration, numberOfnewImagesDone)
        numberOfTotalImages = numberOfImagesDone + numberOfnewImagesDone
        print "\nIteration {0} done.\nCurrent number of images in data set: {1}\nReloading dataset.".format(iteration, numberOfTotalImages)

        self.reset_data_set()
        loadedSize = self.set_root_path(normTestDataRootPath)
        if not loadedSize >= numberOfTotalImages:
            print "Reloading was not successfull! Number of actual reloaded images: {0} - Expected number of images: {1}.".format(loadedSize, numberOfTotalImages)
            raw_input("Press any key to continue.")
            return None
        print "Reloading successfull."
        return numberOfnewImagesDone
Exemple #11
0
    def augment_test_data(self, newName, cherryPickIteration, equalizeAfter2ndIterationSize):

        # functions that get applied during 1st/2nd iteration   
        firstIterationOps = [(utils.flip_image_horizontal, []), (utils.flip_image_vertical, []), (utils.rotate_image, [-90, -180, 90]), (utils.equalize_BGR_image, [])]

        # second iteration        
        possibleAngleValues = range(5, 355)
        secondIterationOps = [(utils.rotate_image, possibleAngleValues)]

        # third iteration
        possibleLightValues = range(-70, -20, 5)
        possibleLightValues.extend(range(25, 75, 5))
        thirdIterationOps = [(utils.change_brightness, possibleLightValues), (utils.change_saturation, possibleLightValues)]

        normTestDataRootPath = utils.get_parent_dir(self.get_root_path()) + "/" + newName
        # when cherrypicking take the same path as original
        if newName == "":
            normTestDataRootPath = self.get_root_path()
        
        
        utils.create_dir_if_necessary(normTestDataRootPath)
        print "Saving new test data set in path",normTestDataRootPath

        firstIterationData = [1, "-", "-", "-", "-"]
        secondIterationData = [2, "-", "-", "-", "-"]
        thirdIterationData = [3, "-", "-", "-", "-"]

        if cherryPickIteration == 1 or cherryPickIteration == -1:
            # First iteration
            startTime = time.clock()
            imagesBefore = self.testDataSize
            numberOfnewImagesDone = self.__augment_test_data_iteration(normTestDataRootPath, 1, firstIterationOps, (2,2), True, True)
            firstIterationData = [1, imagesBefore, numberOfnewImagesDone, self.testDataSize, time.clock() - startTime]

        # Second iteration
        if cherryPickIteration == 2 or cherryPickIteration == -1:
            startTime = time.clock()
            imagesBefore = self.testDataSize
            numberOfnewImagesDone = self.__augment_test_data_iteration(normTestDataRootPath, 2, secondIterationOps, (0,3), False, False)
            secondIterationData = [2, imagesBefore, numberOfnewImagesDone, self.testDataSize, time.clock() - startTime]

        # The thrid iteration contains light changes. For big images those take a really long time.
        # By equalization we can reduce the size before.
        if equalizeAfter2ndIterationSize > 0:
            self.normalize_test_data(equalizeAfter2ndIterationSize, "", True)
            self.reset_data_set()
            self.set_root_path(normTestDataRootPath)

        if cherryPickIteration == 3 or cherryPickIteration == -1:
            # Third iteration
            startTime = time.clock()
            imagesBefore = self.testDataSize
            numberOfnewImagesDone = self.__augment_test_data_iteration(normTestDataRootPath, 3, thirdIterationOps, (0,0), False, False)
            thirdIterationData = [3, imagesBefore, numberOfnewImagesDone, self.testDataSize, time.clock() - startTime]
        results = utils.get_table(["It", "IMGs Before", "New IMGs", "IMGs After", "Elapsed Time"], 1, firstIterationData, secondIterationData, thirdIterationData)
        print "All operations finished.\n\n"
        print results
        print "\n"

        if Settings.G_MAIL_REPORTS:
            MailServer.send_mail(results.get_html_string(), "Increasing finished")
        raw_input("Press any key to continue.")
    **baseline,
    **{
        'task': 'coarse',
        'log_every_xth_iteration': -1,
        'seed': None
    }
}

rc = get_default_params(use_cuda=True, overwrite={}, from_default=test_params)
logger = logging.getLogger(__name__)

dataset_logger = logging.getLogger('data_loader')
logger.debug('Load dataset')

path = os.path.join(os.getcwd(), 'evaluation')
utils.create_dir_if_necessary(path)

f1_scores_test = []
f1_scores_val = []

for i in range(8):
    print('New Iteration')
    dataset = load_dataset(rc, dataset_logger, rc.task)

    logger.debug('dataset loaded')
    logger.debug('Load model')
    trainer = load_model(dataset, rc, experiment_name)
    logger.debug('model loaded')

    trainer.train(perform_evaluation=False, use_cuda=use_cuda)
    result = trainer.perform_final_evaluation(use_test_set=True, verbose=False)
Exemple #13
0
    def load_data(self,
                  loader,
                  custom_preprocessing: data.Pipeline = DEFAULT_DATA_PIPELINE,
                  verbose=True):

        self.verbose = verbose

        if self.verbose:
            # create an image folder
            self.img_stats_folder = os.path.join(self.data_path, 'stats')
            create_dir_if_necessary(self.img_stats_folder)

        self.logger.info(
            f'Getting {self.pretrained_word_embeddings} with dimension {self.pretrained_word_embeddings_dim}'
        )
        word_vectors: vocab
        word_vectors = None
        if self.pretrained_word_embeddings == 'glove':
            word_vectors = vocab.GloVe(
                name=self.pretrained_word_embeddings_name,
                dim=self.pretrained_word_embeddings_dim)
        elif self.pretrained_word_embeddings == 'fasttext':
            word_vectors = vocab.FastText(language=self.language)
        self.logger.info('Word vectors successfully loaded.')

        self.logger.debug('Start loading dataset')
        self.dataset = loader(self.name, word_vectors, self.configuration,
                              self.batch_size, self.data_path, self.train_file,
                              self.valid_file, self.test_file, self.use_cuda,
                              self.verbose)

        self.vocabs = self.dataset['vocabs']
        self.task = self.dataset['task']
        self.ds_stats = self.dataset['stats']
        self.split_length = self.dataset['split_length']
        self.train_iter, self.valid_iter, self.test_iter = self.dataset[
            'iters']
        self.fields = self.dataset['fields']
        self.target = self.dataset['target']
        self.target_names = [n for n, _ in self.target]
        self.examples = self.dataset['examples']
        self.embedding = self.dataset['embeddings']
        self.dummy_input = self.dataset['dummy_input']
        self.source_field_name = self.dataset['source_field_name']
        self.target_field_name = self.dataset['target_field_name']
        self.padding_field_name = self.dataset['padding_field_name']
        self.baselines = self.dataset['baselines']

        self.target_size = len(self.vocabs[self.target_vocab_index])
        self.source_embedding = self.embedding[self.source_index]
        self.class_labels = list(self.vocabs[self.target_vocab_index].itos)

        self.source_reverser = self.dataset['source_field']
        self.target_reverser = self.target[0]
        self.log_parameters()

        if verbose:
            # sns.set(style="whitegrid")
            sns.set_style("white")
            sns.despine()

            sns.set_color_codes()
            # sns.set_context("paper")
            sns.set(rc={"font.size": 18, "axes.labelsize": 22})
            # sns.set(font_scale=1.7)
            self.show_stats()
        else:
            self._calculate_dataset_stats()

        self.logger.info('Dataset loaded. Ready for training')
Exemple #14
0
    def get_save_path_for_visualizations(self):
        """ Returns the path for visualizations for the current model."""

        path = self.get_save_path() + "visualizations/"
        utils.create_dir_if_necessary(path)
        return path