Exemple #1
0
    def _get_info(self,ident,request,base_uri,src_fp=None,src_format=None):
        if self.enable_caching:
            in_cache = request in self.info_cache
        else:
            in_cache = False

        if in_cache:
            return self.info_cache[request]
        else:
            if not all((src_fp, src_format)):
                # get_img can pass in src_fp, src_format because it needs them
                # elsewhere; get_info does not.
                src_fp, src_format = self.resolver.resolve(ident)

            formats = self.transformers[src_format].target_formats

            self.logger.debug('Format: %s' % (src_format,))
            self.logger.debug('File Path: %s' % (src_fp,))
            self.logger.debug('Identifier: %s' % (ident,))
            self.logger.debug('Base URI: %s' % (base_uri,))

            # get the info
            info = ImageInfo.from_image_file(base_uri, src_fp, src_format, formats)

            # store
            if self.enable_caching:
                self.logger.debug('ident used to store %s: %s' % (ident,ident))
                self.info_cache[request] = info
                # pick up the timestamp... :()
                info,last_mod = self.info_cache[request]
            else:
                last_mod = None

            return (info,last_mod)
Exemple #2
0
def train_model(args, params):
    """Trains a model on the training data.

  The test data is used to report validation accuracy after each training epoch.
  The model can be trained from scratch, or existing weights can be updated.
  
  Args:
    args: the arguments from argparse that contains all user-specified options.
    params: a ModelParams object containing the appropriate data file paths,
        data parameters, and training hyperparameters.
  """
    # Set the data parameters and image source paths.
    img_info = ImageInfo(params)
    # Load the model and (possibly) its weights.
    model = get_model(args, img_info)
    # Save the model if that option was specified.
    if args.save_model:
        f = open(args.save_model, 'w')
        f.write(model.to_json())
        f.close()
        print 'Saved model architecture to {}.'.format(args.save_model)
    # Compile the model.
    print('Compiling module...')
    timer = ElapsedTimer()
    compile_model(model, params)
    print 'Done in {}.'.format(timer.get_elapsed_time())
    # Load the images into memory and preprocess appropriately.
    timer.reset()
    img_loader = ImageLoader(img_info)
    img_loader.load_all_images()
    print 'Data successfully loaded in {}.'.format(timer.get_elapsed_time())
    # Train the model.
    timer.reset()
    # TODO: implement data augmentation option.
    model.fit(img_loader.train_data,
              img_loader.train_labels,
              validation_data=(img_loader.test_data, img_loader.test_labels),
              batch_size=params['batch_size'],
              nb_epoch=params['num_epochs'],
              shuffle=True,
              show_accuracy=True,
              verbose=1)
    print 'Finished training in {}.'.format(timer.get_elapsed_time())
    # Save the weights if that option was specified.
    if args.save_weights:
        model.save_weights(args.save_weights)
        print 'Saved trained model weights to {}.'.format(args.save_weights)
Exemple #3
0
    def _get_info(self,
                  ident,
                  request,
                  base_uri,
                  src_fp=None,
                  src_format=None):
        if self.enable_caching:
            in_cache = ident in self.info_cache
        else:
            in_cache = False

        if in_cache:
            return self.info_cache[ident]
        else:
            if not all((src_fp, src_format)):
                # get_img can pass in src_fp, src_format because it needs them
                # elsewhere; get_info does not.
                src_fp, src_format = self.resolver.resolve(ident)

            formats = self.transformers[src_format].target_formats

            logger.debug('Format: %s' % (src_format, ))
            logger.debug('File Path: %s' % (src_fp, ))
            logger.debug('Identifier: %s' % (ident, ))
            logger.debug('Base URI: %s' % (base_uri, ))

            # get the info
            info = ImageInfo.from_image_file(base_uri, src_fp, src_format,
                                             formats)

            # store
            if self.enable_caching:
                # Elusive bug. For some reason, every once in a while, ident
                # is the path on the file system rather than the URI.
                # One thing that's confusing about it is that here 'ident' is
                # used to mean the identifier slice of the request, and in the
                # info cache it's used this way, but ImageInfo.ident is URI
                # that goes in @id.
                logger.debug('ident used to store %s: %s' % (ident, ident))
                self.info_cache[ident] = info
                # pick up the timestamp... :()
                info, last_mod = self.info_cache[ident]
            else:
                last_mod = None

            return (info, last_mod)
Exemple #4
0
def test_model(args, params):
    """Tests a model on the test data set.

  Prints out the final accuracy of the predictions on the test data. Also prints
  a normalized confusion matrix if that argument is specified by the user.
  
  Args:
    args: the arguments from argparse that contains all user-specified options.
        The model weights that are to be tested must be provided.
    params: a ModelParams object containing the appropriate data file paths and
        data parameters.
  """
    if not args.load_weights:
        print 'Cannot test model: no weights provided.'
        return
    img_info = ImageInfo(params, test_only=True)
    # Load the model and its weights and compile it.
    model = get_model(args, img_info)
    print('Compiling module...')
    timer = ElapsedTimer()
    compile_model(model, params)
    print 'Done in {}.'.format(timer.get_elapsed_time())
    # Load the test images into memory and preprocess appropriately.
    timer.reset()
    img_loader = ImageLoader(img_info)
    img_loader.load_test_images()
    print 'Test data successfully loaded in {}.'.format(
        timer.get_elapsed_time())
    # Run the evaluation on the test data.
    timer.reset()
    predictions = model.predict_classes(img_loader.test_data,
                                        batch_size=params['batch_size'])
    print 'Finished testing in {}.'.format(timer.get_elapsed_time())
    # Compute the percentage of correct classifications.
    num_predicted = len(predictions)
    num_correct = 0
    num_classes = params['number_of_classes']
    confusion_matrix = np.zeros((num_classes, num_classes))
    misclassified = []
    # Convert the test image dictionary to a flat list.
    test_img_files = []
    for i in range(num_classes):
        for img_file in img_info.test_img_files[i]:
            test_img_files.append(img_file)
    # Compute confusion matrix and find incorrect classifications.
    for i in range(num_predicted):
        predicted_class = predictions[i]
        correct = np.nonzero(img_loader.test_labels[i])
        correct = correct[0][0]
        confusion_matrix[correct][predicted_class] += 1
        if predicted_class == correct:
            num_correct += 1
        else:
            # Save the image file name, its correct class, and its predicted class.
            misclassified.append((test_img_files[i], correct, predicted_class))
    accuracy = round(float(num_correct) / float(num_predicted), 4)
    print 'Predicted classes for {} images with accuracy = {}'.format(
        num_predicted, accuracy)
    if args.confusion_matrix:
        # Normalize and print the matrix.
        per_row_max = confusion_matrix.sum(axis=1)
        confusion_matrix = confusion_matrix.transpose() / per_row_max
        confusion_matrix = confusion_matrix.transpose()
        output = ''
        for row in confusion_matrix:
            row_list = list(row)
            output += ' '.join(map(str, row_list)) + '\n'
        f = open(args.confusion_matrix, 'w')
        f.write(output)
        f.close()
        print 'Saved confusion matrix to {}.'.format(args.confusion_matrix)
    if args.report_misclassified:
        f = open(args.report_misclassified, 'w')
        for example in misclassified:
            img_path, img_class, predicted_class = example
            f.write('{} {} {}\n'.format(img_path, img_class, predicted_class))
        f.close()
        print 'Saved misclassified images report to {}.'.format(
            args.report_misclassified)
    if args.report_scores:
        print 'Computing instance scores...'
        scores = model.predict_proba(img_loader.test_data,
                                     batch_size=params['batch_size'],
                                     verbose=0)
        f = open(args.report_scores, 'w')
        score_template = ' '.join(['{}'] * num_classes)
        for i in range(len(scores)):
            score_list = [round(score, 5) for score in scores[i]]
            score_string = score_template.format(*score_list)
            f.write('{} {}\n'.format(test_img_files[i], score_string))
        f.close()
        print 'Saved scores report to {}.'.format(args.report_scores)