Ejemplo n.º 1
0
    def pick_reps(self, dbs, dir_path=None, l=2):
        """Pick entries which matched the images existing in
           a specified directory from multiple db json files

        @param dbs: list of the input db

        Keyword arguments:
        dir_path: parent directory of the images
        l: level of path suffix (default: 2)

        """
        import pandas as pd
        im = image.IMAGE()
        img_sufs = im.find_images(dir_path=dir_path)
        img_sufs = [im.path_suffix(x, level=l) for x in img_sufs]
        df = io.read_jsons_to_df(dbs, orient='index')
        df['path_suf'] = df['path'].apply(lambda x: im.path_suffix(x, level=l))
        df = df[df['path_suf'].isin(img_sufs)]
        io.write_df_json(df, fname='./picked_rep.json')
        return df
Ejemplo n.º 2
0
# This is an example to create a html for displaying images
# $python html_creator.py IP PARENT TEMPLATE
# PARENT: parent folder of the images (default $PWD)
# TEMPLATE: jinja template file
#           (default: simdat/examples/html_plots.template)
import os
import sys
from simdat.core import image

imtl = image.IMAGE()

args = sys.argv[1:]
if len(args) > 0:
    ip = ''.join(["http://", args[0], ":8088/"])
else:
    ip = os.getcwd()

if len(args) > 1:
    parent = args[1]
else:
    parent = os.getcwd()
print("Looking for images in %s" % parent)

temp = '/home/tammy/SOURCES/simdat/tools/html_plots.template'
if len(args) > 2:
    temp = args[2]

outf = parent + '/images.html'
title = 'Images'
imgs = imtl.find_images(dir_path=parent)
imgs.sort()
Ejemplo n.º 3
0
import os
import cv2
import time
import numpy as np
from keras.optimizers import SGD
from simdat.core import dp_models
from simdat.core import ml
from simdat.core import plot
from simdat.core import image

im = image.IMAGE()
pl = plot.PLOT()
mlr = ml.MLRun()

t0 = time.time()
mdls = dp_models.DPModel()
imnet = dp_models.ImageNet()

weight_path = '/home/tammy/SOURCES/keras/examples/vgg16_weights.h5'
t0 = pl.print_time(t0, 'initiate')

model = mdls.VGG_16(weight_path)
t0 = pl.print_time(t0, 'load weights')
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy')
t0 = pl.print_time(t0, 'compile')

imgs = im.find_images()
X = []
Y = []
Ejemplo n.º 4
0
 def __init__(self):
     self.im = image.IMAGE()
     self.mlr = ml.MLRun()
     self.dp_init()
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser(
        description="Use Simple model to train a classifier.")
    subparsers = parser.add_subparsers(help='commands', dest='sbp_name')
    parser.add_argument("-p",
                        "--path",
                        type=str,
                        default='.',
                        help="Path where the images are. Default: $PWD.")
    parser.add_argument("--img-width",
                        type=int,
                        default=224,
                        dest='width',
                        help="Rows of the images, default: 224.")
    parser.add_argument("--img-height",
                        type=int,
                        default=224,
                        dest='height',
                        help="Columns of the images, default: 224.")
    parser.add_argument("--seed",
                        type=int,
                        default=1337,
                        help="Random seed, default: 1337.")

    predict_parser = subparsers.add_parser("predict",
                                           help='Predict the images.')
    add_prediction_args(predict_parser)

    batch_train_parser = subparsers.add_parser(
        "batch-train", help='Command to train with batches.')
    add_traiining_args(batch_train_parser)
    batch_train_parser.add_argument(
        "--size",
        type=int,
        default=5000,
        help="Size of the image batch (default: 5,000)")

    finetune_parser = subparsers.add_parser(
        "train", help='Command to finetune the images.')
    add_traiining_args(finetune_parser)

    crop_parser = subparsers.add_parser("augmentation",
                                        help='Generate scroped images.')

    t0 = time.time()
    tl = tools.DATA()
    simdat_im = image.IMAGE()
    mdls = dp_models.DPModel()

    args = parser.parse_args()
    np.random.seed(args.seed)

    if args.sbp_name in ['train', 'predict', 'batch-train']:
        tl.check_dir(args.ofolder)
        path_model = os.path.join(args.ofolder, 'model.json')
        path_weights = os.path.join(args.ofolder, 'weights.h5')
        path_cls = os.path.join(args.ofolder, 'classes.json')

    if args.sbp_name == 'batch-train':
        imgs = simdat_im.find_images(dir_path=args.path)
        classes = simdat_im.find_folders(dir_path=args.path)

        model = mdls.VGG_16(args.weights, lastFC=False)
        sgd = SGD(lr=args.lr,
                  decay=args.lrdecay,
                  momentum=args.momentum,
                  nesterov=True)
        print('[finetune_vgg] lr = %f, decay = %f, momentum = %f' %
              (args.lr, args.lrdecay, args.momentum))

        print('[finetune_vgg] Adding Dense(nclasses, activation=softmax).')
        model.add(Dense(len(classes), activation='softmax'))
        model.compile(optimizer=sgd, loss='categorical_crossentropy')
        t0 = tl.print_time(t0, 'compile the model to be fine tuned.')

        shuffle(imgs)
        for e in range(args.epochs):
            print("[finetune_vgg] Epoch %d/%d" % (e + 1, args.epochs))
            for i in range(len(imgs) / args.size + 1):
                start = i * args.size
                end = ((i + 1) * args.size)
                files = imgs[start:end]
                shuffle(files)
                if (i + 1) * args.size > len(imgs):
                    end = len(imgs)
                X_train, X_test, Y_train, Y_test, _c = mdls.prepare_data_train(
                    files,
                    args.width,
                    args.height,
                    classes=classes,
                    rc=args.rc)
                model.fit(X_train,
                          Y_train,
                          batch_size=args.batchsize,
                          nb_epoch=1,
                          show_accuracy=True,
                          verbose=1,
                          validation_data=(X_test, Y_test))

        t0 = tl.print_time(t0, 'fit')

        tl.write_json(classes, fname=path_cls)
        json_string = model.to_json()
        open(path_model, 'w').write(json_string)
        model.save_weights(path_weights, overwrite=True)

    elif args.sbp_name == 'train':

        scale = True
        if args.augmentation:
            scale = False
        X_train, X_test, Y_train, Y_test, classes = mdls.prepare_data_train(
            args.path, args.width, args.height, rc=args.rc, scale=scale)
        tl.write_json(classes, fname=path_cls)
        nclasses = len(classes)
        t0 = tl.print_time(t0, 'prepare data')

        model = mdls.VGG_16(args.weights, lastFC=False)
        sgd = SGD(lr=args.lr,
                  decay=args.lrdecay,
                  momentum=args.momentum,
                  nesterov=True)
        print('[finetune_vgg] lr = %f, decay = %f, momentum = %f' %
              (args.lr, args.lrdecay, args.momentum))

        print('[finetune_vgg] Adding Dense(nclasses, activation=softmax).')
        model.add(Dense(nclasses, activation='softmax'))
        model.compile(optimizer=sgd, loss='categorical_crossentropy')
        t0 = tl.print_time(t0, 'compile the model to be fine tuned.')

        for stack in ['conv1', 'conv2', 'conv3', 'conv4', 'conv5']:
            for l in mdls.layers[stack]:
                l.trainable = False

        if args.augmentation:
            datagen = ImageDataGenerator(featurewise_center=True,
                                         samplewise_center=False,
                                         featurewise_std_normalization=True,
                                         samplewise_std_normalization=False,
                                         zca_whitening=False,
                                         rotation_range=20,
                                         width_shift_range=0.2,
                                         height_shift_range=0.2,
                                         horizontal_flip=True,
                                         vertical_flip=False)

            datagen.fit(X_train)
            model.fit_generator(datagen.flow(X_train,
                                             Y_train,
                                             batch_size=args.batchsize),
                                samples_per_epoch=X_train.shape[0],
                                nb_epoch=args.epochs,
                                show_accuracy=True,
                                validation_data=(X_test, Y_test),
                                nb_worker=1)

        else:
            model.fit(X_train,
                      Y_train,
                      batch_size=args.batchsize,
                      nb_epoch=args.epochs,
                      show_accuracy=True,
                      verbose=1,
                      validation_data=(X_test, Y_test))
        t0 = tl.print_time(t0, 'fit')
        score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0)
        print('[finetune_vgg] Test score:', score[0])
        print('[finetune_vgg] Test accuracy:', score[1])
        t0 = tl.print_time(t0, 'evaluate')

        json_string = model.to_json()
        open(path_model, 'w').write(json_string)
        model.save_weights(path_weights, overwrite=True)

    elif args.sbp_name == 'predict':
        cls_map = tl.parse_json(path_cls)
        model = model_from_json(open(path_model).read())
        t0 = tl.print_time(t0, 'load model from json')

        model.load_weights(path_weights)
        t0 = tl.print_time(t0, 'load model weights')

        if args.cm:
            from simdat.core import plot
            from sklearn.metrics import confusion_matrix
            pl = plot.PLOT()

            X_test, Y_test, classes, F = mdls.prepare_data_test(
                args.path,
                args.width,
                args.height,
                convert_Y=False,
                y_as_str=False,
                classes=cls_map)
            t0 = tl.print_time(t0, 'prepare data')
            results = model.predict_classes(X_test,
                                            batch_size=args.batchsize,
                                            verbose=1)
            cm = confusion_matrix(Y_test, results)
            pl.plot_confusion_matrix(cm,
                                     xticks=cls_map,
                                     yticks=cls_map,
                                     xrotation=90)

        else:
            X_test, Y_test, classes, F = mdls.prepare_data_test(
                args.path, args.width, args.height)
            t0 = tl.print_time(t0, 'prepare data')

            results = model.predict_proba(X_test,
                                          batch_size=args.batchsize,
                                          verbose=1)
            outputs = []
            precision = dict((el, 0) for el in cls_map)
            recall = dict((el, 0) for el in cls_map)
            total = dict((el, 0) for el in classes)
            for i in range(0, len(F)):
                _cls = results[i].argmax()
                max_prob = results[i][_cls]
                outputs.append({'input': F[i], 'max_probability': max_prob})
                cls = cls_map[_cls]
                recall[cls] += 1
                total[Y_test[i]] += 1
                if max_prob >= args.threshold:
                    outputs[-1]['class'] = cls
                    if Y_test[i] == cls:
                        precision[cls] += 1
                    else:
                        print('[finetune_vgg] %s: %s (%.2f)' %
                              (F[i], cls, max_prob))
                else:
                    print('[finetune_vgg] %s: low probability (%.2f),'
                          ' cannot find a match' % (F[i], max_prob))
                    outputs[-1]['class'] = None
            tl.write_json(outputs, fname=args.output_loc)
            print_precision_recall(precision, recall, total)

    elif args.sbp_name == 'augmentation':
        fimgs = simdat_im.find_images(dir_path=args.path)
        for fimg in fimgs:
            imgs = simdat_im.read_and_random_crop(fimg, save=True)

    else:
        print('Wrong command.')
        parser.print_help()