def pick_reps(self, dbs, dir_path=None, l=2): """Pick entries which matched the images existing in a specified directory from multiple db json files @param dbs: list of the input db Keyword arguments: dir_path: parent directory of the images l: level of path suffix (default: 2) """ import pandas as pd im = image.IMAGE() img_sufs = im.find_images(dir_path=dir_path) img_sufs = [im.path_suffix(x, level=l) for x in img_sufs] df = io.read_jsons_to_df(dbs, orient='index') df['path_suf'] = df['path'].apply(lambda x: im.path_suffix(x, level=l)) df = df[df['path_suf'].isin(img_sufs)] io.write_df_json(df, fname='./picked_rep.json') return df
# This is an example to create a html for displaying images # $python html_creator.py IP PARENT TEMPLATE # PARENT: parent folder of the images (default $PWD) # TEMPLATE: jinja template file # (default: simdat/examples/html_plots.template) import os import sys from simdat.core import image imtl = image.IMAGE() args = sys.argv[1:] if len(args) > 0: ip = ''.join(["http://", args[0], ":8088/"]) else: ip = os.getcwd() if len(args) > 1: parent = args[1] else: parent = os.getcwd() print("Looking for images in %s" % parent) temp = '/home/tammy/SOURCES/simdat/tools/html_plots.template' if len(args) > 2: temp = args[2] outf = parent + '/images.html' title = 'Images' imgs = imtl.find_images(dir_path=parent) imgs.sort()
import os import cv2 import time import numpy as np from keras.optimizers import SGD from simdat.core import dp_models from simdat.core import ml from simdat.core import plot from simdat.core import image im = image.IMAGE() pl = plot.PLOT() mlr = ml.MLRun() t0 = time.time() mdls = dp_models.DPModel() imnet = dp_models.ImageNet() weight_path = '/home/tammy/SOURCES/keras/examples/vgg16_weights.h5' t0 = pl.print_time(t0, 'initiate') model = mdls.VGG_16(weight_path) t0 = pl.print_time(t0, 'load weights') sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy') t0 = pl.print_time(t0, 'compile') imgs = im.find_images() X = [] Y = []
def __init__(self): self.im = image.IMAGE() self.mlr = ml.MLRun() self.dp_init()
def main(): parser = argparse.ArgumentParser( description="Use Simple model to train a classifier.") subparsers = parser.add_subparsers(help='commands', dest='sbp_name') parser.add_argument("-p", "--path", type=str, default='.', help="Path where the images are. Default: $PWD.") parser.add_argument("--img-width", type=int, default=224, dest='width', help="Rows of the images, default: 224.") parser.add_argument("--img-height", type=int, default=224, dest='height', help="Columns of the images, default: 224.") parser.add_argument("--seed", type=int, default=1337, help="Random seed, default: 1337.") predict_parser = subparsers.add_parser("predict", help='Predict the images.') add_prediction_args(predict_parser) batch_train_parser = subparsers.add_parser( "batch-train", help='Command to train with batches.') add_traiining_args(batch_train_parser) batch_train_parser.add_argument( "--size", type=int, default=5000, help="Size of the image batch (default: 5,000)") finetune_parser = subparsers.add_parser( "train", help='Command to finetune the images.') add_traiining_args(finetune_parser) crop_parser = subparsers.add_parser("augmentation", help='Generate scroped images.') t0 = time.time() tl = tools.DATA() simdat_im = image.IMAGE() mdls = dp_models.DPModel() args = parser.parse_args() np.random.seed(args.seed) if args.sbp_name in ['train', 'predict', 'batch-train']: tl.check_dir(args.ofolder) path_model = os.path.join(args.ofolder, 'model.json') path_weights = os.path.join(args.ofolder, 'weights.h5') path_cls = os.path.join(args.ofolder, 'classes.json') if args.sbp_name == 'batch-train': imgs = simdat_im.find_images(dir_path=args.path) classes = simdat_im.find_folders(dir_path=args.path) model = mdls.VGG_16(args.weights, lastFC=False) sgd = SGD(lr=args.lr, decay=args.lrdecay, momentum=args.momentum, nesterov=True) print('[finetune_vgg] lr = %f, decay = %f, momentum = %f' % (args.lr, args.lrdecay, args.momentum)) print('[finetune_vgg] Adding Dense(nclasses, activation=softmax).') model.add(Dense(len(classes), activation='softmax')) model.compile(optimizer=sgd, loss='categorical_crossentropy') t0 = tl.print_time(t0, 'compile the model to be fine tuned.') shuffle(imgs) for e in range(args.epochs): print("[finetune_vgg] Epoch %d/%d" % (e + 1, args.epochs)) for i in range(len(imgs) / args.size + 1): start = i * args.size end = ((i + 1) * args.size) files = imgs[start:end] shuffle(files) if (i + 1) * args.size > len(imgs): end = len(imgs) X_train, X_test, Y_train, Y_test, _c = mdls.prepare_data_train( files, args.width, args.height, classes=classes, rc=args.rc) model.fit(X_train, Y_train, batch_size=args.batchsize, nb_epoch=1, show_accuracy=True, verbose=1, validation_data=(X_test, Y_test)) t0 = tl.print_time(t0, 'fit') tl.write_json(classes, fname=path_cls) json_string = model.to_json() open(path_model, 'w').write(json_string) model.save_weights(path_weights, overwrite=True) elif args.sbp_name == 'train': scale = True if args.augmentation: scale = False X_train, X_test, Y_train, Y_test, classes = mdls.prepare_data_train( args.path, args.width, args.height, rc=args.rc, scale=scale) tl.write_json(classes, fname=path_cls) nclasses = len(classes) t0 = tl.print_time(t0, 'prepare data') model = mdls.VGG_16(args.weights, lastFC=False) sgd = SGD(lr=args.lr, decay=args.lrdecay, momentum=args.momentum, nesterov=True) print('[finetune_vgg] lr = %f, decay = %f, momentum = %f' % (args.lr, args.lrdecay, args.momentum)) print('[finetune_vgg] Adding Dense(nclasses, activation=softmax).') model.add(Dense(nclasses, activation='softmax')) model.compile(optimizer=sgd, loss='categorical_crossentropy') t0 = tl.print_time(t0, 'compile the model to be fine tuned.') for stack in ['conv1', 'conv2', 'conv3', 'conv4', 'conv5']: for l in mdls.layers[stack]: l.trainable = False if args.augmentation: datagen = ImageDataGenerator(featurewise_center=True, samplewise_center=False, featurewise_std_normalization=True, samplewise_std_normalization=False, zca_whitening=False, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, vertical_flip=False) datagen.fit(X_train) model.fit_generator(datagen.flow(X_train, Y_train, batch_size=args.batchsize), samples_per_epoch=X_train.shape[0], nb_epoch=args.epochs, show_accuracy=True, validation_data=(X_test, Y_test), nb_worker=1) else: model.fit(X_train, Y_train, batch_size=args.batchsize, nb_epoch=args.epochs, show_accuracy=True, verbose=1, validation_data=(X_test, Y_test)) t0 = tl.print_time(t0, 'fit') score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) print('[finetune_vgg] Test score:', score[0]) print('[finetune_vgg] Test accuracy:', score[1]) t0 = tl.print_time(t0, 'evaluate') json_string = model.to_json() open(path_model, 'w').write(json_string) model.save_weights(path_weights, overwrite=True) elif args.sbp_name == 'predict': cls_map = tl.parse_json(path_cls) model = model_from_json(open(path_model).read()) t0 = tl.print_time(t0, 'load model from json') model.load_weights(path_weights) t0 = tl.print_time(t0, 'load model weights') if args.cm: from simdat.core import plot from sklearn.metrics import confusion_matrix pl = plot.PLOT() X_test, Y_test, classes, F = mdls.prepare_data_test( args.path, args.width, args.height, convert_Y=False, y_as_str=False, classes=cls_map) t0 = tl.print_time(t0, 'prepare data') results = model.predict_classes(X_test, batch_size=args.batchsize, verbose=1) cm = confusion_matrix(Y_test, results) pl.plot_confusion_matrix(cm, xticks=cls_map, yticks=cls_map, xrotation=90) else: X_test, Y_test, classes, F = mdls.prepare_data_test( args.path, args.width, args.height) t0 = tl.print_time(t0, 'prepare data') results = model.predict_proba(X_test, batch_size=args.batchsize, verbose=1) outputs = [] precision = dict((el, 0) for el in cls_map) recall = dict((el, 0) for el in cls_map) total = dict((el, 0) for el in classes) for i in range(0, len(F)): _cls = results[i].argmax() max_prob = results[i][_cls] outputs.append({'input': F[i], 'max_probability': max_prob}) cls = cls_map[_cls] recall[cls] += 1 total[Y_test[i]] += 1 if max_prob >= args.threshold: outputs[-1]['class'] = cls if Y_test[i] == cls: precision[cls] += 1 else: print('[finetune_vgg] %s: %s (%.2f)' % (F[i], cls, max_prob)) else: print('[finetune_vgg] %s: low probability (%.2f),' ' cannot find a match' % (F[i], max_prob)) outputs[-1]['class'] = None tl.write_json(outputs, fname=args.output_loc) print_precision_recall(precision, recall, total) elif args.sbp_name == 'augmentation': fimgs = simdat_im.find_images(dir_path=args.path) for fimg in fimgs: imgs = simdat_im.read_and_random_crop(fimg, save=True) else: print('Wrong command.') parser.print_help()