Ejemplo n.º 1
0
def start():
    model = keras.applications.vgg19.VGG19(include_top=False, weights='imagenet', pooling='avg')

    root_dir = "/media/rishabh/dump_bin/Animals_with_Attributes2/JPEGImages/"
    for root, subdirs, files in os.walk(root_dir):
        list_file_path = os.path.join(root, 'list_of_files.txt')
        with open(list_file_path, 'wb') as list_file:

            for filename in files:
                if filename.endswith("jpg"):
                    file_path = os.path.join(root, filename)
                    img = image.load_img(file_path, target_size=(224,224))
                    x = image.img_to_array(img)
                    x = np.expand_dims(x, axis=0)
                    x = preprocess_input(x)

                    features = model.predict(x)

                    np_name = filename[0:-4]
                    np_name = np_name+".npy"
                    np.save(os.path.join(root,np_name), features)

        #            npy = open(os.path.join(root,np_name),"w+")
                    print('file %s (full path: %s)' % (filename, file_path))
                    list_file.write(('%s\n' % filename).encode('utf-8'))
Ejemplo n.º 2
0
def preprocess_xray_flipped(xray_path):
    xray = image.load_img(xray_path, color_mode="grayscale", vertical_flip=True,
                          target_size=(img_dims[0], img_dims[1], 1))
    xray = image.img_to_array(xray)
    xray = np.dstack([xray, xray, xray])
    xray = preprocess_input(xray)
    return xray
def classifyImage(fname):
    img = image.load_img(fname, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    
    pred = decode_predictions(model.predict(x), top=1)[0][0]
    
    return (pred[1], pred[2])
Ejemplo n.º 4
0
def predict(model, img, target_size, top_n=3):
 """Run model prediction on image
 """
 if img.size != target_size:
   img = img.resize(target_size)
 x = image.img_to_array(img)
 x = np.expand_dims(x, axis=0)
 x = preprocess_input(x)
 preds = model.predict(x)
 return decode_predictions(preds, top=top_n)[0]
Ejemplo n.º 5
0
def predict(img):
  print('{}:'.format(img))
  # Load the image file, resizing it to 224x224 pixels (required by this model)
  img_df = image.load_img(img, target_size=(224, 224))
  x = image.img_to_array(img_df)  # [row[columns[r, g, b]]]
  x = np.expand_dims(x, axis=0)  # array of images

  # Scale and predict
  predictions = model.predict(resnet50.preprocess_input(x))
  predicted_classes = resnet50.decode_predictions(predictions, top=5)
  for imagenet_id, name, likelihood in predicted_classes[0]:
    print(' - {}: {:2f} likelihood'.format(name, likelihood))
  print('\n')
Ejemplo n.º 6
0
def __read_all_images(src):
    files = listdir(src)
    images = {}
    i = 0
    total = len(files)
    for f in files:
        if not (f.endswith(".jpg")):
            continue
        im = Image.open(src + f)
        im = img_to_array(im)
        im = preprocess_input(im)
        images[f[:-4]] = im
        if i % 100 == 0:
            print(str(i) + " / " + str(total))
        i += 1
    return images
def read_image(i):

    rPath = image_filenames[i]
    dPath = depth_filenames[i]
    print(i)

    image = misc.imread(rPath)/1.
    image_resized = resize(image, output_shape=(224, 224))
    depth = misc.imread(dPath).astype(np.uint16)/1000.0
    depth_resized = resize(depth, output_shape=(224, 224))  # (480,640) -> Model Output (224, 224)

    print(image_resized.shape)
    print(np.expand_dims(depth_resized,-1).shape)
    input("aki")

    return preprocess_input(image_resized), np.expand_dims(depth_resized,-1)
    def _f():
        start = 0
        end = start + batch_size
        n = data.shape[0]

        while True:
            X_batch = session.run(resize_op, {img_placeholder: data[start:end]})
            X_batch = preprocess_input(X_batch)
            y_batch = labels[start:end]
            start += batch_size
            end += batch_size
            if start >= n:
                start = 0
                end = batch_size

            print(start, end)
            yield (X_batch, y_batch)
def extract_feature(dir_path, net):
  features = []
  infos = []
  num = 0
  for image_name in os.listdir(dir_path):
    arr = image_name.split('_')
    person = int(arr[0])
    camera = int(arr[1][1])
    image_path = os.path.join(dir_path, image_name) 
    img = image.load_img(image_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    feature = net.predict(x)
    features.append(np.squeeze(feature))
    infos.append((person, camera))

  return features, infos
Ejemplo n.º 10
0
def predict(model, img, target_size, top_n=3):
  """Run model prediction on image
  Args:
    model: keras model
    img: PIL format image
    target_size: (w,h) tuple
    top_n: # of top predictions to return
  Returns:
    list of predicted labels and their probabilities
  """
  if img.size != target_size:
    img = img.resize(target_size)

  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)
  x = preprocess_input(x)
  preds = model.predict(x)
  return decode_predictions(preds, top=top_n)[0]
Ejemplo n.º 11
0
def pix2depth(path, model):
    model_name = 'p2d'
    originalImage = cv2.imread(path)
    loaded_model =  model_list['pix2depth'][model]
    file_name = model+'_'+path.split('/')[-1]
    output_file = os.path.join(output_path,file_name)
    if model =='CNN':
        originalImage = cv2.resize(originalImage,(img_dim,img_dim))
        x = preprocess_input(originalImage/1.)
    elif model == 'CycleGAN':
        test(path)
        os.system('cp gautam/inf_results/imgs/fakeA_0_0.jpg %s' % output_file)
    else:
        originalImage = cv2.resize(originalImage,(256,256))
        x = originalImage/255.
    if not model == 'CycleGAN':
        p1 = get_depth_map(x, loaded_model)
        cv2.imwrite(output_file,p1)
    return output_file
Ejemplo n.º 12
0
def load_image(img_path):
    data = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(data)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x
Ejemplo n.º 13
0
######################################################################
# Load a test image
# ------------------
# A single cat dominates the examples!
from PIL import Image
from matplotlib import pyplot as plt
from keras.applications.resnet50 import preprocess_input
img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
download(img_url, 'cat.png')
img = Image.open('cat.png').resize((224, 224))
plt.imshow(img)
plt.show()
# input preprocess
data = np.array(img)[np.newaxis, :].astype('float32')
data = preprocess_input(data).transpose([0, 3, 1, 2])
print('input_1', data.shape)

######################################################################
# Compile the model on NNVM
# --------------------------
# We should be familiar with the process now.

# convert the keras model(NHWC layout) to NNVM format(NCHW layout).
sym, params = nnvm.frontend.from_keras(keras_resnet50)
# compile the model
target = 'cuda'
shape_dict = {'input_1': data.shape}
with nnvm.compiler.build_config(opt_level=3):
    graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
                                   bounds=(0, 255),
                                   preprocessing=preprocessing)

img_folderpath = "./sample_images/"
img_path = img_folderpath + 'sample_image_2.jpg'  # An image of a yellow cab/taxi
x = image.load_img(img_path,
                   color_mode='rgb',
                   target_size=(img_rows, img_cols))
img = image.img_to_array(x)
img = np.expand_dims(img, axis=0)
img = img.reshape(img_shape)
label = 468  # For the class of taxi and cab

# Note that proprocess_input is an in-place operation.
prediction = kmodel.predict(
    preprocess_input(np.copy(img)).reshape(
        (1, img_rows, img_cols, nb_channels)))
# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
print('Prediction on the original example:',
      decode_predictions(prediction, top=3)[0])
# The original image is correctly classified as a cab with the confidence of 0.999.

# apply attack on source image
# ::-1 reverses the color channels, because Keras ResNet50 expects BGR instead of RGB
attack = foolbox.attacks.FGSM(fmodel)
adversarial = attack(img[:, :, ::-1], label)[:, :, ::-1]
# if the attack fails, adversarial will be None and a warning will be printed

adversarial_prediction = kmodel.predict(
    adversarial.reshape((1, img_rows, img_cols, nb_channels)))
print('Prediction on the adversarial example:',
Ejemplo n.º 15
0
def preprocess_image(image_path):
    img = load_img(image_path, target_size=(img_nrows, img_ncols))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = resnet50.preprocess_input(img)
    return img
Ejemplo n.º 16
0
def extract_InceptionV3(tensor):
    from keras.applications.inception_v3 import InceptionV3, preprocess_input
    return InceptionV3(weights='imagenet',
                       include_top=False).predict(preprocess_input(tensor))
#%%
# prepare the image for the VGG model
processed_image = vgg16.preprocess_input(image_batch.copy())

# get the predicted probabilities for each class
predictions = vgg_model.predict(processed_image)
#print(predictions)

# convert the probabilities to class labels
# We will get top 5 predictions which is the default
label = decode_predictions(predictions)
print(label)

#%%
# prepare the image for the Resnet model
processed_image = resnet50.preprocess_input(image_batch.copy())

# get the predicted probabilities for each class
predictions = resnet_model.predict(processed_image)
#print(predictions)

# convert the probabilities to class labels
# We will get top 5 predictions which is the default
label = decode_predictions(predictions)
print(label)

#%%
# prepare the image for the Resnet model
processed_image = mobilenet.preprocess_input(image_batch.copy())

# get the predicted probabilities for each class
Ejemplo n.º 18
0
import numpy as np
from keras.preprocessing import image
from keras.applications import resnet50

model = resnet50.ResNet50()

img = image.load_img("image.jpg", target_size = (224,224))

x = image.img_to_array(img)

x = np.expand_dims(x, axis=0)

x = resnet50.preprocess_input(x)

predictions = model.predict(x)

predicted_classes = resnet50.decode_predictions(predictions, top=9)

print("This is an image of : ")

for imagenet_id, name, likelihood in predicted_classes[0]:
    print(" - {}: {:2f} likelihood".format(name, likelihood))








Ejemplo n.º 19
0
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications import resnet50
from keras.applications.imagenet_utils import decode_predictions
import keras as keras
import numpy as np

model = keras.applications.resnet50.ResNet50(weights="imagenet")
path = "../input/starfish/asd.jpg"
# load an image in PIL format
original_image = load_img(path, target_size=(224, 224))
numpy_image = img_to_array(original_image)

# Convert the image into 4D Tensor (samples, height, width, channels) by adding an extra dimension to the axis 0.
input_image = np.expand_dims(numpy_image, axis=0)

# preprocess for resnet50
processed_image_resnet50 = resnet50.preprocess_input(input_image.copy())

# resnet50
predictions_resnet50 = model.predict(processed_image_resnet50)
label_resnet50 = decode_predictions(predictions_resnet50)
print(label_resnet50)
Ejemplo n.º 20
0
def predict_dog_label():
    """Predicts the dog breed and recommends similar dogs based on its characterictics"""
    url = input("Enter img url :")
    image_to_tensor = np.zeros(((1), 224, 224, 3), dtype='float32')

    # converts image to centralized format for ResNet model
    image_to_tensor[0] = preprocess_input(
        np.expand_dims(read_img(url).copy(), axis=0))

    # predicts bottleneck features using ResNet model
    get_image_features = model_RN50.predict(image_to_tensor, verbose=1)

    # predicts image label based on features from ResNet model
    pred = ((model_2cr.predict(get_image_features) +
             model_2dr.predict(get_image_features) +
             model_2gr.predict(get_image_features) +
             model_2jr.predict(get_image_features) +
             model_2lr.predict(get_image_features)) / 5)

    # returns top 5 predicted labels
    top_5 = [
        i[0] for i in sorted(list(zip(list(y_table.columns),
                                      pred.tolist()[0])),
                             key=lambda x: x[1],
                             reverse=True)[:6]
    ]
    label = list(y_table.columns)[int(pred.argmax(axis=1))]
    show_image_pred(url, pred.argmax(axis=1))

    # finds most similar picture in local database based on cosine similarity of image features
    similar_pic = sorted(list(
        zip(list(train_breeds_names[0]),
            (cosine_similarity(get_image_features,
                               pd.DataFrame(train_breeds_features))).reshape(
                                   -1, 1))),
                         key=lambda x: x[1],
                         reverse=True)[1][0]
    time.sleep(0.5)
    sim = input('\nDo you want the most similar pic in database: (y/n) ')
    if sim == 'y':
        print('\n Most similar pic in database: \n')
        show_image(similar_pic)
    time.sleep(0.5)
    print('\nTop 5 visually most similar dogs:', top_5[1:])
    show = input('\nDo you want to see those dogs?: (y/n) ')
    if show == 'y':
        show_rand_image(top_5[1:])
    feat = input("\nDo you want to see the features of your dog? (y,n) ")
    if feat == 'y':
        if dog_features.loc[dog_features['label_name'] ==
                            label].values[0][1:-1][0] == 0:
            print('\nThis dog is not a pet or no information available\n')
            feat_vis = input(
                '\nDo you want to see the features of visually similar dogs? (y/n) '
            )
            if feat_vis == 'y':
                find_features_table(top_5[1:])
                return label
        find_features_table([label])
    feat_sim = input(
        '\nDo you want to find similar dogs based on its features? (y/n) ')
    if feat_sim == 'y':
        print('\n')
        # finds most similar dog based on cosine similarity of dog characteristcs
        top_5_feat = [
            x[0] for x in sorted(list(
                zip(list(dog_features.label_name), (
                    cosine_similarity(dog_features[dog_features.columns[1:-1]])
                    [int(pred.argmax(axis=1))]))),
                                 key=lambda x: x[1],
                                 reverse=True)[1:6]
        ]
        find_features_table(top_5_feat)
        feat_sim2 = input("\nDo you want to see them? (y,n) ")
        if feat_sim2 == 'y':
            show_rand_image(top_5_feat)
    return label
Ejemplo n.º 21
0
def load_image(img_path):
    img = image.load_img(img_path, target_size=(299, 299))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x
Ejemplo n.º 22
0
resnet.summary()

# make a model to get output before flatten
activation_layer = resnet.get_layer('activation_49')

# create a model object
model = Model(inputs=resnet.input, outputs=activation_layer.output)

# get the feature map weights
final_dense = resnet.get_layer('fc1000')
W = final_dense.get_weights()[0]


while True:
  img = image.load_img(np.random.choice(image_files), target_size=(224, 224))
  x = preprocess_input(np.expand_dims(img, 0))
  fmaps = model.predict(x)[0] # 7 x 7 x 2048

  # get predicted class
  probs = resnet.predict(x)
  classnames = decode_predictions(probs)[0]
  print(classnames)
  classname = classnames[0][1]
  pred = np.argmax(probs[0])

  # get the 2048 weights for the relevant class
  w = W[:, pred]

  # "dot" w with fmaps
  cam = fmaps.dot(w)
Ejemplo n.º 23
0
import random
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input
from sklearn.preprocessing import OneHotEncoder

random.shuffle(data_pair)
X = [item[0] for item in data_pair]
Y = [item[1] for item in data_pair]

image_data_list = []

for img_path in X:
    img = image.load_img(img_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = preprocess_input(img_data)
    image_data_np = np.array(img_data)
    image_data_list.append(image_data_np)

image_data_list = np.array(image_data_list)
#print(len(image_data_list))
Y = np.array(Y)

onehot_encoder = OneHotEncoder(sparse=False)
Y = Y.reshape(len(Y), 1)
onehot_encoded = onehot_encoder.fit_transform(Y)

X_train, X_valid, Y_train, Y_valid = train_test_split(image_data_list,
                                                      onehot_encoded,
                                                      test_size=0.30,
                                                      random_state=42)
encoder = preprocessing.LabelEncoder()
encoder.fit(Classes['Bird Class'].values)
print(encoder.classes_)
y_train = onehot(encoder.transform(y_train1))
y_valid = onehot(encoder.transform(y_valid1))
y_test = onehot(encoder.transform(y_test1))

print(np.sum(np.argmax(y_train, axis=1) == encoder.transform(y_train1)))
print(np.sum(np.argmax(y_valid, axis=1) == encoder.transform(y_valid1)))
print(np.sum(np.argmax(y_test, axis=1) == encoder.transform(y_test1)))

x_train, y_train = shuffle(x_train, y_train)
x_valid, y_valid = shuffle(x_valid, y_valid)
x_test, y_test = shuffle(x_test, y_test)

x_train = preprocess_input(x_train)
x_valid = preprocess_input(x_valid)
x_test = preprocess_input(x_test)

# In[113]:

# trainning process
nb_epoch = int(sys.argv[1])
batch_size = int(sys.argv[2])
print("Epochs: {}".format(nb_epoch), "Batch Size: {}".format(batch_size))
checkpointer = ModelCheckpoint(filepath=sys.argv[3],
                               verbose=1,
                               monitor='val_categorical_accuracy',
                               save_best_only=True)
model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                    steps_per_epoch=(x_train.shape[0] / batch_size) + 1,
     # print(imgs.shape)
     ag_imgs = []
     for k in range(imgs.shape[0]):
         n = 0
         for ag in datagen.flow(imgs[k], batch_size=1):
             ag_imgs.append(ag)
             new_labels.append(label[k])
             n += 1
             if n > 5:  #增强数量5张
                 break
     # for ag in datagen.flow(,batch_size=1):
     ag_imgs = np.asarray(ag_imgs)
     ag_imgs = ag_imgs.reshape(ag_imgs.shape[0], ag_imgs.shape[2],
                               ag_imgs.shape[3], ag_imgs.shape[4])
     if i == 0:
         train_feature = model.predict(preprocess_input(ag_imgs))
     else:
         print(train_feature.shape,
               model.predict(preprocess_input(ag_imgs)).shape)
         train_feature = np.vstack(
             (train_feature, model.predict(preprocess_input(ag_imgs))))
 print(train_feature.shape, len(new_labels))
 new_labels = np.asarray(new_labels)
 list_2 = [test_data_path + list for list in os.listdir(test_data_path)]
 for i in range(int(len(list_2) / BATCHSIZE) + 1):
     imgs = []
     for j in range(BATCHSIZE):
         if i * BATCHSIZE + j < len(list_2):
             imgs.append(
                 img_to_array(
                     load_img(list_2[i * BATCHSIZE + j],
Ejemplo n.º 26
0
######################################################################
# Load a test image
# ------------------
# A single cat dominates the examples!
from PIL import Image
from matplotlib import pyplot as plt
from keras.applications.resnet50 import preprocess_input
img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
img_path = download_testdata(img_url, 'cat.png', module='data')
img = Image.open(img_path).resize((224, 224))
plt.imshow(img)
plt.show()
# input preprocess
data = np.array(img)[np.newaxis, :].astype('float32')
data = preprocess_input(data).transpose([0, 3, 1, 2])

######################################################################
# Compile the model with Relay
# ----------------------------
# convert the keras model(NHWC layout) to Relay format(NCHW layout).
shape_dict = {'input_1': data.shape}
mod, params = relay.frontend.from_keras(keras_resnet50, shape_dict)
# compile the model

target = "llvm -mcpu=core-avx2"
with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(mod, target, params=params)

from tvm.contrib import graph_runtime as runtime
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir',
                        help='Directory for storing generated images')
    parser.add_argument('dlatent_dir',
                        help='Directory for storing dlatent representations')
    parser.add_argument('--data_dir',
                        default='data',
                        help='Directory for storing optional models')
    parser.add_argument('--mask_dir',
                        default='masks',
                        help='Directory for storing optional masks')
    parser.add_argument('--load_last',
                        default='',
                        help='Start with embeddings from directory')
    parser.add_argument(
        '--dlatent_avg',
        default='',
        help=
        'Use dlatent from file specified here for truncation instead of dlatent_avg from Gs'
    )
    parser.add_argument(
        '--model_url',
        default='gdrive:networks/stylegan2-ffhq-config-f.pkl',
        help='Fetch a StyleGAN model to train on from this URL')
    parser.add_argument('--model_res',
                        default=1024,
                        help='The dimension of images in the StyleGAN model',
                        type=int)
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)
    parser.add_argument(
        '--optimizer',
        default='ggt',
        help='Optimization algorithm used for optimizing dlatents')

    # Perceptual model params
    parser.add_argument(
        '--vgg_url',
        default=
        'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2',
        help='Fetch VGG model on from this URL')
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--resnet_image_size',
                        default=256,
                        help='Size of images for the Resnet model',
                        type=int)
    parser.add_argument('--lr',
                        default=0.25,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--decay_rate',
                        default=0.9,
                        help='Decay rate for learning rate',
                        type=float)
    parser.add_argument('--iterations',
                        default=100,
                        help='Number of optimization steps for each batch',
                        type=int)
    parser.add_argument(
        '--decay_steps',
        default=4,
        help='Decay steps for learning rate decay (as a percent of iterations)',
        type=float)
    parser.add_argument('--early_stopping',
                        default=True,
                        help='Stop early once training stabilizes',
                        type=str2bool,
                        nargs='?',
                        const=True)
    parser.add_argument('--early_stopping_threshold',
                        default=0.5,
                        help='Stop after this threshold has been reached',
                        type=float)
    parser.add_argument('--early_stopping_patience',
                        default=10,
                        help='Number of iterations to wait below threshold',
                        type=int)
    parser.add_argument(
        '--load_effnet',
        default='data/finetuned_effnet.h5',
        help='Model to load for EfficientNet approximation of dlatents')
    parser.add_argument(
        '--load_resnet',
        default='data/finetuned_resnet.h5',
        help='Model to load for ResNet approximation of dlatents')
    parser.add_argument(
        '--use_preprocess_input',
        default=True,
        help='Call process_input() first before using feed forward net',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--use_best_loss',
        default=True,
        help='Output the lowest loss value found as the solution',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--average_best_loss',
        default=0.25,
        help=
        'Do a running weighted average with the previous best dlatents found',
        type=float)
    parser.add_argument('--sharpen_input',
                        default=True,
                        help='Sharpen the input images',
                        type=str2bool,
                        nargs='?',
                        const=True)

    # Loss function options
    parser.add_argument(
        '--use_vgg_loss',
        default=0.4,
        help='Use VGG perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument('--use_vgg_layer',
                        default=9,
                        help='Pick which VGG layer to use.',
                        type=int)
    parser.add_argument(
        '--use_pixel_loss',
        default=1.5,
        help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_mssim_loss',
        default=200,
        help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_lpips_loss',
        default=100,
        help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_l1_penalty',
        default=0.5,
        help='Use L1 penalty on latents; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument('--use_discriminator_loss',
                        default=0.5,
                        help='Use trained discriminator to evaluate realism.',
                        type=float)
    parser.add_argument(
        '--use_adaptive_loss',
        default=False,
        help=
        'Use the adaptive robust loss function from Google Research for pixel and VGG feature loss.',
        type=str2bool,
        nargs='?',
        const=True)

    # Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=str2bool,
                        nargs='?',
                        const=True)
    parser.add_argument(
        '--tile_dlatents',
        default=False,
        help='Tile dlatents to use a single vector at each scale',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--clipping_threshold',
        default=2.0,
        help='Stochastic clipping of gradient values outside of this threshold',
        type=float)

    # Masking params
    parser.add_argument('--load_mask',
                        default=False,
                        help='Load segmentation masks',
                        type=str2bool,
                        nargs='?',
                        const=True)
    parser.add_argument(
        '--face_mask',
        default=True,
        help='Generate a mask for predicting only the face area',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--use_grabcut',
        default=True,
        help=
        'Use grabcut algorithm on the face mask to better segment the foreground',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--scale_mask',
        default=1.4,
        help='Look over a wider section of foreground for grabcut',
        type=float)
    parser.add_argument(
        '--composite_mask',
        default=True,
        help='Merge the unmasked area back into the generated image',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--composite_blur',
        default=8,
        help='Size of blur filter to smoothly composite the images',
        type=int)

    # Video params
    parser.add_argument('--video_dir',
                        default='videos',
                        help='Directory for storing training videos')
    parser.add_argument('--output_video',
                        default=False,
                        help='Generate videos of the optimization process',
                        type=bool)
    parser.add_argument('--video_codec',
                        default='MJPG',
                        help='FOURCC-supported video codec name')
    parser.add_argument('--video_frame_rate',
                        default=24,
                        help='Video frames per second',
                        type=int)
    parser.add_argument('--video_size',
                        default=512,
                        help='Video size in pixels',
                        type=int)
    parser.add_argument(
        '--video_skip',
        default=1,
        help='Only write every n frames (1 = write every frame)',
        type=int)

    args, other_args = parser.parse_known_args()

    args.decay_steps *= 0.01 * args.iterations  # Calculate steps as a percent of total iterations

    if args.output_video:
        import cv2
        synthesis_kwargs = dict(output_transform=dict(
            func=tflib.convert_images_to_uint8, nchw_to_nhwc=False),
                                minibatch_size=args.batch_size)

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
        if x[0] not in '._'
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.data_dir, exist_ok=True)
    os.makedirs(args.mask_dir, exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    os.makedirs(args.video_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    generator_network, discriminator_network, Gs_network = pretrained_networks.load_networks(
        args.model_url)

    generator = Generator(Gs_network,
                          args.batch_size,
                          randomize_noise=args.randomize_noise)
    if (args.dlatent_avg != ''):
        generator.set_dlatent_avg(np.load(args.dlatent_avg))

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        with dnnlib.util.open_url(args.vgg_url,
                                  cache_dir='.stylegan2-cache') as f:
            perc_model = pickle.load(f)

    perceptual_model = PerceptualModel(args,
                                       perc_model=perc_model,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator, discriminator_network)

    ff_model = None

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        if args.output_video:
            video_out = {}
            for name in names:
                video_out[name] = cv2.VideoWriter(
                    os.path.join(args.video_dir, f'{name}.avi'),
                    cv2.VideoWriter_fourcc(*args.video_codec),
                    args.video_frame_rate, (args.video_size, args.video_size))

        perceptual_model.set_reference_images(images_batch)
        perceptual_model.set_reference_images(images_batch)
        dlatents = None
        if (args.load_last != ''):  # load previous dlatents for initialization
            for name in names:
                dl = np.expand_dims(np.load(
                    os.path.join(args.load_last, f'{name}.npy')),
                                    axis=0)
                if (dlatents is None):
                    dlatents = dl
                else:
                    dlatents = np.vstack((dlatents, dl))
        else:
            if (ff_model is None):
                if os.path.exists(args.load_resnet):
                    from keras.applications.resnet50 import preprocess_input
                    print("Loading ResNet Model:")
                    ff_model = load_model(args.load_resnet)
            if (ff_model is None):
                if os.path.exists(args.load_effnet):
                    import efficientnet
                    from efficientnet import preprocess_input
                    print("Loading EfficientNet Model:")
                    ff_model = load_model(args.load_effnet)
            if (ff_model
                    is not None):  # predict initial dlatents with ResNet model
                if (args.use_preprocess_input):
                    dlatents = ff_model.predict(
                        preprocess_input(
                            load_images(images_batch,
                                        image_size=args.resnet_image_size)))
                else:
                    dlatents = ff_model.predict(
                        load_images(images_batch,
                                    image_size=args.resnet_image_size))
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations,
                                       use_optimizer=args.optimizer)
        pbar = tqdm(op, leave=False, total=args.iterations)
        vid_count = 0
        best_loss = None
        best_dlatent = None
        avg_loss_count = 0
        if args.early_stopping:
            avg_loss = prev_loss = None
        for loss_dict in pbar:
            if args.early_stopping:  # early stopping feature
                if prev_loss is not None:
                    if avg_loss is not None:
                        avg_loss = 0.5 * avg_loss + (prev_loss -
                                                     loss_dict["loss"])
                        if avg_loss < args.early_stopping_threshold:  # count while under threshold; else reset
                            avg_loss_count += 1
                        else:
                            avg_loss_count = 0
                        if avg_loss_count > args.early_stopping_patience:  # stop once threshold is reached
                            print("")
                            break
                    else:
                        avg_loss = prev_loss - loss_dict["loss"]
            pbar.set_description(" ".join(names) + ": " + "; ".join(
                ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                if best_dlatent is None or args.average_best_loss <= 0.00000001:
                    best_dlatent = generator.get_dlatents()
                else:
                    best_dlatent = 0.25 * best_dlatent + 0.75 * generator.get_dlatents(
                    )
                if args.use_best_loss:
                    generator.set_dlatents(best_dlatent)
                best_loss = loss_dict["loss"]
            if args.output_video and (vid_count % args.video_skip == 0):
                batch_frames = generator.generate_images()
                for i, name in enumerate(names):
                    video_frame = PIL.Image.fromarray(
                        batch_frames[i], 'RGB').resize(
                            (args.video_size, args.video_size),
                            PIL.Image.LANCZOS)
                    video_out[name].write(
                        cv2.cvtColor(
                            np.array(video_frame).astype('uint8'),
                            cv2.COLOR_RGB2BGR))
            generator.stochastic_clip_dlatents()
            prev_loss = loss_dict["loss"]
        if not args.use_best_loss:
            best_loss = prev_loss
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        if args.output_video:
            for name in names:
                video_out[name].release()

        # Generate images from found dlatents and save them
        if args.use_best_loss:
            generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_path, img_name in zip(
                generated_images, generated_dlatents, images_batch, names):
            mask_img = None
            if args.composite_mask and (args.load_mask or args.face_mask):
                _, im_name = os.path.split(img_path)
                mask_img = os.path.join(args.mask_dir, f'{im_name}')
            if args.composite_mask and mask_img is not None and os.path.isfile(
                    mask_img):
                orig_img = PIL.Image.open(img_path).convert('RGB')
                width, height = orig_img.size
                imask = PIL.Image.open(mask_img).convert('L').resize(
                    (width, height))
                imask = imask.filter(
                    ImageFilter.GaussianBlur(args.composite_blur))
                mask = np.array(imask) / 255
                mask = np.expand_dims(mask, axis=-1)
                img_array = mask * np.array(img_array) + (
                    1.0 - mask) * np.array(orig_img)
                img_array = img_array.astype(np.uint8)
                #img_array = np.where(mask, np.array(img_array), orig_img)
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir, f'{img_name}.png'),
                'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
#-*- coding: utf-8 -*-

from keras.applications.resnet50 import preprocess_input
import numpy as np
import cv2

from src.model_builder import CamModelBuilder
from src.utils import plot_img, list_files


if __name__ == "__main__":
    detector = CamModelBuilder().get_cam_model()
    detector.load_weights("weights.h5", by_name=True)
    detector.summary()
    imgs = list_files("dataset//train//text")
     
    for i, img_path in enumerate(imgs):
        original_img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
         
        img = cv2.resize(original_img, (224, 224))
        img = np.expand_dims(img, 0).astype(np.float64)
     
        cam_map = detector.predict(preprocess_input(img))
        cam_map = cam_map[0, :, :, 1]
        cam_map = cv2.resize(cam_map, (original_img.shape[1], original_img.shape[0]))
         
        plot_img(original_img, cam_map, show=False, save_filename="{}.png".format(i+1))
    
    
    
Ejemplo n.º 29
0
    print('the class', j)
    path = 'D:\\sns_images\\class\\' + str(j)
    for root, dir, files in os.walk(path):
        imgbatch = zeros((batchsize, 224, 224, 3))
        wordindex = 0
        num = 0
        classdata = []
        for file in files:
            try:
                img = image.load_img(path + '\\' + file, target_size=(224, 224))
                if wordindex == batchsize:
                    imgbatch = zeros((batchsize, 224, 224, 3))
                    wordindex = 0
                img = image.img_to_array(img)
                img = expand_dims(img, axis=0)
                img = preprocess_input(img)
                imgbatch[wordindex] = img
                wordindex += 1
                if wordindex == batchsize:
                    num += 1
                    print(num)
                    batchresult = model.predict(imgbatch)
                    batchmean = list(mean(batchresult, axis=0))
                    classdata.append(batchmean)
            except:
                print("iii")
        classdata = mat(classdata)
        classmean = mean(classdata, axis=0)
        allclassmean[j] = classmean[0, :]

save('D:\\tencent\\miniproject\\allclassmean', allclassmean)
Ejemplo n.º 30
0
    test_feature = np.array([
        0,
    ])
    BATCHSIZE = 1024
    target_size = (224, 224)
    list_2 = [
        './image2/image/' + list for list in os.listdir("./image2/image")
    ]
    for i in range(int(len(list_2) / BATCHSIZE) + 1):
        imgs = []
        for j in range(BATCHSIZE):
            imgs.append(
                img_to_array(
                    load_img(list_2[i * BATCHSIZE + j],
                             target_size=target_size)))
            if i * BATCHSIZE + j == len(list_2) - 1:
                break
        imgs = np.asarray(imgs)
        if i == 0:
            test_feature = model.predict(preprocess_input(imgs))
        else:
            test_feature = np.vstack(
                [test_feature,
                 model.predict(preprocess_input(imgs))])
    print(test_feature.shape)
    # y = np.asarray(y, dtype=np.uint8)
    with h5py.File("./dense161_final_feature.h5") as h:
        # h.create_dataset("train", data=train_feature)
        h.create_dataset("test", data=test_feature)
        # h.create_dataset("label", data=y)
Ejemplo n.º 31
0
def main():
    parser = argparse.ArgumentParser(
        description='Find latent representation of reference images using perceptual losses',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir', help='Directory for storing generated images')
    parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations')
    parser.add_argument('--data_dir', default='data', help='Directory for storing optional models')
    parser.add_argument('--mask_dir', default='masks', help='Directory for storing optional masks')
    parser.add_argument('--load_last', default='', help='Start with embeddings from directory')
    parser.add_argument('--dlatent_avg', default='',
                        help='Use dlatent from file specified here for truncation instead of dlatent_avg from Gs')
    parser.add_argument('--model_url', default=config.Model,
                        help='Fetch a StyleGAN model to train on from this URL')  # karras2019stylegan-ffhq-1024x1024.pkl
    parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int)
    parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int)

    # Perceptual model params
    parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int)
    parser.add_argument('--resnet_image_size', default=224, help='Size of images for the Resnet model', type=int)
    parser.add_argument('--lr', default=0.02, help='Learning rate for perceptual model', type=float)
    parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float)
    parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int)
    parser.add_argument('--decay_steps', default=10,
                        help='Decay steps for learning rate decay (as a percent of iterations)', type=float)
    parser.add_argument('--load_effnet', default='data/finetuned_effnet.h5',
                        help='Model to load for EfficientNet approximation of dlatents')
    parser.add_argument('--load_resnet', default='data/resnet_18.h5',
                        help='Model to load for ResNet approximation of dlatents')

    # Loss function options
    parser.add_argument('--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.',
                        type=float)
    parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int)
    parser.add_argument('--use_pixel_loss', default=1.5,
                        help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float)
    parser.add_argument('--use_mssim_loss', default=100, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.',
                        type=float)
    parser.add_argument('--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.',
                        type=float)
    parser.add_argument('--use_l1_penalty', default=1, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.',
                        type=float)

    # Generator params
    parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool)
    parser.add_argument('--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale',
                        type=bool)
    parser.add_argument('--clipping_threshold', default=2.0,
                        help='Stochastic clipping of gradient values outside of this threshold', type=float)

    # Masking params
    parser.add_argument('--load_mask', default=False, help='Load segmentation masks', type=bool)
    parser.add_argument('--face_mask', default=False, help='Generate a mask for predicting only the face area',
                        type=bool)
    parser.add_argument('--use_grabcut', default=True,
                        help='Use grabcut algorithm on the face mask to better segment the foreground', type=bool)
    parser.add_argument('--scale_mask', default=1.5, help='Look over a wider section of foreground for grabcut',
                        type=float)

    # Video params
    parser.add_argument('--video_dir', default='videos', help='Directory for storing training videos')
    parser.add_argument('--output_video', default=False, help='Generate videos of the optimization process', type=bool)
    parser.add_argument('--video_codec', default='MJPG', help='FOURCC-supported video codec name')
    parser.add_argument('--video_frame_rate', default=24, help='Video frames per second', type=int)
    parser.add_argument('--video_size', default=512, help='Video size in pixels', type=int)
    parser.add_argument('--video_skip', default=1, help='Only write every n frames (1 = write every frame)', type=int)

    # 获取到基本设置时,如果运行命令中传入了之后才会获取到的其他配置,不会报错;而是将多出来的部分保存起来,留到后面使用
    args, other_args = parser.parse_known_args()

    # learning rate衰减的steps
    args.decay_steps *= 0.01 * args.iterations  # Calculate steps as a percent of total iterations

    if args.output_video:
        import cv2
        synthesis_kwargs = dict(output_transform=dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=False),
                                minibatch_size=args.batch_size)

    # 找到src_dir下所有图片文件,加入ref_images列表(即:源图的列表;只有一个图片也可以)
    ref_images = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    # 创建工作目录
    os.makedirs(args.data_dir, exist_ok=True)
    os.makedirs(args.mask_dir, exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    os.makedirs(args.video_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    # 加载StyleGAN模型
    model_file = glob.glob(args.model_url)
    if len(model_file) == 1:
        model_file = open(model_file[0], "rb")
    else:
        raise Exception('Failed to find the model')
    generator_network, discriminator_network, Gs_network = pickle.load(model_file)

    # 加载Generator类,参与构建VGG16 perceptual model,用于调用(说是生成,更好理解)generated_image
    # generated_image通过perceptual_model转化为generated_img_features,参与计算loss
    generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold,
                          tiled_dlatent=args.tile_dlatents, model_res=args.model_res,
                          randomize_noise=args.randomize_noise)
    if (args.dlatent_avg != ''):
        generator.set_dlatent_avg(np.load(args.dlatent_avg))

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):  # '--use_lpips_loss', default = 100
        # 加载VGG16 perceptual模型
        model_file = glob.glob('./models/vgg16_zhang_perceptual.pkl')
        if len(model_file) == 1:
            model_file = open(model_file[0], "rb")
        else:
            raise Exception('Failed to find the model')
        perc_model = pickle.load(model_file)

    # 创建VGG16 perceptual模型
    perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator)

    ff_model = None
    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    # tqdm 是一个快速,可扩展的Python进度条,可以在 Python 长循环中添加一个进度提示信息
    # 把ref_images分割为若干批次,每个批次的大小为args.batch_size,分批使用perceptual_model.optimize()求解每个源图的dlatents的最优解
    # 对每一个源图,优化迭代的过程是从一个初始dlatents开始,在某个空间内,按正态分布取值,使用Adam优化器,逐步寻找使loss最小的dlatents,即:stochastic clipping方法
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size):
        print('开始计时')
        starttime = time.time()
        # 读取每个批次中的文件名
        names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch]
        if args.output_video:
            video_out = {}
            for name in names:
                video_out[name] = cv2.VideoWriter(os.path.join(args.video_dir, f'{name}.avi'),
                                                  cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate,
                                                  (args.video_size, args.video_size))

        # 给源图及源图用VGG16生成的features赋值(这是计算loss的基准)
        perceptual_model.set_reference_images(images_batch)
        dlatents = None
        if (args.load_last != ''):  # load previous dlatents for initialization
            for name in names:
                dl = np.expand_dims(np.load(os.path.join(args.load_last, f'{name}.npy')), axis=0)
                if (dlatents is None):
                    dlatents = dl
                else:
                    dlatents = np.vstack((dlatents, dl))
        else:
            if (ff_model is None):
                if os.path.exists(args.load_resnet):
                    print("Loading ResNet Model:")
                    ff_model = load_model(args.load_resnet)
                    from keras.applications.resnet50 import preprocess_input
            if (ff_model is None):
                if os.path.exists(args.load_effnet):
                    import efficientnet
                    print("Loading EfficientNet Model:")
                    ff_model = load_model(args.load_effnet)
                    from efficientnet import preprocess_input
            if (ff_model is not None):  # predict initial dlatents with ResNet model
                dlatents = ff_model.predict(
                    preprocess_input(load_images(images_batch, image_size=args.resnet_image_size)))
        # 设置用于perceptual_model优化迭代的初始值dlatents,它是用resnet50或者efficientnet从源图预测得到的
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        # 对每一个源图,用tqdm构造进度条,显示优化迭代的过程
        op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations)
        pbar = tqdm(op, leave=False, total=args.iterations)
        vid_count = 0
        best_loss = None
        best_dlatent = None
        # 用stochastic clipping方法,使用VGG16 perceptual_model进行优化迭代,迭代次数为iterations=args.iterations
        endtime = time.time()
        print('开始迭代时间为:', round(endtime - starttime, 2), 'secs')
        for loss_dict in pbar:
            pbar.set_description(" ".join(names) + ": " + "; ".join(["{} {:.4f}".format(k, v)
                                                                     for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()
            if args.output_video and (vid_count % args.video_skip == 0):
                batch_frames = generator.generate_images()
                for i, name in enumerate(names):
                    video_frame = PIL.Image.fromarray(batch_frames[i], 'RGB').resize((args.video_size, args.video_size),
                                                                                     PIL.Image.LANCZOS)
                    video_out[name].write(cv2.cvtColor(np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR))
            # 用stochastic clip方法更新dlatent_variable
            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        if args.output_video:
            for name in names:
                video_out[name].release()

        # Generate images from found dlatents and save them
        generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        endtime = time.time()
        print('计算完成时间为:', round(endtime - starttime, 2), 'secs')
        for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
        endtime = time.time()
        print('写入完成时间为:', round(endtime - starttime, 2), 'secs')
Ejemplo n.º 32
0
def preprocess_image(img):
    img = image.load_img(img, target_size=(224,224))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    return img
Ejemplo n.º 33
0
def extract_Resnet50(tensor):
    return ResNet50(weights='imagenet',
                    include_top=False).predict(preprocess_input(tensor))
def ResNet50_predict_labels(img_path):
    # 返回img_path路径的图像的预测向量
    img = preprocess_input(path_to_tensor(img_path))
    return np.argmax(ResNet50_model.predict(img))
Ejemplo n.º 35
0
if __name__ == '__main__':
    #Output dim for your dataset
    output_dim = 257  #For Caltech256
    images_list = []
    images_names = []
    val_images_list = []
    val_images_names = []
    examples_per_class = 3
    validation_per_class = 1
    images_list, images_names, val_images_list, val_images_names = load_all_images(
        'C:/Users/Chetan/Documents/CSE253/PA3/256_ObjectCategories',
        '256_ObjectCategories', images_list, images_names, val_images_list,
        val_images_names, examples_per_class, validation_per_class)
    # Normalization
    images_list = preprocess_input(np.array(images_list))
    val_images_list = preprocess_input(np.array(val_images_list))
    images_list = images_list / 255.0
    val_images_list = val_images_list / 255.0

    # Get one hot representation
    image_category = get_one_hot(images_names)
    val_image_category = get_one_hot(val_images_names)
    # Shuffle
    X_train, y_train = shuffle(images_list, image_category)
    tl_model = getModel(output_dim)
    tl_model.summary()
    #Train the model
    tl_model.compile(loss='categorical_crossentropy',
                     optimizer=RMSprop(lr=0.001,
                                       decay=1e-2,
Ejemplo n.º 36
0
        # More image pre-processing.
        im_224 = image.img_to_array(im_224)
        im_224 = np.expand_dims(
            im_224, axis=0
        )  # This gives the image 4 dimensions and is necessary for future steps.

        im_299 = image.img_to_array(im_299)
        im_299 = np.expand_dims(im_299, axis=0)

        im_331 = image.img_to_array(im_331)
        im_331 = np.expand_dims(im_331, axis=0)

        # Generate predictions for each model.
        resnet_pred = imagenet_utils.decode_predictions(
            resnet.predict(resnet50.preprocess_input(im_224)), 5)
        xc_pred = imagenet_utils.decode_predictions(
            xc.predict(xception.preprocess_input(im_299)), 5)
        v19_pred = imagenet_utils.decode_predictions(
            v19.predict(vgg19.preprocess_input(im_224)), 5)
        ic3_pred = imagenet_utils.decode_predictions(
            ic3.predict(inception_v3.preprocess_input(im_299)), 5)
        ic_resnet_pred = imagenet_utils.decode_predictions(
            ic_resnet.predict(inception_resnet_v2.preprocess_input(im_299)), 5)
        mobile_pred = imagenet_utils.decode_predictions(
            mobile.predict(mobilenet.preprocess_input(im_224)), 5)
        nn_large_pred = imagenet_utils.decode_predictions(
            nn_large.predict(nasnet.preprocess_input(im_331)), 5)

        # Update result dictionaries based on tags.
        resnet_results = update_dict(preds=resnet_pred,
Ejemplo n.º 37
0
    def do_POST(s):
        length = int(s.headers['Content-Length'])
        body = s.rfile.read(length).decode('utf-8')
        if s.headers['Content-type'] == 'application/json':
            post_data = json.loads(body)
        else:
            post_data = urllib.parse.parse_qs(body)

        modelid = post_data['model']
        try:    
            model = model_impls[modelid]['class'](**model_impls[modelid]['params'])
        except Exception as e:
            logger.error("Unable to load model: {reason}".format(reason=e.message))
            s.send_response(300)
            s.send_header("Content-type", "application/json")
            s.end_headers()
            json.dump({
                    "status": 300,
                    "message": e.message,
                 }, s.wfile)
            return
        
        target_size = (dict([(m["id"],m['image_size']) for m in models]))[modelid]
        concepts = []
        for annotation in post_data['annotations']:
            aid = annotation['annotationid']
            begin = annotation['begin']
            begin = annotation['end']
            
            batch_x = np.zeros((len(annotation['frames']),target_size,target_size,3), dtype=np.float32)
            for i,frame in enumerate(annotation['frames']):
                # Load image to PIL format
                img = Image.open(BytesIO(base64.b64decode(frame['screenshot'])))
                # cache frame - FIXME: currently there is no mean to identify the video - same timstamp will overwrite an old frame (hash?)
                img.save(os.path.join(CACHE_DIR,'{0}.png'.format(frame['timecode'])))
                if img.mode != 'RGB':
                    img = img.convert('RGB')
                hw_tuple = (target_size, target_size)
                if img.size != hw_tuple:
                    logger.warn("Scaling image to model size - this should be done in advene!")
                    img = img.resize(hw_tuple)
                x = image.img_to_array(img)
                x = np.expand_dims(x, axis=0)
                x = preprocess_input(x)
                batch_x[i] = x[0,:,:,:]
            preds = model.predict_on_batch(np.asarray(batch_x))

            # decode the results into a list of tuples (class, description, probability)
            # (one such list for each sample in the batch)
            decoded = decode_predictions(preds, top=top_n_preds)
            confidences = dict()
            for t in itertools.chain.from_iterable(decoded):
                if t[1] in confidences:
                    confidences[t[1]].append(float(t[2]))
                else:
                    confidences[t[1]] = [float(t[2])]
            logger.debug(confidences)
            
            concepts.extend([
            {
                'annotationid': aid,
                'confidence': max(confidences[l]),
                #FIXME: set correct timecode - set timecode of frame with max confidence?
                'timecode': annotation['begin'], #timestamp_in_ms,
                'label': l,
                'uri': 'http://concept.org/%s' % l
            } for l in confidences]
            )

        logger.debug(concepts)
        s.send_response(200)
        s.send_header("Content-type", "application/json")
        s.end_headers()
        response=json.dumps({
            "status": 200,
            "message": "OK",
            "data": {
                'media_filename': post_data["media_filename"],
                'media_uri': post_data["media_uri"],
                'concepts': concepts
            }
        })
        s.wfile.write(response.encode())
resnet.summary()

# make a model to get output before flatten
activation_layer = resnet.get_layer('activation_49')

# create a model object
model = Model(inputs=resnet.input, outputs=activation_layer.output)

# get the feature map weights
final_dense = resnet.get_layer('fc1000')
W = final_dense.get_weights()[0]


while True:
  img = image.load_img(np.random.choice(image_files), target_size=(224, 224))
  x = preprocess_input(np.expand_dims(img, 0))
  fmaps = model.predict(x)[0] # 7 x 7 x 2048

  # get predicted class
  probs = resnet.predict(x)
  classnames = decode_predictions(probs)[0]
  print(classnames)
  classname = classnames[0][1]
  pred = np.argmax(probs[0])

  # get the 2048 weights for the relevant class
  w = W[:, pred]

  # "dot" w with fmaps
  cam = fmaps.dot(w)
def ResNet50_predict_labels(img_path):
    img = preprocess_input(to_tensor(img_path))
    return np.argmax(ResNet50_model.predict(img))
Ejemplo n.º 40
0
def ResNet50_predict_labels(img_path):
    # 返回img_path路径的图像的预测向量
    img = preprocess_input(path_to_tensor(img_path))
    return np.argmax(ResNet50_model.predict(img))
Ejemplo n.º 41
0
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np

height = 224
width = 224
channels = 3
top = 5


def load_image(path):
    img = image.load_img(path, target_size=(224, 224))
    x = image.img_to_array(img)
    return x


if __name__ == '__main__':
    args = docopt(__doc__)
    paths = args['IMAGE_PATH']
    model = ResNet50(weights='imagenet')

    x = np.zeros((len(paths), height, width, channels))
    for i, path in enumerate(paths):
        x[i] = load_image(path)
    x = preprocess_input(x)

    predictions = decode_predictions(model.predict(x), top=top)
    for prediction in predictions:
        print(prediction)
Ejemplo n.º 42
0
 def _preprocess(self, image_tensor):
     """Preprocess image data by modifying it directly"""
     resnet50.preprocess_input(image_tensor)
Ejemplo n.º 43
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual losses',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir',
                        help='Directory for storing generated images')
    parser.add_argument('dlatent_dir',
                        help='Directory for storing dlatent representations')
    parser.add_argument('--data_dir',
                        default='data',
                        help='Directory for storing optional models')
    parser.add_argument('--mask_dir',
                        default='masks',
                        help='Directory for storing optional masks')
    parser.add_argument('--load_last',
                        default='',
                        help='Start with embeddings from directory')
    parser.add_argument(
        '--dlatent_avg',
        default='',
        help=
        'Use dlatent from file specified here for truncation instead of dlatent_avg from Gs'
    )
    # parser.add_argument('--model_url', default='https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ', help='Fetch a StyleGAN model to train on from this URL') # karras2019stylegan-ffhq-1024x1024.pkl
    parser.add_argument(
        '--model_url',
        default='./cache/karras2019stylegan-ffhq-1024x1024.pkl',
        help='load local model -- Jack12'
    )  # karras2019stylegan-ffhq-1024x1024.pkl
    parser.add_argument('--model_res',
                        default=1024,
                        help='The dimension of images in the StyleGAN model',
                        type=int)
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)

    # Perceptual model params
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--resnet_image_size',
                        default=256,
                        help='Size of images for the Resnet model',
                        type=int)
    parser.add_argument('--lr',
                        default=0.02,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--decay_rate',
                        default=0.9,
                        help='Decay rate for learning rate',
                        type=float)
    parser.add_argument('--iterations',
                        default=200,
                        help='Number of optimization steps for each batch',
                        type=int)
    parser.add_argument(
        '--decay_steps',
        default=10,
        help='Decay steps for learning rate decay (as a percent of iterations)',
        type=float)
    parser.add_argument(
        '--load_effnet',
        default='data/finetuned_effnet.h5',
        help='Model to load for EfficientNet approximation of dlatents')
    parser.add_argument(
        '--load_resnet',
        default='data/finetuned_resnet.h5',
        help='Model to load for ResNet approximation of dlatents')

    # Loss function options
    parser.add_argument(
        '--use_vgg_loss',
        default=0.4,
        help='Use VGG perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument('--use_vgg_layer',
                        default=9,
                        help='Pick which VGG layer to use.',
                        type=int)
    parser.add_argument(
        '--use_pixel_loss',
        default=1.5,
        help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_mssim_loss',
        default=100,
        help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_lpips_loss',
        default=100,
        help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_l1_penalty',
        default=1,
        help='Use L1 penalty on latents; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument('--use_tex_loss',
                        default=3.0,
                        help='Use texture space loss.',
                        type=float)

    # Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=bool)
    parser.add_argument(
        '--tile_dlatents',
        default=False,
        help='Tile dlatents to use a single vector at each scale',
        type=bool)
    parser.add_argument(
        '--clipping_threshold',
        default=2.0,
        help='Stochastic clipping of gradient values outside of this threshold',
        type=float)

    # Face Alignment Model
    parser.add_argument('--crop_face',
                        default=False,
                        help='Crop face image use face alignment model',
                        type=bool)

    # Masking params
    parser.add_argument('--load_mask',
                        default=False,
                        help='Load segmentation masks',
                        type=bool)
    parser.add_argument(
        '--face_mask',
        default=False,
        help='Generate a mask for predicting only the face area',
        type=bool)
    parser.add_argument(
        '--use_grabcut',
        default=True,
        help=
        'Use grabcut algorithm on the face mask to better segment the foreground',
        type=bool)
    parser.add_argument(
        '--scale_mask',
        default=1.5,
        help='Look over a wider section of foreground for grabcut',
        type=float)

    # Video params
    parser.add_argument('--video_dir',
                        default='videos',
                        help='Directory for storing training videos')
    parser.add_argument('--output_video',
                        default=False,
                        help='Generate videos of the optimization process',
                        type=bool)
    parser.add_argument('--video_codec',
                        default='MJPG',
                        help='FOURCC-supported video codec name')
    parser.add_argument('--video_frame_rate',
                        default=24,
                        help='Video frames per second',
                        type=int)
    parser.add_argument('--video_size',
                        default=512,
                        help='Video size in pixels',
                        type=int)
    parser.add_argument(
        '--video_skip',
        default=1,
        help='Only write every n frames (1 = write every frame)',
        type=int)

    args, other_args = parser.parse_known_args()

    args.decay_steps *= 0.01 * args.iterations  # Calculate steps as a percent of total iterations

    if args.output_video:
        import cv2
        synthesis_kwargs = dict(output_transform=dict(
            func=tflib.convert_images_to_uint8, nchw_to_nhwc=False),
                                minibatch_size=args.batch_size)

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.data_dir, exist_ok=True)
    os.makedirs(args.mask_dir, exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    os.makedirs(args.video_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f:
        print('Load model from %s' % (args.model_url))
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network,
                          args.batch_size,
                          clipping_threshold=args.clipping_threshold,
                          tiled_dlatent=args.tile_dlatents,
                          model_res=args.model_res,
                          randomize_noise=args.randomize_noise)
    if (args.dlatent_avg != ''):
        generator.set_dlatent_avg(np.load(args.dlatent_avg))

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        # with dnnlib.util.open_url('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=config.cache_dir) as f:
        #     perc_model =  pickle.load(f)
        with dnnlib.util.open_url('./cache/vgg16_zhang_perceptual.pkl',
                                  cache_dir=config.cache_dir) as f:
            perc_model = pickle.load(f)
    #         Jack12 load local model
    perceptual_model = PerceptualModel(args,
                                       perc_model=perc_model,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator)

    ff_model = None

    cur_batch_id = 0

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        cur_batch_id += 1
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        if args.output_video:
            video_out = {}
            for name in names:
                video_out[name] = cv2.VideoWriter(
                    os.path.join(args.video_dir, f'{name}.avi'),
                    cv2.VideoWriter_fourcc(*args.video_codec),
                    args.video_frame_rate, (args.video_size, args.video_size))

        perceptual_model.set_reference_images(images_batch, cur_batch_id)
        dlatents = None
        if (args.load_last != ''):  # load previous dlatents for initialization
            for name in names:
                dl = np.expand_dims(np.load(
                    os.path.join(args.load_last, f'{name}.npy')),
                                    axis=0)
                if (dlatents is None):
                    dlatents = dl
                else:
                    dlatents = np.vstack((dlatents, dl))
        else:
            if (ff_model is None):
                if os.path.exists(args.load_resnet):
                    print("Loading ResNet Model:")
                    ff_model = load_model(args.load_resnet)
                    from keras.applications.resnet50 import preprocess_input
            if (ff_model is None):
                if os.path.exists(args.load_effnet):
                    import efficientnet
                    print("Loading EfficientNet Model:")
                    ff_model = load_model(args.load_effnet)
                    from efficientnet import preprocess_input
            if (ff_model
                    is not None):  # predict initial dlatents with ResNet model
                dlatents = ff_model.predict(
                    preprocess_input(
                        load_images(images_batch,
                                    image_size=args.resnet_image_size)))
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations)
        pbar = tqdm(op, leave=False, total=args.iterations)
        vid_count = 0
        best_loss = None
        best_dlatent = None
        for loss_dict in pbar:
            pbar.set_description(" ".join(names) + ": " + "; ".join(
                ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()
            if args.output_video and (vid_count % args.video_skip == 0):
                batch_frames = generator.generate_images()
                for i, name in enumerate(names):
                    video_frame = PIL.Image.fromarray(
                        batch_frames[i], 'RGB').resize(
                            (args.video_size, args.video_size),
                            PIL.Image.LANCZOS)
                    video_out[name].write(
                        cv2.cvtColor(
                            np.array(video_frame).astype('uint8'),
                            cv2.COLOR_RGB2BGR))
            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        if args.output_video:
            for name in names:
                video_out[name].release()

        # Generate images from found dlatents and save them
        generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir, f'{img_name}.png'),
                'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
x = Dense(702, activation='softmax', name='fc8', kernel_initializer=RandomNormal(mean=0.0, stddev=0.001))(x)
net = Model(input=base_model.input, output=x)

for layer in net.layers:
   layer.trainable = True

# load data
images, labels = [], []
with open(LIST, 'r') as f:
  for line in f:
    line = line.strip()
    img, lbl = line.split()
    img = image.load_img(os.path.join(TRAIN, img), target_size=[224, 224])
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img) 

    images.append(img[0])
    labels.append(int(lbl))

images = np.array(images)
labels = to_categorical(labels)

# train
batch_size = 16
datagen = ImageDataGenerator(featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    rotation_range=20, # 0. 
Ejemplo n.º 45
0
  for img in images:
    x = Image.open( img_path + '/' + img)
    x = x.resize( ( 224 , 224 ))
    x = np.array(x , dtype = np.float32)

    x_.append(x)
    y_.append(dict_labels[img_folder])
    
X = np.array(x_)
Y = np.array(y_)
print(X.shape)
print(Y.shape)

print(type(X[3]))

X = preprocess_input(X)

print(type(X[3]))

Y = np_utils.to_categorical(Y)
print(Y.shape)

X, Y = shuffle(X, Y)



X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size = 0.2 )

print(X_train.shape,  X_test.shape, Y_train.shape, Y_test.shape)

resnet_model = ResNet50( input_shape = (224,224,3) , include_top = False, weights = 'imagenet' )
model = ResNet50(
    weights="../weight/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5",
    include_top=False,
    pooling='avg')

for flower in flower_type:
    data = []
    dir = "../train/" + flower
    print("Feature Extraction (" + flower + ") begin")
    for img in os.listdir(dir):
        path = os.path.join(dir, img)
        img = image.load_img(path, target_size=(224, 224))
        data.append(np.array(img))
    X = np.array(data)
    X = preprocess_input(X)
    feature = model.predict(X)
    np.save("../feature/" + flower + "_resnet50", feature)
    print("Feature Extraction (" + flower + ") done")

dir = "../test"
data = []
print("Feature Extraction (test) begin")

for i in range(424):
    path = "../test/" + str(i) + ".jpg"
    img = image.load_img(path, target_size=(224, 224))
    data.append(np.array(img))

test = np.array(data)
test = preprocess_input(test)
def generator(batch_size, image_list, image_shape, coco_instance, id_to_index,
              is_training):
    """Generator

    Generate the images for models to train

    Args:
        - batch_size: batch_size
        - image_list: the list of file name of images
        - image_shape: the target image shape
        - coco_instance: the ground truth of COCO dataset
        - id_to_index: dictionary project id to index
        - is_training: open or close data augmentation

    Returns:
        - all_img: shape: (batch_size, image_shape[0], image_shape[1], 3)
        - label: (batch_size, image_shape[0], image_shape[1], classes)
    """

    aug, mask_hook = utilities.img_aug()

    def f(id):
        if id != 0:
            return id_to_index[id]
        else:
            return id_to_index[183]

    # Lambda function to convert id to index
    vfunc = np.vectorize(f)

    while True:

        # Random Shuffling
        random.shuffle(image_list)

        index = 0
        while index + batch_size < len(image_list):
            all_img = np.zeros((batch_size, image_shape[0], image_shape[1], 3),
                               dtype=np.float32)
            label = np.zeros(
                (batch_size, image_shape[0], image_shape[1], len(id_to_index)))
            i = 0
            while i < batch_size:
                image = image_list[index]
                im = cv2.imread(image)
                # Mode 4 stands for any image between bgr and gray scale
                # Convert back to RGB
                # If the width or height is smaller than indicated size
                # or if the image is grayscale
                #if (im.shape[0] < image_shape[0] or im.shape[1] < image_shape[1]) or im.ndim == 2:
                #    i -= 1
                #    continue
                if (im.shape[0] < image_shape[0]
                        or im.shape[1] < image_shape[1]):
                    index += 1
                    continue

                im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

                # Get the id containing 12 numbers (000000XXXXXX)
                lbl_id = int(image.replace(".jpg", '')[-12:])
                lbl = cocoSegmentationToSegmentationMap(coco_instance, lbl_id)
                lbl = vfunc(lbl.astype(np.uint8))

                # Resize
                #im = transform.rescale(im, 0.5)

                # Random Crop
                rnd_x = random.randint(0, im.shape[0] - image_shape[0])
                rnd_y = random.randint(0, im.shape[1] - image_shape[1])

                crop_im = im[rnd_x:rnd_x + image_shape[0],
                             rnd_y:rnd_y + image_shape[1], :]
                crop_lbl = lbl[rnd_x:rnd_x + image_shape[0],
                               rnd_y:rnd_y + image_shape[1]]

                # Convert to one hot
                crop_lbl = keras.utils.to_categorical(
                    crop_lbl, num_classes=len(id_to_index))

                # Save data
                all_img[i] = crop_im
                label[i] = crop_lbl

                index += 1
                i += 1

            if is_training:
                aug_det = aug.to_deterministic()
                all_img = aug.augment_images(all_img)
                label = aug.augment_images(label, hooks=mask_hook)

            #all_img = preprocess_input(all_img, mode = "torch")
            all_img = preprocess_input(all_img)
            yield all_img, label
Ejemplo n.º 48
0
def ResNet50_predict_labels(img_path):
    # returns prediction vector for image located at img_path
    img = preprocess_input(path_to_tensor(img_path))
    return np.argmax(ResNet50_model.predict(img))
Ejemplo n.º 49
0
img_path = sys.argv[1]

try:
    # load YAML and create model
    yaml_file = open('model.yaml', 'r')
    loaded_model_yaml = yaml_file.read()
    yaml_file.close()
    model = model_from_yaml(loaded_model_yaml)
    # load weights into new model
    model.load_weights("model.h5")
    print("Loaded model from disk")
except:
    model = ResNet50(weights='imagenet')
    # serialize model to YAML
    model_yaml = model.to_yaml()
    with open("model.yaml", "w") as yaml_file:
        yaml_file.write(model_yaml)
    # serialize weights to HDF5
    model.save_weights("model.h5")
    print("Saved model to disk")

img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

preds = model.predict(x)
# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
print('Predicted:', decode_predictions(preds, top=3)[0])
Ejemplo n.º 50
0
def get_features(img, model):
    img_data = img_to_array(img.resize((224, 224)))
    img_data = expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    return model.predict(img_data)
Ejemplo n.º 51
0
def ResNet50_predict_labels(img_path):
    # returns prediction vector for image located at img_path
    img = preprocess_input(path_to_tensor(img_path))
    return np.argmax(ResNet50_model.predict(img))