def start(): model = keras.applications.vgg19.VGG19(include_top=False, weights='imagenet', pooling='avg') root_dir = "/media/rishabh/dump_bin/Animals_with_Attributes2/JPEGImages/" for root, subdirs, files in os.walk(root_dir): list_file_path = os.path.join(root, 'list_of_files.txt') with open(list_file_path, 'wb') as list_file: for filename in files: if filename.endswith("jpg"): file_path = os.path.join(root, filename) img = image.load_img(file_path, target_size=(224,224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) features = model.predict(x) np_name = filename[0:-4] np_name = np_name+".npy" np.save(os.path.join(root,np_name), features) # npy = open(os.path.join(root,np_name),"w+") print('file %s (full path: %s)' % (filename, file_path)) list_file.write(('%s\n' % filename).encode('utf-8'))
def preprocess_xray_flipped(xray_path): xray = image.load_img(xray_path, color_mode="grayscale", vertical_flip=True, target_size=(img_dims[0], img_dims[1], 1)) xray = image.img_to_array(xray) xray = np.dstack([xray, xray, xray]) xray = preprocess_input(xray) return xray
def classifyImage(fname): img = image.load_img(fname, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) pred = decode_predictions(model.predict(x), top=1)[0][0] return (pred[1], pred[2])
def predict(model, img, target_size, top_n=3): """Run model prediction on image """ if img.size != target_size: img = img.resize(target_size) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) preds = model.predict(x) return decode_predictions(preds, top=top_n)[0]
def predict(img): print('{}:'.format(img)) # Load the image file, resizing it to 224x224 pixels (required by this model) img_df = image.load_img(img, target_size=(224, 224)) x = image.img_to_array(img_df) # [row[columns[r, g, b]]] x = np.expand_dims(x, axis=0) # array of images # Scale and predict predictions = model.predict(resnet50.preprocess_input(x)) predicted_classes = resnet50.decode_predictions(predictions, top=5) for imagenet_id, name, likelihood in predicted_classes[0]: print(' - {}: {:2f} likelihood'.format(name, likelihood)) print('\n')
def __read_all_images(src): files = listdir(src) images = {} i = 0 total = len(files) for f in files: if not (f.endswith(".jpg")): continue im = Image.open(src + f) im = img_to_array(im) im = preprocess_input(im) images[f[:-4]] = im if i % 100 == 0: print(str(i) + " / " + str(total)) i += 1 return images
def read_image(i): rPath = image_filenames[i] dPath = depth_filenames[i] print(i) image = misc.imread(rPath)/1. image_resized = resize(image, output_shape=(224, 224)) depth = misc.imread(dPath).astype(np.uint16)/1000.0 depth_resized = resize(depth, output_shape=(224, 224)) # (480,640) -> Model Output (224, 224) print(image_resized.shape) print(np.expand_dims(depth_resized,-1).shape) input("aki") return preprocess_input(image_resized), np.expand_dims(depth_resized,-1)
def _f(): start = 0 end = start + batch_size n = data.shape[0] while True: X_batch = session.run(resize_op, {img_placeholder: data[start:end]}) X_batch = preprocess_input(X_batch) y_batch = labels[start:end] start += batch_size end += batch_size if start >= n: start = 0 end = batch_size print(start, end) yield (X_batch, y_batch)
def extract_feature(dir_path, net): features = [] infos = [] num = 0 for image_name in os.listdir(dir_path): arr = image_name.split('_') person = int(arr[0]) camera = int(arr[1][1]) image_path = os.path.join(dir_path, image_name) img = image.load_img(image_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) feature = net.predict(x) features.append(np.squeeze(feature)) infos.append((person, camera)) return features, infos
def predict(model, img, target_size, top_n=3): """Run model prediction on image Args: model: keras model img: PIL format image target_size: (w,h) tuple top_n: # of top predictions to return Returns: list of predicted labels and their probabilities """ if img.size != target_size: img = img.resize(target_size) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) preds = model.predict(x) return decode_predictions(preds, top=top_n)[0]
def pix2depth(path, model): model_name = 'p2d' originalImage = cv2.imread(path) loaded_model = model_list['pix2depth'][model] file_name = model+'_'+path.split('/')[-1] output_file = os.path.join(output_path,file_name) if model =='CNN': originalImage = cv2.resize(originalImage,(img_dim,img_dim)) x = preprocess_input(originalImage/1.) elif model == 'CycleGAN': test(path) os.system('cp gautam/inf_results/imgs/fakeA_0_0.jpg %s' % output_file) else: originalImage = cv2.resize(originalImage,(256,256)) x = originalImage/255. if not model == 'CycleGAN': p1 = get_depth_map(x, loaded_model) cv2.imwrite(output_file,p1) return output_file
def load_image(img_path): data = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(data) x = np.expand_dims(x, axis=0) x = preprocess_input(x) return x
###################################################################### # Load a test image # ------------------ # A single cat dominates the examples! from PIL import Image from matplotlib import pyplot as plt from keras.applications.resnet50 import preprocess_input img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true' download(img_url, 'cat.png') img = Image.open('cat.png').resize((224, 224)) plt.imshow(img) plt.show() # input preprocess data = np.array(img)[np.newaxis, :].astype('float32') data = preprocess_input(data).transpose([0, 3, 1, 2]) print('input_1', data.shape) ###################################################################### # Compile the model on NNVM # -------------------------- # We should be familiar with the process now. # convert the keras model(NHWC layout) to NNVM format(NCHW layout). sym, params = nnvm.frontend.from_keras(keras_resnet50) # compile the model target = 'cuda' shape_dict = {'input_1': data.shape} with nnvm.compiler.build_config(opt_level=3): graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
bounds=(0, 255), preprocessing=preprocessing) img_folderpath = "./sample_images/" img_path = img_folderpath + 'sample_image_2.jpg' # An image of a yellow cab/taxi x = image.load_img(img_path, color_mode='rgb', target_size=(img_rows, img_cols)) img = image.img_to_array(x) img = np.expand_dims(img, axis=0) img = img.reshape(img_shape) label = 468 # For the class of taxi and cab # Note that proprocess_input is an in-place operation. prediction = kmodel.predict( preprocess_input(np.copy(img)).reshape( (1, img_rows, img_cols, nb_channels))) # decode the results into a list of tuples (class, description, probability) # (one such list for each sample in the batch) print('Prediction on the original example:', decode_predictions(prediction, top=3)[0]) # The original image is correctly classified as a cab with the confidence of 0.999. # apply attack on source image # ::-1 reverses the color channels, because Keras ResNet50 expects BGR instead of RGB attack = foolbox.attacks.FGSM(fmodel) adversarial = attack(img[:, :, ::-1], label)[:, :, ::-1] # if the attack fails, adversarial will be None and a warning will be printed adversarial_prediction = kmodel.predict( adversarial.reshape((1, img_rows, img_cols, nb_channels))) print('Prediction on the adversarial example:',
def preprocess_image(image_path): img = load_img(image_path, target_size=(img_nrows, img_ncols)) img = img_to_array(img) img = np.expand_dims(img, axis=0) img = resnet50.preprocess_input(img) return img
def extract_InceptionV3(tensor): from keras.applications.inception_v3 import InceptionV3, preprocess_input return InceptionV3(weights='imagenet', include_top=False).predict(preprocess_input(tensor))
#%% # prepare the image for the VGG model processed_image = vgg16.preprocess_input(image_batch.copy()) # get the predicted probabilities for each class predictions = vgg_model.predict(processed_image) #print(predictions) # convert the probabilities to class labels # We will get top 5 predictions which is the default label = decode_predictions(predictions) print(label) #%% # prepare the image for the Resnet model processed_image = resnet50.preprocess_input(image_batch.copy()) # get the predicted probabilities for each class predictions = resnet_model.predict(processed_image) #print(predictions) # convert the probabilities to class labels # We will get top 5 predictions which is the default label = decode_predictions(predictions) print(label) #%% # prepare the image for the Resnet model processed_image = mobilenet.preprocess_input(image_batch.copy()) # get the predicted probabilities for each class
import numpy as np from keras.preprocessing import image from keras.applications import resnet50 model = resnet50.ResNet50() img = image.load_img("image.jpg", target_size = (224,224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = resnet50.preprocess_input(x) predictions = model.predict(x) predicted_classes = resnet50.decode_predictions(predictions, top=9) print("This is an image of : ") for imagenet_id, name, likelihood in predicted_classes[0]: print(" - {}: {:2f} likelihood".format(name, likelihood))
from keras.preprocessing.image import load_img from keras.preprocessing.image import img_to_array from keras.applications import resnet50 from keras.applications.imagenet_utils import decode_predictions import keras as keras import numpy as np model = keras.applications.resnet50.ResNet50(weights="imagenet") path = "../input/starfish/asd.jpg" # load an image in PIL format original_image = load_img(path, target_size=(224, 224)) numpy_image = img_to_array(original_image) # Convert the image into 4D Tensor (samples, height, width, channels) by adding an extra dimension to the axis 0. input_image = np.expand_dims(numpy_image, axis=0) # preprocess for resnet50 processed_image_resnet50 = resnet50.preprocess_input(input_image.copy()) # resnet50 predictions_resnet50 = model.predict(processed_image_resnet50) label_resnet50 = decode_predictions(predictions_resnet50) print(label_resnet50)
def predict_dog_label(): """Predicts the dog breed and recommends similar dogs based on its characterictics""" url = input("Enter img url :") image_to_tensor = np.zeros(((1), 224, 224, 3), dtype='float32') # converts image to centralized format for ResNet model image_to_tensor[0] = preprocess_input( np.expand_dims(read_img(url).copy(), axis=0)) # predicts bottleneck features using ResNet model get_image_features = model_RN50.predict(image_to_tensor, verbose=1) # predicts image label based on features from ResNet model pred = ((model_2cr.predict(get_image_features) + model_2dr.predict(get_image_features) + model_2gr.predict(get_image_features) + model_2jr.predict(get_image_features) + model_2lr.predict(get_image_features)) / 5) # returns top 5 predicted labels top_5 = [ i[0] for i in sorted(list(zip(list(y_table.columns), pred.tolist()[0])), key=lambda x: x[1], reverse=True)[:6] ] label = list(y_table.columns)[int(pred.argmax(axis=1))] show_image_pred(url, pred.argmax(axis=1)) # finds most similar picture in local database based on cosine similarity of image features similar_pic = sorted(list( zip(list(train_breeds_names[0]), (cosine_similarity(get_image_features, pd.DataFrame(train_breeds_features))).reshape( -1, 1))), key=lambda x: x[1], reverse=True)[1][0] time.sleep(0.5) sim = input('\nDo you want the most similar pic in database: (y/n) ') if sim == 'y': print('\n Most similar pic in database: \n') show_image(similar_pic) time.sleep(0.5) print('\nTop 5 visually most similar dogs:', top_5[1:]) show = input('\nDo you want to see those dogs?: (y/n) ') if show == 'y': show_rand_image(top_5[1:]) feat = input("\nDo you want to see the features of your dog? (y,n) ") if feat == 'y': if dog_features.loc[dog_features['label_name'] == label].values[0][1:-1][0] == 0: print('\nThis dog is not a pet or no information available\n') feat_vis = input( '\nDo you want to see the features of visually similar dogs? (y/n) ' ) if feat_vis == 'y': find_features_table(top_5[1:]) return label find_features_table([label]) feat_sim = input( '\nDo you want to find similar dogs based on its features? (y/n) ') if feat_sim == 'y': print('\n') # finds most similar dog based on cosine similarity of dog characteristcs top_5_feat = [ x[0] for x in sorted(list( zip(list(dog_features.label_name), ( cosine_similarity(dog_features[dog_features.columns[1:-1]]) [int(pred.argmax(axis=1))]))), key=lambda x: x[1], reverse=True)[1:6] ] find_features_table(top_5_feat) feat_sim2 = input("\nDo you want to see them? (y,n) ") if feat_sim2 == 'y': show_rand_image(top_5_feat) return label
def load_image(img_path): img = image.load_img(img_path, target_size=(299, 299)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) return x
resnet.summary() # make a model to get output before flatten activation_layer = resnet.get_layer('activation_49') # create a model object model = Model(inputs=resnet.input, outputs=activation_layer.output) # get the feature map weights final_dense = resnet.get_layer('fc1000') W = final_dense.get_weights()[0] while True: img = image.load_img(np.random.choice(image_files), target_size=(224, 224)) x = preprocess_input(np.expand_dims(img, 0)) fmaps = model.predict(x)[0] # 7 x 7 x 2048 # get predicted class probs = resnet.predict(x) classnames = decode_predictions(probs)[0] print(classnames) classname = classnames[0][1] pred = np.argmax(probs[0]) # get the 2048 weights for the relevant class w = W[:, pred] # "dot" w with fmaps cam = fmaps.dot(w)
import random from sklearn.model_selection import train_test_split from keras.preprocessing import image from keras.applications.resnet50 import preprocess_input from sklearn.preprocessing import OneHotEncoder random.shuffle(data_pair) X = [item[0] for item in data_pair] Y = [item[1] for item in data_pair] image_data_list = [] for img_path in X: img = image.load_img(img_path, target_size=(224, 224)) img_data = image.img_to_array(img) img_data = preprocess_input(img_data) image_data_np = np.array(img_data) image_data_list.append(image_data_np) image_data_list = np.array(image_data_list) #print(len(image_data_list)) Y = np.array(Y) onehot_encoder = OneHotEncoder(sparse=False) Y = Y.reshape(len(Y), 1) onehot_encoded = onehot_encoder.fit_transform(Y) X_train, X_valid, Y_train, Y_valid = train_test_split(image_data_list, onehot_encoded, test_size=0.30, random_state=42)
encoder = preprocessing.LabelEncoder() encoder.fit(Classes['Bird Class'].values) print(encoder.classes_) y_train = onehot(encoder.transform(y_train1)) y_valid = onehot(encoder.transform(y_valid1)) y_test = onehot(encoder.transform(y_test1)) print(np.sum(np.argmax(y_train, axis=1) == encoder.transform(y_train1))) print(np.sum(np.argmax(y_valid, axis=1) == encoder.transform(y_valid1))) print(np.sum(np.argmax(y_test, axis=1) == encoder.transform(y_test1))) x_train, y_train = shuffle(x_train, y_train) x_valid, y_valid = shuffle(x_valid, y_valid) x_test, y_test = shuffle(x_test, y_test) x_train = preprocess_input(x_train) x_valid = preprocess_input(x_valid) x_test = preprocess_input(x_test) # In[113]: # trainning process nb_epoch = int(sys.argv[1]) batch_size = int(sys.argv[2]) print("Epochs: {}".format(nb_epoch), "Batch Size: {}".format(batch_size)) checkpointer = ModelCheckpoint(filepath=sys.argv[3], verbose=1, monitor='val_categorical_accuracy', save_best_only=True) model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), steps_per_epoch=(x_train.shape[0] / batch_size) + 1,
# print(imgs.shape) ag_imgs = [] for k in range(imgs.shape[0]): n = 0 for ag in datagen.flow(imgs[k], batch_size=1): ag_imgs.append(ag) new_labels.append(label[k]) n += 1 if n > 5: #增强数量5张 break # for ag in datagen.flow(,batch_size=1): ag_imgs = np.asarray(ag_imgs) ag_imgs = ag_imgs.reshape(ag_imgs.shape[0], ag_imgs.shape[2], ag_imgs.shape[3], ag_imgs.shape[4]) if i == 0: train_feature = model.predict(preprocess_input(ag_imgs)) else: print(train_feature.shape, model.predict(preprocess_input(ag_imgs)).shape) train_feature = np.vstack( (train_feature, model.predict(preprocess_input(ag_imgs)))) print(train_feature.shape, len(new_labels)) new_labels = np.asarray(new_labels) list_2 = [test_data_path + list for list in os.listdir(test_data_path)] for i in range(int(len(list_2) / BATCHSIZE) + 1): imgs = [] for j in range(BATCHSIZE): if i * BATCHSIZE + j < len(list_2): imgs.append( img_to_array( load_img(list_2[i * BATCHSIZE + j],
###################################################################### # Load a test image # ------------------ # A single cat dominates the examples! from PIL import Image from matplotlib import pyplot as plt from keras.applications.resnet50 import preprocess_input img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true' img_path = download_testdata(img_url, 'cat.png', module='data') img = Image.open(img_path).resize((224, 224)) plt.imshow(img) plt.show() # input preprocess data = np.array(img)[np.newaxis, :].astype('float32') data = preprocess_input(data).transpose([0, 3, 1, 2]) ###################################################################### # Compile the model with Relay # ---------------------------- # convert the keras model(NHWC layout) to Relay format(NCHW layout). shape_dict = {'input_1': data.shape} mod, params = relay.frontend.from_keras(keras_resnet50, shape_dict) # compile the model target = "llvm -mcpu=core-avx2" with relay.build_config(opt_level=3): graph, lib, params = relay.build(mod, target, params=params) from tvm.contrib import graph_runtime as runtime
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') parser.add_argument('--data_dir', default='data', help='Directory for storing optional models') parser.add_argument('--mask_dir', default='masks', help='Directory for storing optional masks') parser.add_argument('--load_last', default='', help='Start with embeddings from directory') parser.add_argument( '--dlatent_avg', default='', help= 'Use dlatent from file specified here for truncation instead of dlatent_avg from Gs' ) parser.add_argument( '--model_url', default='gdrive:networks/stylegan2-ffhq-config-f.pkl', help='Fetch a StyleGAN model to train on from this URL') parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int) parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument( '--optimizer', default='ggt', help='Optimization algorithm used for optimizing dlatents') # Perceptual model params parser.add_argument( '--vgg_url', default= 'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', help='Fetch VGG model on from this URL') parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int) parser.add_argument('--lr', default=0.25, help='Learning rate for perceptual model', type=float) parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float) parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int) parser.add_argument( '--decay_steps', default=4, help='Decay steps for learning rate decay (as a percent of iterations)', type=float) parser.add_argument('--early_stopping', default=True, help='Stop early once training stabilizes', type=str2bool, nargs='?', const=True) parser.add_argument('--early_stopping_threshold', default=0.5, help='Stop after this threshold has been reached', type=float) parser.add_argument('--early_stopping_patience', default=10, help='Number of iterations to wait below threshold', type=int) parser.add_argument( '--load_effnet', default='data/finetuned_effnet.h5', help='Model to load for EfficientNet approximation of dlatents') parser.add_argument( '--load_resnet', default='data/finetuned_resnet.h5', help='Model to load for ResNet approximation of dlatents') parser.add_argument( '--use_preprocess_input', default=True, help='Call process_input() first before using feed forward net', type=str2bool, nargs='?', const=True) parser.add_argument( '--use_best_loss', default=True, help='Output the lowest loss value found as the solution', type=str2bool, nargs='?', const=True) parser.add_argument( '--average_best_loss', default=0.25, help= 'Do a running weighted average with the previous best dlatents found', type=float) parser.add_argument('--sharpen_input', default=True, help='Sharpen the input images', type=str2bool, nargs='?', const=True) # Loss function options parser.add_argument( '--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int) parser.add_argument( '--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_mssim_loss', default=200, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_l1_penalty', default=0.5, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_discriminator_loss', default=0.5, help='Use trained discriminator to evaluate realism.', type=float) parser.add_argument( '--use_adaptive_loss', default=False, help= 'Use the adaptive robust loss function from Google Research for pixel and VGG feature loss.', type=str2bool, nargs='?', const=True) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=str2bool, nargs='?', const=True) parser.add_argument( '--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=str2bool, nargs='?', const=True) parser.add_argument( '--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float) # Masking params parser.add_argument('--load_mask', default=False, help='Load segmentation masks', type=str2bool, nargs='?', const=True) parser.add_argument( '--face_mask', default=True, help='Generate a mask for predicting only the face area', type=str2bool, nargs='?', const=True) parser.add_argument( '--use_grabcut', default=True, help= 'Use grabcut algorithm on the face mask to better segment the foreground', type=str2bool, nargs='?', const=True) parser.add_argument( '--scale_mask', default=1.4, help='Look over a wider section of foreground for grabcut', type=float) parser.add_argument( '--composite_mask', default=True, help='Merge the unmasked area back into the generated image', type=str2bool, nargs='?', const=True) parser.add_argument( '--composite_blur', default=8, help='Size of blur filter to smoothly composite the images', type=int) # Video params parser.add_argument('--video_dir', default='videos', help='Directory for storing training videos') parser.add_argument('--output_video', default=False, help='Generate videos of the optimization process', type=bool) parser.add_argument('--video_codec', default='MJPG', help='FOURCC-supported video codec name') parser.add_argument('--video_frame_rate', default=24, help='Video frames per second', type=int) parser.add_argument('--video_size', default=512, help='Video size in pixels', type=int) parser.add_argument( '--video_skip', default=1, help='Only write every n frames (1 = write every frame)', type=int) args, other_args = parser.parse_known_args() args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations if args.output_video: import cv2 synthesis_kwargs = dict(output_transform=dict( func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) if x[0] not in '._' ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.mask_dir, exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) os.makedirs(args.video_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() generator_network, discriminator_network, Gs_network = pretrained_networks.load_networks( args.model_url) generator = Generator(Gs_network, args.batch_size, randomize_noise=args.randomize_noise) if (args.dlatent_avg != ''): generator.set_dlatent_avg(np.load(args.dlatent_avg)) perc_model = None if (args.use_lpips_loss > 0.00000001): with dnnlib.util.open_url(args.vgg_url, cache_dir='.stylegan2-cache') as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator, discriminator_network) ff_model = None # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter( os.path.join(args.video_dir, f'{name}.avi'), cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size, args.video_size)) perceptual_model.set_reference_images(images_batch) perceptual_model.set_reference_images(images_batch) dlatents = None if (args.load_last != ''): # load previous dlatents for initialization for name in names: dl = np.expand_dims(np.load( os.path.join(args.load_last, f'{name}.npy')), axis=0) if (dlatents is None): dlatents = dl else: dlatents = np.vstack((dlatents, dl)) else: if (ff_model is None): if os.path.exists(args.load_resnet): from keras.applications.resnet50 import preprocess_input print("Loading ResNet Model:") ff_model = load_model(args.load_resnet) if (ff_model is None): if os.path.exists(args.load_effnet): import efficientnet from efficientnet import preprocess_input print("Loading EfficientNet Model:") ff_model = load_model(args.load_effnet) if (ff_model is not None): # predict initial dlatents with ResNet model if (args.use_preprocess_input): dlatents = ff_model.predict( preprocess_input( load_images(images_batch, image_size=args.resnet_image_size))) else: dlatents = ff_model.predict( load_images(images_batch, image_size=args.resnet_image_size)) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, use_optimizer=args.optimizer) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None avg_loss_count = 0 if args.early_stopping: avg_loss = prev_loss = None for loss_dict in pbar: if args.early_stopping: # early stopping feature if prev_loss is not None: if avg_loss is not None: avg_loss = 0.5 * avg_loss + (prev_loss - loss_dict["loss"]) if avg_loss < args.early_stopping_threshold: # count while under threshold; else reset avg_loss_count += 1 else: avg_loss_count = 0 if avg_loss_count > args.early_stopping_patience: # stop once threshold is reached print("") break else: avg_loss = prev_loss - loss_dict["loss"] pbar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: if best_dlatent is None or args.average_best_loss <= 0.00000001: best_dlatent = generator.get_dlatents() else: best_dlatent = 0.25 * best_dlatent + 0.75 * generator.get_dlatents( ) if args.use_best_loss: generator.set_dlatents(best_dlatent) best_loss = loss_dict["loss"] if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = PIL.Image.fromarray( batch_frames[i], 'RGB').resize( (args.video_size, args.video_size), PIL.Image.LANCZOS) video_out[name].write( cv2.cvtColor( np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) generator.stochastic_clip_dlatents() prev_loss = loss_dict["loss"] if not args.use_best_loss: best_loss = prev_loss print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them if args.use_best_loss: generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_path, img_name in zip( generated_images, generated_dlatents, images_batch, names): mask_img = None if args.composite_mask and (args.load_mask or args.face_mask): _, im_name = os.path.split(img_path) mask_img = os.path.join(args.mask_dir, f'{im_name}') if args.composite_mask and mask_img is not None and os.path.isfile( mask_img): orig_img = PIL.Image.open(img_path).convert('RGB') width, height = orig_img.size imask = PIL.Image.open(mask_img).convert('L').resize( (width, height)) imask = imask.filter( ImageFilter.GaussianBlur(args.composite_blur)) mask = np.array(imask) / 255 mask = np.expand_dims(mask, axis=-1) img_array = mask * np.array(img_array) + ( 1.0 - mask) * np.array(orig_img) img_array = img_array.astype(np.uint8) #img_array = np.where(mask, np.array(img_array), orig_img) img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()
#-*- coding: utf-8 -*- from keras.applications.resnet50 import preprocess_input import numpy as np import cv2 from src.model_builder import CamModelBuilder from src.utils import plot_img, list_files if __name__ == "__main__": detector = CamModelBuilder().get_cam_model() detector.load_weights("weights.h5", by_name=True) detector.summary() imgs = list_files("dataset//train//text") for i, img_path in enumerate(imgs): original_img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) img = cv2.resize(original_img, (224, 224)) img = np.expand_dims(img, 0).astype(np.float64) cam_map = detector.predict(preprocess_input(img)) cam_map = cam_map[0, :, :, 1] cam_map = cv2.resize(cam_map, (original_img.shape[1], original_img.shape[0])) plot_img(original_img, cam_map, show=False, save_filename="{}.png".format(i+1))
print('the class', j) path = 'D:\\sns_images\\class\\' + str(j) for root, dir, files in os.walk(path): imgbatch = zeros((batchsize, 224, 224, 3)) wordindex = 0 num = 0 classdata = [] for file in files: try: img = image.load_img(path + '\\' + file, target_size=(224, 224)) if wordindex == batchsize: imgbatch = zeros((batchsize, 224, 224, 3)) wordindex = 0 img = image.img_to_array(img) img = expand_dims(img, axis=0) img = preprocess_input(img) imgbatch[wordindex] = img wordindex += 1 if wordindex == batchsize: num += 1 print(num) batchresult = model.predict(imgbatch) batchmean = list(mean(batchresult, axis=0)) classdata.append(batchmean) except: print("iii") classdata = mat(classdata) classmean = mean(classdata, axis=0) allclassmean[j] = classmean[0, :] save('D:\\tencent\\miniproject\\allclassmean', allclassmean)
test_feature = np.array([ 0, ]) BATCHSIZE = 1024 target_size = (224, 224) list_2 = [ './image2/image/' + list for list in os.listdir("./image2/image") ] for i in range(int(len(list_2) / BATCHSIZE) + 1): imgs = [] for j in range(BATCHSIZE): imgs.append( img_to_array( load_img(list_2[i * BATCHSIZE + j], target_size=target_size))) if i * BATCHSIZE + j == len(list_2) - 1: break imgs = np.asarray(imgs) if i == 0: test_feature = model.predict(preprocess_input(imgs)) else: test_feature = np.vstack( [test_feature, model.predict(preprocess_input(imgs))]) print(test_feature.shape) # y = np.asarray(y, dtype=np.uint8) with h5py.File("./dense161_final_feature.h5") as h: # h.create_dataset("train", data=train_feature) h.create_dataset("test", data=test_feature) # h.create_dataset("label", data=y)
def main(): parser = argparse.ArgumentParser( description='Find latent representation of reference images using perceptual losses', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') parser.add_argument('--data_dir', default='data', help='Directory for storing optional models') parser.add_argument('--mask_dir', default='masks', help='Directory for storing optional masks') parser.add_argument('--load_last', default='', help='Start with embeddings from directory') parser.add_argument('--dlatent_avg', default='', help='Use dlatent from file specified here for truncation instead of dlatent_avg from Gs') parser.add_argument('--model_url', default=config.Model, help='Fetch a StyleGAN model to train on from this URL') # karras2019stylegan-ffhq-1024x1024.pkl parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int) parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--resnet_image_size', default=224, help='Size of images for the Resnet model', type=int) parser.add_argument('--lr', default=0.02, help='Learning rate for perceptual model', type=float) parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float) parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int) parser.add_argument('--decay_steps', default=10, help='Decay steps for learning rate decay (as a percent of iterations)', type=float) parser.add_argument('--load_effnet', default='data/finetuned_effnet.h5', help='Model to load for EfficientNet approximation of dlatents') parser.add_argument('--load_resnet', default='data/resnet_18.h5', help='Model to load for ResNet approximation of dlatents') # Loss function options parser.add_argument('--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int) parser.add_argument('--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_mssim_loss', default=100, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_l1_penalty', default=1, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) parser.add_argument('--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=bool) parser.add_argument('--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float) # Masking params parser.add_argument('--load_mask', default=False, help='Load segmentation masks', type=bool) parser.add_argument('--face_mask', default=False, help='Generate a mask for predicting only the face area', type=bool) parser.add_argument('--use_grabcut', default=True, help='Use grabcut algorithm on the face mask to better segment the foreground', type=bool) parser.add_argument('--scale_mask', default=1.5, help='Look over a wider section of foreground for grabcut', type=float) # Video params parser.add_argument('--video_dir', default='videos', help='Directory for storing training videos') parser.add_argument('--output_video', default=False, help='Generate videos of the optimization process', type=bool) parser.add_argument('--video_codec', default='MJPG', help='FOURCC-supported video codec name') parser.add_argument('--video_frame_rate', default=24, help='Video frames per second', type=int) parser.add_argument('--video_size', default=512, help='Video size in pixels', type=int) parser.add_argument('--video_skip', default=1, help='Only write every n frames (1 = write every frame)', type=int) # 获取到基本设置时,如果运行命令中传入了之后才会获取到的其他配置,不会报错;而是将多出来的部分保存起来,留到后面使用 args, other_args = parser.parse_known_args() # learning rate衰减的steps args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations if args.output_video: import cv2 synthesis_kwargs = dict(output_transform=dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) # 找到src_dir下所有图片文件,加入ref_images列表(即:源图的列表;只有一个图片也可以) ref_images = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) # 创建工作目录 os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.mask_dir, exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) os.makedirs(args.video_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() # 加载StyleGAN模型 model_file = glob.glob(args.model_url) if len(model_file) == 1: model_file = open(model_file[0], "rb") else: raise Exception('Failed to find the model') generator_network, discriminator_network, Gs_network = pickle.load(model_file) # 加载Generator类,参与构建VGG16 perceptual model,用于调用(说是生成,更好理解)generated_image # generated_image通过perceptual_model转化为generated_img_features,参与计算loss generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) if (args.dlatent_avg != ''): generator.set_dlatent_avg(np.load(args.dlatent_avg)) perc_model = None if (args.use_lpips_loss > 0.00000001): # '--use_lpips_loss', default = 100 # 加载VGG16 perceptual模型 model_file = glob.glob('./models/vgg16_zhang_perceptual.pkl') if len(model_file) == 1: model_file = open(model_file[0], "rb") else: raise Exception('Failed to find the model') perc_model = pickle.load(model_file) # 创建VGG16 perceptual模型 perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator) ff_model = None # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space # tqdm 是一个快速,可扩展的Python进度条,可以在 Python 长循环中添加一个进度提示信息 # 把ref_images分割为若干批次,每个批次的大小为args.batch_size,分批使用perceptual_model.optimize()求解每个源图的dlatents的最优解 # 对每一个源图,优化迭代的过程是从一个初始dlatents开始,在某个空间内,按正态分布取值,使用Adam优化器,逐步寻找使loss最小的dlatents,即:stochastic clipping方法 for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): print('开始计时') starttime = time.time() # 读取每个批次中的文件名 names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter(os.path.join(args.video_dir, f'{name}.avi'), cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size, args.video_size)) # 给源图及源图用VGG16生成的features赋值(这是计算loss的基准) perceptual_model.set_reference_images(images_batch) dlatents = None if (args.load_last != ''): # load previous dlatents for initialization for name in names: dl = np.expand_dims(np.load(os.path.join(args.load_last, f'{name}.npy')), axis=0) if (dlatents is None): dlatents = dl else: dlatents = np.vstack((dlatents, dl)) else: if (ff_model is None): if os.path.exists(args.load_resnet): print("Loading ResNet Model:") ff_model = load_model(args.load_resnet) from keras.applications.resnet50 import preprocess_input if (ff_model is None): if os.path.exists(args.load_effnet): import efficientnet print("Loading EfficientNet Model:") ff_model = load_model(args.load_effnet) from efficientnet import preprocess_input if (ff_model is not None): # predict initial dlatents with ResNet model dlatents = ff_model.predict( preprocess_input(load_images(images_batch, image_size=args.resnet_image_size))) # 设置用于perceptual_model优化迭代的初始值dlatents,它是用resnet50或者efficientnet从源图预测得到的 if dlatents is not None: generator.set_dlatents(dlatents) # 对每一个源图,用tqdm构造进度条,显示优化迭代的过程 op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None # 用stochastic clipping方法,使用VGG16 perceptual_model进行优化迭代,迭代次数为iterations=args.iterations endtime = time.time() print('开始迭代时间为:', round(endtime - starttime, 2), 'secs') for loss_dict in pbar: pbar.set_description(" ".join(names) + ": " + "; ".join(["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = PIL.Image.fromarray(batch_frames[i], 'RGB').resize((args.video_size, args.video_size), PIL.Image.LANCZOS) video_out[name].write(cv2.cvtColor(np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) # 用stochastic clip方法更新dlatent_variable generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() endtime = time.time() print('计算完成时间为:', round(endtime - starttime, 2), 'secs') for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents() endtime = time.time() print('写入完成时间为:', round(endtime - starttime, 2), 'secs')
def preprocess_image(img): img = image.load_img(img, target_size=(224,224)) img = image.img_to_array(img) img = np.expand_dims(img, axis=0) img = preprocess_input(img) return img
def extract_Resnet50(tensor): return ResNet50(weights='imagenet', include_top=False).predict(preprocess_input(tensor))
def ResNet50_predict_labels(img_path): # 返回img_path路径的图像的预测向量 img = preprocess_input(path_to_tensor(img_path)) return np.argmax(ResNet50_model.predict(img))
if __name__ == '__main__': #Output dim for your dataset output_dim = 257 #For Caltech256 images_list = [] images_names = [] val_images_list = [] val_images_names = [] examples_per_class = 3 validation_per_class = 1 images_list, images_names, val_images_list, val_images_names = load_all_images( 'C:/Users/Chetan/Documents/CSE253/PA3/256_ObjectCategories', '256_ObjectCategories', images_list, images_names, val_images_list, val_images_names, examples_per_class, validation_per_class) # Normalization images_list = preprocess_input(np.array(images_list)) val_images_list = preprocess_input(np.array(val_images_list)) images_list = images_list / 255.0 val_images_list = val_images_list / 255.0 # Get one hot representation image_category = get_one_hot(images_names) val_image_category = get_one_hot(val_images_names) # Shuffle X_train, y_train = shuffle(images_list, image_category) tl_model = getModel(output_dim) tl_model.summary() #Train the model tl_model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.001, decay=1e-2,
# More image pre-processing. im_224 = image.img_to_array(im_224) im_224 = np.expand_dims( im_224, axis=0 ) # This gives the image 4 dimensions and is necessary for future steps. im_299 = image.img_to_array(im_299) im_299 = np.expand_dims(im_299, axis=0) im_331 = image.img_to_array(im_331) im_331 = np.expand_dims(im_331, axis=0) # Generate predictions for each model. resnet_pred = imagenet_utils.decode_predictions( resnet.predict(resnet50.preprocess_input(im_224)), 5) xc_pred = imagenet_utils.decode_predictions( xc.predict(xception.preprocess_input(im_299)), 5) v19_pred = imagenet_utils.decode_predictions( v19.predict(vgg19.preprocess_input(im_224)), 5) ic3_pred = imagenet_utils.decode_predictions( ic3.predict(inception_v3.preprocess_input(im_299)), 5) ic_resnet_pred = imagenet_utils.decode_predictions( ic_resnet.predict(inception_resnet_v2.preprocess_input(im_299)), 5) mobile_pred = imagenet_utils.decode_predictions( mobile.predict(mobilenet.preprocess_input(im_224)), 5) nn_large_pred = imagenet_utils.decode_predictions( nn_large.predict(nasnet.preprocess_input(im_331)), 5) # Update result dictionaries based on tags. resnet_results = update_dict(preds=resnet_pred,
def do_POST(s): length = int(s.headers['Content-Length']) body = s.rfile.read(length).decode('utf-8') if s.headers['Content-type'] == 'application/json': post_data = json.loads(body) else: post_data = urllib.parse.parse_qs(body) modelid = post_data['model'] try: model = model_impls[modelid]['class'](**model_impls[modelid]['params']) except Exception as e: logger.error("Unable to load model: {reason}".format(reason=e.message)) s.send_response(300) s.send_header("Content-type", "application/json") s.end_headers() json.dump({ "status": 300, "message": e.message, }, s.wfile) return target_size = (dict([(m["id"],m['image_size']) for m in models]))[modelid] concepts = [] for annotation in post_data['annotations']: aid = annotation['annotationid'] begin = annotation['begin'] begin = annotation['end'] batch_x = np.zeros((len(annotation['frames']),target_size,target_size,3), dtype=np.float32) for i,frame in enumerate(annotation['frames']): # Load image to PIL format img = Image.open(BytesIO(base64.b64decode(frame['screenshot']))) # cache frame - FIXME: currently there is no mean to identify the video - same timstamp will overwrite an old frame (hash?) img.save(os.path.join(CACHE_DIR,'{0}.png'.format(frame['timecode']))) if img.mode != 'RGB': img = img.convert('RGB') hw_tuple = (target_size, target_size) if img.size != hw_tuple: logger.warn("Scaling image to model size - this should be done in advene!") img = img.resize(hw_tuple) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) batch_x[i] = x[0,:,:,:] preds = model.predict_on_batch(np.asarray(batch_x)) # decode the results into a list of tuples (class, description, probability) # (one such list for each sample in the batch) decoded = decode_predictions(preds, top=top_n_preds) confidences = dict() for t in itertools.chain.from_iterable(decoded): if t[1] in confidences: confidences[t[1]].append(float(t[2])) else: confidences[t[1]] = [float(t[2])] logger.debug(confidences) concepts.extend([ { 'annotationid': aid, 'confidence': max(confidences[l]), #FIXME: set correct timecode - set timecode of frame with max confidence? 'timecode': annotation['begin'], #timestamp_in_ms, 'label': l, 'uri': 'http://concept.org/%s' % l } for l in confidences] ) logger.debug(concepts) s.send_response(200) s.send_header("Content-type", "application/json") s.end_headers() response=json.dumps({ "status": 200, "message": "OK", "data": { 'media_filename': post_data["media_filename"], 'media_uri': post_data["media_uri"], 'concepts': concepts } }) s.wfile.write(response.encode())
resnet.summary() # make a model to get output before flatten activation_layer = resnet.get_layer('activation_49') # create a model object model = Model(inputs=resnet.input, outputs=activation_layer.output) # get the feature map weights final_dense = resnet.get_layer('fc1000') W = final_dense.get_weights()[0] while True: img = image.load_img(np.random.choice(image_files), target_size=(224, 224)) x = preprocess_input(np.expand_dims(img, 0)) fmaps = model.predict(x)[0] # 7 x 7 x 2048 # get predicted class probs = resnet.predict(x) classnames = decode_predictions(probs)[0] print(classnames) classname = classnames[0][1] pred = np.argmax(probs[0]) # get the 2048 weights for the relevant class w = W[:, pred] # "dot" w with fmaps cam = fmaps.dot(w)
def ResNet50_predict_labels(img_path): img = preprocess_input(to_tensor(img_path)) return np.argmax(ResNet50_model.predict(img))
def ResNet50_predict_labels(img_path): # 返回img_path路径的图像的预测向量 img = preprocess_input(path_to_tensor(img_path)) return np.argmax(ResNet50_model.predict(img))
from keras.applications.resnet50 import ResNet50 from keras.preprocessing import image from keras.applications.resnet50 import preprocess_input, decode_predictions import numpy as np height = 224 width = 224 channels = 3 top = 5 def load_image(path): img = image.load_img(path, target_size=(224, 224)) x = image.img_to_array(img) return x if __name__ == '__main__': args = docopt(__doc__) paths = args['IMAGE_PATH'] model = ResNet50(weights='imagenet') x = np.zeros((len(paths), height, width, channels)) for i, path in enumerate(paths): x[i] = load_image(path) x = preprocess_input(x) predictions = decode_predictions(model.predict(x), top=top) for prediction in predictions: print(prediction)
def _preprocess(self, image_tensor): """Preprocess image data by modifying it directly""" resnet50.preprocess_input(image_tensor)
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual losses', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') parser.add_argument('--data_dir', default='data', help='Directory for storing optional models') parser.add_argument('--mask_dir', default='masks', help='Directory for storing optional masks') parser.add_argument('--load_last', default='', help='Start with embeddings from directory') parser.add_argument( '--dlatent_avg', default='', help= 'Use dlatent from file specified here for truncation instead of dlatent_avg from Gs' ) # parser.add_argument('--model_url', default='https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ', help='Fetch a StyleGAN model to train on from this URL') # karras2019stylegan-ffhq-1024x1024.pkl parser.add_argument( '--model_url', default='./cache/karras2019stylegan-ffhq-1024x1024.pkl', help='load local model -- Jack12' ) # karras2019stylegan-ffhq-1024x1024.pkl parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int) parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int) parser.add_argument('--lr', default=0.02, help='Learning rate for perceptual model', type=float) parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float) parser.add_argument('--iterations', default=200, help='Number of optimization steps for each batch', type=int) parser.add_argument( '--decay_steps', default=10, help='Decay steps for learning rate decay (as a percent of iterations)', type=float) parser.add_argument( '--load_effnet', default='data/finetuned_effnet.h5', help='Model to load for EfficientNet approximation of dlatents') parser.add_argument( '--load_resnet', default='data/finetuned_resnet.h5', help='Model to load for ResNet approximation of dlatents') # Loss function options parser.add_argument( '--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int) parser.add_argument( '--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_mssim_loss', default=100, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_l1_penalty', default=1, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_tex_loss', default=3.0, help='Use texture space loss.', type=float) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) parser.add_argument( '--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=bool) parser.add_argument( '--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float) # Face Alignment Model parser.add_argument('--crop_face', default=False, help='Crop face image use face alignment model', type=bool) # Masking params parser.add_argument('--load_mask', default=False, help='Load segmentation masks', type=bool) parser.add_argument( '--face_mask', default=False, help='Generate a mask for predicting only the face area', type=bool) parser.add_argument( '--use_grabcut', default=True, help= 'Use grabcut algorithm on the face mask to better segment the foreground', type=bool) parser.add_argument( '--scale_mask', default=1.5, help='Look over a wider section of foreground for grabcut', type=float) # Video params parser.add_argument('--video_dir', default='videos', help='Directory for storing training videos') parser.add_argument('--output_video', default=False, help='Generate videos of the optimization process', type=bool) parser.add_argument('--video_codec', default='MJPG', help='FOURCC-supported video codec name') parser.add_argument('--video_frame_rate', default=24, help='Video frames per second', type=int) parser.add_argument('--video_size', default=512, help='Video size in pixels', type=int) parser.add_argument( '--video_skip', default=1, help='Only write every n frames (1 = write every frame)', type=int) args, other_args = parser.parse_known_args() args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations if args.output_video: import cv2 synthesis_kwargs = dict(output_transform=dict( func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.mask_dir, exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) os.makedirs(args.video_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f: print('Load model from %s' % (args.model_url)) generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) if (args.dlatent_avg != ''): generator.set_dlatent_avg(np.load(args.dlatent_avg)) perc_model = None if (args.use_lpips_loss > 0.00000001): # with dnnlib.util.open_url('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=config.cache_dir) as f: # perc_model = pickle.load(f) with dnnlib.util.open_url('./cache/vgg16_zhang_perceptual.pkl', cache_dir=config.cache_dir) as f: perc_model = pickle.load(f) # Jack12 load local model perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator) ff_model = None cur_batch_id = 0 # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): cur_batch_id += 1 names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter( os.path.join(args.video_dir, f'{name}.avi'), cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size, args.video_size)) perceptual_model.set_reference_images(images_batch, cur_batch_id) dlatents = None if (args.load_last != ''): # load previous dlatents for initialization for name in names: dl = np.expand_dims(np.load( os.path.join(args.load_last, f'{name}.npy')), axis=0) if (dlatents is None): dlatents = dl else: dlatents = np.vstack((dlatents, dl)) else: if (ff_model is None): if os.path.exists(args.load_resnet): print("Loading ResNet Model:") ff_model = load_model(args.load_resnet) from keras.applications.resnet50 import preprocess_input if (ff_model is None): if os.path.exists(args.load_effnet): import efficientnet print("Loading EfficientNet Model:") ff_model = load_model(args.load_effnet) from efficientnet import preprocess_input if (ff_model is not None): # predict initial dlatents with ResNet model dlatents = ff_model.predict( preprocess_input( load_images(images_batch, image_size=args.resnet_image_size))) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None for loss_dict in pbar: pbar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = PIL.Image.fromarray( batch_frames[i], 'RGB').resize( (args.video_size, args.video_size), PIL.Image.LANCZOS) video_out[name].write( cv2.cvtColor( np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()
x = Dense(702, activation='softmax', name='fc8', kernel_initializer=RandomNormal(mean=0.0, stddev=0.001))(x) net = Model(input=base_model.input, output=x) for layer in net.layers: layer.trainable = True # load data images, labels = [], [] with open(LIST, 'r') as f: for line in f: line = line.strip() img, lbl = line.split() img = image.load_img(os.path.join(TRAIN, img), target_size=[224, 224]) img = image.img_to_array(img) img = np.expand_dims(img, axis=0) img = preprocess_input(img) images.append(img[0]) labels.append(int(lbl)) images = np.array(images) labels = to_categorical(labels) # train batch_size = 16 datagen = ImageDataGenerator(featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, rotation_range=20, # 0.
for img in images: x = Image.open( img_path + '/' + img) x = x.resize( ( 224 , 224 )) x = np.array(x , dtype = np.float32) x_.append(x) y_.append(dict_labels[img_folder]) X = np.array(x_) Y = np.array(y_) print(X.shape) print(Y.shape) print(type(X[3])) X = preprocess_input(X) print(type(X[3])) Y = np_utils.to_categorical(Y) print(Y.shape) X, Y = shuffle(X, Y) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size = 0.2 ) print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape) resnet_model = ResNet50( input_shape = (224,224,3) , include_top = False, weights = 'imagenet' )
model = ResNet50( weights="../weight/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5", include_top=False, pooling='avg') for flower in flower_type: data = [] dir = "../train/" + flower print("Feature Extraction (" + flower + ") begin") for img in os.listdir(dir): path = os.path.join(dir, img) img = image.load_img(path, target_size=(224, 224)) data.append(np.array(img)) X = np.array(data) X = preprocess_input(X) feature = model.predict(X) np.save("../feature/" + flower + "_resnet50", feature) print("Feature Extraction (" + flower + ") done") dir = "../test" data = [] print("Feature Extraction (test) begin") for i in range(424): path = "../test/" + str(i) + ".jpg" img = image.load_img(path, target_size=(224, 224)) data.append(np.array(img)) test = np.array(data) test = preprocess_input(test)
def generator(batch_size, image_list, image_shape, coco_instance, id_to_index, is_training): """Generator Generate the images for models to train Args: - batch_size: batch_size - image_list: the list of file name of images - image_shape: the target image shape - coco_instance: the ground truth of COCO dataset - id_to_index: dictionary project id to index - is_training: open or close data augmentation Returns: - all_img: shape: (batch_size, image_shape[0], image_shape[1], 3) - label: (batch_size, image_shape[0], image_shape[1], classes) """ aug, mask_hook = utilities.img_aug() def f(id): if id != 0: return id_to_index[id] else: return id_to_index[183] # Lambda function to convert id to index vfunc = np.vectorize(f) while True: # Random Shuffling random.shuffle(image_list) index = 0 while index + batch_size < len(image_list): all_img = np.zeros((batch_size, image_shape[0], image_shape[1], 3), dtype=np.float32) label = np.zeros( (batch_size, image_shape[0], image_shape[1], len(id_to_index))) i = 0 while i < batch_size: image = image_list[index] im = cv2.imread(image) # Mode 4 stands for any image between bgr and gray scale # Convert back to RGB # If the width or height is smaller than indicated size # or if the image is grayscale #if (im.shape[0] < image_shape[0] or im.shape[1] < image_shape[1]) or im.ndim == 2: # i -= 1 # continue if (im.shape[0] < image_shape[0] or im.shape[1] < image_shape[1]): index += 1 continue im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # Get the id containing 12 numbers (000000XXXXXX) lbl_id = int(image.replace(".jpg", '')[-12:]) lbl = cocoSegmentationToSegmentationMap(coco_instance, lbl_id) lbl = vfunc(lbl.astype(np.uint8)) # Resize #im = transform.rescale(im, 0.5) # Random Crop rnd_x = random.randint(0, im.shape[0] - image_shape[0]) rnd_y = random.randint(0, im.shape[1] - image_shape[1]) crop_im = im[rnd_x:rnd_x + image_shape[0], rnd_y:rnd_y + image_shape[1], :] crop_lbl = lbl[rnd_x:rnd_x + image_shape[0], rnd_y:rnd_y + image_shape[1]] # Convert to one hot crop_lbl = keras.utils.to_categorical( crop_lbl, num_classes=len(id_to_index)) # Save data all_img[i] = crop_im label[i] = crop_lbl index += 1 i += 1 if is_training: aug_det = aug.to_deterministic() all_img = aug.augment_images(all_img) label = aug.augment_images(label, hooks=mask_hook) #all_img = preprocess_input(all_img, mode = "torch") all_img = preprocess_input(all_img) yield all_img, label
def ResNet50_predict_labels(img_path): # returns prediction vector for image located at img_path img = preprocess_input(path_to_tensor(img_path)) return np.argmax(ResNet50_model.predict(img))
img_path = sys.argv[1] try: # load YAML and create model yaml_file = open('model.yaml', 'r') loaded_model_yaml = yaml_file.read() yaml_file.close() model = model_from_yaml(loaded_model_yaml) # load weights into new model model.load_weights("model.h5") print("Loaded model from disk") except: model = ResNet50(weights='imagenet') # serialize model to YAML model_yaml = model.to_yaml() with open("model.yaml", "w") as yaml_file: yaml_file.write(model_yaml) # serialize weights to HDF5 model.save_weights("model.h5") print("Saved model to disk") img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) preds = model.predict(x) # decode the results into a list of tuples (class, description, probability) # (one such list for each sample in the batch) print('Predicted:', decode_predictions(preds, top=3)[0])
def get_features(img, model): img_data = img_to_array(img.resize((224, 224))) img_data = expand_dims(img_data, axis=0) img_data = preprocess_input(img_data) return model.predict(img_data)
def ResNet50_predict_labels(img_path): # returns prediction vector for image located at img_path img = preprocess_input(path_to_tensor(img_path)) return np.argmax(ResNet50_model.predict(img))