def get_embeddings(filenames): faces = [extract_faces(f) for f in filenames] samples = asarray(faces, 'float32') samples = preprocess_input(samples, version=2) model = VGGFace(model = 'resnet50', include_top = False, input_shape = (224, 224, 3), pooling = 'avg') yhat = model.predict(samples) return yhat
def percept_loss(input_image, reconstructed_image): vggface = VGGFace(include_top=False, input_shape=x_dim, model='vgg16') vgg_layers = ['conv1_1'] outputs = [vggface.get_layer(l).output for l in vgg_layers] model = Model(inputs=vggface.input, outputs=outputs) for layer in model.layers: layer.trainable = False input_image *= 255.0 reconstructed_image *= 255.0 input_image = preprocess_input(input_image, mode='tf', data_format='channels_last') reconstructed_image = preprocess_input(reconstructed_image, mode='tf', data_format='channels_last') h1_list = model(input_image) h2_list = model(reconstructed_image) if not isinstance(h1_list, list): h1_list = [h1_list] h2_list = [h2_list] p_loss = 0.0 for h1, h2 in zip(h1_list, h2_list): h1 = K.batch_flatten(h1) h2 = K.batch_flatten(h2) p_loss += K.mean(K.square(h1 - h2), axis=-1) return gamma * p_loss
def tensorflow_example(): """This example uses TensorFlow instead of CoreML, and was found to give consistent numbers to CoreML""" model = VGGFace(model="senet50", pooling="avg", include_top=False) img = image.load_img('../image/ajb.jpg', target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = utils.preprocess_input(x, version=2) embeddings = model.predict(x)[0] print("TensorFlow embeddings: ", embeddings)
def testTHPrediction(self): keras.backend.set_image_dim_ordering('th') model = VGGFace() img = image.load_img('image/ak.jpg', target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = utils.preprocess_input(x) preds = model.predict(x) print('Predicted:', utils.decode_predictions(preds)) self.assertIn(utils.decode_predictions(preds)[0][0][0], 'Aamir_Khan') self.assertAlmostEqual( utils.decode_predictions(preds)[0][0][1], 0.94938219)
def testRESNET50(self): keras.backend.set_image_dim_ordering('tf') model = VGGFace(model='resnet50') img = image.load_img('image/ajb.jpg', target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = utils.preprocess_input(x, version=2) preds = model.predict(x) #print ('\n',"RESNET50") #print('\n',preds) #print('\n','Predicted:', utils.decode_predictions(preds)) self.assertIn('A._J._Buckley', utils.decode_predictions(preds)[0][0][0]) self.assertAlmostEqual(utils.decode_predictions(preds)[0][0][1], 0.91819614,places=3)
def testRESNET50(self): keras.backend.set_image_dim_ordering('tf') model = VGGFace(model='resnet50') img = image.load_img('image/ajb.jpg', target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = utils.preprocess_input(x, version=2) preds = model.predict(x) #print ('\n',"RESNET50") #print('\n',preds) #print('\n','Predicted:', utils.decode_predictions(preds)) self.assertIn('A._J._Buckley', utils.decode_predictions(preds)[0][0][0]) self.assertAlmostEqual(utils.decode_predictions(preds)[0][0][1], 0.91819614)
def testSENET50(self): image_data_format() model = VGGFace(model='senet50') img = image.load_img('image/ajb.jpg', target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = utils.preprocess_input(x, version=2) preds = model.predict(x) # print ('\n', "SENET50") # print('\n',preds) # print('\n','Predicted:', utils.decode_predictions(preds)) self.assertIn('A._J._Buckley', utils.decode_predictions(preds)[0][0][0]) self.assertAlmostEqual(utils.decode_predictions(preds)[0][0][1], 0.9993529, places=3)
def testVGG16(self): keras.backend.image_data_format() model = VGGFace(model='vgg16') img = image.load_img('image/ajb.jpg', target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = utils.preprocess_input(x, version=1) preds = model.predict(x) # print ('\n', "VGG16") # print('\n',preds) # print('\n','Predicted:', utils.decode_predictions(preds)) self.assertIn('A.J._Buckley', utils.decode_predictions(preds)[0][0][0]) self.assertAlmostEqual(utils.decode_predictions(preds)[0][0][1], 0.9790116, places=3)
class EmbedderApp(EmbeddingServicer): model: VGGFace = VGGFace(model='resnet50', include_top=False, input_shape=(224, 224, 3), pooling='avg') def deserializer_image(self, b_64_image) -> np.array: image = base64.b64decode(b_64_image) image = BytesIO(image) image = Image.open(image) image = image.resize((224, 224)) image = np.asarray(image).astype(np.float32) return np.expand_dims(image, axis=0) def elaborate_image(self, b_64_image): image = self.deserializer_image(b_64_image) return preprocess_input(image, version=2) def embed(self, request: EmbeddingRequest, context): inputs = list(request.images) inputs = map(self.elaborate_image, inputs) inputs = list(inputs) inputs = np.concatenate(inputs, axis=0) predictions = self.model.predict(inputs).tolist() return EmbeddingResponse(results=list( map(lambda p: EmbeddingContainer(array=p), predictions)))
def main(): print('Loading model...') global sess global graph global vgg_features global image_size gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) with tf.Graph().as_default() as graph: with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options)).as_default() as sess: vgg_features = VGGFace(model='resnet50', include_top=False, pooling='avg') # vgg_features = load_model('resnet_model/resnet50.h5') image_size = 224 handler = FaceEmbeddingHandler() processor = FaceEmbedding.Processor(handler) transport = TSocket.TServerSocket(host='0.0.0.0', port=config.SERVER_THRIFT_PORT) tfactory = TTransport.TBufferedTransportFactory() pfactory = TBinaryProtocol.TBinaryProtocolFactory() server = TServer.TThreadedServer(processor, transport, tfactory, pfactory) print('READY') try: server.serve() except KeyboardInterrupt: pass
def feature_extraction(root_path, imglist, emb_dim=1024, model_name="vgg16"): K.common.image_dim_ordering = 'tf' # Features by keras vgg16 trained on VGGFACE2 # vggface = VGGFace(model=model_name, include_top=False, input_shape=(224, 224, 3), pooling='avg') # get_output = K.function([vggface.layers[0].input], [vggface.layers[-1].output]) vggface = VGGFace(model=model_name) get_output = K.function([vggface.layers[0].input], [vggface.layers[-4].output]) feat = [] for i in range(len(imglist)): img = image.load_img(root_path + imglist[i], target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = x[:, :, :, ::-1] f_vec = get_output([x])[0] f_vec = np.squeeze(f_vec) norm = np.sqrt(f_vec.dot(f_vec)) f_vec = f_vec / norm feat.append(f_vec) # features = np.array(feat) feat = np.array(feat) pca = PCA(n_components=emb_dim) pca.fit(feat) features = pca.transform(feat) features = features.T features = features / np.linalg.norm(features, axis=0) return features
def create_model(params: Params) -> Model: input1: InputLayer = Input(shape=(params.image_size, params.image_size, 3)) input2: InputLayer = Input(shape=(params.image_size, params.image_size, 3)) base_model: Model = VGGFace(model='resnet50', include_top=False) # Make last 3 layers trainable. for x in base_model.layers[:-3]: x.trainable = True # Transform image1 x1 = base_model(input1) x1 = Concatenate()([GlobalMaxPool2D()(x1), GlobalAvgPool2D()(x1)]) # Transform image2 x2 = base_model(input2) x2 = Concatenate()([GlobalMaxPool2D()(x2), GlobalAvgPool2D()(x2)]) _diff = Subtract()([x1, x2]) diff_squared = Multiply()([_diff, _diff]) # concat(x1.x2, (x1-x2)**2) x = Concatenate()([Multiply()([x1, x2]), diff_squared]) x = Dense(100, activation="relu")(x) # TODO(dotslash): Not sure about the dropout prob. x = Dropout(params.dropout)(x) out = Dense(1, activation="sigmoid")(x) model = Model([input1, input2], out) model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adam(params.optimizer_lr)) return model
def get_predictions_from_png_image_example(): """Example usage to get predictions (human identity) from image""" from tensorflow.keras.preprocessing import image import numpy as np import keras_vggface.utils as libutils image_preprocessor = create_preprocessing_model() model = VGGFace(model='senet50') img = image.load_img('image/ajb-resized.jpg', target_size=(224, 224), interpolation="bilinear") x = image.img_to_array(img) x = np.expand_dims(x, axis=0) preprocessed = image_preprocessor.predict(x) predictions = model.predict(preprocessed) print('Predicted:', libutils.decode_predictions(predictions))
def tensorflow_custom_preprocessing_example(): """Example usage to get face embeddings from cropped image of human face""" import numpy as np from tensorflow.keras.preprocessing import image image_preprocessor = create_preprocessing_model() embeddings_model = VGGFace(model="senet50", pooling="avg", include_top=False, input_shape=(224, 224, 3)) img = image.load_img('../image/ajb.jpg', target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) preprocessed = image_preprocessor.predict(x) embeddings = embeddings_model.predict(preprocessed) print("TensorFlow embeddings: ", embeddings)
def testVGG16(self): keras.backend.set_image_dim_ordering('tf') model = VGGFace(include_top=False, input_shape=(224, 224, 3), pooling='avg') # pooling: None, avg or max #model = VGGFace(model='vgg16') img = image.load_img('image/ajb.jpg', target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = utils.preprocess_input(x, version=2) preds = model.predict(x) print('\n', "VGG16") print('\n', preds) print('\n', 'Predicted:', utils.decode_predictions(preds)) self.assertIn('A.J._Buckley', utils.decode_predictions(preds)[0][0][0]) self.assertAlmostEqual(utils.decode_predictions(preds)[0][0][1], 0.9790116, places=3)
def create_core_ml_for_tensorflow_preprocessing(): input = ct.TensorType(shape=(1, 224, 224, 3)) keras_model = VGGFace(model="senet50", pooling="avg", include_top=False, input_shape=(224, 224, 3)) coreml_model = ct.convert(keras_model, source='tensorflow', inputs=[input]) write_metadata(coreml_model) coreml_model.save("Face-without-preprocessing.mlmodel")
def model_from_vgg(last_layer='pool4'): """ returns a neural network with layers upto <last_layer> from vgg16 with the weights for the vggface layers preloaded """ vgg_model = VGGFace(model='vgg16', include_top=False, input_shape=(224, 224, 3)) X = vgg_model.get_layer(last_layer).output layer_shape = vgg_model.get_layer(last_layer).output_shape n_encoder_layers = int(np.log2(224/layer_shape[2])) for n in range(n_encoder_layers): X = Conv2DTranspose(int(layer_shape[3]/(2**(n+1))), (3, 3), activation='relu', padding='same', name='deconv'+str(n+1))(X) X = UpSampling2D(size=(2, 2), interpolation='bilinear', name='unpool'+str(n+1))(X) mask = Conv2D(1, (3, 3), activation='sigmoid', padding='same', name='mask')(X) custom_model = Model(vgg_model.input, mask) return custom_model
class face_analyzer: def __init__(self): self.face_not_found = 0 self.model = VGGFace(model='resnet50', include_top=False, input_shape=(224, 224, 3), pooling='avg') def get_face(self, img): face_cascade = cv2.CascadeClassifier( "cascades\\data\\haarcascade_frontalface_default.xml") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) coords = face_cascade.detectMultiScale(gray, 1.1, 4) x, y, w, h = coords[0] img = img[y:y + h, x:x + w] img = cv2.resize(img, (224, 224)) return img def find_best_match(self): self.scores = {} for person in os.listdir("userdata"): self.get_cos_dif("userdata" + "\\" + person + "\\" + "userface.jpg") mini = 1 mini_link = "" for person in self.scores: prs = person.split('\\')[-2] print(f"{prs} -> {self.scores[person]}") if self.scores[person] < mini: mini_link = person mini = self.scores[person] if mini < 0.4: #print(f"User {mini_link} detected") return "\\".join(mini_link.split("\\")[:-1]) + "\\" + "text.txt" return None def get_cos_dif(self, cnt_person): self.face_not_found = 0 self.ppl = [] self.ppl.append(cv2.imread(cnt_person)) self.ppl.append(cv2.imread("auth_pers.jpg")) faces = [] for img in self.ppl: faces.append(self.get_face(img)) faces = asarray(faces, 'float32') faces = preprocess_input(faces, version=2) preds = self.model.predict(faces) score = cosine(preds[0], preds[1]) #print( f"{cnt_person} --> {score}") self.scores[cnt_person] = score
def get_classification_face_match(target_image): nb_class = 2 hidden_dim = 512 vggmodel = VGGFace(include_top=False, input_shape=(224, 224, 3)) last_layer = vggmodel.get_layer('pool5').output x = Flatten(name='flatten')(last_layer) x = Dense(hidden_dim, activation='relu', name='fc6')(x) x = Dense(hidden_dim, activation='relu', name='fc7')(x) out = Dense(nb_class, activation='softmax', name='fc8')(x) identity_model = Model(vggmodel.input, out) ## Train the model ##Predict img = image.load_img(target_image, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = utils.preprocess_input(x, version=1) preds = identity_model.predict(x) print('\n', "VGG16") print('\n', preds) print('\n', 'Predicted:', utils.decode_predictions(preds)) return utils.decode_predictions(preds)[0][0][0]
class BlackboxModel(metaclass=Singleton): """ Singleton class representing blackbox model """ def __init__(self, architecture): self.model = VGGFace(model=architecture) def predict(self, batch): preprocessed_batch = utils.preprocess_input(batch, version=2) preds = self.model.predict(preprocessed_batch) return preds def __call__(self, batch): return self.model(batch)
def extract_cnn_ft(dataset): alignDir = '/media/tunguyen/Others/Dataset/FacialExpressions/processed_data/' + dataset + '/aligned_faces_extracted' if dataset == 'SPOS': alignDir = '/media/tunguyen/Others/Dataset/FacialExpressions/processed_data/' + dataset + '/frames' # extract cnn features of each aligned face using VGGface out_ft = '/media/tunguyen/Others/Dataset/FacialExpressions/processed_data/' + dataset + '/cnn_features_by_frame/' keras.backend.set_image_dim_ordering('tf') model = VGGFace(include_top=False, model='vgg16', input_shape=(224, 224, 3), pooling='avg') # pooling: None, avg or max model.summary() for expression in os.listdir(alignDir): expression_path = os.path.join(alignDir, expression) if expression in ['Happy', 'Neutral', 'Sad_']: if expression == 'Happy': out_ftDir = os.path.join(out_ft, 'Genuine') else: out_ftDir = os.path.join(out_ft, 'Fake') else: out_ftDir = os.path.join(out_ft, expression) # if 1 == 1: if expression in ['0', '1']: for session in os.listdir(expression_path): if not session in ['49_664_676']: session_path = os.path.join(expression_path, session) # output folder to save features ft_f_dir = os.path.join(out_ftDir, session) if not os.path.exists(ft_f_dir): os.makedirs(ft_f_dir) get_cnn_features(session_path, ft_f_dir, model)
def senet50(num_classes=params.NUM_CLASSES_VGGFACE, trained=False, weights_path=params.SENET50_WEIGHTS_PATH): tf.keras.backend.set_session(sess) optimizer = keras.optimizers.Adam() model = VGGFace(model='senet50', include_top=True, weights=None, classes=num_classes) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) if trained: model.build(input_shape=[None, 224, 224, 3]) model.load_weights(weights_path) return model
from keras_vggface import VGGFace VGGFace(model="resnet50")
def init_model(self): vgg = VGGFace(model='vgg16', include_top=False, input_shape=(224, 224, 3), pooling='max') last_layer = vgg_model
from keras.utils import plot_model from keras_vggface import VGGFace model = VGGFace(model='vgg16') plot_model(model, to_file='vgg16.png', show_shapes=True) model = VGGFace(model='resnet50') plot_model(model, to_file='resnet50.png', show_shapes=True) model = VGGFace(model='senet50') plot_model(model, to_file='senet50.png', show_shapes=True)
def __init__(self, architecture): self.model = VGGFace(model=architecture)
# image = combine_images(generated_images) # image = image*127.5+127.5 # Image.fromarray(image.astype(np.uint8)).save( # # "./image_result/generated_image.png") def get_args(): parser = argparse.ArgumentParser() parser.add_argument("--mode", type=str) parser.add_argument("--batch_size", type=int, default=128) parser.add_argument("--nice", dest="nice", action="store_true") parser.set_defaults(nice=False) args = parser.parse_args() return args if __name__ == "__main__": args = get_args() if args.mode == "train": image_input = Input(shape=(3, 112, 96)) vgg_model = VGGFace(input_tensor=image_input, include_top=False, pooling='avg') # pooling: None, avg or max out = vgg_model.get_layer('pool5').output vgg_conv = Model(image_input, out) train(vgg_conv) elif args.mode == "generate": generate(BATCH_SIZE=args.batch_size, nice=args.nice) elif args.mode == "mse": MSE(BATCH_SIZE=args.batch_size)
def train(BATCH_SIZE): # Load the training data print('Data loading..') X_train, y_train, X_test, y_test = Data.loadData('data.h5') print('Data Loaded. Now normalizing..') X_train = (X_train.astype(np.float32) - 127.5) / 127.5 y_train = (y_train.astype(np.float32) - 127.5) / 127.5 print('Data Normalized.') # Optimization setting RMSprop(lr=0.0001, rho=0.9, epsilon=1e-08, decay=0.0) d_optim = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True) g_optim = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) g_vgg_optim = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) # Vgg model goes here image_input = Input(shape=(112, 96, 3)) vgg_model = VGGFace(input_tensor=image_input, include_top=False, pooling='avg') # pooling: None, avg or max out = vgg_model.get_layer('pool5').output vgg_conv = Model(image_input, out) # Generator model goes here generator = res_net((112, 96, 3)) # Generator = cnn_model() generator.compile(loss='mean_squared_error', optimizer=g_optim) # Discriminative model goes here discriminator = discriminator_model() discriminator.trainable = True discriminator.compile(loss='binary_crossentropy', optimizer=d_optim) # Gener_VGG model generator_vgg = \ generator_containing_discriminator(generator, vgg_conv) generator_vgg.compile(loss=conv_loss, optimizer=g_vgg_optim) # Gener_Discrim model generator_discriminator = \ generator_containing_discriminator(generator, discriminator) generator_discriminator.compile(loss='binary_crossentropy', optimizer=g_optim) for epoch in range(100): print("Epoch is", epoch) print("Number of batches", int(X_train.shape[0] / BATCH_SIZE)) for index in range(int(X_train.shape[0] / BATCH_SIZE)): lr_image_batch = X_train[index * BATCH_SIZE:(index + 1) * BATCH_SIZE] hr_image_batch = y_train[index * BATCH_SIZE:(index + 1) * BATCH_SIZE] generated_images = generator.predict(lr_image_batch, verbose=0) shape = generated_images.shape if index % 10 == 0: image = save_image(generated_images) image = image * 127.5 + 127.5 #imsave("./image_result/"+ str(epoch)+"_"+ str(index)+ ".png", image) # imsave("./image_result/"+ str(epoch)+"_"+ str(index)+ ".png", image.astype(np.uint8)) im = Image.fromarray(image.astype(np.uint8)) im.save("./image_result/" + str(epoch) + "_" + str(index) + ".png") X = np.concatenate((hr_image_batch, generated_images)) y = [1] * BATCH_SIZE + [0] * BATCH_SIZE if epoch >= 5: # Discriminative Model Training d_loss = discriminator.train_on_batch(X, y) print("batch %d d_loss : %f" % (index, d_loss)) discriminator.trainable = False # Generator Model Training g_loss1 = generator.train_on_batch(lr_image_batch, hr_image_batch) print("batch %d gene_discri_loss : %f" % (index, g_loss1)) # Generator_Discri Model Training g_loss2 = generator_discriminator.train_on_batch( lr_image_batch, [1] * BATCH_SIZE) discriminator.trainable = True print("batch %d gene_discri_loss : %f" % (index, g_loss2)) print(' ') else: # Discriminative Model Training d_loss = discriminator.train_on_batch(X, y) print("batch %d d_loss : %f" % (index, d_loss)) discriminator.trainable = False # Generator Model Training g_loss1 = generator.train_on_batch(lr_image_batch, hr_image_batch) print("batch %d generator loss : %f" % (index, g_loss1)) # Generator_Discri Model Training g_loss2 = generator_discriminator.train_on_batch( lr_image_batch, [1] * BATCH_SIZE) discriminator.trainable = True print("batch %d gene_discri_loss : %f" % (index, g_loss2)) # Generate feature labels for the hr_images labels = vgg_conv.predict(hr_image_batch) g_loss2 = generator_vgg.train_on_batch(lr_image_batch, labels) print("batch %d gene_vgg_loss : %f" % (index, g_loss2)) if index % 10 == 9: generator.save_weights('generator', True) discriminator.save_weights('discriminator', True)
loss_config = {} loss_config[ "gan_training"] = "mixup_LSGAN" # "mixup_LSGAN" or "relativistic_avg_LSGAN" loss_config['use_PL'] = False loss_config["PL_before_activ"] = False loss_config['use_mask_hinge_loss'] = False loss_config['m_mask'] = 0. loss_config['lr_factor'] = 1. loss_config['use_cyclic_loss'] = False model = FaceswapGANModel(**arch_config) model.load_weights(path=models_dir) # VGGFace ResNet50 vggface = VGGFace(include_top=False, model='resnet50', input_shape=(224, 224, 3)) #vggface.summary() model.build_pl_model(vggface_model=vggface, before_activ=loss_config["PL_before_activ"]) model.build_train_functions(loss_weights=loss_weights, **loss_config) model.load_weights(path=models_dir) def showG(test_A, test_B, path_A, path_B, batchSize): figure_A = np.stack([ test_A,
# MIT License # Copyright (c) 2021 Loyio from keras_vggface import VGGFace model = VGGFace(model='resnet50') print('Inputs: %s' % model.inputs) print('Outputs: %s' % model.outputs)
def double_faces_extraction(double_face_video_path, detections, temp_dir_double_extraction): list_det_final = [] frame_counter = 0 face_model = VGGFace(model='resnet50', include_top=False, input_shape=(224, 224, 3), pooling='avg') cv2video = cv2.VideoCapture(double_face_video_path) video_double = os.path.basename(double_face_video_path) video_name = video_double[:-4] video_frames_extraction_folder = os.path.join(temp_dir_double_extraction, video_name) # #print("Processing video") analysis_step = True right_face = { 'best_img': None, 'features': [], 'avg_feature': None, 'rois': [] } left_face = { 'best_img': None, 'features': [], 'avg_feature': None, 'rois': [] } current_annotation = 'right' video_frames_extraction_folder_annotation_left = video_frames_extraction_folder + "_left" Path(video_frames_extraction_folder_annotation_left).mkdir(parents=True, exist_ok=True) video_frames_extraction_folder_annotation_right = video_frames_extraction_folder + "_right" Path(video_frames_extraction_folder_annotation_right).mkdir(parents=True, exist_ok=True) counter_for_analysis = 0 counter_maximum_frame_analysis = 20 # #print("Processing: ", video_name) pbar = tqdm(total=len(detections)) total = len(detections) previous_f = {} previous_f['roi'] = None detections_final = {} roi_right = None roi_left = None while (frame_counter < total): # Capture frame-by-frame ret, frame = cv2video.read() if ret is True: if analysis_step and frame_counter % 30 == 0: faces = detections[frame_counter] if len(faces) == 2: extraction_condition = check_left_right_from_center( faces, frame.shape[1]) if extraction_condition: left_face_from_frame, right_face_from_frame = extract_left_right_faces( faces) if current_annotation == 'right': f = right_face_from_frame else: f = left_face_from_frame resized_left_face = cv2.resize( left_face_from_frame['img'], (224, 224)) y_left = face_model.predict( np.expand_dims(resized_left_face, axis=0)) left_face['features'].append(y_left) resized_right_face = cv2.resize( right_face_from_frame['img'], (224, 224)) y_right = face_model.predict( np.expand_dims(resized_right_face, axis=0)) right_face['features'].append(y_right) elif not analysis_step: faces = detections[frame_counter] if len(faces) > 1: if current_annotation == 'right': f, pred = findFaceOnSide(face_model, faces, (left_face, right_face), True, frame.shape[1], previous_f['roi']) else: f, pred = findFaceOnSide(face_model, faces, (left_face, right_face), False, frame.shape[1], previous_f['roi']) elif len(faces) == 1: # print("only one face") checked_similarity, pred = check_face_similarity( face_model, faces[0], (left_face, right_face)) if (checked_similarity == 0 and current_annotation == 'left') or (checked_similarity == 1 and current_annotation == 'right'): f = faces[0] # just a not verified condition ##in this case is a face B else: f = None # detection fails --> return map_if_error error else: # print("detection problem: ", frame_counter) f = None if (f is not None) and (f['img'].size != 0): detections_final[frame_counter] = [f] previous_f = f f['roi'] = add_margin(f['roi'], 0.9 * f['roi'][2]) f['roi'] = enclosing_square(f['roi']) img = cut(frame, f['roi']) cv2.imwrite( video_frames_extraction_folder + "_" + current_annotation + "/frame-{:06}.png".format(frame_counter), img) else: if previous_f['roi'] is None: roi_prov = (100, 100, 100, 100) img_recover = cut(frame, roi_prov) detections_final[frame_counter] = [{'roi': roi_prov}] else: img_recover = cut(frame, previous_f['roi']) detections_final[frame_counter] = [previous_f] cv2.imwrite( video_frames_extraction_folder + "_" + current_annotation + "/frame-{:06}.png".format(frame_counter), img_recover) # else: # if not analysis_step: # print("ret False") # in this case just append a None object (interpolation to do after this stage), cv2 video fails frame_counter += 1 if frame_counter == total and analysis_step or counter_for_analysis > counter_maximum_frame_analysis: frame_counter = 0 cv2video.set(cv2.CAP_PROP_POS_FRAMES, 0) analysis_step = False left_face['avg_feature'] = np.mean(left_face['features'], axis=0) right_face['avg_feature'] = np.mean(right_face['features'], axis=0) counter_for_analysis = 0 pbar = tqdm(total=total) if frame_counter == total and not analysis_step and current_annotation == 'right': cv2video.set(cv2.CAP_PROP_POS_FRAMES, 0) frame_counter = 0 current_annotation = 'left' pbar = tqdm(total=total) list_det_final.append(detections_final) detections_final = {} previous_f = {} previous_f['roi'] = None pbar.update(1) pbar.close() # When everything done, release the video capture object cv2video.release() # Closes all the frames cv2.destroyAllWindows() list_det_final.append(detections_final) return list_det_final