def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None, generate_data_file_only=False, seq_length=40, use_padding=False, nb_min_subsample=None): super(AdjustSubsampleCount, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs, generate_data_file_only) self.process_description = 'Adjusting Subsample Count to ' + str( seq_length) self.seq_length = seq_length # Get the dataset. self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None) self.use_padding = use_padding self.nb_min_subsample = seq_length if use_padding: if nb_min_subsample: self.nb_min_subsample = nb_min_subsample else: self.nb_min_subsample = seq_length / 2
def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None, generate_data_file_only=False, seq_length=40, pretrained_model=None, layer_name=None): super(DrawLandmarks, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs, generate_data_file_only) self.process_description = 'Drawing Land Marks' self.seq_length = seq_length # obtain detector and predictor self.detector = dlib.get_frontal_face_detector() self.predictor = dlib.shape_predictor(PREDICTOR_PATH) # Get the dataset. self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None)
def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None, generate_data_file_only=False, seq_length=40, pretrained_model=None, layer_name=None): super(ExtractFeatures, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs, generate_data_file_only) self.process_description = 'Extracting Features' self.seq_length = seq_length # get the model. self.extractor = Extractor(pretrained_model, layer_name, (dimension, dimension)) # Get the dataset. self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None)
def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None, generate_data_file_only=False, seq_length=40): super(ExtractLandmarksFixLength, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs, generate_data_file_only) self.process_description = 'Fixing Land Marks Sequence Count' self.seq_length = seq_length # Get the dataset. self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None)
def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None, generate_data_file_only=False, seq_length=40, use_padding=False, nb_min_subsample=None): super(ExtractLandmarks, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs, generate_data_file_only) self.process_description = 'Extracting Land Marks' self.seq_length = seq_length # obtain detector and predictor self.detector = dlib.get_frontal_face_detector() self.predictor = dlib.shape_predictor(PREDICTOR_PATH) # Get the dataset. self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None) self.use_padding = use_padding self.nb_min_subsample = seq_length if use_padding: if nb_min_subsample: self.nb_min_subsample=nb_min_subsample else: self.nb_min_subsample=seq_length/2
class ExtractLandmarksFixLength(DataProcessBase): def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None, generate_data_file_only=False, seq_length=40): super(ExtractLandmarksFixLength, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs, generate_data_file_only) self.process_description = 'Fixing Land Marks Sequence Count' self.seq_length = seq_length # Get the dataset. self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None) def do_process(self, source_row_tuple): # un-box row to variables input_dir, class_name, filename_no_ext, nb_sub_samples = source_row_tuple if int(nb_sub_samples) < self.seq_length: return # Get the path to the sequence for this sub sample. path = self.target_dir + '/' + input_dir + '/' + class_name + '/' + os.path.basename( filename_no_ext) + '-' + str(self.seq_length) + SQ_LM_FILE_SUFFIX # Check if we already have it. if os.path.isfile(path): return if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) # list sub-samples sub_samples = np.load(self.source_dir + '/' + input_dir + '/' + class_name + '/' + os.path.basename(filename_no_ext) + LM_FILE_SUFFIX) # Now downs ample to just the ones we need. sub_samples = self.data.rescale_list(sub_samples, self.seq_length) sequence = [] for sub_sample in sub_samples: sequence.append(sub_sample) # Save the sequence. np.save(path, np.array(sequence)) return def get_nb_sub_samples(self, sample_tuple): """ Return generated number of sub samples for the sample. :param sample_tuple: has the structure input_dir, class_name, filename_no_ext, nb_sub_samples :return: number of sub samples """ # if file exists we know that there are exactly seq_length sub samples inside it. # if file does not exits we can say that there are no sub samples if self.check_already_extracted(sample_tuple, self.target_dir): return self.seq_length return 0 def check_already_extracted(self, sample_tuple, target_dir): """Check to see if we created the -001 frame of this file.""" input_dir, class_name, filename_no_ext, _ = sample_tuple return bool( os.path.exists(target_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + '-' + str(self.seq_length) + SQ_LM_FILE_SUFFIX))
class ExtractLandmarks(DataProcessBase): def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None, generate_data_file_only=False, seq_length=40, use_padding=False, nb_min_subsample=None): super(ExtractLandmarks, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs, generate_data_file_only) self.process_description = 'Extracting Land Marks' self.seq_length = seq_length # obtain detector and predictor self.detector = dlib.get_frontal_face_detector() self.predictor = dlib.shape_predictor(PREDICTOR_PATH) # Get the dataset. self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None) self.use_padding = use_padding self.nb_min_subsample = seq_length if use_padding: if nb_min_subsample: self.nb_min_subsample=nb_min_subsample else: self.nb_min_subsample=seq_length/2 def do_process(self, source_row_tuple): # un-box row to variables input_dir, class_name, filename_no_ext, nb_sub_samples = source_row_tuple if int(nb_sub_samples) < self.nb_min_subsample: return # Get the path to the sequence for this sub sample. path = self.target_dir + '/' + input_dir + '/' + class_name + '/' + os.path.basename( filename_no_ext) + LM_FILE_SUFFIX # Check if we already have it. if os.path.exists(path): return if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) # list sub-samples sub_samples = glob.glob(self.source_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + '*.*') # Now downs ample to just the ones we need. if self.seq_length: sub_samples = self.data.rescale_list(sub_samples, self.seq_length) sequence = [] for sub_sample in sub_samples: # extract features to build the sequence. landmarks = self.__detect_landmarks(sub_sample) if landmarks: landmarks = self.__normalize_landmarks(landmarks) sequence.append(landmarks) if len(sequence) < self.seq_length and len(sequence) > self.nb_min_subsample and self.use_padding: nb_padding_needed = max(0, self.seq_length - len(sequence)) for i in range(nb_padding_needed): sequence.append([0 for x in range(0, 23)]) # pad with zeros if len(sequence)==self.seq_length: # Save the sequence. np.save(path, np.array(sequence)) return def get_nb_sub_samples(self, sample_tuple): """ Return generated number of sub samples for the sample. :param sample_tuple: has the structure input_dir, class_name, filename_no_ext, nb_sub_samples :return: number of sub samples """ # if file exists we know that there are exactly seq_length sub samples inside it. # if file does not exits we can say that there are no sub samples if ExtractLandmarks.check_already_extracted(sample_tuple, self.target_dir): input_dir, class_name, filename_no_ext, _ = sample_tuple lm_arr = np.load(self.target_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + LM_FILE_SUFFIX) return len(lm_arr) return 0 def __detect_landmarks(self, image_path): image = Image.open(image_path) # convert image to numpy array img = np.asanyarray(image) img.flags.writeable = True # output list face_landmark_tuples = [] # Obtain landmarks dets = self.detector(img, 1) print("Number of faces detected: {}".format(len(dets))) for k, rect in enumerate(dets): print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}" .format(k, rect.left(), rect.top(), rect.right(), rect.bottom())) # Get the landmarks/parts for the face in box rect. shape = self.predictor(img, rect) face_landmark_tuples.append([shape.part(x) for x in range(68)]) return face_landmark_tuples @staticmethod def check_already_extracted(sample_tuple, target_dir): """Check to see if we created the -001 frame of this file.""" input_dir, class_name, filename_no_ext, _ = sample_tuple return bool(os.path.exists(target_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + LM_FILE_SUFFIX)) def __normalize_landmarks(self, landmarks): landmarks=landmarks[0] top_point = landmarks[TOP_OF_FACE_INDEX] bottom_point = landmarks[BOTTOM_OF_FACE_INDEX] heigth = FACE_HEIGHT_MULTIPLIER * (bottom_point.y - top_point.y) center_y=landmarks[CENTER_OF_FACE_INDEX].y lm_indices = [] lm_indices.extend(LEFT_EYEBROW_INDICES) lm_indices.extend(LEFT_EYE_INDICES) lm_indices.extend(LEFT_LIPS_INDICES) lm_indices.extend(LEFT_MOUTH_INDICES) normalized_landmarks=[] for idx in lm_indices: lm_point_y=landmarks[idx].y normalized_landmarks.append( abs(lm_point_y-center_y)*1.0/heigth ) return normalized_landmarks
class DrawLandmarks(DataProcessBase): def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None, generate_data_file_only=False, seq_length=40, pretrained_model=None, layer_name=None): super(DrawLandmarks, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs, generate_data_file_only) self.process_description = 'Drawing Land Marks' self.seq_length = seq_length # obtain detector and predictor self.detector = dlib.get_frontal_face_detector() self.predictor = dlib.shape_predictor(PREDICTOR_PATH) # Get the dataset. self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None) def do_process(self, source_row_tuple): # un-box row to variables input_dir, class_name, filename_no_ext, nb_sub_samples = source_row_tuple if int(nb_sub_samples) < self.seq_length: return # Get the path to the sequence for this sub sample. target_class_path = self.target_dir + '/' + input_dir + '/' + class_name + '/' if not os.path.exists(target_class_path): os.makedirs(target_class_path) # list sub-samples sub_samples = glob.glob(self.source_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + '*.*') # Now downs ample to just the ones we need. if self.seq_length: sub_samples = self.data.rescale_list(sub_samples, self.seq_length) for sub_sample in sub_samples: sub_sample_img = Image.open(sub_sample) # extract features to build the sequence. landmarks = self.__detect_landmarks(sub_sample_img) if landmarks: sub_sample_img_lm = self.draw_landmarks( sub_sample_img, landmarks) sub_sample_img_lm.save( target_class_path + os.path.splitext(os.path.basename(sub_sample))[0] + LM_SUFFIX) return def get_nb_sub_samples(self, sample_tuple): """ Return generated number of sub samples for the sample. :param sample_tuple: has the structure input_dir, class_name, filename_no_ext, nb_sub_samples :return: number of sub samples """ input_dir, class_name, filename_no_ext, nb_sub_samples = sample_tuple sub_samples = glob.glob(self.target_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + '*.*') return len(sub_samples) def __detect_landmarks(self, sub_sample_img): # convert image to numpy array img = np.asanyarray(sub_sample_img) img.flags.writeable = True # output list face_landmark_tuples = [] # Obtain landmarks dets = self.detector(img, 1) print("Number of faces detected: {}".format(len(dets))) for k, rect in enumerate(dets): print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( k, rect.left(), rect.top(), rect.right(), rect.bottom())) # Get the landmarks/parts for the face in box rect. shape = self.predictor(img, rect) face_landmark_tuples.append([shape.part(x) for x in range(68)]) return face_landmark_tuples def draw_landmarks(self, image, parts): radius = 1 # copy original image, do not touch it out_image = image.copy() if out_image.mode != "RGB": out_image = out_image.convert("RGB") # for each part, draw a circle draw = ImageDraw.Draw(out_image) for part in parts[0]: x = part.x y = part.y draw.ellipse([x - radius, y - radius, x + radius, y + radius], fill=(250, 0, 0)) return out_image
class AdjustSubsampleCount(DataProcessBase): def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None, generate_data_file_only=False, seq_length=40, use_padding=False, nb_min_subsample=None): super(AdjustSubsampleCount, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs, generate_data_file_only) self.process_description = 'Adjusting Subsample Count to ' + str( seq_length) self.seq_length = seq_length # Get the dataset. self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None) self.use_padding = use_padding self.nb_min_subsample = seq_length if use_padding: if nb_min_subsample: self.nb_min_subsample = nb_min_subsample else: self.nb_min_subsample = seq_length / 2 def do_process(self, source_row_tuple): # un-box row to variables input_dir, class_name, filename_no_ext, nb_sub_samples = source_row_tuple if int(nb_sub_samples) < self.nb_min_subsample: return # if nb sub samples less than seq length, find required padding sub sample count # if padding not required, e.g. there are enough or more that enoguh samples, count is zero # if padding not enabled and padding is needed, previous statement does not allow reaching this statement nb_padding_needed = max(0, self.seq_length - int(nb_sub_samples)) # Get the path to the sequence for this sub sample. path = self.target_dir + '/' + input_dir + '/' + class_name + '/' + os.path.basename( filename_no_ext) + FEATURE_FILE_SUFFIX # Check if we already have it. if os.path.isfile(path): return if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) # list sub-samples sub_samples = glob.glob(self.source_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + '*.*') # Now downs ample to just the ones we need. if nb_padding_needed == 0: sub_samples = self.data.rescale_list(sub_samples, self.seq_length) for i in range(nb_padding_needed): empty_image = Image.new('RGB', self.dimension) target_file = self.target_dir + '/' + input_dir + '/' + class_name + '/' + os.path.basename( filename_no_ext) + '_' + '{:08d}'.format( 0) + '_' + '{:02d}'.format(i) + '.jpg' empty_image.save(target_file) for sub_sample in sub_samples: # extract features to build the sequence. img = cv2.imread(sub_sample) height, width = img.shape[:2] if self.dimension[0] != height or self.dimension[1] != width: img = cv2.resize(img, self.dimension) target_file = self.target_dir + '/' + input_dir + '/' + class_name + '/' + os.path.basename( sub_sample) cv2.imwrite(target_file, img) return def get_nb_sub_samples(self, sample_tuple): """ Return generated number of sub samples for the sample. :param sample_tuple: has the structure input_dir, class_name, filename_no_ext, nb_sub_samples :return: number of sub samples """ input_dir, class_name, filename_no_ext, _ = sample_tuple sub_samples = glob.glob(self.target_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + '*.*') return len(sub_samples)
def train(source_dir, work_root_dir, data_type, seq_length, model, saved_model=None, concat=False, image_shape=None, load_to_memory=False): if not os.path.exists(work_root_dir): os.makedirs(work_root_dir) os.makedirs(work_root_dir+'/checkpoints') os.makedirs(work_root_dir+'/logs') # Set variables. nb_epoch = 2000 batch_size = 8 data_file=source_dir+'/data.csv' # Helper: TensorBoard tb = TensorBoard(log_dir=work_root_dir+'/logs') # Helper: Save the model. checkpointer = ModelCheckpoint(save_weights_only=False, filepath=work_root_dir + '/checkpoints/w.{epoch:03d}-{val_acc:.4f}-{val_loss:.2f}.hdf5', verbose=1, save_best_only=True, monitor='val_acc') # Helper: Stop when we stop learning. early_stopper = EarlyStopping(patience=20, monitor='val_acc') # Helper: Save results. timestamp = time.time() csv_logger = CSVLogger(work_root_dir+'/logs/' + model + '-' + 'training-' + \ str(timestamp) + '.log') # Get the data and process it. if image_shape is None: features_length = determine_feature_count(source_dir) data = DataSet( data_file=data_file, sequence_dir=source_dir, seq_length=seq_length, class_limit=None ,given_classes=None ) else: features_length = None data = DataSet( data_file=data_file, sequence_dir=source_dir, seq_length=seq_length, class_limit=None, image_shape=image_shape , given_classes=None ) # Get samples per epoch. # Multiply by 0.7 to attempt to guess how much of data.data is the train set. steps_per_epoch = (len(data.data) * 0.7) // batch_size X=None y=None X_test=None y_test=None generator=None val_generator=None if load_to_memory: # Get data. X, y = data.get_all_sequences_in_memory(True, data_type, concat) X_test, y_test = data.get_all_sequences_in_memory(False, data_type, concat) print ("Train samples %d, test samples %d"%(len(X),len(X_test))) else: # Get generators. generator = data.frame_generator(batch_size, True, data_type, concat) val_generator = data.frame_generator(1, False, data_type, concat) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model,features_length=features_length,dimension=image_shape) # Fit! if load_to_memory: # Use standard fit. history=rm.model.fit( X, y, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1, callbacks=[checkpointer, tb, early_stopper, csv_logger], epochs=nb_epoch) else: # Use fit generator. history = rm.model.fit_generator( generator=generator, steps_per_epoch=steps_per_epoch, epochs=nb_epoch, verbose=1, callbacks=[checkpointer, tb, early_stopper, csv_logger], validation_data=val_generator, validation_steps=365) if val_generator: _, test=data.split_train_test() present_results_generator(work_root_dir, rm.model, history, val_generator, len(test),classes=data.classes) else: present_results(work_root_dir, rm.model,history, X_test=X_test, Y_test=y_test, classes=data.classes)
class ExtractFeatures(DataProcessBase): def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None, generate_data_file_only=False, seq_length=40, pretrained_model=None, layer_name=None): super(ExtractFeatures, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs, generate_data_file_only) self.process_description = 'Extracting Features' self.seq_length = seq_length # get the model. self.extractor = Extractor(pretrained_model, layer_name, (dimension, dimension)) # Get the dataset. self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None) def do_process(self, source_row_tuple): # un-box row to variables input_dir, class_name, filename_no_ext, nb_sub_samples = source_row_tuple if int(nb_sub_samples) < self.seq_length: return # Get the path to the sequence for this sub sample. path = self.target_dir + '/' + input_dir + '/'+ class_name + '/' + os.path.basename(filename_no_ext) + FEATURE_FILE_SUFFIX # Check if we already have it. if os.path.isfile(path): return if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) # list sub-samples sub_samples = glob.glob(self.source_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + '*.*') # Now downs ample to just the ones we need. sub_samples = self.data.rescale_list(sub_samples, self.seq_length) sequence = [] for sub_sample in sub_samples: # extract features to build the sequence. features = self.extractor.extract(sub_sample) sequence.append(features) # Save the sequence. np.savetxt(path, np.array(sequence).reshape((self.seq_length, -1))) return def get_nb_sub_samples(self, sample_tuple): """ Return generated number of sub samples for the sample. :param sample_tuple: has the structure input_dir, class_name, filename_no_ext, nb_sub_samples :return: number of sub samples """ # if file exists we know that there are exactly seq_length sub samples inside it. # if file does not exits we can say that there are no sub samples if ExtractFeatures.check_already_extracted(sample_tuple, self.target_dir): return self.seq_length return 0 @staticmethod def check_already_extracted(sample_tuple, target_dir): """Check to see if we created the -001 frame of this file.""" input_dir, class_name, filename_no_ext, _ = sample_tuple return bool(os.path.exists( target_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + FEATURE_FILE_SUFFIX))