def __init__(self, config, episode_filename): logger.info('Initializing phase 2 parameters') self.episode_number = get_episode_number_from_filename(episode_filename) # input self.input_directory_path = os.path.join(config['input_directory_path'], f'episode{self.episode_number}') # prepare directory for caching self.cache_dir = TemporaryDirectory() results_file_path = os.path.join(self.input_directory_path, 'results.csv') self.results = Results.read(results_file_path) # prepare directory for local saving self.save_images = config.getboolean('save_images') self.save_results = config.getboolean('save_results') if self.save_images or self.save_results: out_dir_path = os.path.join(config['output_directory_path'], f'episode{self.episode_number}') if not os.path.exists(out_dir_path): os.makedirs(out_dir_path, exist_ok=True) self.output_directory_path = out_dir_path # for uploading cached files self.upload_labelled = config.getboolean('upload_images') self.upload_results = config.getboolean('upload_results') # for google drive self.gdrive = GDrive(token_path=os.environ['IC_GDRIVE_AUTH_TOKEN_PATH'], client_secrets_path=os.environ['IC_GDRIVE_CLIENT_SECRETS_PATH']) # for face recognition self.person_group_id = config['person_group_id'] self.faceclient = afr.authenticate_client(config['endpoint'], os.environ['IC_AZURE_KEY_FACE'])
def cache_episode_from_s3(region, bucket_name, episode_name): video = s3_download(region, bucket_name, episode_name) dir = TemporaryDirectory() path = os.path.join(dir.name, episode_name) file = open(path, 'rb') file.write(video) logger.info(f'Episode will be cached @ {file.name}') return file
def run(self): try: logger.info('Estimating burned members...') self.process_results() logger.info('Updating database...') self.update_database() self.upload_cached_files() self.save_cached_files() except Exception as ex: logger.error('Phase 3 failed') raise ex
def save_cached_files(self): out_dir_path = self.output_directory_path if not os.path.isdir(out_dir_path): raise FileNotFoundError(f'The specified output path is not a directory: {out_dir_path}') for file in os.listdir(self.cache_dir.name): if file.endswith('.jpg') and self.save_images: dst = os.path.join(out_dir_path, file) dst = os.path.abspath(dst) logger.info(f'Saving {file} to {dst}') shutil.move(os.path.join(self.cache_dir.name, file), dst) if self.save_results: self.results.write(os.path.join(out_dir_path, 'results.csv'))
def s3_download(region_name, bucket_name, filename): #adapted from #https://www.thetechnologyupdates.com/image-processing-opencv-with-aws-lambda/ s3 = boto3.client('s3', region_name=region_name) logger.info(f'Downloading: [{bucket_name}/{filename}]') try: file_obj = s3.get_object(Bucket=bucket_name, Key=filename) file_data = file_obj["Body"].read() logger.info(f'Complete: [{bucket_name}/{filename}]') return file_data except BaseException as ex: logger.error(f'Download failed') raise ex
def train(fc, person_group_id): logger.info('Training the person group...') # Train the person group fc.person_group.train(person_group_id) while True: training_status = fc.person_group.get_training_status(person_group_id) logger.info("Training status: {}.".format(training_status.status)) if training_status.status is TrainingStatusType.succeeded: break elif training_status.status is TrainingStatusType.failed: sys.exit('Training the person group has failed.') time.sleep(5)
def add_skull_entry(self, ep, time, skull_list): idx = pandas.MultiIndex.from_tuples([(ep, time)], names=Results.INDEX_FIELDS) entry = pandas.DataFrame([[skull_list, None, None, None]], index=idx, columns=Results.VALUE_FIELDS) try: self.data = self.data.append(entry, verify_integrity=True) logger.info(f'Entry for [{time} @ ep{ep}] was created.') except ValueError: logger.info( f'Entry for [{time} @ ep{ep}] already has skull detection results, overwriting...' ) self.data.update(entry)
def __init__(self, db_endpoint, db_name, db_uid, db_pw): connection_string = 'DRIVER={ODBC Driver 17 for SQL Server}; ' \ + f'SERVER={db_endpoint}; ' \ f'DATABASE={db_name}; ' \ f'UID={db_uid}; ' \ f'PWD={db_pw}' try: logger.debug(connection_string) self.conn = pyodbc.connect(connection_string) self.cursor = self.conn.cursor() logger.info('Able to connect.') except pyodbc.Error as ex: logger.error('Failed to connect.') raise ex
def update_burned_member(self, ep, time, burned): idx = pandas.MultiIndex.from_tuples([(ep, time)], names=Results.INDEX_FIELDS) entry = pandas.DataFrame([[None, None, None, burned]], index=idx, columns=Results.VALUE_FIELDS) try: self.data.update(entry, errors='raise') logger.info( f'Entry for [{time} @ ep{ep}] was updated with burned member results.' ) except ValueError: logger.info( f'Entry for [{time} @ ep{ep}] already has burned member results, overwriting...' ) self.data.update(entry)
def update_face_entry(self, ep, time, face_list, name_list): idx = pandas.MultiIndex.from_tuples([(ep, time)], names=Results.INDEX_FIELDS) entry = pandas.DataFrame([[None, face_list, name_list, None]], index=idx, columns=Results.VALUE_FIELDS) try: self.data.update(entry, errors='raise') logger.info( f'Entry for [{time} @ ep{ep}] was updated with face recognition results.' ) except ValueError: logger.info( f'Entry for [{time} @ ep{ep}] already has face recognition results, overwriting...' ) self.data.update(entry)
def process_images(self, image_paths): mappings = {} for path in image_paths: in_dir_path, filename = os.path.split(path) logger.info(f'Processing {filename}') faces = afr.recognise_faces(self.faceclient, path, self.person_group_id) mappings[filename] = faces logger.info(f'Caching labelled images') name, ext = filename.split('.') if faces: face_labelled_image_path = os.path.join(self.cache_dir.name, f'{name}_face.{ext}') else: face_labelled_image_path = os.path.join(self.cache_dir.name, f'{name}_noface.{ext}') skull_labelled_image_path = os.path.join(in_dir_path, f'{name}_skull.{ext}') # overlay face labels over skull labels from previous phase afr.label_image(faces, skull_labelled_image_path, face_labelled_image_path) return mappings
def execute(self, query, type): logger.info(query) try: self.cursor.execute(query) # SELECT OPERATION if type == 'SELECT': columns = [column[0] for column in self.cursor.description] results = [ dict(zip(columns, row)) for row in self.cursor.fetchall() ] logger.info(results) return results # SINGLE INSERT OPERATION elif type == 'INSERT': self.conn.commit() except pyodbc.Error as ex: logger.error(ex) raise ex
def download_file(self, remote_filepath, output_path): """Attempts to download the specified file to the specified output path :param remote_filepath: Filepath of the file in the drive to be downloaded :param output_path: Destination where the downloaded file will be saved """ folder_name, file_name = os.path.split(remote_filepath) file_id = self._get_file_id(remote_filepath) request = self.drive.files().get_media(fileId=file_id) fh = FileIO(output_path, 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while not done: status, done = downloader.next_chunk() if done: logger.info(f'Downloading {file_name}[{file_id}] completed.') else: logger.info(f"Downloading {file_name}[{file_id}] {str(int(status.progress() * 100))}%")
def run(self): try: logger.info('Phase 2 start') paths = self.get_imagepaths() logger.info('Processing images in input directory') results = self.process_images(paths) logger.info('Updating result CSV file') self.update_results(results) self.upload_cached_files() self.save_cached_files() logger.info('Phase 2 complete') except Exception as ex: logger.error('Phase 2 failed') raise ex
def recognise_faces_many(fc, img_dir_path, person_group_id, out_dir_path, label_and_save=False): """ Identify a face against a defined PersonGroup for all images in a specified directory """ logger.info(f'Preparing images in {img_dir_path} ...') test_image_array = [ file for file in glob.glob('{}/*.*'.format(img_dir_path)) ] no_files = len(test_image_array) no_fails = 0 result_dict = {} for image_path in test_image_array: if not image_path.endswith('.jpg'): continue basename = os.path.basename(image_path) logger.info(f'Processing {image_path}...') try: faces = recognise_faces(fc, image_path, person_group_id) if label_and_save: label_image(faces, image_path, os.path.join(out_dir_path, basename)) result_dict[os.path.basename(basename)] = faces except (APIErrorException, APIError) as ex: logger.error(f'Failed to process {basename}', ex) no_fails += 1 logger.info('Result: Total {} images, {} processing failed...'.format( no_files, no_fails)) # Returns the face & coord dict return result_dict
def bulk_insert_csv(self, file_path, table_name, cols): try: # Reading from CSV file df = pd.read_csv(file_path, encoding='utf8', usecols=cols) df = df.values.tolist() if not df: logger.info('No entries to insert into database.') return logger.info('Successfully read {} rows from CSV file {}'.format( len(df), file_path)) except pd.io.common.EmptyDataError as ex: logger.error(ex) raise ex try: column_str = str(tuple(cols)).replace("'", "\"") wildcard_str = str(tuple(map(lambda x: "?", cols))).replace("'", "") query_template = 'INSERT INTO {} {} VALUES {}'.format( table_name, column_str, wildcard_str) logger.debug(f'executemany query template: \'{query_template}\'') # Performing Bulk Insert into RDS logger.debug(df) self.cursor.executemany(query_template, df) self.cursor.commit() logger.info('Insert success.') except pyodbc.Error as ex: logger.error(ex) raise ex
def locate_faces(image, detection_method): logger.info("Resizing face...") # convert to rgb rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # resize rgb = imutils.resize(rgb, width=280) # resize factor r = image.shape[1] / float(rgb.shape[1]) # Detecting the coordinates of the bounding boxes corresponding to each face in the input image # then compute the facial embeddings for each face logger.info("Recognizing faces...") resized_boxes = face_recognition.face_locations(rgb, model=detection_method) encodings_of_detected_faces = face_recognition.face_encodings(rgb, resized_boxes) boxes = [] for (a, b, c, d) in resized_boxes: a = int(a * r) b = int(b * r) c = int(c * r) d = int(d * r) boxes.append((a, b, c, d)) return boxes, encodings_of_detected_faces
def __init__(self, config, episode_filename): logger.info('Initializing phase 1 parameters') self.episode_filename = episode_filename self.episode_number = get_episode_number_from_filename( episode_filename) # prepare directory for caching self.cache_dir = TemporaryDirectory() self.results = Results.blank() # prepare directory for local saving self.save_images = config.getboolean('save_images') self.save_results = config.getboolean('save_results') if self.save_images or self.save_results: out_dir_path = os.path.join(config['output_directory_path'], f'episode{self.episode_number}') if not os.path.exists(out_dir_path): os.makedirs(out_dir_path, exist_ok=True) self.output_directory_path = out_dir_path # for uploading cached files self.upload_unlabelled = config.getboolean('upload_unlabelled') self.upload_labelled = config.getboolean('upload_labelled') self.upload_results = config.getboolean('upload_results') # for video processing self.display = config.getboolean('display') self.video_sample_rate = config.getint('video_sample_rate') self.skull_confidence_threshold = config.getfloat( 'skull_confidence_threshold') self.skull_model_version = config['skull_model_version'] try: self.azure_key = os.environ['IC_AZURE_KEY_SKULL'] except KeyError as ex: logger.error('Missing required environment variable') raise ex # for google drive self.gdrive = GDrive( token_path=os.environ['IC_GDRIVE_AUTH_TOKEN_PATH'], client_secrets_path=os.environ['IC_GDRIVE_CLIENT_SECRETS_PATH'])
def __init__(self, config, episode_filename): logger.info('initializing phase3 parameters') self.episode_number = get_episode_number_from_filename( episode_filename) input_directory_path = os.path.join(config['input_directory_path'], f'episode{self.episode_number}') self.save_results = config['save_results'] self.upload_results = config['upload_results'] self.output_directory_path = os.path.join( config['output_directory_path'], f'episode{self.episode_number}') if self.save_results: os.makedirs(self.output_directory_path, exist_ok=True) self.results = Results.read( os.path.join(input_directory_path, 'results.csv')) self.database = SqlConnector(config['db_endpoint'], config['db_name'], config['db_username'], os.environ['IC_RDS_PASSWORD']) self.db_tablename = config['db_tablename'] # for uploading self.upload_results = config['upload_results'] self.gdrive = GDrive( token_path=os.environ['IC_GDRIVE_AUTH_TOKEN_PATH'], client_secrets_path=os.environ['IC_GDRIVE_CLIENT_SECRETS_PATH'])
def upload_file(self, filepath, remote_filepath=None): """ Uploads a file located at the specified filepath to the remote_filepath specified. If no remote_filepath is specified, the file is uploaded to the root directory on the drive :param filepath: The path of the local file to be uploaded :param remote_filepath: The filepath where the file is to be uploaded :return: """ if remote_filepath is None: filename = os.path.basename(filepath) folder_name = 'root' else: folder_name, filename = os.path.split(remote_filepath) logger.info(f'Uploading {filename}') try: folder_id = self._get_folder_id(folder_name) except FileNotFoundError: logger.info(f'Target folder [{folder_name}] not found, creating directory') folder_id = self.mkdir(folder_name) file_metadata = {'name': [filename], 'parents': [folder_id]} media = MediaFileUpload(filepath, mimetype=guess_type(filepath)[0]) response_fields = 'id, name, parents' try: # attempt to update file if one with the same name exists file_id = self._get_file_id(remote_filepath, folder_id=folder_id) # if FileNotFoundError was not raised metadata = self.drive.files().get(fileId=file_id).execute() del metadata['id'] file = self.drive.files().update(fileId=file_id, body=metadata, media_body=media, fields=response_fields).execute() logger.info(f"[{file['name']}] already exists and was overwritten") except FileNotFoundError: file = self.drive.files().create(body=file_metadata, media_body=media, fields=response_fields).execute() logger.info(f"[{file['name']}] was uploaded to {folder_name}") return file
def list_files(self, page_size=10): """ Lists the first {page_size} files in the drive :param page_size: The number of items to list :return: None """ results = self.drive.files().list( pageSize=page_size, fields="nextPageToken, files(id, name, mimeType)").execute() items = results.get('files', []) if not items: logger.info('No files found.') else: logger.info('Files:') for item in items: logger.info(u'{0} ({1} || {2})'.format(item['name'], item['mimeType'], item['id']))
def recognise_faces(fc, image_path, person_group_id): """ Recognize faces in an image :param fc: FaceClient :param image_path: path to image to recognize faces in :param person_group_id: the id of the trained person group :return: results of detect and identify """ data = open(image_path, 'rb') face_ids = [] faces = {} logger.info(f'Detecting faces using Azure Face Client...') detect_results = fc.face.detect_with_stream(data) for face in detect_results: rect = face.face_rectangle l = rect.left t = rect.top r = l + rect.width b = t + rect.height bounding_box = (t, r, b, l) id = face.face_id face_ids.append(id) faces[id] = {'bounding_box': bounding_box} if not faces: logger.info(f'No faces to identify') return faces logger.info(f'Identifying faces using Azure Face Client...') identify_results = fc.face.identify(face_ids, person_group_id=person_group_id) for person in identify_results: name = 'unknown' face_id = person.face_id logger.info(f'person: {person}') try: # get the highest probability person_id person_id = person.candidates[0].person_id name = get_name_by_id(fc, person_id, person_group_id) logger.info( f'{name} was identified at {faces[face_id]["bounding_box"]}') except IndexError: logger.info( f'Unable to recognize face at {faces[face_id]["bounding_box"]}.' ) faces[face_id]['name'] = name return [faces[id] for id in faces]
def run(self): try: logger.info('Phase 1 start') ep_no = self.episode_number # get episode from google drive logger.info(f'Downloading episode {ep_no} from Google Drive') episode_filepath = self.download_episode() # process episode logger.info(f'Finding frames with skulls in episode {ep_no}') extracted_frames = self.process_episode(episode_filepath) # update results and cache image locally on container logger.info(f'Caching frames with skulls in episode {ep_no}') self.cache_extracted_frames(extracted_frames) logger.info(f'Updating results CSV file') self.update_results(extracted_frames) self.upload_cached_files() self.save_cached_files() logger.info('Phase 1 complete') except Exception as ex: logger.error('Phase 1 failed') raise ex
def write(self, file_path): self.data.to_csv(file_path) logger.info(f"Results have been saved to '{file_path}'.")
def process_image(image_path, known_face_encoding_data, detection_method, display=False): episode_number, timestamp, image = fetch_unprocessed_img(image_path) boxes, encodings = locate_faces(image, detection_method) names = process_recognition(known_face_encoding_data, encodings) processed = ProcessedImage(image, episode_number, timestamp, names, boxes) if display: display_results(processed) cv2.destroyAllWindows() return processed if __name__ == "__main__": import argparse import pickle # Initialize arguments ap = argparse.ArgumentParser() ap.add_argument("-e", "--encodings", required=True, help="path to serialized db of facial encodings") ap.add_argument("-y", "--display", type=int, default=1, help="whether or not to display output frame to screen") ap.add_argument("-i", "--image", required=True, help="path to input image for recognition <ep_num>_<hr>_<min>_<sec>_<ms>.jpg") ap.add_argument("-d", "--detection-method", type=str, default="cnn") args = vars(ap.parse_args()) logger.info('loading encodings...') data = pickle.loads(open(args["encodings"], "rb").read()) logger.info('processing image...') process_image(args['image'], data, args["detection_method"], args['display'])
def authenticate_client(endpoint, key): logger.info('Authenticating Azure Face Client at {}...'.format(endpoint)) fc = FaceClient(endpoint, CognitiveServicesCredentials(key)) return fc
def init_person_group(fc, person_group_id, known_faces_dir): train_required = 1 person_group_list = fc.person_group.list() if len(person_group_list) == 0: logger.info( 'Person Group ID {} does not exist, creating a new one in azure...' .format(person_group_id)) fc.person_group.create(person_group_id=person_group_id, name=person_group_id) else: logger.info( 'Person Group initialized for {}. Proceeding with person object creation..' .format(person_group_id)) person_group_list = fc.person_group.list() if len(person_group_list) != 0: train_required = 0 logger.info(person_group_list) logger.info('people objects are already added. Skipping creation...') else: for member in os.listdir(known_faces_dir): logger.info('Creating person object in azure: ' + member) member_obj = fc.person_group_person.create(person_group_id, member) member_path = os.path.join(known_faces_dir, member) member_images = [ file for file in glob.glob('{}/*.*'.format(member_path)) ] count = 0 for member_image in member_images: ch = open(member_image, 'r+b') try: fc.person_group_person.add_face_from_stream( person_group_id, member_obj.person_id, ch) except Exception as ex: logger.info(ex) continue count += 1 logger.info( 'Member {} total {} images.. added in person group'.format( member, count)) return train_required