def read_list_from_file( files_list_path: str = FILES_LIST_PATH, files_list_filename: str = FILES_LIST_FILENAME, ) -> list: """ Import list from file """ if os.path.exists(f"{files_list_path}/{files_list_filename}"): try: with open(f"{files_list_path}/{files_list_filename}", "r") as r: data = r.read().splitlines() statistics.append(["read_list_from_file", len(data)]) logger.info(f"{len(data)} items imported from file.") except Exception as e: logger.error(e) raise return data else: logger.critical( f'Cannot open the file "{files_list_path}/{files_list_filename}", looks like it does not exists.' ) return False logger.critical("Something went wrong!") return False
def seed_db_table( db_objects: list = None, table_name: str = TABLE_NAME, aws_region: str = AWS_REGION, ) -> bool: """ Insert DB objects into table """ logger.info("Inserting data into DB...") logger.debug( f"Context Parameters: {seed_db_table.__name__} => {seed_db_table.__code__.co_varnames}" ) try: dynamodb = boto3.resource("dynamodb", region_name=aws_region) table = dynamodb.Table(table_name) with table.batch_writer() as batch: for item in db_objects: batch.put_item(Item=item) statistics.append(["seed_db_table", len(db_objects)]) logger.info(f"{len(db_objects)} item(s) were inserted in DB.") except Exception as e: logger.error(e) raise return True
def export_to_json( db_data: list = None, display: bool = False, files_list_path: str = FILES_LIST_PATH, ) -> bool: """ Export DB objects to JSON """ try: data = json.dumps(db_data, indent=4) data_count = len(json.loads(data)) statistics.append(["export_to_json", data_count]) with open(f"{files_list_path}/cards.json", "w") as f: f.write(data) logger.info( f"DB objects exported to JSON file successfully: {files_list_path}/cards.json" ) if display: print(data) else: pass except Exception as e: logger.error(e) raise return True
def build_card_objects(media_list: list = None) -> list: """ Creates DB objects from S3 objects list """ logger.info("Crafting list of DB objects...") logger.debug( f"Context Parameters: {build_card_objects.__name__} => {build_card_objects.__code__.co_varnames}" ) medias_list = defaultdict(list) try: for item in media_list: medias_list[item["ts"]].append({ "name": item["name"], "path": item["path"], "url": item["url"], "kind": item["kind"], }) medias = [{"ts": k, "medias": v} for k, v in medias_list.items()] statistics.append(["build_card_objects", len(medias)]) logger.info(f'{len(medias)} "card" objects generated successfully.') except Exception as e: logger.error(e) raise return medias
def get_s3_files( bucket_name: str = BUCKET_NAME, save_to_disk: bool = True, files_list_path: str = FILES_LIST_PATH, files_list_filename: str = FILES_LIST_FILENAME, aws_region: str = AWS_REGION, s3_prefix: str = S3_PREFIX, ) -> list: """ Get S3 objects and creates list """ logger.info("Building media list from S3 objects...") logger.debug( f"Context Parameters: {get_s3_files.__name__} => {get_s3_files.__code__.co_varnames}" ) data = [] # testing format: assets/20160823/img.jpg pattern = re.compile( "^[a-z-A-Z-0-9]+/[a-z-A-Z-0-9]+/[0-9]{8}/.+[.][a-z-A-Z-0-9]+$" ) try: s3 = boto3.client("s3", region_name=aws_region) paginator = s3.get_paginator("list_objects_v2") pages = paginator.paginate(Bucket=bucket_name, Prefix=s3_prefix) for page in pages: for obj in page["Contents"]: if pattern.match(obj["Key"]): data.append(obj["Key"]) else: logger.warning( f'Wrong filename format, object "{obj["Key"]}", not added to the list.' ) statistics.append(["get_s3_files", len(data)]) logger.info("Media Objects list generated successfully.") logger.debug(f"Media objects count: {len(data)}.") if save_to_disk: logger.info("Writing media list to disk...") export_data = [f"{item}\n" for item in data] with open(f"{files_list_path}/{files_list_filename}", "w") as w: w.writelines(export_data) logger.info( f'List successfully saved to disk: "{files_list_path}/{files_list_filename}".' ) else: pass except Exception as e: logger.error(e) raise return data
def medias_copy( local_path: str = LOCAL_MEDIA_OUTPUT_PATH, video_encode: bool = VIDEO_ENCODE, media_encode_platform: str = MEDIA_ENCODE_PLATFORM, ) -> bool: """ Copy media files to S3 """ logger.info("Starting copy...") try: medias = get_local_medias_files(path=local_path, save_to_disk=False) logger.debug(medias) for media in medias: media_type = get_media_type(basename(media)) ts = media.split("/")[-2] if media_type == "movie": if video_encode == "True" and media_encode_platform == "cloud": send_to_bucket(media, ts) elif ( video_encode == "True" and media_encode_platform == "local" ): logger.info( f"Skipping copy of {media} for local re-encoding." ) elif media_type == "picture": send_to_bucket(media, ts) else: logger.warning(f"Media type is: {media_type} !") logger.info( f"{len(medias)} medias files have been successfully copied." ) except Exception as e: logger.error(e) raise statistics.append(["medias_copy", len(medias)]) logger.info("...done.") return True
def build_media_files_from_list( local_files_list: list = None, output_image_width: int = OUTPUT_IMAGE_WIDTH, output_image_height: int = OUTPUT_IMAGE_HEIGHT, output_path: str = LOCAL_MEDIA_OUTPUT_PATH, log_path: str = LOG_PATH, ) -> bool: """ Generates web friendly resized images and copy other media files """ logger.info("Generating web friendly images...") processed_files_count = 0 unprocessed_files = [] path_pattern = re.compile("^.*?/[0-9]{8}/.*[.][a-z-A-Z-0-9]+$") ts_pattern = re.compile("^[0-9]{8}$") try: for media in local_files_list: ts = media.split("/")[-2] if path_pattern.match(media): media_ts = ts elif not ts_pattern.match(ts): media_ts = media_ts_format(ts, media) else: logger.warning( f'The file path format should by like eg.: "path/ts/image.jpg".' ) logger.critical( f'Input file path format "{media}" is incorrect! Stopping here!' ) return False if not media_ts: unprocessed_files.append(media) logger.warning( f"Could not identify the date format. Skipping." ) else: gen = media_generate( media=media, output_path=output_path, media_ts=media_ts, output_image_width=output_image_width, output_image_height=output_image_height, processed_files_count=processed_files_count, unprocessed_files=unprocessed_files, ) # processed_files_count = gen[0] # unprocessed_files = gen[1] processed_files_count, unprocessed_files = gen statistics.append( ["build_media_files_from_list", processed_files_count] ) logger.info( f"{processed_files_count} images have been generated successfully." ) log_file = f"{log_path}/unprocessed_files.log" if len(unprocessed_files) > 0: up_files = [item + "\n" for item in unprocessed_files] with open(log_file, "w") as w: w.writelines(up_files) logger.warning(f"{len(unprocessed_files)} unprocessed file(s)!") logger.debug(f"Unprocessed file(s): {unprocessed_files}") elif os.path.exists(log_file): with open(log_file, "r+") as t: t.truncate(0) else: pass logger.info("Image files tree generation done.") if len(unprocessed_files) > 0: logger.info( f'Some files were not processed, please review the list: "{log_path}/unprocessed_files.log".' ) else: pass except Exception as e: logger.error(e) raise return True
def get_local_medias_files( path: str = LOCAL_MEDIA_PATH, save_to_disk: bool = True, files_list_path: str = FILES_LIST_PATH, files_list_filename: str = FILES_LIST_FILENAME, config_path: str = CONFIG_PATH, ) -> list: """ Generates a list of local media files """ if os.path.exists(path): local_medias = [] filtered_files = [] try: logger.info("Generating list of local files...") for dirpath, _, files in os.walk(path): for filename in files: fname = os.path.join(dirpath, filename) if is_filtered(filename): filtered_files.append(fname) else: local_medias.append(fname) if len(local_medias) > 0: statistics.append( ["get_local_medias_files", len(local_medias)] ) logger.info("List successfully generated.") logger.debug(f"Count: {len(local_medias)} local files.") else: logger.critical( f'No files found in source directory: "{path}".' ) return False if save_to_disk: logger.info("Writing local files list to disk...") data_to_write = [item + "\n" for item in local_medias] with open( f"{files_list_path}/{files_list_filename}", "w" ) as w: w.writelines(data_to_write) logger.info( f'The list has been saved successfully: "{files_list_path}/{files_list_filename}".' ) else: pass if len(filtered_files) > 0: logger.info( f'Number of file(s) excluded by filter specified in "{config_path}/exclude_local.txt": {len(filtered_files)}.' ) logger.debug(f"excluded by filter: {filtered_files}") else: pass except Exception as e: logger.error(e) raise return local_medias else: logger.critical(f'Missing input "path"! Stopping here!') return False
def s3_clean( bucket_name: str = BUCKET_NAME, aws_region: str = AWS_REGION ) -> bool: """ Delete imcomplete multi-part uploads """ logger.info("Getting list of incomplete uploads...") try: multipart_uploads_cmd = f"aws s3api list-multipart-uploads --bucket {bucket_name} --region {aws_region}" proc = subprocess.run( multipart_uploads_cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, ) logger.debug( f'"s3api list-multipart-uploads" returns code: {proc.returncode} => OK' ) except Exception as e: if "exit status 254" in str(e): logger.warning( f"Bucket {bucket_name} does not exist. Stopping here." ) return False else: logger.error(e) raise if proc.returncode == 0 and proc.stdout: multipart_uploads_list = proc.stdout.strip() multipart_uploads_list = json.loads(multipart_uploads_list)["Uploads"] logger.info("Delete in progess...") try: for item in multipart_uploads_list: proc_cmd = f"aws s3api abort-multipart-upload --bucket {bucket_name} --region {aws_region} --key \"{item['Key']}\" --upload-id {item['UploadId']}" proc = subprocess.run( proc_cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, ) statistics.append(["s3_clean", len(multipart_uploads_list)]) logger.debug( f'"s3api abort-multipart-upload" returns code: {proc.returncode} => OK' ) logger.info(f"Deleted incomplete upload: \"{item['Key']}\".") logger.debug( f"{len(multipart_uploads_list)} incomplete upload(s) deleted." ) except Exception as e: logger.error(e) raise return True else: logger.info("Nothing to clean.") return True
def media_sync( local_path: str = LOCAL_MEDIA_OUTPUT_PATH, bucket_name: str = BUCKET_NAME, remote_path_prefix: str = S3_PREFIX, log_path: str = LOG_PATH, aws_region: str = AWS_REGION, config_path: str = CONFIG_PATH, ) -> bool: """ Synchronize local/S3 media files tree """ exclude_s3_file_path = f"{config_path}/exclude_s3.txt" if os.path.exists(exclude_s3_file_path): with open(exclude_s3_file_path, "r") as r: common_oses_filter = r.read().splitlines() cli_filter_args = "" cli_filter_args = cli_filter_args.join( [ f' --exclude "{item}"' for item in common_oses_filter if not item.startswith("#") or item != "\n" ] ) else: cli_filter_args = "" logger.info("Starting sync...") logger.info(f"S3 sync task log => tail -F {log_path}/s3_sync.log") try: cli_cmd = f"aws s3 sync {local_path}/ s3://{bucket_name}/{remote_path_prefix}/ --delete --region {aws_region} {cli_filter_args}" logger.debug(f"cli command: {cli_cmd}") with open(f"{log_path}/s3_sync.log", "w") as w: proc = subprocess.run( cli_cmd, shell=True, check=True, stdout=w, stderr=subprocess.STDOUT, universal_newlines=True, ) if proc.returncode == 0: with open(f"{log_path}/s3_sync.log", "r") as r: processed_objects = r.read().splitlines() processed_objects = [ item for item in processed_objects if "upload" in item ] statistics.append(["media_sync", len(processed_objects)]) logger.info("Sync completed successfully.") logger.debug( f"{len(processed_objects)} files have been synchronized successfully." ) logger.debug(f"S3 CLI returned code: {proc.returncode} => OK") else: logger.critical("Something wrong happened during sync operation!") return False except Exception as e: logger.error(e) raise return True
def build_media_objects( items: list = None, aws_region: str = AWS_REGION, bucket_name: str = BUCKET_NAME, ) -> list: """ Build media objects """ mediaItems: list = [] ts = None logger.info("Building media list dictionaries...") logger.debug( f"Context Parameters: {build_media_objects.__name__} => {build_media_objects.__code__.co_varnames}" ) try: for item in items: key = item.split("/") name = key[3] ts = key[2] ts = f"{ts[0:4]}-{ts[4:6]}-{ts[6:8]}" path = f"{key[0]}/{key[1]}/{key[2]}" url = f"https://s3-{aws_region}.amazonaws.com/{bucket_name}/{path}/{name}" media_type = get_media_type(name) if ts != "" and name != "": media = {} media["ts"] = ts media["name"] = name media["kind"] = media_type media["path"] = path media["url"] = url mediaItems.append(media) else: logger.warning(f"ts = {ts} and name = {name}. Stopping here.") return False data = sorted(mediaItems, key=itemgetter("ts"), reverse=False) nbr_data = len(data) nbr_items = len(items) statistics.append(["build_media_objects", len(data)]) logger.info("Media list dictionaries built successfully.") logger.debug(f"{nbr_data} objects in media list.") if nbr_data != nbr_items: logger.critical( "Inconsistency found between data input and output! Stopping here!" ) logger.debug( f"Input objects list count [{nbr_items}] and generated media objects count [{nbr_data}] are uneven. Stopping here." ) return False else: pass except Exception as e: logger.error(e) raise return data