Esempio n. 1
0
def read_list_from_file(
    files_list_path: str = FILES_LIST_PATH,
    files_list_filename: str = FILES_LIST_FILENAME,
) -> list:
    """ Import list from file """

    if os.path.exists(f"{files_list_path}/{files_list_filename}"):
        try:
            with open(f"{files_list_path}/{files_list_filename}", "r") as r:
                data = r.read().splitlines()

            statistics.append(["read_list_from_file", len(data)])
            logger.info(f"{len(data)} items imported from file.")
        except Exception as e:
            logger.error(e)
            raise
        return data
    else:
        logger.critical(
            f'Cannot open the file "{files_list_path}/{files_list_filename}", looks like it does not exists.'
        )
        return False

    logger.critical("Something went wrong!")
    return False
Esempio n. 2
0
def seed_db_table(
    db_objects: list = None,
    table_name: str = TABLE_NAME,
    aws_region: str = AWS_REGION,
) -> bool:
    """ Insert DB objects into table """

    logger.info("Inserting data into DB...")
    logger.debug(
        f"Context Parameters: {seed_db_table.__name__} => {seed_db_table.__code__.co_varnames}"
    )

    try:
        dynamodb = boto3.resource("dynamodb", region_name=aws_region)
        table = dynamodb.Table(table_name)

        with table.batch_writer() as batch:
            for item in db_objects:
                batch.put_item(Item=item)

        statistics.append(["seed_db_table", len(db_objects)])

        logger.info(f"{len(db_objects)} item(s) were inserted in DB.")
    except Exception as e:
        logger.error(e)
        raise

    return True
Esempio n. 3
0
def export_to_json(
    db_data: list = None,
    display: bool = False,
    files_list_path: str = FILES_LIST_PATH,
) -> bool:
    """ Export DB objects to JSON """

    try:
        data = json.dumps(db_data, indent=4)
        data_count = len(json.loads(data))

        statistics.append(["export_to_json", data_count])

        with open(f"{files_list_path}/cards.json", "w") as f:
            f.write(data)
        logger.info(
            f"DB objects exported to JSON file successfully: {files_list_path}/cards.json"
        )
        if display:
            print(data)
        else:
            pass

    except Exception as e:
        logger.error(e)
        raise

    return True
Esempio n. 4
0
def build_card_objects(media_list: list = None) -> list:
    """ Creates DB objects from S3 objects list """

    logger.info("Crafting list of DB objects...")
    logger.debug(
        f"Context Parameters: {build_card_objects.__name__} => {build_card_objects.__code__.co_varnames}"
    )
    medias_list = defaultdict(list)
    try:
        for item in media_list:
            medias_list[item["ts"]].append({
                "name": item["name"],
                "path": item["path"],
                "url": item["url"],
                "kind": item["kind"],
            })
        medias = [{"ts": k, "medias": v} for k, v in medias_list.items()]

        statistics.append(["build_card_objects", len(medias)])

        logger.info(f'{len(medias)} "card" objects generated successfully.')
    except Exception as e:
        logger.error(e)
        raise

    return medias
Esempio n. 5
0
def get_s3_files(
    bucket_name: str = BUCKET_NAME,
    save_to_disk: bool = True,
    files_list_path: str = FILES_LIST_PATH,
    files_list_filename: str = FILES_LIST_FILENAME,
    aws_region: str = AWS_REGION,
    s3_prefix: str = S3_PREFIX,
) -> list:
    """ Get S3 objects and creates list """

    logger.info("Building media list from S3 objects...")
    logger.debug(
        f"Context Parameters: {get_s3_files.__name__} => {get_s3_files.__code__.co_varnames}"
    )

    data = []

    # testing format: assets/20160823/img.jpg
    pattern = re.compile(
        "^[a-z-A-Z-0-9]+/[a-z-A-Z-0-9]+/[0-9]{8}/.+[.][a-z-A-Z-0-9]+$"
    )

    try:
        s3 = boto3.client("s3", region_name=aws_region)
        paginator = s3.get_paginator("list_objects_v2")
        pages = paginator.paginate(Bucket=bucket_name, Prefix=s3_prefix)

        for page in pages:
            for obj in page["Contents"]:
                if pattern.match(obj["Key"]):
                    data.append(obj["Key"])
                else:
                    logger.warning(
                        f'Wrong filename format, object "{obj["Key"]}", not added to the list.'
                    )

        statistics.append(["get_s3_files", len(data)])

        logger.info("Media Objects list generated successfully.")
        logger.debug(f"Media objects count: {len(data)}.")

        if save_to_disk:
            logger.info("Writing media list to disk...")
            export_data = [f"{item}\n" for item in data]
            with open(f"{files_list_path}/{files_list_filename}", "w") as w:
                w.writelines(export_data)
            logger.info(
                f'List successfully saved to disk: "{files_list_path}/{files_list_filename}".'
            )
        else:
            pass
    except Exception as e:
        logger.error(e)
        raise

    return data
Esempio n. 6
0
def medias_copy(
    local_path: str = LOCAL_MEDIA_OUTPUT_PATH,
    video_encode: bool = VIDEO_ENCODE,
    media_encode_platform: str = MEDIA_ENCODE_PLATFORM,
) -> bool:
    """ Copy media files to S3 """

    logger.info("Starting copy...")

    try:
        medias = get_local_medias_files(path=local_path, save_to_disk=False)
        logger.debug(medias)
        for media in medias:
            media_type = get_media_type(basename(media))
            ts = media.split("/")[-2]

            if media_type == "movie":
                if video_encode == "True" and media_encode_platform == "cloud":
                    send_to_bucket(media, ts)
                elif (
                    video_encode == "True" and media_encode_platform == "local"
                ):
                    logger.info(
                        f"Skipping copy of {media} for local re-encoding."
                    )
            elif media_type == "picture":
                send_to_bucket(media, ts)
            else:
                logger.warning(f"Media type is: {media_type} !")

        logger.info(
            f"{len(medias)} medias files have been successfully copied."
        )
    except Exception as e:
        logger.error(e)
        raise

    statistics.append(["medias_copy", len(medias)])

    logger.info("...done.")

    return True
Esempio n. 7
0
def build_media_files_from_list(
    local_files_list: list = None,
    output_image_width: int = OUTPUT_IMAGE_WIDTH,
    output_image_height: int = OUTPUT_IMAGE_HEIGHT,
    output_path: str = LOCAL_MEDIA_OUTPUT_PATH,
    log_path: str = LOG_PATH,
) -> bool:
    """ Generates web friendly resized images and copy other media files """

    logger.info("Generating web friendly images...")
    processed_files_count = 0
    unprocessed_files = []
    path_pattern = re.compile("^.*?/[0-9]{8}/.*[.][a-z-A-Z-0-9]+$")
    ts_pattern = re.compile("^[0-9]{8}$")

    try:
        for media in local_files_list:
            ts = media.split("/")[-2]

            if path_pattern.match(media):
                media_ts = ts
            elif not ts_pattern.match(ts):
                media_ts = media_ts_format(ts, media)
            else:
                logger.warning(
                    f'The file path format should by like eg.: "path/ts/image.jpg".'
                )
                logger.critical(
                    f'Input file path format "{media}" is incorrect! Stopping here!'
                )
                return False

            if not media_ts:
                unprocessed_files.append(media)
                logger.warning(
                    f"Could not identify the date format. Skipping."
                )
            else:
                gen = media_generate(
                    media=media,
                    output_path=output_path,
                    media_ts=media_ts,
                    output_image_width=output_image_width,
                    output_image_height=output_image_height,
                    processed_files_count=processed_files_count,
                    unprocessed_files=unprocessed_files,
                )
                # processed_files_count = gen[0]
                # unprocessed_files = gen[1]
                processed_files_count, unprocessed_files = gen

        statistics.append(
            ["build_media_files_from_list", processed_files_count]
        )
        logger.info(
            f"{processed_files_count} images have been generated successfully."
        )

        log_file = f"{log_path}/unprocessed_files.log"

        if len(unprocessed_files) > 0:
            up_files = [item + "\n" for item in unprocessed_files]

            with open(log_file, "w") as w:
                w.writelines(up_files)

            logger.warning(f"{len(unprocessed_files)} unprocessed file(s)!")
            logger.debug(f"Unprocessed file(s): {unprocessed_files}")
        elif os.path.exists(log_file):
            with open(log_file, "r+") as t:
                t.truncate(0)
        else:
            pass

        logger.info("Image files tree generation done.")

        if len(unprocessed_files) > 0:
            logger.info(
                f'Some files were not processed, please review the list: "{log_path}/unprocessed_files.log".'
            )
        else:
            pass
    except Exception as e:
        logger.error(e)
        raise

    return True
Esempio n. 8
0
def get_local_medias_files(
    path: str = LOCAL_MEDIA_PATH,
    save_to_disk: bool = True,
    files_list_path: str = FILES_LIST_PATH,
    files_list_filename: str = FILES_LIST_FILENAME,
    config_path: str = CONFIG_PATH,
) -> list:
    """ Generates a list of local media files """

    if os.path.exists(path):
        local_medias = []
        filtered_files = []

        try:
            logger.info("Generating list of local files...")
            for dirpath, _, files in os.walk(path):
                for filename in files:
                    fname = os.path.join(dirpath, filename)
                    if is_filtered(filename):
                        filtered_files.append(fname)
                    else:
                        local_medias.append(fname)

            if len(local_medias) > 0:
                statistics.append(
                    ["get_local_medias_files", len(local_medias)]
                )

                logger.info("List successfully generated.")
                logger.debug(f"Count: {len(local_medias)} local files.")
            else:
                logger.critical(
                    f'No files found in source directory: "{path}".'
                )
                return False

            if save_to_disk:
                logger.info("Writing local files list to disk...")
                data_to_write = [item + "\n" for item in local_medias]

                with open(
                    f"{files_list_path}/{files_list_filename}", "w"
                ) as w:
                    w.writelines(data_to_write)

                logger.info(
                    f'The list has been saved successfully: "{files_list_path}/{files_list_filename}".'
                )
            else:
                pass

            if len(filtered_files) > 0:
                logger.info(
                    f'Number of file(s) excluded by filter specified in "{config_path}/exclude_local.txt": {len(filtered_files)}.'
                )
                logger.debug(f"excluded by filter: {filtered_files}")
            else:
                pass
        except Exception as e:
            logger.error(e)
            raise
        return local_medias
    else:
        logger.critical(f'Missing input "path"! Stopping here!')
        return False
Esempio n. 9
0
def s3_clean(
    bucket_name: str = BUCKET_NAME, aws_region: str = AWS_REGION
) -> bool:
    """ Delete imcomplete multi-part uploads """

    logger.info("Getting list of incomplete uploads...")
    try:
        multipart_uploads_cmd = f"aws s3api list-multipart-uploads --bucket {bucket_name} --region {aws_region}"
        proc = subprocess.run(
            multipart_uploads_cmd,
            shell=True,
            check=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
        )
        logger.debug(
            f'"s3api list-multipart-uploads" returns code: {proc.returncode} => OK'
        )
    except Exception as e:
        if "exit status 254" in str(e):
            logger.warning(
                f"Bucket {bucket_name} does not exist. Stopping here."
            )
            return False
        else:
            logger.error(e)
            raise

    if proc.returncode == 0 and proc.stdout:
        multipart_uploads_list = proc.stdout.strip()
        multipart_uploads_list = json.loads(multipart_uploads_list)["Uploads"]
        logger.info("Delete in progess...")
        try:
            for item in multipart_uploads_list:
                proc_cmd = f"aws s3api abort-multipart-upload --bucket {bucket_name} --region {aws_region} --key \"{item['Key']}\" --upload-id {item['UploadId']}"

                proc = subprocess.run(
                    proc_cmd,
                    shell=True,
                    check=True,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    universal_newlines=True,
                )

                statistics.append(["s3_clean", len(multipart_uploads_list)])
                logger.debug(
                    f'"s3api abort-multipart-upload" returns code: {proc.returncode} => OK'
                )
                logger.info(f"Deleted incomplete upload: \"{item['Key']}\".")
            logger.debug(
                f"{len(multipart_uploads_list)} incomplete upload(s) deleted."
            )
        except Exception as e:
            logger.error(e)
            raise

        return True
    else:
        logger.info("Nothing to clean.")
        return True
Esempio n. 10
0
def media_sync(
    local_path: str = LOCAL_MEDIA_OUTPUT_PATH,
    bucket_name: str = BUCKET_NAME,
    remote_path_prefix: str = S3_PREFIX,
    log_path: str = LOG_PATH,
    aws_region: str = AWS_REGION,
    config_path: str = CONFIG_PATH,
) -> bool:
    """ Synchronize local/S3 media files tree """

    exclude_s3_file_path = f"{config_path}/exclude_s3.txt"
    if os.path.exists(exclude_s3_file_path):
        with open(exclude_s3_file_path, "r") as r:
            common_oses_filter = r.read().splitlines()
        cli_filter_args = ""
        cli_filter_args = cli_filter_args.join(
            [
                f' --exclude "{item}"'
                for item in common_oses_filter
                if not item.startswith("#") or item != "\n"
            ]
        )
    else:
        cli_filter_args = ""

    logger.info("Starting sync...")
    logger.info(f"S3 sync task log => tail -F {log_path}/s3_sync.log")

    try:
        cli_cmd = f"aws s3 sync {local_path}/ s3://{bucket_name}/{remote_path_prefix}/ --delete --region {aws_region} {cli_filter_args}"
        logger.debug(f"cli command: {cli_cmd}")
        with open(f"{log_path}/s3_sync.log", "w") as w:
            proc = subprocess.run(
                cli_cmd,
                shell=True,
                check=True,
                stdout=w,
                stderr=subprocess.STDOUT,
                universal_newlines=True,
            )

        if proc.returncode == 0:
            with open(f"{log_path}/s3_sync.log", "r") as r:
                processed_objects = r.read().splitlines()

            processed_objects = [
                item for item in processed_objects if "upload" in item
            ]
            statistics.append(["media_sync", len(processed_objects)])

            logger.info("Sync completed successfully.")
            logger.debug(
                f"{len(processed_objects)} files have been synchronized successfully."
            )
            logger.debug(f"S3 CLI returned code: {proc.returncode} => OK")
        else:
            logger.critical("Something wrong happened during sync operation!")
            return False
    except Exception as e:
        logger.error(e)
        raise

    return True
Esempio n. 11
0
def build_media_objects(
    items: list = None,
    aws_region: str = AWS_REGION,
    bucket_name: str = BUCKET_NAME,
) -> list:
    """ Build media objects """

    mediaItems: list = []
    ts = None
    logger.info("Building media list dictionaries...")
    logger.debug(
        f"Context Parameters: {build_media_objects.__name__} => {build_media_objects.__code__.co_varnames}"
    )

    try:
        for item in items:
            key = item.split("/")
            name = key[3]
            ts = key[2]
            ts = f"{ts[0:4]}-{ts[4:6]}-{ts[6:8]}"
            path = f"{key[0]}/{key[1]}/{key[2]}"
            url = f"https://s3-{aws_region}.amazonaws.com/{bucket_name}/{path}/{name}"

            media_type = get_media_type(name)

            if ts != "" and name != "":
                media = {}
                media["ts"] = ts
                media["name"] = name
                media["kind"] = media_type
                media["path"] = path
                media["url"] = url
                mediaItems.append(media)
            else:
                logger.warning(f"ts = {ts} and name = {name}. Stopping here.")
                return False

        data = sorted(mediaItems, key=itemgetter("ts"), reverse=False)

        nbr_data = len(data)
        nbr_items = len(items)

        statistics.append(["build_media_objects", len(data)])

        logger.info("Media list dictionaries built successfully.")
        logger.debug(f"{nbr_data} objects in media list.")

        if nbr_data != nbr_items:
            logger.critical(
                "Inconsistency found between data input and output! Stopping here!"
            )
            logger.debug(
                f"Input objects list count [{nbr_items}] and generated media objects count [{nbr_data}] are uneven. Stopping here."
            )
            return False
        else:
            pass
    except Exception as e:
        logger.error(e)
        raise

    return data