コード例 #1
0
def test_get_from_event():
    event_dict = json.loads(S3_MOCK_ESSENCE_EVENT)
    bucket = get_from_event(event_dict, "bucket")
    assert bucket == "MAM_HighresVideo"
    event_name = get_from_event(event_dict, "event_name")
    assert event_name == "ObjectCreated:Put"
    or_id = get_from_event(event_dict, "tenant")
    assert or_id == "OR-rf5kf25"
コード例 #2
0
def construct_fts_params_dict(event, dest_filename, dest_path, ctx):
    """"""
    return {
        "source": {
            "host": get_from_event(event, 'host'),
            "user": get_from_event(event, 'tenant'),
            "password": "******",
            "file": get_from_event(event, 'tenant'),
            "path": get_from_event(event, 'object_key'),
        },
        "destination": {
            "host": ctx.config['mediahaven']['ftp']['host'],
            "user": get_from_event(event, 'tenant'),
            "password": "******",
            "file": dest_filename,
            "path": dest_path
        },
        "move": False
    }
コード例 #3
0
ファイル: main.py プロジェクト: viaacode/s3-events-handler
def construct_fts_params_dict(event, pid, file_extension, dest_path, ctx):
    """"""
    return {
        "source": {
            "domain": {
                "name": get_from_event(event, "domain")
            },
            "bucket": {
                "name": get_from_event(event, "bucket")
            },
            "object": {
                "key": get_from_event(event, "object_key")
            },
        },
        "destination": {
            "path":
            f"/mnt/STORAGE/INGEST/SIDECAR{dest_path}/{pid}{file_extension}",
            "host": ctx.config.app_cfg["mediahaven"]["ftp"]["host"],
        },
    }
コード例 #4
0
ファイル: main.py プロジェクト: viaacode/s3-events-handler
def calculate_handler(event: dict):
    """ Factory method to return correct handler """
    event_name = get_from_event(event, "event_name")
    base_type = event_name.split(":")[0]
    if base_type == "ObjectCreated":
        return handle_create_event
    elif base_type == "ObjectRemoved":
        return handle_remove_event
    else:
        raise NackException(f"Unknown type of s3 event: {event_name}",
                            s3_event=event)
コード例 #5
0
ファイル: main.py プロジェクト: viaacode/s3-events-handler
def query_params_item_ingested(event: dict,
                               cp_name: str) -> List[Tuple[str, str]]:
    """ Construct the query parameters to check if an item is already in the MAM.

    A check on S3 object key is always needed.

    A check on md5 is only executed if there is an md5 and
    the CP is not VRT unless the item is a collateral.

    Returns:
        List[Tuple[str, str]] -- The query params.
    """
    # Check based on the S3 object key
    query_params = [("s3_object_key", get_from_event(event, "object_key"))]

    # Check based on md5 if:
    #  - the md5 is available and
    #  - the CP is not VRT unless the item is a collateral
    md5 = get_from_event(event, "md5")
    if md5 and (cp_name.upper() not in ("VRT") or is_collateral(event)):
        query_params.append(("md5", md5))

    return query_params
コード例 #6
0
ファイル: main.py プロジェクト: viaacode/s3-events-handler
def construct_collateral_sidecar(event, pid, media_id, cp_name, object_use):
    s3_object_key = get_from_event(event, "object_key")

    root = etree.Element("MediaHAVEN_external_metadata")
    etree.SubElement(root, "title").text = f"Collateral: pid: {pid}"

    description = f"""Subtitles for essence:
    - filename: {s3_object_key}
    - CP: {cp_name}
    """
    etree.SubElement(root, "description").text = description

    mdprops = etree.SubElement(root, "MDProperties")
    etree.SubElement(mdprops, "CP").text = cp_name
    etree.SubElement(mdprops, "CP_id").text = get_from_event(event, "tenant")
    etree.SubElement(mdprops, "sp_name").text = "s3"
    etree.SubElement(mdprops, "PID").text = pid
    etree.SubElement(mdprops,
                     "s3_domain").text = get_from_event(event, "domain")
    etree.SubElement(mdprops,
                     "s3_bucket").text = get_from_event(event, "bucket")
    etree.SubElement(mdprops, "s3_object_key").text = s3_object_key
    etree.SubElement(mdprops, "dc_source").text = s3_object_key
    etree.SubElement(mdprops,
                     "s3_object_owner").text = get_from_event(event, "user")
    etree.SubElement(mdprops, "dc_identifier_localid").text = media_id
    etree.SubElement(mdprops, "object_level").text = "file"
    etree.SubElement(mdprops, "object_use").text = object_use
    etree.SubElement(mdprops, "ie_type").text = "n/a"

    # Only add md5 if valid and available from the event.
    if re.match("^[a-fA-F0-9]{32}$", get_from_event(event, "md5")):
        etree.SubElement(mdprops, "md5").text = get_from_event(event, "md5")

    relations = etree.SubElement(mdprops, "dc_relations")
    etree.SubElement(relations, "is_verwant_aan").text = pid

    return etree.tostring(root,
                          pretty_print=True,
                          encoding="UTF-8",
                          xml_declaration=True)
コード例 #7
0
 def test_get_from_event(self):
     bucket = get_from_event(json.loads(S3_MOCK_EVENT), 'bucket')
     self.assertEqual(bucket, 'MAM_HighresVideo')
コード例 #8
0
def event_handler(event, context):
    """Main event handler function"""
    # Get the bucket (which conveniently also is the organisation ID)
    bucket = get_from_event(event, 'bucket')
コード例 #9
0
#
#  @author: https://github.com/maartends
#
#######################################################################
#
#  ./tests/unit_tests.py
#
#######################################################################

import json
from meemoo.helpers import get_from_event
from tests.resources import S3_MOCK_EVENT

event = json.loads(S3_MOCK_EVENT)

get_from_event(event, "host")
get_from_event(event, "object_key")
get_from_event(event, "tenant")
get_from_event(event, "bucket")

ALLOWED_NODES = ["Dynamic", "Technical"]
XML_ENCODING = "UTF-8"
MHS_VERSION = "19.4"
MH_NAMESPACES = {
    "mhs": f"https://zeticon.mediahaven.com/metadata/{MHS_VERSION}/mhs/",
    "mh": f"https://zeticon.mediahaven.com/metadata/{MHS_VERSION}/mh/",
}

for k in metadata_dict:
    assert k in ALLOWED_NODES, f'Unknown sidecar node: "{k}"'
コード例 #10
0
ファイル: main.py プロジェクト: viaacode/s3-events-handler
def handle_remove_event(event: dict, properties, ctx: Context) -> bool:
    """Handler for s3 removed events

    First we query MH with the s3_object_key and s3_bucket

    This results in the main object (= essence) and its real fragments.
    The real fragments potentially have collaterals linked that need to be deleted.
    So we query all the objects with the media ID of the fragments.
    Of those we filter out the real fragments. We should end up with only the
    collaterals. These will be deleted one by one. Finally, delete the essence.
    """

    mediahaven_service = MediahavenService(ctx)

    # Query MH with the s3_bucket en s3_object_key
    s3_bucket = get_from_event(event, "bucket")
    s3_object_key = get_from_event(event, "object_key")
    query_params = [
        ("s3_object_key", s3_object_key),
        ("s3_bucket", s3_bucket),
    ]
    try:
        result = mediahaven_service.get_fragment(query_params, or_params=False)
    except RequestException as error:
        raise NackException(
            "Error connecting to MediaHaven, retrying....",
            error=error,
            requeue=True,
        )
    except HTTPError as error:
        raise NackException(
            "Error occurred when querying MediaHaven",
            query_params=query_params,
            error=error,
            error_message=error.response.text,
        )

    if not result["MediaDataList"]:
        log.info(
            f"No media object found with s3 bucket: {s3_bucket} and object key: {s3_object_key}"
        )
        return

    log.info(
        f"Removing media object with s3 bucket: {s3_bucket} and object key: {s3_object_key}"
    )
    items = result["MediaDataList"]

    # Collect the Media ID (and Fragment ID) of the fragments. These will be used to remove the collaterals.
    fragments = {}
    for item in items:
        if item["Internal"]["IsFragment"]:
            fragments[item["Dynamic"]["dc_identifier_localid"]] = item[
                "Internal"]["FragmentId"]

    try:
        # Get the Fragment ID of the essence to delete
        fragment_id_essence = next(item["Internal"]["FragmentId"]
                                   for item in items
                                   if not item["Internal"]["IsFragment"])
    except StopIteration:
        # Should not occur
        return

    if fragments:
        # Query all the objects with the media IDs of the fragments
        query_params_media_ids = [("dc_identifier_localid", media_id)
                                  for media_id in fragments.keys()]
        try:
            response = mediahaven_service.get_fragment(query_params_media_ids)
        except RequestException as error:
            raise NackException(
                "Error connecting to MediaHaven, retrying....",
                error=error,
                requeue=True,
            )
        except HTTPError as error:
            raise NackException(
                "Error occurred when querying MediaHaven",
                query_params=query_params_media_ids,
                error=error,
                error_message=error.response.text,
            )

        # Collect the Fragment IDs of the collaterals. The Media ID is used in the delete reason.
        fragments_collateral = [(item["Internal"]["FragmentId"],
                                 item["Dynamic"]["dc_identifier_localid"])
                                for item in response["MediaDataList"]
                                if not item["Internal"]["IsFragment"]]

        # Delete the collaterals
        for fragment_collateral in fragments_collateral:
            # Get the Fragment ID of the fragment to which this collateral is linked to
            local_id = fragment_collateral[1]
            linked_fragment_id = fragments.get(local_id)
            result = delete_media_object(
                mediahaven_service, fragment_collateral[0],
                f'Deleted collateral with local_id: "{local_id}" linked to fragment with fragment_id: "{linked_fragment_id}". Essence was deleted via s3 delete-object.'
            )
            time.sleep(0.2)  # Sleep to not hit rate limit

    # Delete the essence
    delete_media_object(
        mediahaven_service, fragment_id_essence,
        f's3 delete-object for bucket: "{s3_bucket}" and key: "{s3_object_key}"'
    )
コード例 #11
0
ファイル: main.py プロジェクト: viaacode/s3-events-handler
def handle_create_event(event: dict, properties, ctx: Context) -> bool:
    """Handler for s3 create events"""

    # Get cp_name for or_id
    or_id = get_from_event(event, "tenant")
    cp_name = get_cp_name(or_id, ctx)

    # Check if item already in mediahaven
    mediahaven_service = MediahavenService(ctx)
    query_params = query_params_item_ingested(event, cp_name)

    try:
        result = mediahaven_service.get_fragment(query_params)
    except RequestException as error:
        raise NackException(
            "Error connecting to MediaHaven, retrying....",
            error=error,
            requeue=True,
        )
    except HTTPError as error:
        raise NackException(
            "Error occurred when querying MediaHaven",
            query_params=query_params,
            error=error,
            error_message=error.response.text,
        )

    if result["MediaDataList"]:
        log.warning("Item already archived",
                    s3_object_key=get_from_event(event, "object_key"))
        return

    # Check if we are dealing with essence or collateral
    if is_collateral(event):
        # Handle collateral
        object_key = get_from_event(event, "object_key")
        try:
            collateral_type = object_key.split("/")[0]
            media_id = object_key.split("/")[1]
        except IndexError as error:
            # Object key is not properly formatted as <collateral_type>/<media_id>/
            raise NackException(
                f"Non-compliant object key for collateral: {object_key}",
                error=error,
            )

        log.debug(f"Received a {collateral_type} for media id: {media_id}")

        query_params = [
            ("dc_identifier_localid", media_id),
        ]
        try:
            result = mediahaven_service.get_fragment(query_params)
        except RequestException as error:
            raise NackException(
                "Error connecting to MediaHaven, retrying....",
                error=error,
                requeue=True,
            )
        except HTTPError as error:
            raise NackException(
                "Error occurred when querying MediaHaven",
                query_params=query_params,
                error=error,
                error_message=error.response.text,
            )

        try:
            item_pid = result["MediaDataList"][0]["Dynamic"]["PID"]
            item_fragment_id = result["MediaDataList"][0]["Internal"][
                "FragmentId"]
        except (IndexError, KeyError) as error:
            raise NackException(
                f"Item not found in MediaHaven for dc_identifier_localid: {media_id}",
                error=error,
            )

        log.debug(f"Found pid: {item_pid} for media id: {media_id}")

        pid = f"{item_pid}_{collateral_type}"
        dest_path = construct_destination_path(
            ctx.config.app_cfg["environment"], cp_name, "collateral")
        dest_filename = f"{pid}.xml"

        if collateral_type in ("openOt", "closedOt"):
            object_use = "subtitle"
        else:
            object_use = "collateral"

        sidecar_xml = construct_collateral_sidecar(event, item_pid, media_id,
                                                   cp_name, object_use)

        essence_update_sidecar = construct_fragment_update_sidecar(pid)
        try:
            mediahaven_service.update_metadata(item_fragment_id,
                                               essence_update_sidecar)
        except RequestException as error:
            raise NackException(
                "Error connecting to MediaHaven, retrying....",
                error=error,
                requeue=True,
            )
        except HTTPError as error:
            raise NackException(
                "Error occurred when updating metadata of collateral",
                fragment_id=item_fragment_id,
                sidecar=essence_update_sidecar,
                error=error,
                error_message=error.response.text,
            )
    else:
        # Handle essence
        try:
            pid_service = PIDService(ctx)
            pid = pid_service.get_pid()
        except (RequestException, IndexError, KeyError) as error:
            raise NackException(
                "Unable to get a PID, retrying...",
                error=error,
                requeue=True,
            )

        log.info(f"PID received: {pid}")

        dest_path = construct_destination_path(
            ctx.config.app_cfg["environment"], cp_name, "essence")
        dest_filename = f"{pid}.xml"

        sidecar_xml = construct_essence_sidecar(event, pid, cp_name)

    # # Build the sidecar
    # sidecar_builder = SidecarBuilder(ctx)
    # log.debug(f"Item md5: {event['Records'][0]['s3']['object']['metadata']['x-md5sum-meta']}")
    # metadata_dict = {
    #     "Dynamic": {
    #         "s3_object_key": event["Records"][0]["s3"]["object"]["key"],
    #         "s3_bucket": event["Records"][0]["s3"]["bucket"]["name"],
    #         "PID": pid
    #     },
    #     "Technical": {
    #         "Md5": event["Records"][0]["s3"]["object"]["metadata"]["x-md5sum-meta"]
    #     }
    # }

    # sidecar_builder.build(metadata_dict)

    # # Send the sidecar to TRA-server
    # # Get the sidecar XML representation as bytes
    # sidecar_xml = sidecar_builder.to_bytes(pretty=True)
    # log.debug(sidecar_xml.decode('utf-8'))

    log.debug(f"Destination: path={dest_path}, file_name={dest_filename}")

    # Transfer sidecar to FTP TRA
    try:
        ftp = FTP(ctx)
        ftp.put(sidecar_xml, dest_path, dest_filename)
    except Exception as error:
        # Potential destructive action has happened, allowed to requeue?
        raise NackException("Error transferring sidecar via FTP",
                            sidecar=sidecar_xml,
                            error=error)

    # Request file transfer
    file_extension = os.path.splitext(get_from_event(event, "object_key"))[1]
    param_dict = construct_fts_params_dict(event, pid, file_extension,
                                           dest_path, ctx)

    events = Events(ctx.config.app_cfg["rabbitmq"]["outgoing"], ctx)
    events.publish(json.dumps(param_dict), properties.correlation_id)
コード例 #12
0
ファイル: main.py プロジェクト: viaacode/s3-events-handler
def is_collateral(event: dict) -> bool:
    """Check if the event is a collateral."""
    return get_from_event(event, "bucket") == "mam-collaterals"
コード例 #13
0
#
#  @author: https://github.com/maartends
#
#######################################################################
#
#  ./tests/unit_tests.py
#
#######################################################################

import json
from meemoo.helpers import get_from_event
from tests.resources import S3_MOCK_EVENT

event = json.loads(S3_MOCK_EVENT)

get_from_event(event, 'host')
get_from_event(event, 'object_key')
get_from_event(event, 'tenant')
get_from_event(event, 'bucket')

ALLOWED_NODES = ['Dynamic', 'Technical']
XML_ENCODING = 'UTF-8'
MHS_VERSION = '19.4'
MH_NAMESPACES = {
    "mhs": f"https://zeticon.mediahaven.com/metadata/{MHS_VERSION}/mhs/",
    "mh": f"https://zeticon.mediahaven.com/metadata/{MHS_VERSION}/mh/"
}

for k in metadata_dict:
    assert k in ALLOWED_NODES, f'Unknown sidecar node: "{k}"'