コード例 #1
0
def build_condition_table(db_session, service):
    """
    Build the Conditions table - the list of conditions available to each service.

    :param db_session: SQLAlchemy database session
    :param service: AWS Service Prefix
    """
    directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/'
    html_list = get_html(directory, service)
    for df_list in html_list:
        for df in df_list:  # pylint: disable=invalid-name
            table = json.loads(df.to_json(orient='split'))
            table_data = df
            if 'Condition Keys' in table_data and 'Description' in table_data and 'Type' in table_data:
                for i in range(len(table['data'])):
                    # Description: sometimes it is empty, like the conditions table for S3.
                    # In order to avoid errors with NULL Database entries, set
                    # to 'None'
                    if table['data'][i][1] is None:
                        temp_description = 'None'
                    else:
                        temp_description = table['data'][i][1]
                    # Name: sometimes there are random spaces in the string, like 'aws:RequestTag/$  {  TagKey}'.
                    condition_key_name = table['data'][i][0].replace(" ", "")
                    db_session.add(ConditionTable(
                        service=service,
                        condition_key_name=condition_key_name,
                        condition_key_service=get_service_from_condition_key(
                            table['data'][i][0]),
                        description=temp_description,
                        condition_value_type=str.lower(table['data'][i][2])
                    ))
                    db_session.commit()
コード例 #2
0
def build_arn_table(db_session, service):
    """
    Builds the ARN Table - the table of resource types - in the SQLite database.

    :param db_session: SQLAlchemy database session.
    :param service: The AWS service prefix
    """
    directory = os.path.abspath(os.path.dirname(__file__)) + "/data/docs/"
    html_list = get_html(directory, service)
    for df_list in html_list:
        for df in df_list:  # pylint: disable=invalid-name
            table = json.loads(df.to_json(orient="split"))
            table_data = df
            if "Resource Types" in table_data and "ARN" in table_data:
                for i in range(len(table["data"])):
                    # Replace the random spaces in the ARN
                    temp_raw_arn = table["data"][i][1].replace(" ", "")
                    # Handle resource ARN path
                    if get_resource_path_from_arn(temp_raw_arn):
                        resource_path = get_resource_path_from_arn(temp_raw_arn)
                    else:
                        resource_path = ""
                    # Handle condition keys
                    if table["data"][i][2] is None:
                        condition_keys = None
                    # If there are multiple condition keys, make them comma separated
                    # Otherwise, if we ingest them as-is, it will show up as
                    # two spaces
                    elif "  " in table["data"][i][2]:
                        condition_keys = get_comma_separated_condition_keys(
                            table["data"][i][2]
                        )
                    else:
                        condition_keys = table["data"][i][2]
                    db_session.add(
                        ArnTable(
                            resource_type_name=table["data"][i][0],
                            raw_arn=str(temp_raw_arn),
                            arn="arn",
                            partition=get_partition_from_arn(temp_raw_arn),
                            service=get_service_from_arn(temp_raw_arn),
                            region=get_region_from_arn(temp_raw_arn),
                            account=get_account_from_arn(temp_raw_arn),
                            resource=get_resource_from_arn(temp_raw_arn),
                            resource_path=resource_path,
                            condition_keys=condition_keys,
                        )
                    )
                    db_session.commit()
コード例 #3
0
def build_action_table(db_session, service, access_level_overrides_file):
    """
    Builds the action table in the SQLite database.
    See the first Table on any service-specific page in the Actions, Resources, and Condition Keys documentation.
    That information is scraped, parsed, and stored in the SQLite database using this function.

    :param db_session: Database session object
    :param service: AWS Service to query. This can be called in a loop or for a single service (see connect_db function above).
    :param access_level_overrides_file: The path to the file that we use for overriding access levels that are incorrect in the AWS documentation
    """
    directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/'
    html_list = get_html(directory, service)
    access_level_overrides_cfg = get_action_access_level_overrides_from_yml(
        service, access_level_overrides_file)
    for df_list in html_list:
        for df in df_list:  # pylint: disable=invalid-name
            table = json.loads(df.to_json(orient='split'))
            table_data = df
            # Actions table
            if 'Actions' in table_data and 'Access Level' in table_data:
                for i in range(len(table['data'])):
                    # If the table is set to none
                    # If the cell is blank, that indicates it needs wildcard
                    if table['data'][i][3] is None:
                        resource_type_name = 'None'
                        resource_type_name_append_wildcard = 'False'
                        resource_arn_format = '*'
                    # Check if resource type name has wildcard suffix - i.e., parameter* instead of parameter
                    # If it does, set the append_wildcard flag to true,
                    # and set the resource name to that but without the
                    # wildcard to make searching easier
                    elif '*' in table['data'][i][3]:
                        temp_resource_type_name = table['data'][i][3]
                        resource_type_name = temp_resource_type_name[:-1]
                        if resource_type_name is None:
                            resource_type_name = 'None'
                        resource_type_name_append_wildcard = 'True'
                        query_resource_arn_format = db_session.query(
                            ArnTable.raw_arn).filter(and_(ArnTable.service.ilike(service),
                                                          ArnTable.resource_type_name.like(resource_type_name)))
                        first_result = query_resource_arn_format.first()
                        try:
                            resource_arn_format = first_result.raw_arn
                        # For EC2 RunInstances, ResourceTypes have some duplicates.
                        # The Resource Types (*required) column has duplicates
                        # and the Access Level has `nan`
                        except AttributeError:
                            continue
                    else:
                        resource_type_name = table['data'][i][3]
                        resource_type_name_append_wildcard = 'False'
                        first_result = db_session.query(
                            ArnTable.raw_arn).filter(ArnTable.service.ilike(service),
                                                     ArnTable.resource_type_name.like(table['data'][i][3])).first()
                        try:
                            if '*' in first_result.raw_arn:
                                resource_arn_format = first_result.raw_arn[:-1]
                            else:
                                resource_arn_format = first_result.raw_arn
                        except AttributeError:
                            continue
                    # For lambda:InvokeFunction, the cell is 'lambda:InvokeFunction [permission only]'.
                    # To avoid this, let's test for a space in the name.
                    # If there is a space, remove the space and all text after
                    # it.
                    # pylint: disable=unused-variable
                    if ' ' in table['data'][i][0]:
                        text_with_space = table['data'][i][0]
                        action_name, sep, tail = text_with_space.partition(
                            ' ')
                    else:
                        action_name = table['data'][i][0]

                    # Access Level #####
                    # access_level_overrides_cfg will only be true if the service in question is present
                    # in the overrides YML file
                    if access_level_overrides_cfg:
                        override_result = determine_access_level_override(
                            service, str.lower(action_name), table['data'][i][2], access_level_overrides_cfg)
                        if override_result:
                            access_level = override_result
                            print(
                                f"Override: Setting access level for {service}:{action_name} to {access_level}")
                        else:
                            access_level = table['data'][i][2]
                    else:
                        access_level = table['data'][i][2]
                    # Condition keys #####
                    if table['data'][i][4] is None:
                        condition_keys = None
                    # If there are multiple condition keys, make them comma separated
                    # Otherwise, if we ingest them as-is, it will show up as
                    # two spaces
                    elif '  ' in table['data'][i][4]:
                        condition_keys = get_comma_separated_condition_keys(
                            table['data'][i][4])
                    else:
                        condition_keys = table['data'][i][4]

                    ##### Dependent actions #####
                    if table['data'][i][5] is None:
                        dependent_actions = None
                    elif '  ' in table['data'][i][5]:
                        # Let's just use the same method that we use for
                        # separating condition keys
                        dependent_actions = get_comma_separated_condition_keys(
                            table['data'][i][5])
                    else:
                        dependent_actions = table['data'][i][5]

                    db_session.add(ActionTable(
                        service=service,
                        name=str.lower(action_name),
                        description=table['data'][i][1],
                        access_level=access_level,
                        resource_type_name=resource_type_name,
                        resource_type_name_append_wildcard=resource_type_name_append_wildcard,
                        resource_arn_format=str(resource_arn_format),
                        condition_keys=condition_keys,
                        dependent_actions=dependent_actions
                    ))
                    db_session.commit()
            elif 'Resource Types' in table_data and 'ARN' in table_data:
                continue
            else:
                continue
    db_session.commit()