Ejemplo n.º 1
0
def build_condition_table(db_session, service):
    directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/'
    html_list = get_html(directory, service)
    for df_list in html_list:
        for df in df_list:
            table = json.loads(df.to_json(orient='split'))
            table_data = df
            if 'Condition Keys' in table_data and 'Description' in table_data and 'Type' in table_data:
                temp = table['data'][1::]
                for i in range(len(table['data'])):
                    # Description: sometimes it is empty, like the conditions table for S3.
                    # In order to avoid errors with NULL Database entries, set to 'None'
                    if table['data'][i][1] is None:
                        temp_description = 'None'
                    else:
                        temp_description = table['data'][i][1]

                    db_session.add(ConditionTable(
                        service=service,
                        condition_key_name=table['data'][i][0],
                        condition_key_service=get_service_from_condition_key(
                            table['data'][i][0]),
                        description=temp_description,
                        condition_value_type=str.lower(table['data'][i][2])
                    ))
                    db_session.commit()
Ejemplo n.º 2
0
def build_arn_table(db_session, service):
    directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/'
    html_list = get_html(directory, service)
    for df_list in html_list:
        for df in df_list:
            table = json.loads(df.to_json(orient='split'))
            table_data = df
            if 'Resource Types' in table_data and 'ARN' in table_data:
                temp = table['data'][1::]
                for i in range(len(table['data'])):
                    if get_resource_path_from_arn(table['data'][i][1]):
                        resource_path = get_resource_path_from_arn(
                            table['data'][i][1])
                    else:
                        resource_path = ''
                    db_session.add(ArnTable(
                        resource_type_name=table['data'][i][0],
                        raw_arn=str(table['data'][i][1]).replace(
                            "${Partition}", "aws"),
                        # raw_arn=get_string_arn(table['data'][i][1]),
                        arn='arn',
                        partition='aws',
                        service=get_service_from_arn(table['data'][i][1]),
                        region=get_region_from_arn(table['data'][i][1]),
                        account=get_account_from_arn(table['data'][i][1]),
                        resource=get_resource_from_arn(table['data'][i][1]),
                        resource_path=resource_path
                        # resource_path=get_resource_path_from_arn(table['data'][i][1])
                    ))
                    db_session.commit()
Ejemplo n.º 3
0
def build_condition_table(db_session, service):
    """
    Build the Conditions table - the list of conditions available to each service.
    :param db_session: SQLAlchemy database session
    :param service: AWS Service Prefix
    """
    directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/'
    html_list = get_html(directory, service)
    for df_list in html_list:
        for df in df_list:  # pylint: disable=invalid-name
            table = json.loads(df.to_json(orient='split'))
            table_data = df
            if 'Condition Keys' in table_data and 'Description' in table_data and 'Type' in table_data:
                for i in range(len(table['data'])):
                    # Description: sometimes it is empty, like the conditions table for S3.
                    # In order to avoid errors with NULL Database entries, set
                    # to 'None'
                    if table['data'][i][1] is None:
                        temp_description = 'None'
                    else:
                        temp_description = table['data'][i][1]

                    db_session.add(
                        ConditionTable(service=service,
                                       condition_key_name=table['data'][i][0],
                                       condition_key_service=
                                       get_service_from_condition_key(
                                           table['data'][i][0]),
                                       description=temp_description,
                                       condition_value_type=str.lower(
                                           table['data'][i][2])))
                    db_session.commit()
Ejemplo n.º 4
0
def build_arn_table(db_session, service):
    """
    Builds the ARN Table - the table of resource types - in the SQLite database.
    :param db_session: SQLAlchemy database session.
    :param service: The AWS service prefix
    """
    directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/'
    html_list = get_html(directory, service)
    for df_list in html_list:
        for df in df_list:  # pylint: disable=invalid-name
            table = json.loads(df.to_json(orient='split'))
            table_data = df
            if 'Resource Types' in table_data and 'ARN' in table_data:
                for i in range(len(table['data'])):
                    # Handle resource ARN path
                    if get_resource_path_from_arn(table['data'][i][1]):
                        resource_path = get_resource_path_from_arn(
                            table['data'][i][1])
                    else:
                        resource_path = ''
                    # Handle condition keys
                    if table['data'][i][2] is None:
                        condition_keys = None
                    # If there are multiple condition keys, make them comma separated
                    # Otherwise, if we ingest them as-is, it will show up as
                    # two spaces
                    elif '  ' in table['data'][i][2]:
                        condition_keys = get_comma_separated_condition_keys(
                            table['data'][i][2])
                    else:
                        condition_keys = table['data'][i][2]
                    db_session.add(
                        ArnTable(
                            resource_type_name=table['data'][i][0],
                            raw_arn=str(table['data'][i][1]).replace(
                                "${Partition}", "aws"),
                            # raw_arn=get_string_arn(table['data'][i][1]),
                            arn='arn',
                            partition='aws',
                            service=get_service_from_arn(table['data'][i][1]),
                            region=get_region_from_arn(table['data'][i][1]),
                            account=get_account_from_arn(table['data'][i][1]),
                            resource=get_resource_from_arn(
                                table['data'][i][1]),
                            resource_path=resource_path,
                            condition_keys=condition_keys
                            # resource_path=get_resource_path_from_arn(table['data'][i][1])
                        ))
                    db_session.commit()
Ejemplo n.º 5
0
def build_action_table(db_session, service):
    """
    Builds the action table in the SQLite database.
    See the first Table on any service-specific page in the Actions, Resources, and Condition Keys documentation.
    That information is scraped, parsed, and stored in the SQLite database using this function.
    :param db_session: Database session object
    :param service: AWS Service to query. This can be called in a loop or for a single service (see connect_db function above).
    :return:
    """
    directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/'
    html_list = get_html(directory, service)
    for df_list in html_list:
        for df in df_list:
            table = json.loads(df.to_json(orient='split'))
            table_data = df
            # Actions table
            if 'Actions' in table_data and 'Access Level' in table_data:
                temp = table['data'][1::]
                for i in range(len(table['data'])):
                    # If the table is set to none
                    # If the cell is blank, that indicates it needs wildcard
                    if table['data'][i][3] is None:
                        resource_type_name = 'None'
                        resource_type_name_append_wildcard = 'False'
                        resource_arn_format = '*'
                    # Check if resource type name has wildcard suffix - i.e., parameter* instead of parameter
                    # If it does, set the append_wildcard flag to true,
                    # and set the resource name to that but without the wildcard to make searching easier
                    elif '*' in table['data'][i][3]:
                        temp_resource_type_name = table['data'][i][3]
                        resource_type_name = temp_resource_type_name[:-1]
                        if resource_type_name is None:
                            resource_type_name = 'None'
                        resource_type_name_append_wildcard = 'True'
                        query_resource_arn_format = db_session.query(ArnTable.raw_arn).filter(
                            and_(ArnTable.service.ilike(service),
                                 ArnTable.resource_type_name.like(resource_type_name)))
                        first_result = query_resource_arn_format.first()
                        try:
                            resource_arn_format = first_result.raw_arn
                        # For EC2 RunInstances, ResourceTypes have some duplicates.
                        # The Resource Types (*required) column has duplicates and the Access Level has `nan`
                        except AttributeError:
                            continue
                    else:
                        resource_type_name = table['data'][i][3]
                        resource_type_name_append_wildcard = 'False'
                        first_result = db_session.query(ArnTable.raw_arn).filter(ArnTable.service.ilike(service),
                                                                              ArnTable.resource_type_name.like(
                                                                                  table['data'][i][3])).first()
                        try:
                            if '*' in first_result.raw_arn:
                                resource_arn_format = first_result.raw_arn[:-1]
                            else:
                                resource_arn_format = first_result.raw_arn
                        except AttributeError:
                            continue
                    # For lambda:InvokeFunction, the cell is 'lambda:InvokeFunction [permission only]'.
                    # To avoid this, let's test for a space in the name.
                    # If there is a space, remove the space and all text after it.
                    if ' ' in table['data'][i][0]:
                        text_with_space = table['data'][i][0]
                        action_name, sep, tail = text_with_space.partition(' ')
                    else:
                        action_name = table['data'][i][0]

                    # Condition keys
                    if table['data'][i][4] is None:
                        # In order to avoid errors with NULL Database entries, set to 'None'
                        condition_keys = 'None'
                    # If there are multiple condition keys, make them comma separated
                    # Otherwise, if we ingest them as-is, it will show up as two spaces
                    elif '  ' in table['data'][i][4]:
                        condition_keys = get_comma_separated_condition_keys(
                            table['data'][i][4])
                    else:
                        condition_keys = table['data'][i][4]

                    # Dependent actions
                    if table['data'][i][5] is None:
                        dependent_actions = None
                    elif '  ' in table['data'][i][5]:
                        # Let's just use the same method that we use for separating condition keys
                        dependent_actions = get_comma_separated_condition_keys(
                            table['data'][i][5])
                    else:
                        dependent_actions = table['data'][i][5]

                    db_session.add(ActionTable(
                        service=service,
                        name=str.lower(action_name),
                        description=table['data'][i][1],
                        access_level=table['data'][i][2],
                        resource_type_name=resource_type_name,
                        resource_type_name_append_wildcard=resource_type_name_append_wildcard,
                        resource_arn_format=str(resource_arn_format),
                        condition_keys=condition_keys,
                        dependent_actions=dependent_actions
                    ))
                    db_session.commit()
            elif 'Resource Types' in table_data and 'ARN' in table_data:
                continue
            else:
                continue
    db_session.commit()