def build_condition_table(db_session, service): directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/' html_list = get_html(directory, service) for df_list in html_list: for df in df_list: table = json.loads(df.to_json(orient='split')) table_data = df if 'Condition Keys' in table_data and 'Description' in table_data and 'Type' in table_data: temp = table['data'][1::] for i in range(len(table['data'])): # Description: sometimes it is empty, like the conditions table for S3. # In order to avoid errors with NULL Database entries, set to 'None' if table['data'][i][1] is None: temp_description = 'None' else: temp_description = table['data'][i][1] db_session.add(ConditionTable( service=service, condition_key_name=table['data'][i][0], condition_key_service=get_service_from_condition_key( table['data'][i][0]), description=temp_description, condition_value_type=str.lower(table['data'][i][2]) )) db_session.commit()
def build_arn_table(db_session, service): directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/' html_list = get_html(directory, service) for df_list in html_list: for df in df_list: table = json.loads(df.to_json(orient='split')) table_data = df if 'Resource Types' in table_data and 'ARN' in table_data: temp = table['data'][1::] for i in range(len(table['data'])): if get_resource_path_from_arn(table['data'][i][1]): resource_path = get_resource_path_from_arn( table['data'][i][1]) else: resource_path = '' db_session.add(ArnTable( resource_type_name=table['data'][i][0], raw_arn=str(table['data'][i][1]).replace( "${Partition}", "aws"), # raw_arn=get_string_arn(table['data'][i][1]), arn='arn', partition='aws', service=get_service_from_arn(table['data'][i][1]), region=get_region_from_arn(table['data'][i][1]), account=get_account_from_arn(table['data'][i][1]), resource=get_resource_from_arn(table['data'][i][1]), resource_path=resource_path # resource_path=get_resource_path_from_arn(table['data'][i][1]) )) db_session.commit()
def build_condition_table(db_session, service): """ Build the Conditions table - the list of conditions available to each service. :param db_session: SQLAlchemy database session :param service: AWS Service Prefix """ directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/' html_list = get_html(directory, service) for df_list in html_list: for df in df_list: # pylint: disable=invalid-name table = json.loads(df.to_json(orient='split')) table_data = df if 'Condition Keys' in table_data and 'Description' in table_data and 'Type' in table_data: for i in range(len(table['data'])): # Description: sometimes it is empty, like the conditions table for S3. # In order to avoid errors with NULL Database entries, set # to 'None' if table['data'][i][1] is None: temp_description = 'None' else: temp_description = table['data'][i][1] db_session.add( ConditionTable(service=service, condition_key_name=table['data'][i][0], condition_key_service= get_service_from_condition_key( table['data'][i][0]), description=temp_description, condition_value_type=str.lower( table['data'][i][2]))) db_session.commit()
def build_arn_table(db_session, service): """ Builds the ARN Table - the table of resource types - in the SQLite database. :param db_session: SQLAlchemy database session. :param service: The AWS service prefix """ directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/' html_list = get_html(directory, service) for df_list in html_list: for df in df_list: # pylint: disable=invalid-name table = json.loads(df.to_json(orient='split')) table_data = df if 'Resource Types' in table_data and 'ARN' in table_data: for i in range(len(table['data'])): # Handle resource ARN path if get_resource_path_from_arn(table['data'][i][1]): resource_path = get_resource_path_from_arn( table['data'][i][1]) else: resource_path = '' # Handle condition keys if table['data'][i][2] is None: condition_keys = None # If there are multiple condition keys, make them comma separated # Otherwise, if we ingest them as-is, it will show up as # two spaces elif ' ' in table['data'][i][2]: condition_keys = get_comma_separated_condition_keys( table['data'][i][2]) else: condition_keys = table['data'][i][2] db_session.add( ArnTable( resource_type_name=table['data'][i][0], raw_arn=str(table['data'][i][1]).replace( "${Partition}", "aws"), # raw_arn=get_string_arn(table['data'][i][1]), arn='arn', partition='aws', service=get_service_from_arn(table['data'][i][1]), region=get_region_from_arn(table['data'][i][1]), account=get_account_from_arn(table['data'][i][1]), resource=get_resource_from_arn( table['data'][i][1]), resource_path=resource_path, condition_keys=condition_keys # resource_path=get_resource_path_from_arn(table['data'][i][1]) )) db_session.commit()
def build_action_table(db_session, service): """ Builds the action table in the SQLite database. See the first Table on any service-specific page in the Actions, Resources, and Condition Keys documentation. That information is scraped, parsed, and stored in the SQLite database using this function. :param db_session: Database session object :param service: AWS Service to query. This can be called in a loop or for a single service (see connect_db function above). :return: """ directory = os.path.abspath(os.path.dirname(__file__)) + '/data/docs/' html_list = get_html(directory, service) for df_list in html_list: for df in df_list: table = json.loads(df.to_json(orient='split')) table_data = df # Actions table if 'Actions' in table_data and 'Access Level' in table_data: temp = table['data'][1::] for i in range(len(table['data'])): # If the table is set to none # If the cell is blank, that indicates it needs wildcard if table['data'][i][3] is None: resource_type_name = 'None' resource_type_name_append_wildcard = 'False' resource_arn_format = '*' # Check if resource type name has wildcard suffix - i.e., parameter* instead of parameter # If it does, set the append_wildcard flag to true, # and set the resource name to that but without the wildcard to make searching easier elif '*' in table['data'][i][3]: temp_resource_type_name = table['data'][i][3] resource_type_name = temp_resource_type_name[:-1] if resource_type_name is None: resource_type_name = 'None' resource_type_name_append_wildcard = 'True' query_resource_arn_format = db_session.query(ArnTable.raw_arn).filter( and_(ArnTable.service.ilike(service), ArnTable.resource_type_name.like(resource_type_name))) first_result = query_resource_arn_format.first() try: resource_arn_format = first_result.raw_arn # For EC2 RunInstances, ResourceTypes have some duplicates. # The Resource Types (*required) column has duplicates and the Access Level has `nan` except AttributeError: continue else: resource_type_name = table['data'][i][3] resource_type_name_append_wildcard = 'False' first_result = db_session.query(ArnTable.raw_arn).filter(ArnTable.service.ilike(service), ArnTable.resource_type_name.like( table['data'][i][3])).first() try: if '*' in first_result.raw_arn: resource_arn_format = first_result.raw_arn[:-1] else: resource_arn_format = first_result.raw_arn except AttributeError: continue # For lambda:InvokeFunction, the cell is 'lambda:InvokeFunction [permission only]'. # To avoid this, let's test for a space in the name. # If there is a space, remove the space and all text after it. if ' ' in table['data'][i][0]: text_with_space = table['data'][i][0] action_name, sep, tail = text_with_space.partition(' ') else: action_name = table['data'][i][0] # Condition keys if table['data'][i][4] is None: # In order to avoid errors with NULL Database entries, set to 'None' condition_keys = 'None' # If there are multiple condition keys, make them comma separated # Otherwise, if we ingest them as-is, it will show up as two spaces elif ' ' in table['data'][i][4]: condition_keys = get_comma_separated_condition_keys( table['data'][i][4]) else: condition_keys = table['data'][i][4] # Dependent actions if table['data'][i][5] is None: dependent_actions = None elif ' ' in table['data'][i][5]: # Let's just use the same method that we use for separating condition keys dependent_actions = get_comma_separated_condition_keys( table['data'][i][5]) else: dependent_actions = table['data'][i][5] db_session.add(ActionTable( service=service, name=str.lower(action_name), description=table['data'][i][1], access_level=table['data'][i][2], resource_type_name=resource_type_name, resource_type_name_append_wildcard=resource_type_name_append_wildcard, resource_arn_format=str(resource_arn_format), condition_keys=condition_keys, dependent_actions=dependent_actions )) db_session.commit() elif 'Resource Types' in table_data and 'ARN' in table_data: continue else: continue db_session.commit()