Beispiel #1
0
def get_rules():
    if not hmac.compare_digest(request.cookies.get("sid", ""), SECRET):
        return jsonify(rules=[])

    rule_type = request.args.get('type', '%').upper()
    rule_target = request.args.get('target', '%').upper()

    logger.info(f'Fetching {rule_target}_{rule_type} rules...')

    oauth = json.loads(request.headers.get('Authorization') or '{}')
    if not oauth and not dbconfig.PRIVATE_KEY:
        return jsonify(success=False, message='please log in')

    ctx = db.connect(oauth=oauth)
    rules = db.fetch(
        ctx, f"SHOW VIEWS LIKE '%_{rule_target}\_{rule_type}' IN rules")

    return jsonify(rules=[{
        "title":
        re.sub('_(alert|violation|policy)_(query|suppression|definition)$',
               '',
               rule['name'],
               flags=re.I),
        "target":
        rule['name'].split('_')[-2].upper(),
        "type":
        rule['name'].split('_')[-1].upper(),
        "body":
        replace_config_vals(rule['text']),
        "results": (list(db.fetch(ctx, f"SELECT * FROM rules.{rule['name']};"))
                    if rule['name'].endswith("_POLICY_DEFINITION") else None),
    } for rule in rules if db.is_valid_rule_name(rule['name'])])
def run_suppression_query(squelch_name):
    try:
        query = SUPPRESSION_QUERY.format(suppression_name=squelch_name)
        return next(db.fetch(query, fix_errors=False))['number of rows updated']
    except Exception:
        log.info(f"{squelch_name} warning: query broken, might need 'id' column, trying 'alert:ALERT_ID'.")
        query = OLD_SUPPRESSION_QUERY.format(suppression_name=squelch_name)
        return next(db.fetch(query))['number of rows updated']
Beispiel #3
0
def handle(alert,
           correlation_id,
           project=PROJECT,
           assignee=None,
           custom_field=None):
    global PROJECT
    PROJECT = project
    if PROJECT == '':
        return "No Jira Project defined"
    if URL == '':
        return "No Jira URL defined."

    CORRELATION_QUERY = f"""
    SELECT *
    FROM results.alerts
    WHERE correlation_id = '{correlation_id}'
      AND iff(alert:HANDLERS is null, ticket is not null, handled is not null)
    ORDER BY EVENT_TIME DESC
    LIMIT 1
    """
    alert_id = alert['ALERT_ID']

    # We check against the correlation ID for alerts in that correlation with the same ticket
    correlated_results = list(
        db.fetch(CORRELATION_QUERY)) if correlation_id else []
    log.info(f"Discovered {len(correlated_results)} correlated results")

    if len(correlated_results) > 0:
        # There is a correlation with a ticket that exists, so we should append to that ticket
        ticket_id = correlated_results[0]['TICKET']
        try:
            ticket_status = check_ticket_status(ticket_id)
        except Exception:
            log.error(f"Failed to get ticket status for {ticket_id}")
            return

        if ticket_status == 'To Do':
            try:
                append_to_body(ticket_id, alert)
            except Exception as e:
                log.error(
                    f"Failed to append alert {alert_id} to ticket {ticket_id}.",
                    e)
                try:
                    ticket_id = create_jira_ticket(alert)
                except Exception as e:
                    log.error(e,
                              f"Failed to create ticket for alert {alert_id}")
                    return e
    else:
        # There is no correlation with a ticket that exists
        # Create a new ticket in JIRA for the alert
        try:
            ticket_id = create_jira_ticket(alert, assignee, custom_field)
        except Exception as e:
            log.error(e, f"Failed to create ticket for alert {alert_id}")
            return e

    record_ticket_id(ticket_id, alert_id)
    return ticket_id
Beispiel #4
0
def pull_aws_data():
    finished = False
    offset = 0
    limit = 1000000
    finished = False
    aws_writer = None
    with open('aws_inventory.csv', 'w') as fou:
        while not finished:
            data = db.fetch(
                f'''SELECT distinct INSTANCE:InstanceId::string as instance_id
                , min(distinct case when value:"Key"='SFROLE' then value:"Value" else NULL end ) as role FROM (
    SELECT distinct INSTANCE FROM SNOWALERT.BASE_DATA.EC2_INSTANCE_SNAPSHOTS_T where timestamp > dateadd(day,-30,current_timestamp)
  and INSTANCE:"Tags" not like '%{{"Key":"SFROLE","Value":"XP"}}%' 
  and INSTANCE:"Tags" not like '%{{"Key":"SFROLE","Value":"IMS_PENDING_SHUTDOWN"}}%'
    ), lateral flatten(input=>INSTANCE:"Tags")
    group by instance_id  having ROLE != 'XP' AND ROLE != 'IMS_PENDING_SHUTDOWN' limit {limit} offset {offset}'''
            )
            num_results = 0
            for row in data:
                num_results += 1
                if aws_writer is None:
                    aws_writer = csv.DictWriter(fou, row.keys())
                    aws_writer.writeheader()
                aws_writer.writerow(row)
            if num_results < limit:
                finished = True
            offset += limit
Beispiel #5
0
def create_metadata_table(table, cols, addition):
    db.create_table(table, cols, ifnotexists=True)
    db.execute(f"GRANT INSERT, SELECT ON {table} TO ROLE {SA_ROLE}")
    table_names = (row['name'] for row in db.fetch(f'desc table {table}'))
    if any(name == addition[0].upper() for name in table_names):
        return
    db.execute(f'ALTER TABLE {table} ADD COLUMN {addition[0]} {addition[1]}')
Beispiel #6
0
def create_rule():
    if not hmac.compare_digest(request.cookies.get("sid", ""), SECRET):
        return jsonify(success=False, message="bad sid", rule={})

    data = request.get_json()
    rule_title, rule_type, rule_target, rule_body = data['title'], data['type'], data['target'], data['body']
    logger.info(f'Creating rule {rule_title}_{rule_target}_{rule_type}')

    # support for full queries with comments frontend sends comments
    rule_body = re.sub(r"^CREATE [^\n]+\n", "", rule_body, flags=re.I)
    m = re.match(r"^  COMMENT='((?:\\'|[^'])*)'\nAS\n", rule_body)
    comment, rule_body = (m.group(1), rule_body[m.span()[1]:]) if m else ('', rule_body)
    comment_clause = f"\n  COMMENT='{comment}'\n"

    view_name = f"{rule_title}_{rule_target}_{rule_type}"
    rule_body = f"CREATE OR REPLACE VIEW {RULES_SCHEMA}.{view_name} COPY GRANTS{comment_clause}AS\n{rule_body}"

    try:
        oauth = json.loads(request.headers.get('Authorization') or '{}')
        ctx = db.connect(oauth=oauth, run_preflight_checks=False)
        ctx.cursor().execute(rule_body).fetchall()

        if 'body' in data and 'savedBody' in data:
            data['savedBody'] = rule_body

        data['results'] = (
            list(db.fetch(ctx, f"SELECT * FROM {RULES_SCHEMA}.{view_name};"))
            if view_name.endswith("_POLICY_DEFINITION")
            else None
        )

    except snowflake.connector.errors.ProgrammingError as e:
        return jsonify(success=False, message=e.msg, rule=data)

    return jsonify(success=True, rule=data)
Beispiel #7
0
def run_suppression(squelch_name):
    metadata = {
        'QUERY_NAME': squelch_name,
        'RUN_ID': RUN_ID,
        'ATTEMPTS': 1,
        'START_TIME': datetime.datetime.utcnow(),
        'ROW_COUNT': {
            'SUPPRESSED': 0
        }
    }
    log.info(f"{squelch_name} processing...")
    try:
        query = VIOLATION_SUPPRESSION_QUERY.format(squelch_name=squelch_name)
        num_violations_suppressed = next(
            db.fetch(query))['number of rows updated']
        log.info(f"{squelch_name} updated {num_violations_suppressed} rows.")
        metadata['ROW_COUNT']['SUPPRESSED'] = num_violations_suppressed
        db.record_metadata(metadata, table=QUERY_METADATA_TABLE)
        RULE_METADATA_RECORDS.append(metadata)

    except Exception as e:
        db.record_metadata(metadata, table=QUERY_METADATA_TABLE, e=e)
        log.error("Suppression query {squelch_name} execution failed.", e)

    print(f"Suppression query {squelch_name} executed")
Beispiel #8
0
def main():
    RUN_METADATA = {
        'RUN_TYPE': 'VIOLATION SUPPRESSION',
        'START_TIME': datetime.datetime.utcnow(),
        'RUN_ID': RUN_ID,
    }

    for squelch_name in db.load_rules(VIOLATION_SQUELCH_POSTFIX):
        run_suppression(squelch_name)

    num_violations_passed = next(
        db.fetch(SET_SUPPRESSED_FALSE))['number of rows updated']
    RUN_METADATA['ROW_COUNT'] = {
        'SUPPRESSED':
        sum(rmr['ROW_COUNT']['SUPPRESSED'] for rmr in RULE_METADATA_RECORDS),
        'PASSED':
        num_violations_passed,
    }
    db.record_metadata(RUN_METADATA, table=RUN_METADATA_TABLE)

    if CLOUDWATCH_METRICS:
        log.metric('Run', 'SnowAlert', [{
            'Name': 'Component',
            'Value': 'Violation Suppression Runner'
        }], 1)
def main(squelch_name=None):
    RUN_METADATA = {
        'RUN_TYPE': 'ALERT SUPPRESSION',
        'START_TIME': datetime.datetime.utcnow(),
        'RUN_ID': RUN_ID,
    }

    rules = (
        db.load_rules(ALERT_SQUELCH_POSTFIX) if squelch_name is None else [squelch_name]
    )
    for squelch_name in rules:
        run_suppressions(squelch_name)

    num_rows_updated = next(db.fetch(SET_SUPPRESSED_FALSE))['number of rows updated']
    log.info(
        f'All suppressions done, {num_rows_updated} remaining alerts marked suppressed=FALSE.'
    )

    RUN_METADATA['ROW_COUNT'] = {
        'PASSED': num_rows_updated,
        'SUPPRESSED': sum(m['ROW_COUNT']['SUPPRESSED'] for m in METADATA_HISTORY),
    }

    db.record_metadata(RUN_METADATA, table=RUN_METADATA_TABLE)

    try:
        if CLOUDWATCH_METRICS:
            log.metric(
                'Run',
                'SnowAlert',
                [{'Name': 'Component', 'Value': 'Alert Suppression Runner'}],
                1,
            )
    except Exception as e:
        log.error("Cloudwatch metric logging failed: ", e)
Beispiel #10
0
def finalize(connection_name):
    base_name = f'ldap_{connection_name}'
    pipe = f'data.{base_name}_pipe'
    table = next(
        db.fetch(f"SHOW TABLES LIKE '{base_name}_connection' IN data"))
    options = yaml.safe_load(table['comment'])
    stage = options.get('existing_stage', f'data.{base_name}_stage')

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql=(f"COPY INTO data.{base_name}_connection "
                 f"FROM (SELECT $1, $2, $3, $4,"
                 f" to_timestamp_ltz($5, 'mm/dd/yyyy hh24:mi:ss (UTC)'),"
                 f" to_timestamp_ltz($6, 'mm/dd/yyyy hh24:mi:ss (UTC)'),"
                 f" to_timestamp_ltz($7, 'mm/dd/yyyy hh24:mi:ss (UTC)'),"
                 f" to_timestamp_ltz($8, 'mm/dd/yyyy hh24:mi:ss (UTC)') "
                 f"FROM @{stage}/)"),
            replace=True,
            autoingest=True,
        ),
        n=10,
        sleep_seconds_btw_retry=1,
    )

    pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None)
    if pipe_description is None:
        return {
            'newStage':
            'error',
            'newMessage':
            f"{pipe} does not exist; please reach out to Snowflake Security for assistance.",
        }

    else:
        sqs_arn = pipe_description['notification_channel']
        return {
            'newStage':
            'finalized',
            'newMessage':
            (f"Please add this SQS Queue ARN to the bucket event notification "
             f"channel for all object create events:\n\n  {sqs_arn}\n\n"
             f"If you'd like to backfill the table, please run\n\n  ALTER PIPE {pipe} REFRESH;"
             ),
        }
Beispiel #11
0
def finalize(connection_name):
    base_name = f'osquery_log_{connection_name}'
    table = next(
        db.fetch(f"SHOW TABLES LIKE '{base_name}_connection' IN data"))
    options = yaml.load(table['comment'])
    stage = options.get('existing_stage', f'data.{base_name}_stage')
    pipe = f'data.{base_name}_pipe'

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql
            =(f'COPY INTO data.{base_name}_connection '
              f'FROM (SELECT PARSE_JSON($1), HASH($1), $1:unixTime::TIMESTAMP_LTZ(9), $1:action, '
              f'$1:calendarTime, $1:columns, $1:counter, $1:epoch, $1:hostIdentifier, $1:instance_id, '
              f'$1:name, $1:unixTime, $1:decorations '
              f'FROM @{stage}/)'),
            replace=True,
            autoingest=True,
        ),
        n=10,
        sleep_seconds_btw_retry=1,
    )

    pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None)
    if pipe_description is None:
        return {
            'newStage':
            'error',
            'newMessage':
            f"{pipe} does not exist; please reach out to Snowflake Security for assistance.",
        }

    else:
        sqs_arn = pipe_description['notification_channel']
        return {
            'newStage':
            'finalized',
            'newMessage':
            (f"Please add this SQS Queue ARN to the bucket event notification "
             f"channel for all object create events:\n\n  {sqs_arn}\n\n"
             f"If you'd like to backfill the table, please run\n\n  ALTER PIPE {pipe} REFRESH;"
             ),
        }
Beispiel #12
0
def main():
    db.connect()
    for table in db.fetch(f"show tables like '%_BASELINE' in {DATA_SCHEMA}"):
        name = table['name']
        comment = table['comment']
        log.info(f'{name} started...')
        run_baseline(name, comment)
        log.info(f'{name} done.')
Beispiel #13
0
def get_newest_timestamp():
    # check table in snowflake and get most recent timestamp
    query = f"SELECT raw FROM {AGARI_TABLE} ORDER BY event_time DESC LIMIT 1"
    try:
        return list(db.fetch(query))[0]['RAW']['date']
    except Exception:
        log.error("no earlier data found")
        return None
Beispiel #14
0
def grab_osquery_details(deployments):
    osquery_schema = environ.get('SECURITY_SCHEMA')
    osquery_query = db.fetch("SHOW VIEWS LIKE 'osquery_v' IN {}".format(osquery_schema))
    query_text = None
    for row in osquery_query:
        query_text= row["text"]
    query_text = query_text.split('union all')
    for query in query_text:
        deployments.append(re.findall('from (.*)', query)[0])
Beispiel #15
0
def run_baseline(name, comment):
    from rpy2 import robjects as ro
    try:
        metadata = yaml.safe_load(comment)
        assert type(metadata) is dict

        source = metadata['log source']
        required_values = metadata['required values']
        code_location = metadata['module name']
        time_filter = metadata['filter']
        time_column = metadata['history']

    except Exception as e:
        log.error(e, f"{name} has invalid metadata: >{metadata}<, skipping")
        return

    os.mkdir(FORMATTED_CODE_DIRECTORY)
    files = os.listdir(f'../baseline_modules/{code_location}')

    shutil.copyfile("../baseline_modules/run_module.R",
                    f"{FORMATTED_CODE_DIRECTORY}/run_module.R")

    for file in files:
        print(file)
        if not file.startswith('.'):
            with open(f"../baseline_modules/{code_location}/{file}") as f:
                r_code = f.read()
            r_code = format_code(r_code, required_values)
            with open(f"{FORMATTED_CODE_DIRECTORY}/{file}", 'w+') as ff:
                ff.write(r_code)

    with open(f"{FORMATTED_CODE_DIRECTORY}/run_module.R") as fr:
        r_code = fr.read()
    frame = query_log_source(source, time_filter, time_column)
    ro.globalenv['input_table'] = frame
    ro.r(f"setwd('./{FORMATTED_CODE_DIRECTORY}')")
    output = ro.r(r_code)
    output = output.to_dict()

    results = unpack(output)

    # Get the columns of the baseline table; find the timestamp column and pop it from the list

    columns = [
        row['name'] for row in db.fetch(f'desc table {DATA_SCHEMA}.{name}')
    ]
    columns.remove('EXPORT_TIME')
    try:
        log.info(f"{name} generated {len(results)} rows")
        db.insert(f"{DATA_SCHEMA}.{name}",
                  results,
                  columns=columns,
                  overwrite=True)
    except Exception as e:
        log.error("Failed to insert the results into the target table", e)
    finally:
        shutil.rmtree(f"../{FORMATTED_CODE_DIRECTORY}")
Beispiel #16
0
def run_suppression_query(squelch_name):
    try:
        query = SUPPRESSION_QUERY.format(suppression_name=squelch_name)
        return next(db.fetch(query, fix_errors=False))['number of rows updated']

    except Exception as e:
        log.info(e, f"{squelch_name} query broken, attempting fallback")
        query = OLD_SUPPRESSION_QUERY.format(suppression_name=squelch_name)
        try:
            result = next(db.fetch(query))
        except StopIteration:
            result = []

        if not result:
            # if neither query worked, re-raise original error
            raise

        return result[0]['number of rows updated']
Beispiel #17
0
def get_timestamp(table):

    timestamp_query = f"""
        SELECT loaded_on
        FROM {table}
        ORDER BY loaded_on DESC
        LIMIT 1
        """
    ts = next(db.fetch(timestamp_query), None)
    return ts['LOADED_ON'] if ts else None
Beispiel #18
0
def create(options):
    base_table = options['base_table']
    groups = list(
        filter(None, [g.strip() for g in options.get('groups', '').split(',')])
    )
    days = int(options.get('history_size_days', '30'))
    return [
        next(db.fetch(sql, fix_errors=False), {}).get('status')
        for sql in generate_baseline_sql(base_table, groups, days)
    ]
Beispiel #19
0
def ingest(table_name, options: dict):
    landing_table = f'data.{table_name}'
    connection_type = options['connection_type']

    aws_access_key = options.get('aws_access_key')
    aws_secret_key = options.get('aws_secret_key')

    source_role_arn = options.get('source_role_arn')
    destination_role_name = options.get('destination_role_name')
    external_id = options.get('external_id')
    accounts_connection_name = options.get('accounts_connection_name', '')

    if not accounts_connection_name.startswith('data.'):
        accounts_connection_name = 'data.' + accounts_connection_name

    ingest_of_type = {
        'EC2': ec2_dispatch,
        'SG': sg_dispatch,
        'ELB': elb_dispatch,
        'IAM': iam_dispatch,
        'AMI': ami_dispatch,
    }[connection_type]

    if (
        source_role_arn
        and destination_role_name
        and external_id
        and accounts_connection_name
    ):
        # get accounts list, pass list into ingest ec2
        query = (
            f"SELECT account_id, account_alias "
            f"FROM {accounts_connection_name} "
            f"WHERE created_at = ("
            f"  SELECT MAX(created_at)"
            f"  FROM {accounts_connection_name}"
            f")"
        )
        accounts = db.fetch(query)
        count = ingest_of_type(
            landing_table,
            accounts=accounts,
            source_role_arn=source_role_arn,
            destination_role_name=destination_role_name,
            external_id=external_id,
        )

    elif aws_access_key and aws_secret_key:
        count = ingest_of_type(
            landing_table, aws_access_key=aws_access_key, aws_secret_key=aws_secret_key
        )
        log.info(f'Inserted {count} rows.')
        yield count
    else:
        log.error()
def test_violation_tags_in_rule_tags_view(violation_queries):
    test_violation_tag_row = next(
        db.fetch(
            "SELECT * FROM data.rule_tags WHERE tag='test-violation-tag'"))
    assert test_violation_tag_row == {
        'TYPE': 'QUERY',
        'TARGET': 'VIOLATION',
        'RULE_NAME': '_TEST1_VIOLATION_QUERY',
        'RULE_ID': 'test-violation-query-id',
        'TAG': 'test-violation-tag',
    }
Beispiel #21
0
def query_log_source(source, time_filter, time_column):
    cutoff = f"DATEADD(day, -{time_filter}, CURRENT_TIMESTAMP())"
    query = f"SELECT * FROM {source} WHERE {time_column} > {cutoff};"
    try:
        data = list(db.fetch(query))
    except Exception as e:
        log.error("Failed to query log source: ", e)
    f = pack(data)
    frame = pandas.DataFrame(f)
    pandas2ri.activate()
    r_dataframe = pandas2ri.py2rpy(frame)
    return r_dataframe
Beispiel #22
0
def main(connection_table=None, run_now=False):
    if connection_table is not None:
        # for a single table, we ignore schedule and run now
        run_now = True
    else:
        connection_table = "%_CONNECTION"

    tables = list(db.fetch(f"SHOW TABLES LIKE '{connection_table}' IN data"))
    if len(tables) == 1:
        connection_run(tables[0], run_now=run_now)
    else:
        Pool(DC_POOLSIZE).map(connection_run, tables)
Beispiel #23
0
def main(baseline='%_BASELINE'):
    db.connect()
    baseline_tables = list(
        db.fetch(f"show tables like '{baseline}' in {DATA_SCHEMA}"))
    for table in baseline_tables:
        name = table['name']
        comment = table['comment']
        log.info(f'{name} started...')
        if len(baseline_tables) > 1:
            subprocess.call(f"python ./run.py baseline {name}", shell=True)
        else:
            run_baseline(name, comment)
        log.info(f'{name} done.')
Beispiel #24
0
def create_rule():
    if not hmac.compare_digest(request.cookies.get("sid", ""), SECRET):
        return jsonify(success=False, message="bad sid", rule={})

    data = request.get_json()
    rule_title, rule_type, rule_target, rule_body = (
        data['title'],
        data['type'],
        data['target'],
        data['body'],
    )
    logger.info(f'Creating rule {rule_title}_{rule_target}_{rule_type}')

    for name, value in CONFIG_VARS:
        rule_body = rule_body.replace(f'{name}', value)

    # support for full queries with comments frontend sends comments
    rule_body = re.sub(r"^CREATE [^\n]+\n", "", rule_body, flags=re.I)
    m = re.match(r"^  COMMENT='((?:\\'|[^'])*)'\nAS\n", rule_body)
    comment, rule_body = ((m.group(1), rule_body[m.span()[1]:]) if m else
                          ('', rule_body))
    comment_clause = f"\n  COMMENT='{comment}'\n"

    view_name = f"rules.{rule_title}_{rule_target}_{rule_type}"
    rule_body = (
        f"CREATE OR REPLACE VIEW {view_name} COPY GRANTS{comment_clause}AS\n{rule_body}"
    )

    try:
        oauth = json.loads(request.headers.get('Authorization') or '{}')
        ctx = db.connect(oauth=oauth)
        ctx.cursor().execute(rule_body).fetchall()

        try:  # errors expected, e.g. if permissions managed by future grants on schema
            ctx.cursor().execute(
                f"GRANT SELECT ON VIEW {view_name} TO ROLE {dbconfig.ROLE}"
            ).fetchall()
        except Exception:
            pass

        if 'body' in data and 'savedBody' in data:
            data['savedBody'] = replace_config_vals(rule_body)

        data['results'] = (list(db.fetch(ctx, f"SELECT * FROM {view_name};"))
                           if view_name.endswith("_POLICY_DEFINITION") else
                           None)

    except snowflake.connector.errors.ProgrammingError as e:
        return jsonify(success=False, message=e.msg, rule=data)

    return jsonify(success=True, rule=data)
Beispiel #25
0
def main():
    reqenv = {'ZENGRC_ID', 'ZENGRC_SECRET', 'ZENGRC_URL', 'ZENGRC_TABLE'}
    missingenv = reqenv - set(environ)
    if missingenv:
        log.fatal(f"missing env vars: {missingenv}")

    print("starting")

    last_time = list(db.fetch(GET_FRESH_ENTRIES_QUERY))

    if len(last_time) == 0:
        for e in ENDPOINTS:
            process_endpoint(e)
    else:
        log.info("Not time to ingest ZenGRC data")
Beispiel #26
0
def main(connection_table="%_CONNECTION"):
    for table in db.fetch(f"SHOW TABLES LIKE '{connection_table}' IN data"):
        table_name = table['name']
        table_comment = table['comment']

        log.info(f"-- START DC {table_name} --")
        try:
            metadata = {'START_TIME': datetime.utcnow()}
            options = yaml.load(table_comment) or {}

            if 'module' in options:
                module = options['module']

                metadata.update({
                    'RUN_ID': RUN_ID,
                    'TYPE': module,
                    'LANDING_TABLE': table_name,
                    'INGEST_COUNT': 0
                })

                connector = importlib.import_module(f"connectors.{module}")

                for module_option in connector.CONNECTION_OPTIONS:
                    name = module_option['name']
                    if module_option.get('secret') and name in options:
                        options[name] = vault.decrypt_if_encrypted(options[name])
                        if module_option.get('type') == 'json':
                            options[name] = json.loads(options[name])

                if callable(getattr(connector, 'ingest', None)):
                    ingested = connector.ingest(table_name, options)
                    if isinstance(ingested, int):
                        metadata['INGEST_COUNT'] += ingested
                    elif isinstance(ingested, GeneratorType):
                        for n in ingested:
                            metadata['INGEST_COUNT'] += n
                    else:
                        metadata['INGESTED'] = ingested

                db.record_metadata(metadata, table=DC_METADATA_TABLE)

        except Exception as e:
            log.error(f"Error loading logs into {table_name}: ", e)
            db.record_metadata(metadata, table=DC_METADATA_TABLE, e=e)

        log.info(f"-- END DC --")
Beispiel #27
0
def finalize(connection_name):
    base_name = f'GITHUB_WEBHOOKS_S3_{connection_name}_EVENTS'.upper()
    pipe = f'data.{base_name}_PIPE'

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql
            =(f"COPY INTO data.{base_name}_connection "
              f"FROM (SELECT current_timestamp, $1, HASH($1), $1:ref, $1: before, $1:after, $1:created, $1:deleted,"
              f"$1:forced, $1:base_ref, $1:compare, $1:commits, $1:head_commit,"
              f"$1:repository, $1:pusher, $1:organization, $1:sender, $1:action, $1:check_run, $1:check_suite, $1:number, $1:pull_request,"
              f"$1:label, $1:requested_team, $1:ref_type, $1:master_branch, $1:description, $1:pusher_type, $1:review, $1:changes, $1:comment, "
              f"$1:issue, $1:id, $1:sha, $1:name, $1:target_url, $1:context, $1:state, $1:commit, $1:branches, $1:created_at, $1:updated_at, $1:assignee, "
              f"$1:release, $1:membership, $1:alert, $1:scope, $1:member, $1:requested_reviewer, $1:team, $1:starred_at, $1:pages, $1:project_card, "
              f"$1:build, $1:deployment_status, $1:deployment, $1:forkee, $1:milestone, $1:key, $1:project_column, $1:status, $1:avatar_url FROM @data.{base_name}_stage/)"
              ),
            replace=True,
            autoingest=True,
        ),
        n=10,
        sleep_seconds_btw_retry=1,
    )

    pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None)
    if pipe_description is None:
        return {
            'newStage':
            'error',
            'newMessage':
            f"{pipe} does not exist; please reach out to Snowflake Security for assistance.",
        }
    else:
        sqs_arn = pipe_description['notification_channel']

    return {
        'newStage':
        'finalized',
        'newMessage':
        (f"Please add this SQS Queue ARN to the bucket event notification "
         f"channel for all object create events:\n\n  {sqs_arn}\n\n"
         f"To backfill the landing table with existing data, please run:\n\n  ALTER PIPE {pipe} REFRESH;\n\n"
         ),
    }
Beispiel #28
0
def ingest_agents(table_name, options):
    last_export_time = next(
        db.fetch(
            f'SELECT MAX(export_at) as time FROM data.{table_name}'))['TIME']
    timestamp = datetime.now(timezone.utc)

    if (last_export_time is None
            or (timestamp - last_export_time).total_seconds() > 86400):
        agents = {a['uuid']: a for a in get_agent_data()}.values()
        for page in groups_of(10000, agents):
            db.insert(
                table=f'data.{table_name}',
                values=[(agent, timestamp) for agent in page],
                select=db.derive_insert_select(AGENT_LANDING_TABLE),
                columns=db.derive_insert_columns(AGENT_LANDING_TABLE),
            )
    else:
        log.info('Not time to import Tenable Agents')
Beispiel #29
0
def ingest_agents(table_name, options):
    last_export_time = next(
        db.fetch(
            f'SELECT MAX(export_at) as time FROM data.{table_name}'))['TIME']
    timestamp = datetime.now(timezone.utc)

    if (last_export_time is None
            or (timestamp - last_export_time).total_seconds() > 86400):
        all_agents = sorted(get_agent_data(),
                            key=lambda a: a.get('last_connect', 0))
        unique_agents = {a['uuid']: a for a in all_agents}.values()
        rows = [{'raw': ua, 'export_at': timestamp} for ua in unique_agents]
        log.debug(f'inserting {len(unique_agents)} unique (by uuid) agents')
        db.insert(f'data.{table_name}', rows)
        return len(rows)
    else:
        log.info('Not time to import Tenable Agents')
        return 0
Beispiel #30
0
def query_snowflake(query):
    global writer  # , lock
    finished = False
    offset = 0
    limit = 10000000
    while not finished:
        num_results = 0
        query_with_limit = query + " limit %s offset %s" % (limit, offset)
        data = db.fetch(query_with_limit)
        for row in data:
            num_results += 1
            # with lock:
            if writer is None:
                writer = csv.DictWriter(sys.stdout, row.keys())
                writer.writeheader()
            writer.writerow(row)
        if num_results < limit:
            finished = True
        offset += limit