def main(rule_name=None): RUN_METADATA = { 'RUN_ID': RUN_ID, 'RUN_TYPE': 'ALERT QUERY', 'START_TIME': datetime.datetime.utcnow(), } if rule_name: metadata = [create_alerts(rule_name)] else: rules = list(db.load_rules(ALERT_QUERY_POSTFIX)) metadata = Pool(POOLSIZE).map(create_alerts, rules) RUN_METADATA['ROW_COUNT'] = { 'INSERTED': sum(q['ROW_COUNT']['INSERTED'] for q in metadata), 'UPDATED': sum(q['ROW_COUNT']['UPDATED'] for q in metadata), } db.record_metadata(RUN_METADATA, table=RUN_METADATA_TABLE) try: if CLOUDWATCH_METRICS: log.metric( 'Run', 'SnowAlert', [{ 'Name': 'Component', 'Value': 'Alert Query Runner' }], 1, ) except Exception as e: log.error("Cloudwatch metric logging failed: ", e)
def create_jira_ticket(alert_id, query_id, query_name, environment, sources, actor, object, action, title, event_time, alert_time, description, detector, event_data, severity): kms = boto3.client('kms', region_name=REGION) encrypted_auth = os.environ['JIRA_PASSWORD'] if len(encrypted_auth ) < 100: # then we treat it an an unencrypted password password = encrypted_auth else: kms = boto3.client('kms', region_name=REGION) binary_auth = b64decode(encrypted_auth) decrypted_auth = kms.decrypt(CiphertextBlob=binary_auth) password = decrypted_auth['Plaintext'].decode() user = os.environ['JIRA_USER'] jira = JIRA(URL, basic_auth=(user, password)) try: event_data = yaml.dump(event_data, indent=4, default_flow_style=False) except Exception as e: log.error("Error while creating ticket", e) body = jira_ticket_body(locals()) print(f'Creating new JIRA ticket for {title} in project', PROJECT) new_issue = jira.create_issue(project=PROJECT, issuetype={'name': 'Story'}, summary=title, description=body) return new_issue
def handle(alert, type='sms', recipient_phone=None, sender_phone=None, message=None): if not os.environ.get('TWILIO_API_SID'): log.info(f"No TWILIO_API_SID in env, skipping handler.") return None twilio_sid = os.environ["TWILIO_API_SID"] twilio_token = vault.decrypt_if_encrypted(os.environ['TWILIO_API_TOKEN']) # check if phone is not empty if yes notification will be delivered to twilio if recipient_phone is None: log.error(f'Cannot identify assignee phone number') return None if message is None: log.error(f'SMS Message is empty') return None log.debug( f'Twilio message for recipient with phone number {recipient_phone}', message) client = Client(twilio_sid, twilio_token) response = client.messages.create(body=message, from_=sender_phone, to=recipient_phone) return response
def ingest_users(url, headers, landing_table, now): while 1: response = requests.get(url=url, headers=headers) if response.status_code != 200: log.error('OKTA REQUEST FAILED: ', response.text) return result = response.json() if result == []: break db.insert( landing_table, [{'raw': row, 'event_time': now} for row in result], ) log.info(f'Inserted {len(result)} rows.') yield len(result) url = '' links = requests.utils.parse_header_links(response.headers['Link']) for link in links: if link['rel'] == 'next': url = link['url'] if len(url) == 0: break
def GET(resource, key=None, limit=100, offset=0): if key is None: key = resource log.debug(f'GET {resource} limit={limit} offset={offset}') response = requests.get( url=f'https://cloud.tenable.com/{resource}', params={ 'limit': limit, 'offset': offset }, headers={"X-ApiKeys": f"accessKey={token}; secretKey={secret}"}, ) if response.status_code != 200: log.info( f'response status {response.status_code}: {response.text}') return result = response.json() elements = result.get(key) if elements is None: log.error(f'no {key} in :', result) return yield from elements pages = result.get('pagination', {}) total = pages.get('total', 0) limit = pages.get('limit', 0) offset = pages.get('offset', 0) if total > limit + offset: yield from GET(resource, key, limit, offset + limit)
def bail_out(alert_id): query = f"UPDATE results.alerts SET handled='no handler' WHERE alert:ALERT_ID='{alert_id}'" print('Updating alert table:', query) try: db.execute(query) except Exception as e: log.error(e, f"Failed to update alert {alert_id} with handler status")
def create_jira_ticket(alert, assignee=None, custom_field=None): if not user: return try: alert['EVENT_DATA'] = yaml.dump(alert['EVENT_DATA'], indent=4, default_flow_style=False) except Exception as e: log.error("Error while creating ticket", e) body = jira_ticket_body(alert) log.info( f'Creating new JIRA ticket for "{alert["TITLE"]}" in project {PROJECT}' ) new_issue = jira.create_issue( project=PROJECT, issuetype={'name': 'Story'}, summary=alert['TITLE'], description=body, ) if custom_field: new_issue.update( fields={custom_field['id']: { 'value': custom_field['value'] }}) if assignee: jira.assign_issue(new_issue, assignee) return new_issue
def main(): for pipe in db.get_pipes('data'): metadata = yaml.load(pipe['comment']) if metadata and metadata.get('type') != 'Azure': log.info(f"{pipe['name']} is not an Azure pipe, and will be skipped.") continue blob_name = metadata['blob'] account_name = metadata['account'] pipe_name = pipe['name'] table = metadata['target'] sas_token_envar = 'AZURE_SAS_TOKEN_' + metadata.get('suffix', '') if sas_token_envar in environ: encrypted_sas_token = environ.get(sas_token_envar) elif 'encrypted_sas_token' in metadata: encrypted_sas_token = metadata['encrypted_sas_token'] else: log.info(f"{pipe['name']} has no azure auth") continue sas_token = vault.decrypt_if_encrypted(encrypted_sas_token) log.info(f"Now working on pipe {pipe_name}") endpoint_suffix = metadata.get('endpoint_suffix', 'core.windows.net') block_blob_service = BlockBlobService( account_name=account_name, sas_token=sas_token, endpoint_suffix=endpoint_suffix ) files = block_blob_service.list_blobs(blob_name) newest_time = get_timestamp(table) new_files = [] if newest_time: for file in files: if file.properties.creation_time > newest_time: new_files.append(StagedFile(file.name, None)) else: for file in files: new_files.append(StagedFile(file.name, None)) log.info(new_files) # Proxy object that abstracts the Snowpipe REST API ingest_manager = SimpleIngestManager(account=environ.get('SNOWFLAKE_ACCOUNT'), host=f'{environ.get("SNOWFLAKE_ACCOUNT")}.snowflakecomputing.com', user=environ.get('SA_USER'), pipe=f'SNOWALERT.DATA.{pipe_name}', private_key=load_pkb_rsa(PRIVATE_KEY, PRIVATE_KEY_PASSWORD)) if len(new_files) > 0: try: response = ingest_manager.ingest_files(new_files) log.info(response) except Exception as e: log.error(e) return
def record_ticket_id(ticket_id, alert_id): query = f"UPDATE results.alerts SET ticket='{ticket_id}' WHERE alert:ALERT_ID='{alert_id}'" print('Updating alert table:', query) try: db.execute(query) except Exception as e: log.error(e, f"Failed to update alert {alert_id} with ticket id {ticket_id}")
def run_baseline(name, comment): try: metadata = yaml.load(comment) assert type(metadata) is dict source = metadata['log source'] required_values = metadata['required values'] code_location = metadata['module name'] time_filter = metadata['filter'] time_column = metadata['history'] except Exception as e: log.error(e, f"{name} has invalid metadata: >{metadata}<, skipping") return with open(f"../baseline_modules/{code_location}/{code_location}.R") as f: r_code = f.read() r_code = format_code(r_code, required_values) frame = query_log_source(source, time_filter, time_column) ro.globalenv['input_table'] = frame output = ro.r(r_code) output = output.to_dict() results = unpack(output) try: log.info(f"{name} generated {len(results)} rows") db.insert(f"{DATA_SCHEMA}.{name}", results, overwrite=True) except Exception as e: log.error("Failed to insert the results into the target table", e)
def log_failure(ctx, suppression_name, e, event_data=None, description=None): if event_data is None: event_data = f"The suppression '{suppression_name}' failed to execute with error: {e}" if description is None: description = f"The suppression '{suppression_name}' failed to execute with error: {e}" alert = {} alert['ALERT_ID'] = uuid.uuid4().hex alert['QUERY_ID'] = 'b1d02051dd2c4d62bb75274f2ee5996a' alert['QUERY_NAME'] = 'Suppression Runner Failure' alert['ENVIRONMENT'] = 'Suppressions' alert['SOURCES'] = 'Suppression Runner' alert['ACTOR'] = 'Suppression Runner' alert['OBJECT'] = suppression_name alert['ACTION'] = 'Suppression Execution' alert['TITLE'] = 'Suppression Runner Failure' alert['EVENT_TIME'] = str(datetime.datetime.utcnow()) alert['ALERT_TIME'] = str(datetime.datetime.utcnow()) alert['DESCRIPTION'] = description alert['DETECTOR'] = 'Suppression Runner' alert['EVENT_DATA'] = event_data alert['SEVERITY'] = 'High' alerts = [] alerts.append(json.dumps(alert)) try: log_alerts(ctx, alerts) log.error(f"Suppression {suppression_name} failure successfully logged", e) except Exception as e: print(f"Failed to log suppression failure") log.error("Failed to log suppression failure", e)
def ingest_users(url, headers, landing_table, timestamp): while 1: response = requests.get(url=url, headers=headers) if response.status_code != 200: log.error('OKTA REQUEST FAILED: ', response.text) return result = response.json() if result == []: break db.insert( landing_table, values=[(row, timestamp) for row in result], select='PARSE_JSON(column1), column2', ) log.info(f'Inserted {len(result)} rows.') yield len(result) url = '' links = requests.utils.parse_header_links(response.headers['Link']) for link in links: if link['rel'] == 'next': url = link['url'] if len(url) == 0: break
def log_failure(ctx, query_name, e, event_data=None, description=None): if event_data is None: event_data = f"The query '{query_name}' failed to execute with error: {e!r}" if description is None: description = f"The query '{query_name}' failed to execute with error: {e!r}" alerts = [ json.dumps({ 'ALERT_ID': uuid.uuid4().hex, 'QUERY_ID': '3a3d173a64ca4fcab2d13ac3e6d08522', 'QUERY_NAME': 'Query Runner Failure', 'ENVIRONMENT': 'Queries', 'SOURCES': ['Query Runner'], 'ACTOR': 'Query Runner', 'OBJECT': query_name, 'ACTION': 'Query Execution', 'TITLE': 'Query Runner Failure', 'ALERT_TIME': str(datetime.datetime.utcnow()), 'EVENT_TIME': str(datetime.datetime.utcnow()), 'EVENT_DATA': event_data, 'DESCRIPTION': description, 'DETECTOR': 'Query Runner', 'SEVERITY': 'High' }) ] try: log_alerts(ctx, alerts) log.info("Query failure logged.", e) except Exception as e: log.error("Failed to log query failure", e)
def log_failure(suppression_name, e, event_data=None, description=None): if event_data is None: event_data = f"The suppression '{suppression_name}' failed to execute with error: {e}" if description is None: description = f"The suppression '{suppression_name}' failed to execute with error: {e}" ctx = db.connect() alerts = [json.dumps({ 'ALERT_ID': uuid.uuid4().hex, 'QUERY_ID': 'b1d02051dd2c4d62bb75274f2ee5996a', 'QUERY_NAME': ['Suppression Runner Failure'], 'ENVIRONMENT': 'Suppressions', 'SOURCES': 'Suppression Runner', 'ACTOR': 'Suppression Runner', 'OBJECT': suppression_name, 'ACTION': 'Suppression Execution', 'TITLE': 'Suppression Runner Failure', 'EVENT_TIME': str(datetime.datetime.utcnow()), 'ALERT_TIME': str(datetime.datetime.utcnow()), 'DESCRIPTION': description, 'DETECTOR': 'Suppression Runner', 'EVENT_DATA': event_data, 'SEVERITY': 'High', })] try: log_alerts(ctx, alerts) log.error(f"{suppression_name} failure successfully logged", e) except Exception as e: log.error("Failed to log suppression failure", e)
def main(squelch_name=None): RUN_METADATA = { 'RUN_TYPE': 'ALERT SUPPRESSION', 'START_TIME': datetime.datetime.utcnow(), 'RUN_ID': RUN_ID, } rules = ( db.load_rules(ALERT_SQUELCH_POSTFIX) if squelch_name is None else [squelch_name] ) for squelch_name in rules: run_suppressions(squelch_name) num_rows_updated = next(db.fetch(SET_SUPPRESSED_FALSE))['number of rows updated'] log.info( f'All suppressions done, {num_rows_updated} remaining alerts marked suppressed=FALSE.' ) RUN_METADATA['ROW_COUNT'] = { 'PASSED': num_rows_updated, 'SUPPRESSED': sum(m['ROW_COUNT']['SUPPRESSED'] for m in METADATA_HISTORY), } db.record_metadata(RUN_METADATA, table=RUN_METADATA_TABLE) try: if CLOUDWATCH_METRICS: log.metric( 'Run', 'SnowAlert', [{'Name': 'Component', 'Value': 'Alert Suppression Runner'}], 1, ) except Exception as e: log.error("Cloudwatch metric logging failed: ", e)
def message_template(vars): payload = None # if we have Slack user data, send it to template if 'user' in vars: params = { 'alert': vars['alert'], 'properties': vars['properties'], 'user': vars['user'] } else: params = {'alert': vars['alert'], 'properties': vars['properties']} try: # retrieve Slack message structure from javascript UDF rows = db.connect_and_fetchall("select " + vars['template'] + "(parse_json('" + json.dumps(params) + "'))") row = rows[1] if len(row) > 0: log.debug(f"Template {vars['template']}", ''.join(row[0])) payload = json.loads(''.join(row[0])) else: log.error(f"Error loading javascript template {vars['template']}") raise Exception("Error loading javascript template " + {vars['template']}) except Exception as e: log.error(f"Error loading javascript template", e) raise return payload
def get_timestamp(): # Once pipelines are more strongly integrated with the installer, this table should be a variable timestamp_query = f""" SELECT EVENT_TIME from {OKTA_TABLE} WHERE EVENT_TIME IS NOT NULL order by EVENT_TIME desc limit 1 """ try: _, ts = db.connect_and_fetchall(timestamp_query) log.info(ts) ts = ts[0][0] ts = ts.strftime("%Y-%m-%dT%H:%M:%S.000Z") log.info(ts) if len(ts) < 1: log.error( "The okta timestamp is too short or doesn't exist; defaulting to one hour ago" ) ts = datetime.datetime.now() - datetime.timedelta(hours=1) ts = ts.strftime("%Y-%m-%dT%H:%M:%S.000Z") except Exception as e: log.error( "Unable to find a timestamp of most recent okta log, defaulting to one hour ago", e, ) ts = datetime.datetime.now() - datetime.timedelta(hours=1) ts = ts.strftime("%Y-%m-%dT%H:%M:%S.000Z") ret = {'since': ts} log.info(ret) return ret
def main(rule_name=None): RUN_METADATA = { 'RUN_ID': RUN_ID, 'RUN_TYPE': 'ALERT QUERY', 'START_TIME': datetime.datetime.utcnow(), } ctx = db.connect_and_execute("ALTER SESSION SET USE_CACHED_RESULT=FALSE;") if rule_name: create_alerts(ctx, rule_name) else: for rule_name in db.load_rules(ctx, ALERT_QUERY_POSTFIX): create_alerts(ctx, rule_name) RUN_METADATA['ROW_COUNT'] = { 'INSERTED': sum(q['ROW_COUNT']['INSERTED'] for q in QUERY_HISTORY), 'UPDATED': sum(q['ROW_COUNT']['UPDATED'] for q in QUERY_HISTORY), } log.metadata_record(ctx, RUN_METADATA, table=RUN_METADATA_TABLE) try: if CLOUDWATCH_METRICS: log.metric('Run', 'SnowAlert', [{ 'Name': 'Component', 'Value': 'Alert Query Runner' }], 1) except Exception as e: log.error("Cloudwatch metric logging failed: ", e)
def run_suppression(squelch_name): metadata = { 'QUERY_NAME': squelch_name, 'RUN_ID': RUN_ID, 'ATTEMPTS': 1, 'START_TIME': datetime.datetime.utcnow(), 'ROW_COUNT': { 'SUPPRESSED': 0 } } log.info(f"{squelch_name} processing...") try: query = VIOLATION_SUPPRESSION_QUERY.format(squelch_name=squelch_name) num_violations_suppressed = next( db.fetch(query))['number of rows updated'] log.info(f"{squelch_name} updated {num_violations_suppressed} rows.") metadata['ROW_COUNT']['SUPPRESSED'] = num_violations_suppressed db.record_metadata(metadata, table=QUERY_METADATA_TABLE) RULE_METADATA_RECORDS.append(metadata) except Exception as e: db.record_metadata(metadata, table=QUERY_METADATA_TABLE, e=e) log.error("Suppression query {squelch_name} execution failed.", e) print(f"Suppression query {squelch_name} executed")
def GET(resource, name, limit=100, offset=0): response = requests.get( url=f'https://cloud.tenable.com/scanners/1/{resource}', params={ 'limit': limit, 'offset': offset, }, headers={"X-ApiKeys": f"accessKey={token}; secretKey={secret}"}, ) result = response.json() elements = result.get(name) if elements is None: log.error(f'no {name} in :', result) return yield from elements pages = result.get('pagination', {}) total = pages.get('total', 0) limit = pages.get('limit', 0) offset = pages.get('offset', 0) if total > limit + offset: yield from GET(resource, name, limit, offset + limit)
def connection_run(connection_table, run_now=False): table_name = connection_table['name'] table_comment = connection_table['comment'] log.info(f"-- START DC {table_name} --") try: metadata = {'START_TIME': datetime.utcnow()} options = yaml.safe_load(table_comment) or {} if 'schedule' in options: schedule = options['schedule'] now = datetime.now() if not run_now and not time_to_run(schedule, now): log.info(f'not scheduled: {schedule} at {now}') log.info(f"-- END DC --") return if 'module' not in options: log.info(f'no module in options') log.info(f"-- END DC --") return module = options['module'] metadata.update({ 'RUN_ID': RUN_ID, 'TYPE': module, 'LANDING_TABLE': table_name }) connector = importlib.import_module(f"connectors.{module}") for module_option in connector.CONNECTION_OPTIONS: name = module_option['name'] if module_option.get('secret') and name in options: options[name] = vault.decrypt_if_encrypted(options[name]) if module_option.get('type') == 'json': options[name] = json.loads(options[name]) if module_option.get('type') == 'list': if type(options[name]) is str: options[name] = options[name].split(',') if module_option.get('type') == 'int': options[name] = int(options[name]) if callable(getattr(connector, 'ingest', None)): db.record_metadata(metadata, table=DC_METADATA_TABLE) result = do_ingest(connector, table_name, options) if result is not None: metadata['INGEST_COUNT'] = result else: metadata['INGESTED'] = result db.record_metadata(metadata, table=DC_METADATA_TABLE) except Exception as e: log.error(f"Error loading logs into {table_name}: ", e) db.record_metadata(metadata, table=DC_METADATA_TABLE, e=e) log.info(f"-- END DC --")
def flag_remaining_alerts(ctx) -> List[str]: try: query = f"UPDATE {ALERTS_TABLE} SET suppressed=FALSE WHERE suppressed IS NULL;" suppression_view_list = ctx.cursor().execute(query) except Exception as e: log.error("Failed to flag remaining alerts as unsuppressed", e) return [name[1] for name in suppression_view_list]
def get_newest_timestamp(): # check table in snowflake and get most recent timestamp query = f"SELECT raw FROM {AGARI_TABLE} ORDER BY event_time DESC LIMIT 1" try: return list(db.fetch(query))[0]['RAW']['date'] except Exception: log.error("no earlier data found") return None
def run_baseline(name, comment): from rpy2 import robjects as ro try: metadata = yaml.safe_load(comment) assert type(metadata) is dict source = metadata['log source'] required_values = metadata['required values'] code_location = metadata['module name'] time_filter = metadata['filter'] time_column = metadata['history'] except Exception as e: log.error(e, f"{name} has invalid metadata: >{metadata}<, skipping") return os.mkdir(FORMATTED_CODE_DIRECTORY) files = os.listdir(f'../baseline_modules/{code_location}') shutil.copyfile("../baseline_modules/run_module.R", f"{FORMATTED_CODE_DIRECTORY}/run_module.R") for file in files: print(file) if not file.startswith('.'): with open(f"../baseline_modules/{code_location}/{file}") as f: r_code = f.read() r_code = format_code(r_code, required_values) with open(f"{FORMATTED_CODE_DIRECTORY}/{file}", 'w+') as ff: ff.write(r_code) with open(f"{FORMATTED_CODE_DIRECTORY}/run_module.R") as fr: r_code = fr.read() frame = query_log_source(source, time_filter, time_column) ro.globalenv['input_table'] = frame ro.r(f"setwd('./{FORMATTED_CODE_DIRECTORY}')") output = ro.r(r_code) output = output.to_dict() results = unpack(output) # Get the columns of the baseline table; find the timestamp column and pop it from the list columns = [ row['name'] for row in db.fetch(f'desc table {DATA_SCHEMA}.{name}') ] columns.remove('EXPORT_TIME') try: log.info(f"{name} generated {len(results)} rows") db.insert(f"{DATA_SCHEMA}.{name}", results, columns=columns, overwrite=True) except Exception as e: log.error("Failed to insert the results into the target table", e) finally: shutil.rmtree(f"../{FORMATTED_CODE_DIRECTORY}")
def main(): for name in os.listdir('../ingestion'): log.info(f"--- Ingesting using {name}") try: res = subprocess.call(f"python ../ingestion/{name}", shell=True) log.info("subprocess returns: ", res) log.info(f"{name} invoked") except Exception as e: log.error(f"failed to run {name}", e)
def ami_dispatch( landing_table, aws_access_key='', aws_secret_key='', accounts=None, source_role_arn='', destination_role_name='', external_id='', ): results = 0 if accounts: for account in accounts: id = account['ACCOUNT_ID'] name = account['ACCOUNT_ALIAS'] target_role = f'arn:aws:iam::{id}:role/{destination_role_name}' log.info(f"Using role {target_role}") try: session = sts_assume_role(source_role_arn, target_role, external_id) results += ingest_ami(landing_table, session=session, account=account) db.insert( AWS_ACCOUNTS_METADATA, values=[(datetime.utcnow(), RUN_ID, id, name, results)], columns=[ 'snapshot_at', 'run_id', 'account_id', 'account_alias', 'ami_count', ], ) except Exception as e: db.insert( AWS_ACCOUNTS_METADATA, values=[(datetime.utcnow(), RUN_ID, id, name, 0, e)], columns=[ 'snapshot_at', 'run_id', 'account_id', 'account_alias', 'ami_count', 'error', ], ) log.error(f"Unable to assume role {target_role} with error", e) else: results += ingest_ami(landing_table, aws_access_key=aws_access_key, aws_secret_key=aws_secret_key) return results
def record_status(results, alert_id): try: db.execute( f"UPDATE results.alerts " f"SET handled=PARSE_JSON(%s) " f"WHERE alert:ALERT_ID='{alert_id}'", params=[json_dumps(results)]) except Exception as e: log.error(e, f"Failed to update alert {alert_id} with status {results}")
def ingest(table_name, options: dict): landing_table = f'data.{table_name}' connection_type = options['connection_type'] aws_access_key = options.get('aws_access_key') aws_secret_key = options.get('aws_secret_key') source_role_arn = options.get('source_role_arn') destination_role_name = options.get('destination_role_name') external_id = options.get('external_id') accounts_connection_name = options.get('accounts_connection_name', '') if not accounts_connection_name.startswith('data.'): accounts_connection_name = 'data.' + accounts_connection_name ingest_of_type = { 'EC2': ec2_dispatch, 'SG': sg_dispatch, 'ELB': elb_dispatch, 'IAM': iam_dispatch, 'AMI': ami_dispatch, }[connection_type] if ( source_role_arn and destination_role_name and external_id and accounts_connection_name ): # get accounts list, pass list into ingest ec2 query = ( f"SELECT account_id, account_alias " f"FROM {accounts_connection_name} " f"WHERE created_at = (" f" SELECT MAX(created_at)" f" FROM {accounts_connection_name}" f")" ) accounts = db.fetch(query) count = ingest_of_type( landing_table, accounts=accounts, source_role_arn=source_role_arn, destination_role_name=destination_role_name, external_id=external_id, ) elif aws_access_key and aws_secret_key: count = ingest_of_type( landing_table, aws_access_key=aws_access_key, aws_secret_key=aws_secret_key ) log.info(f'Inserted {count} rows.') yield count else: log.error()
def query_log_source(source, time_filter, time_column): cutoff = f"DATEADD(day, -{time_filter}, CURRENT_TIMESTAMP())" query = f"SELECT * FROM {source} WHERE {time_column} > {cutoff};" try: data = list(db.fetch(query)) except Exception as e: log.error("Failed to query log source: ", e) f = pack(data) frame = pandas.DataFrame(f) pandas2ri.activate() r_dataframe = pandas2ri.py2rpy(frame) return r_dataframe
def main(script=None): scripts = [script] if script else os.listdir('../ingestion') for name in scripts: log.info(f"--- Ingesting using {name}") try: res = subprocess.call(f"python ../ingestion/{name}", shell=True) log.info("subprocess returns:", res) except Exception as e: log.error(f"exception raised:", e) finally: log.info(f"--- {name} finished")