def run(self): logger = Logger.getInstance() logger.info(f'Daily Traffic Rule Engine: {self._INPUT_TRAFFIC_LOG}') self._read_csv() self._preprocess() self._extract_rules_from_traffic_log() logger.info("fh_stg_trfc_rule_f load successfully")
def run(self): logger = Logger.getInstance() logger.info(f'Daily Threat MIS Engine: {self._INPUT_THREAT_LOG}') self._read_csv() self._preprocess() logger.info(f'log sucessfullly loaded') self._write_new_firewall_rules_to_db() logger.info(f'fh_prd_fw_rule_f successfully loaded')
def run(self): logger = Logger.getInstance() for csv in self._csv_paths: try: logger.info(f'Rule Engine: {csv}') print('****Processing File:', csv) self._run_for_one(csv) except Exception as e: logger.error(str(traceback.format_exc())) logger.info(f'Skipping {csv}') continue
def _run(self, callback, csvs, verbose=False, message='Processing:'): logger = Logger.getInstance() for csv in csvs: try: logger.info(f'DC Engine: {csv}') if verbose: print(message, csv) callback(csv) except Exception as e: logger.error(str(traceback.format_exc())) logger.info(f'Skipping {csv}') continue
def run(self): logger = Logger.getInstance() logger.info('TT Engine started') for csv in self._csvs: try: logger.info(f'TT Engine: {csv}') csv_name = os.path.basename(csv) df = self._spark.read.csv(csv, header=True, inferSchema=True) df = df.withColumn( 'logged_datetime', to_timestamp(df.logged_datetime, 'yyyy/mm/dd')) firewall_rules = self._read_table_from_postgres( 'core_firewallrule') logs = self._read_table_from_postgres('core_trafficlog') mapped = df.join( logs, on=[df.log_name == logs.log_name], ).drop('log_name').withColumnRenamed( 'id', 'log_id').drop(*logs.columns) mapped = mapped.join( firewall_rules, on=[mapped.firewall_rule_id == firewall_rules.name ]).drop('firewall_rule_id').withColumnRenamed( 'id', 'firewall_rule_id').drop(*firewall_rules.columns) mapped = mapped.drop('virtual_system_id', 'inbound_interface_id', 'outbound_interface_id')\ .withColumnRenamed('source_ip_id', 'source_ip')\ .withColumnRenamed('destination_ip_id', 'destination_ip')\ .withColumnRenamed('application_id', 'application')\ .withColumnRenamed('source_zone_id', 'source_zone')\ .withColumnRenamed('destination_zone_id', 'destination_zone')\ .withColumnRenamed('protocol_id', 'protocol')\ .withColumnRenamed('action_id', 'action')\ .withColumnRenamed('session_end_reason_id', 'session_end_reason')\ .withColumnRenamed('category_id', 'category')\ .withColumn('created_datetime', lit(datetime.datetime.now()))\ .withColumn('is_closed', lit(False)) self._write_df_to_postgres( mapped, 'troubleticket_troubleticketanomaly') except Exception as e: logger.error(str(traceback.format_exc())) logger.info(f'Skipping {csv}') continue logger.info('TT Engine: Done')
def run(self): logger = Logger.getInstance() logger.info('TT Engine started') csv = self._csv try: logger.info(f'TT Engine: {csv}') df = self._spark.read.csv(csv, header=True, inferSchema=True) df = self._preprocess(df) mapped = self._map_log_name_and_firewall_rule(df) mapped = mapped.drop('vsys', 'inbound_interface', 'outbound_interface')\ .withColumn('created_datetime', lit(datetime.datetime.now()))\ .withColumn('is_closed', lit(False)) self._write_df_to_postgres( mapped, 'fh_stg_tt_anmly_f') except Exception as e: print(str(traceback.format_exc())) logger.error(str(traceback.format_exc())) logger.info(f'Skipping {csv}') logger.info('TT Engine: Done')
def run(self): logger = Logger.getInstance() csv = self._csv try: logger.info(f'Chart Engine: {csv}') df = self._spark.read.csv(csv, header=True) df = self._preprocess(df) print('**Writing new items to db**') logger.info(f'Chart Engine: Writing new items to db') self._write_new_items_to_db(df) print('**Mapping to Foreign Keys**') logger.info(f'Chart Engine: Mapping Foreign Keys') df = self._map_df_to_fk(df) # Persist the dataframe for faster processing df.cache() print('**Processing Filters**') logger.info(f'Chart Engine: Processing Filters') self._process_filters(df) # Create all the necessary charts print('**Writing Time Series Chart Data**') logger.info(f'Chart Engine: Writing Time Series Chart Data') TimeSeriesChart(df, spark=self._spark).run() print('**Writing IP Profile Chart Data**') logger.info(f'Chart Engine: Writing IP Profile Chart Data') IPChart(df, spark=self._spark).run() print('**Writing Sankey Chart Data**') logger.info(f'Chart Engine: Writing Sankey Chart Data') SankeyChart(df, spark=self._spark).run() # Unpersist the dataframe to free space df.unpersist() except Exception as e: logger.error(str(traceback.format_exc())) logger.info(f'Skipping {csv}') logger.info('Chart Engine: Done') print('Chart Engine finished running on:', datetime.datetime.now())
def run(self): logger = Logger.getInstance() logger.info('Threat Engine Running') for root, dirs, files in os.walk(self._INPUT_DIR): for file in files: try: logger.info(f'Threat Engine: {file}') if file.endswith(".csv"): print( f"processing threat log: {file} inside threat engine") self._read_csv(file) self._preprocess() self._set_firewall_rules_id_to_data() self._resolve_country_from_ip() # self._show_df(10) self._write_df_to_db() except Exception as e: logger.error(str(traceback.format_exc())) logger.info(f'Skipping {file}') continue logger.info('Threat Engine: Done')
def run(self): logger = Logger.getInstance() self._read_csv() logger.info(f'Daily Traffic MIS Engine: {self._INPUT_TRAFFIC_LOG}') self._preprocess() logger.info(f'log sucessfullly loaded') self._write_new_firewall_rules_to_db() logger.info(f'fh_prd_fw_rule_f successfully loaded') self._set_firewall_rules_id_to_data() self._extract_mis_daily() logger.info(f'fh_stg_trfc_mis_dy_a successfully loaded') self._extract_mis_new_source_address() logger.info(f"fh_stg_trfc_mis_new_src_ip_dy_a successfully loaded") self._extract_mis_new_destination_address() logger.info(f"fh_stg_trfc_mis_new_dst_ip_dy_a successfully loaded") self._extract_mis_new_application() logger.info(f"fh_stg_trfc_mis_new_app_dy_a successfully loaded") self._extract_mis_requests_from_blacklisted_ip_event() logger.info(f"fh_stg_trfc_mis_req_frm_blip_dy_a successfully loaded") self._extract_mis_responses_to_blacklisted_ip_event() logger.info(f"fh_stg_trfc_mis_res_to_blip_dy_a successfully loaded")
def run(self): logger = Logger.getInstance() logger.info('Running Log Engine') TRAFFIC_LOG_TABLE = 'core_trafficlog' THREAT_LOG_TABLE = 'core_threatlog' traffic_logs_in_db = self._read_table_from_postgres(TRAFFIC_LOG_TABLE) threat_logs_in_db = self._read_table_from_postgres(THREAT_LOG_TABLE) traffic_logs = self.get_log_info( self._traffic_csvs, 'core_processedtrafficlogdetail' ) threat_logs = self.get_log_info( self._threat_csvs, 'core_processedthreatlogdetail' ) traffic_logs = self.get_df_from_list(traffic_logs, traffic_logs_in_db) threat_logs = self.get_df_from_list(threat_logs, threat_logs_in_db) self._write_df_to_postgres(traffic_logs, TRAFFIC_LOG_TABLE) self._write_df_to_postgres(threat_logs, THREAT_LOG_TABLE) logger.info('Log Engine Done')
def run(self, verbose=True): logger = Logger.getInstance() logger.info(f'DB Engine: Started') print('******RUNNING DB ENGINE******') for csv in self._granular_csvs: try: print(f'\t ******Processing: {csv}******') logger.info(f'DB Engine: {csv}') df = self._spark.read.csv(csv, header=True, inferSchema=True) csv_name = os.path.basename(csv) firewall_rules = self._read_table_from_postgres( 'core_firewallrule') logs = self._read_table_from_postgres('core_trafficlog') log_id = logs.where(logs.log_name == csv_name).collect()[0].id mapped = df.join( firewall_rules, on=[df.firewall_rule_id == firewall_rules.name] )\ .drop('source_port', 'firewall_rule_id', 'name', 'tenant_id', 'virtual_system_id')\ .withColumnRenamed('id', 'firewall_rule_id')\ .withColumnRenamed('source_ip_id', 'source_ip')\ .withColumnRenamed('destination_ip_id', 'destination_ip')\ .withColumnRenamed('application_id', 'application')\ .withColumnRenamed('source_zone_id', 'source_zone')\ .withColumnRenamed('destination_zone_id', 'destination_zone')\ .withColumnRenamed('protocol_id', 'protocol')\ .withColumnRenamed('inbound_interface_id', 'inbound_interface')\ .withColumnRenamed('outbound_interface_id', 'outbound_interface')\ .withColumnRenamed('action_id', 'action')\ .withColumnRenamed('session_end_reason_id', 'session_end_reason')\ .withColumnRenamed('category_id', 'category') mapped = mapped.withColumn('traffic_log_id', lit(log_id)) self._write_df_to_postgres( mapped, 'core_trafficlogdetailgranularhour') except Exception as e: logger.error(str(traceback.format_exc())) logger.info(f'Skipping {csv}') continue logger.info('DB Engine: Done')
def run(self): logger = Logger.getInstance() logger.info('MIS Engine running') for root, dirs, files in os.walk(self._INPUT_DIR): for file in files: try: if file.endswith(".csv"): logger.info(f'MIS Engine: {file}') print(f"processing {file} inside mis engine") raw_df = self._read_csv(file) df = self._preprocess() self._write_new_firewall_rules_to_db() firewall_rules_from_db = self._read_firewall_rules_from_db( ) df = self._set_firewall_rules_id_to_data( df, firewall_rules_from_db) self._write_raw_log_to_cassandra(raw_df) del raw_df print("*** processing finished ****") self._mis_daily = self._extract_mis_daily(df) self._mis_daily.show(5) print("*** mis daily extractng finished ****") self._write_csv_to_postgres(self._mis_daily, "mis_daily", "append") print( "*** writing mis daily to postgres finished ****") self._write_csv_to_cassandra(self._mis_daily, "mis_daily", "append") print( "*** writing mis daily to cassandra finished ****") self._mis_new_source_ip = self._extract_mis_new_source_ip( df) self._mis_new_source_ip.show(5) print( "*** mis daily new source ip extracting finished ****" ) self._write_csv_to_postgres(self._mis_new_source_ip, "mis_dailysourceip", "append") print( "*** writing mis daily new source ip to postgres finished ****" ) self._write_csv_to_cassandra(self._mis_new_source_ip, "mis_daily_source_ip", "append") print( "*** writing mis daily new source ip to cassandra finished ****" ) self._mis_new_destination_ip = self._extract_mis_new_destination_ip( df) self._mis_new_destination_ip.show(5) print( "*** mis daily new destination ip extracting finished ****" ) self._write_csv_to_postgres( self._mis_new_destination_ip, "mis_dailydestinationip", "append") print( "*** writing mis daily new destination ip to postgres finished ****" ) self._write_csv_to_cassandra( self._mis_new_destination_ip, "mis_daily_destination_ip", "append") print( "*** writing mis daily new destination ip to cassandra finished ****" ) self._mis_new_application = self._extract_mis_new_application_ip( df) self._mis_new_application.show(5) print( "*** mis daily new application extracting finished ****" ) self._write_csv_to_postgres(self._mis_new_application, "mis_dailyapplication", "append") print( "*** writing mis daily new application to postgres finished ****" ) self._write_csv_to_cassandra(self._mis_new_application, "mis_daily_application", "append") print( "*** writing mis daily new application to cassandra finished ****" ) self._mis_requests_from_blacklisted_ip = self._extract_mis_requests_from_blacklisted_ip_event( df) self._mis_requests_from_blacklisted_ip.show(5) print( "*** mis daily new blacklist request extracting finished ****" ) self._write_csv_to_postgres( self._mis_requests_from_blacklisted_ip, "mis_dailyrequestfromblacklistevent", "append") print( "*** writing mis daily requests from blacklist ip to postgres finished ****" ) self._write_csv_to_cassandra( self._mis_requests_from_blacklisted_ip, "mis_daily_request_from_black_list_event", "append") print( "*** writing mis daily requests from blacklist ip to cassandra finished ****" ) self._mis_response_to_blacklisted_ip = self._extract_mis_responses_to_blacklisted_ip_event( df) self._mis_response_to_blacklisted_ip.show(5) print( "*** mis daily new blacklist response extracting finished ****" ) self._write_csv_to_postgres( self._mis_response_to_blacklisted_ip, "mis_dailyresponsetoblacklistevent", "append") print( "*** writing mis daily response to blacklist ip to postgres finished ****" ) self._write_csv_to_cassandra( self._mis_response_to_blacklisted_ip, "mis_daily_response_to_black_list_event", "append") print( "*** writing mis daily response to blacklist ip to to cassandra finished ****" ) self._mis_new_private_source_destination_pair = self._extract_mis_new_private_source_destination_pair( df) self._mis_new_private_source_destination_pair.show(5) print( "*** mis daily new source destination extracting finished ****" ) self._write_csv_to_postgres( self._mis_new_private_source_destination_pair, "mis_dailypersourcedestinationpair", "append") print( "*** writing mis daily per source destination pair request to postgres finished ****" ) self._write_csv_to_cassandra( self._mis_new_private_source_destination_pair, "mis_daily_per_source_destination_pair", "append") print( "*** writing mis daily per source destination pair request to cassandra finished ****" ) except Exception as e: logger.error(str(traceback.format_exc())) logger.info(f'Skipping {file}') continue logger.info('Threat Engine: Done')