parser.add_argument("-iz", "--input_zookeeper", help="input zookeeper hostname:port", type=str, required=True) parser.add_argument("-it", "--input_topic", help="input kafka topic", type=str, required=True) parser.add_argument("-oz", "--output_zookeeper", help="output zookeeper hostname:port", type=str, required=True) parser.add_argument("-ot", "--output_topic", help="output kafka topic", type=str, required=True) parser.add_argument("-m", "--microbatch", help="microbatch duration", type=int, required=False, default=5) parser.add_argument("-w", "--window", help="analysis window duration", type=int, required=False, default=60) # arguments for detection parser.add_argument("-t", "--threshold", help="min amount of targets which trigger detection", type=int, required=False, default=20) # Parse arguments args = parser.parse_args() # Initialize input stream and parse it into JSON ssc, parsed_input_stream = kafkaIO\ .initialize_and_parse_input_stream(args.input_zookeeper, args.input_topic, args.microbatch) # Check for port scans processed_input = process_input(parsed_input_stream, args.threshold, args.window, args.microbatch) # Initialize kafka producer kafka_producer = kafkaIO.initialize_kafka_producer(args.output_zookeeper) # Process computed data and send them to the output kafkaIO.process_data_and_send_result(processed_input, kafka_producer, args.output_topic, args.window, process_results) # Start Spark streaming context kafkaIO.spark_start(ssc)
"--threshold", help="min amount of flows which we consider being an attack", type=int, required=False, default=10) # Parse arguments args = parser.parse_args() # Initialize input stream and parse it into JSON ssc, parsed_input_stream = kafkaIO\ .initialize_and_parse_input_stream(args.input_zookeeper, args.input_topic, args.microbatch) # Check for SSH attacks attacks = check_for_attacks_ssh(parsed_input_stream, args.min_packets, args.max_packets, args.min_bytes, args.max_bytes, args.max_duration, args.threshold, args.window, args.microbatch) # Initialize kafka producer kafka_producer = kafkaIO.initialize_kafka_producer(args.output_zookeeper) # Process computed data and send them to the output kafkaIO.process_data_and_send_result(attacks, kafka_producer, args.output_topic, args.window, process_results) # Start Spark streaming context kafkaIO.spark_start(ssc)
if args.filtered_domains: with open(args.filtered_domains, 'r') as f: strings = f.readlines() filtered_domains = [line.strip() for line in strings] # Initialize input stream and parse it into JSON ssc, parsed_input_stream = kafkaIO \ .initialize_and_parse_input_stream(args.input_zookeeper, args.input_topic, args.microbatch) # Get flow with DNS elements dns_stream = parsed_input_stream.filter( lambda flow_json: ("ipfix.DNSName" in flow_json.keys())) # Get mapping of DNS statistics dns_stream_map = dns_stream \ .flatMap(lambda record: get_dns_stats_mapping(record, args.local_network, filtered_domains)) # Get statistics within given window dns_statistics = dns_stream_map.reduceByKey(lambda actual, update: (actual + update)) \ .window(args.window, args.window) \ .reduceByKey(lambda actual, update: (actual + update)) # Initialize kafka producer kafka_producer = kafkaIO.initialize_kafka_producer(args.output_zookeeper) # Process computed data and send them to the output kafkaIO.process_data_and_send_result(dns_statistics, kafka_producer, args.output_topic, process_results) # Start Spark streaming context kafkaIO.spark_start(ssc)