def _zmqSendBuffer(self, milterContext, numRetries, REQUEST_TIMEOUT, SERVER_ENDPOINT): gotResponseFromScanner = -1 self.client = Client(SERVER_ENDPOINT) log = milterContext.uuid + " Sending " + str( milterContext.qid) + " to " + SERVER_ENDPOINT self.logger.writeLog(syslog.LOG_DEBUG, "%s" % (str(log))) myhostname = socket.gethostname() externalObject = ExternalObject( buffer=milterContext.fileBuffer, externalVars=ExternalVars( filename=milterContext.archiveFileName, source=milterContext.milterConfig.milterName+"-"+ \ myhostname.split(".")[0], ephID=milterContext.qid, uniqID=milterContext.messageID ), level=level_metadata ) result = self.client.send(externalObject, retry=numRetries, timeout=REQUEST_TIMEOUT) if result: self.match = flagRollup(result) if not self.match: self.match = [] self.attachements = ','.join(getAttachmentList(result)) strScanResult = finalDispositionFromResult(result) strScanResults = " ".join(dispositionFromResult(result)) if strScanResult: self.strScanResult = strScanResult try: self.dispositions = strScanResults except: self.logger.writeLog( syslog.LOG_ERR, milterContext.uuid + " ERROR getting dispositions via client lib") gotResponseFromScanner = 1 else: self.logger.writeLog( syslog.LOG_ERR, milterContext.uuid + " " + str(milterContext.qid) + "| no result object from scanner, returning SCAN ERROR") return gotResponseFromScanner
def perform_scan(self, poll_timeout): ''' Wait for work from broker then perform the scan. If timeout occurs, no scan is performed and no result is returned. Arguments: poll_timeout -- The amount of time to wait for work. Returns: The result of the scan or None if no scan was performed. ''' from laikaboss.dispatch import Dispatch from laikaboss.objectmodel import ScanResult, ExternalObject, ExternalVars from laikaboss.util import log_result # If task is found, perform scan try: logging.debug("Worker (%s): checking for work", self.identity) tasks = dict(self.broker_poller.poll(poll_timeout)) if tasks.get(self.broker) == zmq.POLLIN: logging.debug("Worker (%s): performing scan", self.identity) # task should be in the following format # ['', client_id, '', request_type, '', request] # where: # client_id -- ZMQ identifier of the client socket # request_type -- The type of request (json/pickle/zlib) # request -- Object to be scanned task = self.broker.recv_multipart() client_id = task[1] if len(task) == 6: request_type = task[3] request = task[5] if request_type in [REQ_TYPE_PICKLE, REQ_TYPE_PICKLE_ZLIB]: #logging.debug("Worker: received work %s", str(task)) if request_type == REQ_TYPE_PICKLE_ZLIB: externalObject = pickle.loads(zlib.decompress(request)) else: externalObject = pickle.loads(request) elif request_type in [REQ_TYPE_JSON, REQ_TYPE_JSON_ZLIB]: if request_type == REQ_TYPE_JSON_ZLIB: jsonRequest = json.loads(zlib.decompress(request)) else: jsonRequest = json.loads(request) # Set default values for our request just in case some were omitted if not 'buffer' in jsonRequest: jsonRequest['buffer'] = '' else: try: jsonRequest['buffer'] = base64.b64decode(jsonRequest['buffer']) except: # This should never happen unless invalid input is given jsonRequest['buffer'] = '' if not 'filename' in jsonRequest: jsonRequest['filename'] = '' if not 'ephID' in jsonRequest: jsonRequest['ephID'] = '' if not 'uniqID' in jsonRequest: jsonRequest['uniqID'] = '' if not 'contentType' in jsonRequest: jsonRequest['contentType'] = [] if not 'timestamp' in jsonRequest: jsonRequest['timestamp'] = '' if not 'source' in jsonRequest: jsonRequest['source'] = '' if not 'origRootUID' in jsonRequest: jsonRequest['origRootUID'] = '' if not 'extMetaData' in jsonRequest: jsonRequest['extMetaData'] = {} if not 'level' in jsonRequest: jsonRequest['level'] = 2 externalVars = ExternalVars(filename=jsonRequest['filename'], ephID=jsonRequest['ephID'], uniqID=jsonRequest['uniqID'], contentType=jsonRequest['contentType'], timestamp=jsonRequest['timestamp'], source=jsonRequest['source'], origRootUID=jsonRequest['origRootUID'], extMetaData=jsonRequest['extMetaData']) externalObject = ExternalObject(buffer=jsonRequest['buffer'], level=jsonRequest['level'], externalVars=externalVars) else: return [client_id, '', 'INVALID REQUEST'] result = ScanResult( source=externalObject.externalVars.source, level=externalObject.level) result.startTime = time.time() try: Dispatch(externalObject.buffer, result, 0, externalVars=externalObject.externalVars) except QuitScanException: raise except: exc_type, exc_value, exc_traceback = sys.exc_info() log_debug( "exception on file: %s, detailed exception: %s" % ( externalObject.externalVars.filename, repr(traceback.format_exception( exc_type, exc_value, exc_traceback)))) if self.logresult: log_result(result) if request_type == REQ_TYPE_PICKLE_ZLIB: result = zlib.compress( pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) elif request_type == REQ_TYPE_PICKLE: result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) elif request_type == REQ_TYPE_JSON_ZLIB: result = zlib.compress( json.dumps(result, cls=ResultEncoder)) elif request_type == REQ_TYPE_JSON: result = json.dumps(result, cls=ResultEncoder) return [client_id, '', result] else: return [client_id, '', 'INVALID REQUEST'] except zmq.ZMQError as zmqerror: if "Interrupted system call" not in str(zmqerror): logging.exception("Worker (%s): Received ZMQError", self.identity) else: logging.debug("Worker (%s): ZMQ interrupted by shutdown signal", self.identity) return None
def main(): parser = OptionParser( usage="usage: %prog [options] (/path/to/file | stdin)") parser.add_option("-d", "--debug", action="store_true", dest="debug", help="enable debug messages to the console.") parser.add_option("-r", "--remove-limit", action="store_true", dest="nolimit", help="disable 20mb size limit (be careful!)") parser.add_option("-t", "--timeout", action="store", type="int", dest="timeout", help="adjust request timeout period (in seconds)") parser.add_option("-c", "--config-path", action="store", type="string", dest="config_path", help="specify a path to si-cloudscan.conf.") parser.add_option("-a", "--address", action="store", type="string", dest="broker_host", help="specify an IP and port to connect to the broker") parser.add_option("-f", "--file-list", action="store", type="string", dest="file_list", help="Specify a list of files to scan") parser.add_option("-s", "--ssh-host", action="store", type="string", dest="ssh_host", help="specify a host for the SSH tunnel") parser.add_option( "-p", "--num-procs", action="store", type="int", default=6, dest="num_procs", help="Specify the number of processors to use for recursion") parser.add_option("-u", "--source", action="store", type="string", dest="source", help="specify a custom source") parser.add_option("--ssh", action="store_true", default=False, dest="use_ssh", help="Use SSH tunneling") parser.add_option( "-l", "--level", action="store", type="string", dest="return_level", help="Return Level: minimal, metadata, full [default: metadata]") parser.add_option( "-o", "--out-path", action="store", type="string", dest="save_path", help="If Return Level Full has been specified, provide a path to " "save the results to [default: current directory]") parser.add_option( "-b", "--buffer", action="store_true", dest="stdin_buffer", help="Specify to allow a buffer to be collected by stdin.") parser.add_option("-e", "--ephID", action="store", type="string", dest="ephID", default="", help="Specify an ephID to send to Laika.") parser.add_option( "-m", "--ext-metadata", action="store", dest="ext_metadata", help="Specify external metadata to be passed into the scanner.") parser.add_option("-z", "--log", action="store_true", dest="log_db", help="Specify to turn on logging results.") parser.add_option( "-R", "--recursive", action="store_true", default=False, dest="recursive", help="Enable recursive directory scanning. If enabled, all files " "in the specified directory will be scanned. Results will " "be output to si-cloudscan.log in the current directory.") (options, args) = parser.parse_args() # Define default configuration location CONFIG_PATH = "/etc/si-cloudscan/si-cloudscan.conf" if options.config_path: CONFIG_PATH = options.config_path Config = ConfigParser.ConfigParser() Config.read(CONFIG_PATH) # Parse through the config file and append each section to a single dictionary global configs for section in Config.sections(): configs.update(dict(Config.items(section))) # Set the working path, this will be used for file ouput if another # path is not specified WORKING_PATH = os.getcwd() if options.use_ssh: USE_SSH = True else: if strtobool(getConfig('use_ssh')): USE_SSH = True else: USE_SSH = False if options.ssh_host: SSH_HOST = options.ssh_host else: SSH_HOST = getConfig('ssh_host') if options.broker_host: BROKER_HOST = options.broker_host else: BROKER_HOST = getConfig('broker_host') if options.debug: logging.basicConfig(level=logging.DEBUG) logging.debug("Host: %s" % BROKER_HOST) if options.return_level: RETURN_LEVEL = options.return_level else: RETURN_LEVEL = getConfig('return_level') if options.source: SOURCE = options.source else: SOURCE = "si-cloudscan" if not options.log_db: SOURCE += "-nolog" if options.save_path: SAVE_PATH = options.save_path else: SAVE_PATH = WORKING_PATH if options.num_procs: num_procs = int(options.num_procs) else: num_procs = int(getConfig('num_procs')) if options.timeout: logging.debug("default timeout changed to %i" % options.timeout) REQUEST_TIMEOUT = options.timeout * 1000 else: REQUEST_TIMEOUT = int(getConfig('request_timeout')) if options.ext_metadata: try: ext_metadata = json.loads(options.ext_metadata) assert isinstance(ext_metadata, dict) except: print "External Metadata must be a dictionary!" sys.exit(0) else: ext_metadata = dict() REQUEST_RETRIES = int(getConfig('request_retries')) # Attempt to get the hostname try: hostname = gethostname().split('.')[0] except: hostname = "none" # Attempt to set the return level, throw an error if it doesn't exist. try: return_level = globals()["level_%s" % RETURN_LEVEL] except KeyError as e: print "Please specify a valid return level: minimal, metadata or full" sys.exit(1) if not options.recursive: try: file_buffer = '' # Try to read the file if len(args) > 0: file_buffer = open(args[0], 'rb').read() file_len = len(file_buffer) logging.debug("opened file %s with len %i" % (args[0], file_len)) else: while sys.stdin in select.select([sys.stdin], [], [], 0)[0]: line = sys.stdin.readline() if not line: break else: file_buffer += line if not file_buffer: parser.print_usage() sys.exit(1) file_len = len(file_buffer) if file_len > 20971520 and not options.nolimit: print "You're trying to scan a file larger than 20mb.. Are you sure?" print "Use the --remove-limit flag if you really want to do this." sys.exit(1) except IOError as e: print "\nERROR: The file does not exist: %s\n" % (args[0], ) sys.exit(1) else: try: fileList = [] if options.file_list: fileList = open(options.file_list).read().splitlines() else: if len(args) > 0: rootdir = args[0] for root, subFolders, files in os.walk(rootdir): for fname in files: fileList.append(os.path.join(root, fname)) else: while sys.stdin in select.select([sys.stdin], [], [], 0)[0]: line = sys.stdin.readline() if not line: break else: fileList.append(line) if not fileList: parser.print_usage() sys.exit(1) if len(fileList) > 1000 and not options.nolimit: print "You're trying to scan over 1000 files... Are you sure?" print "Use the --remove-limit flag if you really want to do this." sys.exit(1) except IOError as e: print "\nERROR: Directory does not exist: %s\n" % (args[0], ) sys.exit(1) if not options.recursive: # Construct the object to be sent for scanning if args: filename = args[0] else: filename = "stdin" ext_metadata['server'] = hostname ext_metadata['user'] = getpass.getuser() externalObject = ExternalObject( buffer=file_buffer, externalVars=ExternalVars(filename=filename, ephID=options.ephID, extMetaData=ext_metadata, source="%s-%s-%s" % (SOURCE, hostname, getpass.getuser())), level=return_level) try: if not options.recursive: # Set up ZMQ context if USE_SSH: try: logging.debug( "attempting to connect to broker at %s and SSH host %s" % (BROKER_HOST, SSH_HOST)) client = Client(BROKER_HOST, useSSH=True, sshHost=SSH_HOST, useGevent=True) except RuntimeError as e: logging.exception("could not set up SSH tunnel to %s" % SSH_HOST) sys.exit(1) else: logging.debug("SSH has been disabled.") client = Client(BROKER_HOST, useGevent=True) starttime = time.time() result = client.send(externalObject, retry=REQUEST_RETRIES, timeout=REQUEST_TIMEOUT) logging.debug("got reply in %s seconds" % str(time.time() - starttime)) rootObject = getRootObject(result) try: jsonResult = getJSON(result) print jsonResult except: logging.exception("error occured collecting results") return if return_level == level_full: SAVE_PATH = "%s/%s" % (SAVE_PATH, get_scanObjectUID(rootObject)) if not os.path.exists(SAVE_PATH): try: os.makedirs(SAVE_PATH) print "\nWriting results to %s...\n" % SAVE_PATH except (OSError, IOError) as e: print "\nERROR: unable to write to %s...\n" % SAVE_PATH return else: print "\nOutput folder already exists! Skipping results output...\n" return for uid, scanObject in result.files.iteritems(): f = open("%s/%s" % (SAVE_PATH, uid), "wb") f.write(scanObject.buffer) f.close() try: if scanObject.filename and scanObject.parent: linkPath = "%s/%s" % (SAVE_PATH, scanObject.filename.replace( "/", "_")) if not os.path.lexists(linkPath): os.symlink("%s" % (uid), linkPath) elif scanObject.filename: filenameParts = scanObject.filename.split("/") os.symlink( "%s" % (uid), "%s/%s" % (SAVE_PATH, filenameParts[-1])) except: print "Unable to create symlink for %s" % (uid) f = open("%s/%s" % (SAVE_PATH, "results.log"), "wb") f.write(jsonResult) f.close() sys.exit(1) else: try: fh = open('si-cloudscan.log', 'w') fh.close() except: pass for fname in fileList: job_queue.put(fname) for i in range(num_procs): job_queue.put("STOP") print "File list length: %s" % len(fileList) for i in range(num_procs): Process(target=worker, args=( options.nolimit, REQUEST_RETRIES, REQUEST_TIMEOUT, SAVE_PATH, SOURCE, return_level, hostname, USE_SSH, BROKER_HOST, SSH_HOST, ext_metadata, options.ephID, )).start() results_processed = 0 while results_processed < len(fileList): logging.debug("Files left: %s" % ((len(fileList) - results_processed))) resultText = result_queue.get() try: # Process results fh = open('si-cloudscan.log', 'ab') fh.write('%s\n' % resultText) fh.close() results_processed += 1 except Exception as e: raise print 'Wrote results to si-cloudscan.log' except KeyboardInterrupt: print "Interrupted by user, exiting..." sys.exit(1)
def worker(nolimit, REQUEST_RETRIES, REQUEST_TIMEOUT, SAVE_PATH, SOURCE, return_level, hostname, USE_SSH, BROKER_HOST, SSH_HOST, ext_metadata, ephID): # Set up ZMQ context if USE_SSH: try: logging.debug( "attempting to connect to broker at %s and SSH host %s" % (BROKER_HOST, SSH_HOST)) client = Client(BROKER_HOST, useSSH=True, sshHost=SSH_HOST) except RuntimeError as e: logging.exception("could not set up SSH tunnel to %s" % SSH_HOST) sys.exit(1) else: logging.debug("SSH has been disabled.") client = Client(BROKER_HOST) randNum = randint(1, 10000) for fname in iter(job_queue.get, 'STOP'): print "Worker %s: Starting new request" % randNum try: # Try to read the file file_buffer = open(fname, 'rb').read() file_len = len(file_buffer) logging.debug("opened file %s with len %i" % (fname, file_len)) if file_len > 20971520 and not nolimit: print "You're trying to scan a file larger than 20mb.. Are you sure?" print "Use the --remove-limit flag if you really want to do this." print "File has not been scanned: %s" % fname result_queue.put( "~~~~~~~~~~~~~~~~~~~~\nFile has not been scanned due to size: %s\n~~~~~~~~~~~~~~~~~~~~" % fname) continue except IOError as e: print "\nERROR: The file does not exist: %s\n" % (fname, ) print "Moving to next file..." result_queue.put( "~~~~~~~~~~~~~~~~~~~~\nFile has not been scanned due to an IO Error: %s\n~~~~~~~~~~~~~~~~~~~~" % fname) continue try: # Construct the object to be sent for scanning externalObject = ExternalObject( buffer=file_buffer, externalVars=ExternalVars( filename=fname, ephID=ephID, extMetaData=ext_metadata, source="%s-%s-%s" % (SOURCE, hostname, getpass.getuser())), level=return_level) starttime = time.time() result = client.send(externalObject, retry=REQUEST_RETRIES, timeout=REQUEST_TIMEOUT) if not result: result_queue.put( "~~~~~~~~~~~~~~~~~~~~\nFile timed out in the scanner: %s\n~~~~~~~~~~~~~~~~~~~~" % fname) continue logging.debug("got reply in %s seconds" % str(time.time() - starttime)) rootObject = getRootObject(result) jsonResult = getJSON(result) resultText = '%s\n' % jsonResult if return_level == level_full: FILE_SAVE_PATH = "%s/%s" % (SAVE_PATH, get_scanObjectUID(rootObject)) if not os.path.exists(FILE_SAVE_PATH): try: os.makedirs(FILE_SAVE_PATH) print "Writing results to %s..." % FILE_SAVE_PATH except (OSError, IOError) as e: print "\nERROR: unable to write to %s...\n" % FILE_SAVE_PATH return else: print "\nOutput folder already exists! Skipping results output...\n" return for uid, scanObject in result.files.iteritems(): f = open("%s/%s" % (FILE_SAVE_PATH, uid), "wb") f.write(scanObject.buffer) f.close() if scanObject.filename and scanObject.depth != 0: linkPath = "%s/%s" % (FILE_SAVE_PATH, scanObject.filename.replace( "/", "_")) if not os.path.lexists(linkPath): os.symlink("%s" % (uid), linkPath) elif scanObject.filename: filenameParts = scanObject.filename.split("/") linkPath = "%s/%s" % (FILE_SAVE_PATH, filenameParts[-1]) if not os.path.lexists(linkPath): os.symlink("%s" % (uid), linkPath) f = open("%s/%s" % (FILE_SAVE_PATH, "results.json"), "wb") f.write(jsonResult) f.close() result_queue.put(resultText) except: #logging.exception("error occured collecting results") result_queue.put( "~~~~~~~~~~~~~~~~~~~~\nUNKNOWN ERROR OCCURRED: %s\n~~~~~~~~~~~~~~~~~~~~" % fname) continue
def main(laika_broker, redis_host, redis_port): # Register signal handler signal.signal(signal.SIGINT, handler) signal.signal(signal.SIGTERM, handler) # Connect to Redis r = redis.StrictRedis(host=redis_host, port=redis_port) # Create Laika BOSS client object client = Client(laika_broker, async=True) while True: # pop next item off queue q_item = r.blpop('suricata_queue', timeout=0) key = q_item[1] print("Popped object: %s" % key) # look up file buffer file_buffer = r.get("%s_buf" % key) # look up file file meta file_meta = r.get("%s_meta" % key) if not file_buffer or not file_meta: print( "File buffer or meta for key: %s not found. Skipping this object." % key) delete_keys(r, key) continue try: file_meta_dict = json.loads(file_meta) except: print("JSON decode error for key: %s. Skipping this object." % key) delete_keys(r, key) continue # Extract File Name # Note: this is best effort - it will not always work filename = os.path.basename(file_meta_dict['http_request'].get( 'request', "")) filename = filename.split('?')[0] # Get respective content type http_direction = file_meta_dict['http_direction'] if http_direction == 'request': content_type = file_meta_dict['http_request'].get( 'Content-Type', []) elif http_direction == 'response': content_type = file_meta_dict['http_response'].get( 'Content-Type', []) else: content_type = [] externalObject = ExternalObject( buffer=file_buffer, externalVars=ExternalVars(filename=filename, source="%s-%s" % ("suricata", "redis"), extMetaData=file_meta_dict, contentType=content_type), level=level_minimal) # send to Laika BOSS for async scanning - no response expected client.send(externalObject) print("Sent %s for scanning...\n" % key) # cleanup delete_keys(r, key)