def run(): """This client pulls PCAP 'views' (view summarize what's in a sample).""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Test out getting the raw Bro logs from a PCAP file # Note: you can get a super nice 'generator' python list of dict by using # 'stream_sample' instead of 'get_sample'. data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pcap') file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue # Process the pcap file with open(filename,'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'pcap') results = workbench.work_request('view_pcap', md5) print '\n<<< %s >>>' % base_name pprint.pprint(results)
def run(): """This client tests workbench support for short md5s """ # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Pull in a bunch of files data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pe/bad') file_list = [ os.path.join(data_path, child) for child in os.listdir(data_path) ] data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pe/good') file_list += [ os.path.join(data_path, child) for child in os.listdir(data_path) ] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename, 'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'exe') results = workbench.work_request('meta', md5[:6]) pprint.pprint(results)
def run(): """This client looks for PEid signatures in PE Files.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Test out PEFile -> peid data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pe/bad') file_list = [ os.path.join(data_path, child) for child in os.listdir(data_path) ][:2] data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pe/good') file_list += [ os.path.join(data_path, child) for child in os.listdir(data_path) ][:2] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename, 'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'exe') results = workbench.work_request('pe_peid', md5) pprint.pprint(results)
def run(): """This client tests workbench support for short md5s """ # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Pull in a bunch of files data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pe/bad') file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)] data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pe/good') file_list += [os.path.join(data_path, child) for child in os.listdir(data_path)] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename,'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'exe') results = workbench.work_request('meta', md5[:6]) pprint.pprint(results)
def run(): """This client looks for PEid signatures in PE Files.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Test out PEFile -> peid data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pe/bad') file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)][:2] data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pe/good') file_list += [os.path.join(data_path, child) for child in os.listdir(data_path)][:2] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename,'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'exe') results = workbench.work_request('pe_peid', md5) pprint.pprint(results)
def run(): """This client shows workbench extacting files from a zip file.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect("tcp://" + args["server"] + ":" + args["port"]) # Test out zip data data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/zip") file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)] for filename in file_list: with open(filename, "rb") as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, "zip") results = workbench.work_request("view", md5) print "Filename: %s " % (base_name) pprint.pprint(results) # The unzip worker gives you a list of md5s back # Run meta on all the unzipped files. results = workbench.work_request("unzip", md5) print "\n*** Filename: %s ***" % (base_name) for child_md5 in results["unzip"]["payload_md5s"]: pprint.pprint(workbench.work_request("meta", child_md5))
def run(): """This client pulls PCAP meta data.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Test out PCAP data data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pcap') file_list = [ os.path.join(data_path, child) for child in os.listdir(data_path) ] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename, 'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'pcap') results = workbench.work_request('view_pcap', md5) print 'Filename: %s results:' % (base_name) pprint.pprint(results)
def run(): """This client pulls PCAP files for building report. Returns: A list with `view_pcap` , `meta` and `filename` objects. """ global WORKBENCH # Grab grab_server_argsrver args args = client_helper.grab_server_args() # Start up workbench connection WORKBENCH = zerorpc.Client(timeout=300, heartbeat=60) WORKBENCH.connect('tcp://'+args['server']+':'+args['port']) data_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), '../data/pcap') file_list = [os.path.join(data_path, child) for child in \ os.listdir(data_path)] results = [] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue # Process the pcap file with open(filename,'rb') as f: md5 = WORKBENCH.store_sample(f.read(), filename, 'pcap') result = WORKBENCH.work_request('view_pcap', md5) result.update(WORKBENCH.work_request('meta', result['view_pcap']['md5'])) result['filename'] = result['meta']['filename'].split('/')[-1] results.append(result) return results
def run(): """This client pulls PCAP 'views' (view summarize what's in a sample).""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Test out getting the raw Bro logs from a PCAP file # Note: you can get a super nice 'generator' python list of dict by using # 'stream_sample' instead of 'get_sample'. data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pcap') file_list = [ os.path.join(data_path, child) for child in os.listdir(data_path) ] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue # Process the pcap file with open(filename, 'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'pcap') results = workbench.work_request('view_pcap', md5) print '\n<<< %s >>>' % base_name pprint.pprint(results)
def run(): """This client pushes PCAPs -> MetaDaa -> ELS Indexer.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Test out PCAP data data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pcap') file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename,'rb') as pcap_file: base_name = os.path.basename(filename) md5 = workbench.store_sample(pcap_file.read(), base_name, 'pcap') # Index the view_pcap output (notice we can ask for any worker output) # Also (super important) it all happens on the server side. workbench.index_worker_output('view_pcap', md5, 'view_pcap', None) print '\n\n<<< PCAP Data: %s Indexed>>>' % (base_name)
def run(): """This client pushes a file into Workbench.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Upload the files into workbench my_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pcap/gold_xxx.pcap') with open(my_file,'rb') as f: # We're going to upload the file in chunks to workbench filename = os.path.basename(my_file) raw_bytes = f.read() md5_list = [] for chunk in chunks(raw_bytes, 1024*1024): md5_list.append(workbench.store_sample(chunk, filename, 'exe')) # Now we just ask Workbench to combine these combined_md5 = workbench.combine_samples(md5_list, filename, 'exe') real_md5 = workbench.store_sample(raw_bytes, filename, 'exe') assert(combined_md5 == real_md5)
def run(): """This client gets metadata about log files.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Test out some log files data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/log') file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)] for filename in file_list: with open(filename,'rb') as f: # Skip OS generated files base_name = os.path.basename(filename) if base_name == '.DS_Store': continue md5 = workbench.store_sample(f.read(), base_name, 'log') results = workbench.work_request('view_log_meta', md5) print 'Filename: %s\n' % (base_name) pprint.pprint(results) stream_log = workbench.stream_sample(md5, {'max_rows':20}) for row in stream_log: print row
def run(): """This client shows workbench extacting files from a zip file.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Test out zip data data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/zip') file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)] for filename in file_list: with open(filename,'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'zip') results = workbench.work_request('view', md5) print 'Filename: %s ' % (base_name) pprint.pprint(results) # The unzip worker gives you a list of md5s back # Run meta on all the unzipped files. results = workbench.work_request('unzip', md5) print '\n*** Filename: %s ***' % (base_name) for child_md5 in results['unzip']['payload_md5s']: pprint.pprint(workbench.work_request('meta', child_md5))
def run(): """This client pushes PCAPs -> MetaDaa -> ELS Indexer.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Test out PCAP data data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pcap') file_list = [ os.path.join(data_path, child) for child in os.listdir(data_path) ] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename, 'rb') as pcap_file: base_name = os.path.basename(filename) md5 = workbench.store_sample(pcap_file.read(), base_name, 'pcap') # Index the view_pcap output (notice we can ask for any worker output) # Also (super important) it all happens on the server side. workbench.index_worker_output('view_pcap', md5, 'view_pcap', None) print '\n\n<<< PCAP Data: %s Indexed>>>' % (base_name)
def run(): """This client gets the raw bro logs from PCAP files.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Test out getting the raw Bro logs from a PCAP file # Note: you can get a super nice 'generator' python list of dict by using # 'stream_sample' instead of 'get_sample'. data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pcap') file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename,'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'pcap') results = workbench.work_request('pcap_bro', md5) # Results is just a dictionary of Bro log file names and their MD5s in workbench for log_name, md5 in results['pcap_bro'].iteritems(): # Just want the logs if log_name.endswith('_log'): bro_log = workbench.get_sample(md5)['sample']['raw_bytes'] print '\n\n<<< Bro log: %s >>>\n %s' % (log_name, str(bro_log)[:500])
def run(): """This client gets extracts URLs from PCAP files (via Bro logs).""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Loop through all the pcaps and collect a set of urls(hosts) from the http_log files urls = set() data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pcap') file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename,'rb') as f: base_name = os.path.basename(filename) pcap_md5 = workbench.store_sample(f.read(), base_name, 'pcap') results = workbench.work_request('pcap_bro', pcap_md5) # Just grab the http log if 'http_log' in results['pcap_bro']: log_md5 = results['pcap_bro']['http_log'] http_data = workbench.stream_sample(log_md5) # None Means all data urls = set( row['host'] for row in http_data) print '<<< %s >>>' % filename pprint.pprint(list(urls)) print
def run(): """This client gets metadata about log files.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Test out some log files data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/log') file_list = [ os.path.join(data_path, child) for child in os.listdir(data_path) ] for filename in file_list: with open(filename, 'rb') as f: # Skip OS generated files base_name = os.path.basename(filename) if base_name == '.DS_Store': continue md5 = workbench.store_sample(f.read(), base_name, 'log') results = workbench.work_request('view_log_meta', md5) print 'Filename: %s\n' % (base_name) pprint.pprint(results) stream_log = workbench.stream_sample(md5, {'max_rows': 20}) for row in stream_log: print row
def run(): """This client pushes PE Files -> ELS Indexer.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Test out PEFile -> strings -> indexer -> search data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pe/bad') file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)][:20] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename, 'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'exe') # Index the strings and features output (notice we can ask for any worker output) # Also (super important) it all happens on the server side. workbench.index_worker_output('strings', md5, 'strings', None) print '\n<<< Strings for PE: %s Indexed>>>' % (base_name) workbench.index_worker_output('pe_features', md5, 'pe_features', None) print '<<< Features for PE: %s Indexed>>>' % (base_name) # Well we should execute some queries against ElasticSearch at this point but as of # version 1.2+ the dynamic scripting disabled by default, see # 'http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-scripting.html#_enabling_dynamic_scripting # Now actually do something interesing with our ELS index # ES Facets are kewl (http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-facets.html) facet_query = '{"facets" : {"tag" : {"terms" : {"field" : "string_list"}}}}' results = workbench.search_index('strings', facet_query) try: print '\nQuery: %s' % facet_query print 'Number of hits: %d' % results['hits']['total'] print 'Max Score: %f' % results['hits']['max_score'] pprint.pprint(results['facets']) except TypeError: print 'Probably using a Stub Indexer, if you want an ELS Indexer see the readme' # Fuzzy is kewl (http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html) fuzzy_query = '{"fields":["md5","sparse_features.imported_symbols"],' \ '"query": {"fuzzy" : {"sparse_features.imported_symbols" : "loadlibrary"}}}' results = workbench.search_index('pe_features', fuzzy_query) try: print '\nQuery: %s' % fuzzy_query print 'Number of hits: %d' % results['hits']['total'] print 'Max Score: %f' % results['hits']['max_score'] pprint.pprint([(hit['fields']['md5'], hit['fields']['sparse_features.imported_symbols']) for hit in results['hits']['hits']]) except TypeError: print 'Probably using a Stub Indexer, if you want an ELS Indexer see the readme'
def run(): """This client pushes a big directory of different files into Workbench.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Grab all the filenames from the data directory data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data') file_list = all_files_in_directory(data_dir) # Upload the files into workbench md5_list = [] for path in file_list: # Skip OS generated files if '.DS_Store' in path: continue with open(path,'rb') as f: filename = os.path.basename(path) # Here we're going to save network traffic by asking # Workbench if it already has this md5 raw_bytes = f.read() md5 = hashlib.md5(raw_bytes).hexdigest() md5_list.append(md5) if workbench.has_sample(md5): print 'Workbench already has this sample %s' % md5 else: # Store the sample into workbench md5 = workbench.store_sample(raw_bytes, filename, 'unknown') print 'Filename %s uploaded: type_tag %s, md5 %s' % (filename, 'unknown', md5) # Okay now explode any container types zip_files = workbench.generate_sample_set('zip') _foo = workbench.set_work_request('unzip', zip_files); list(_foo) # See Issue #306 pcap_files = workbench.generate_sample_set('pcap') _foo = workbench.set_work_request('pcap_bro', pcap_files); list(_foo) # See Issue #306 mem_files = workbench.generate_sample_set('mem') _foo = workbench.set_work_request('mem_procdump', mem_files); list(_foo) # See Issue #306 # Make sure all files are properly identified print 'Info: Ensuring File Identifications...' type_tag_set = set() all_files = workbench.generate_sample_set() meta_all = workbench.set_work_request('meta', all_files) for meta in meta_all: type_tag_set.add(meta['type_tag']) if meta['type_tag'] in ['unknown', 'own']: print meta pprint.pprint(type_tag_set)
def run(): """This client generates customer reports on all the samples in workbench.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) all_set = workbench.generate_sample_set() results = workbench.set_work_request('view_customer', all_set) for customer in results: print customer['customer']
def run(): """This client generates customer reports on all the samples in workbench.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) all_set = workbench.generate_sample_set() results = workbench.set_work_request('view_customer', all_set) for customer in results: print customer['customer']
def run(): ''' This client calls a bunch of help commands from workbench ''' # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Call help methods print workbench.help() print workbench.help('basic') print workbench.help('commands') print workbench.help('store_sample') print workbench.help('workers') print workbench.help('meta') # Call a test worker print workbench.test_worker('meta')
def run(): ''' This client calls a bunch of help commands from workbench ''' # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Call help methods print workbench.help() print workbench.help('basic') print workbench.help('commands') print workbench.help('store_sample') print workbench.help('workers') print workbench.help('meta') # Call a test worker print workbench.test_worker('meta')
def run(): """This client pushes a file into Workbench.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Upload the file into workbench my_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pe/bad/033d91aae8ad29ed9fbb858179271232') with open(my_file,'rb') as f: # Throw file into workbench filename = os.path.basename(my_file) raw_bytes = f.read() md5 = workbench.store_sample(raw_bytes, filename, 'exe') results = workbench.work_request('view', md5) print 'Filename: %s' % filename pprint.pprint(results)
def run(): """This client pushes a file into Workbench.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Upload the file into workbench my_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pe/bad/033d91aae8ad29ed9fbb858179271232') with open(my_file, 'rb') as f: # Throw file into workbench filename = os.path.basename(my_file) raw_bytes = f.read() md5 = workbench.store_sample(raw_bytes, filename, 'exe') results = workbench.work_request('view', md5) print 'Filename: %s' % filename pprint.pprint(results)
def __init__(self): ''' Workbench CLI Initialization ''' # Workbench CLI version self.version = version.__version__ # Workbench CLI Help self.help = help_content.WorkbenchShellHelp() # Grab server arguments self.server_info = client_helper.grab_server_args() # Spin up workbench server self.workbench = None self._connect(self.server_info) # Create a user session self.session = self.Session() # We have a command_set for our Interactive Shell self.command_dict = self._generate_command_dict() self.command_set = set(self.command_dict.keys()) # Our Interactive IPython shell self.ipshell = None # Our File Streamer self.streamer = file_streamer.FileStreamer(self.workbench, self._progress_print) # Register infomation (for help and other stuff) with Workbench self._register_info() # Help decorator self.help_deco = repr_to_str_decorator.r_to_s(self.workbench.help) # What OS/Version do we have? self.beer = '\360\237\215\272' if sys.platform == 'darwin' else ' '
def run(): """This client gets the raw bro logs from PCAP files.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Test out getting the raw Bro logs from a PCAP file # Note: you can get a super nice 'generator' python list of dict by using # 'stream_sample' instead of 'get_sample'. data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pcap') file_list = [ os.path.join(data_path, child) for child in os.listdir(data_path) ] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename, 'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'pcap') results = workbench.work_request('pcap_bro', md5) # Results is just a dictionary of Bro log file names and their MD5s in workbench for log_name, md5 in results['pcap_bro'].iteritems(): # Just want the logs if log_name.endswith('_log'): bro_log = workbench.get_sample(md5)['sample']['raw_bytes'] print '\n\n<<< Bro log: %s >>>\n %s' % (log_name, str(bro_log)[:500])
def run(): """This client pulls PCAP meta data.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Test out PCAP data data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pcap') file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename,'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'pcap') results = workbench.work_request('view_pcap', md5) print 'Filename: %s results:' % (base_name) pprint.pprint(results)
def run(): """This client gets extracts URLs from PCAP files (via Bro logs).""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Loop through all the pcaps and collect a set of urls(hosts) from the http_log files urls = set() data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pcap') file_list = [ os.path.join(data_path, child) for child in os.listdir(data_path) ] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename, 'rb') as f: base_name = os.path.basename(filename) pcap_md5 = workbench.store_sample(f.read(), base_name, 'pcap') results = workbench.work_request('pcap_bro', pcap_md5) # Just grab the http log if 'http_log' in results['pcap_bro']: log_md5 = results['pcap_bro']['http_log'] http_data = workbench.stream_sample( log_md5) # None Means all data urls = set(row['host'] for row in http_data) print '<<< %s >>>' % filename pprint.pprint(list(urls)) print
def run(): """This client generates a similarity graph from features in PE Files.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Test out PEFile -> pe_deep_sim -> pe_jaccard_sim -> graph data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pe/bad') bad_files = [ os.path.join(data_path, child) for child in os.listdir(data_path) ][:5] data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pe/good') good_files = [ os.path.join(data_path, child) for child in os.listdir(data_path) ][:5] # Clear any graph in the Neo4j database workbench.clear_graph_db() # First throw them into workbench and add them as nodes into the graph all_md5s = add_it(workbench, bad_files, ['exe', 'bad']) + add_it( workbench, good_files, ['exe', 'good']) # Make a sample set sample_set = workbench.store_sample_set(all_md5s) # Compute pe_features on all files of type pe, just pull back the sparse features import_gen = workbench.set_work_request( 'pe_features', sample_set, ['md5', 'sparse_features.imported_symbols']) imports = [{ 'md5': r['md5'], 'features': r['imported_symbols'] } for r in import_gen] # Compute pe_features on all files of type pe, just pull back the sparse features warning_gen = workbench.set_work_request( 'pe_features', sample_set, ['md5', 'sparse_features.pe_warning_strings']) warnings = [{ 'md5': r['md5'], 'features': r['pe_warning_strings'] } for r in warning_gen] # Compute strings on all files of type pe, just pull back the string_list string_gen = workbench.set_work_request('strings', sample_set, ['md5', 'string_list']) strings = [{ 'md5': r['md5'], 'features': r['string_list'] } for r in string_gen] # Compute pe_peid on all files of type pe, just pull back the match_list # Fixme: commenting this out until we figure out why peid is SO slow ''' peid_gen = workbench.set_work_request('pe_peid', sample_set, ['md5', 'match_list']}) peids = [{'md5': r['md5'], 'features': r['match_list']} for r in peid_gen] ''' # Compute the Jaccard Index between imported systems and store as relationships sims = jaccard_sims(imports) for sim_info in sims: workbench.add_rel(sim_info['source'], sim_info['target'], 'imports') # Compute the Jaccard Index between warnings and store as relationships sims = jaccard_sims(warnings) for sim_info in sims: workbench.add_rel(sim_info['source'], sim_info['target'], 'warnings') # Compute the Jaccard Index between strings and store as relationships sims = jaccard_sims(strings) for sim_info in sims: workbench.add_rel(sim_info['source'], sim_info['target'], 'strings') # Compute the Jaccard Index between peids and store as relationships # Fixme: commenting this out until we figure out why peid is SO slow ''' sims = jaccard_sims(peids) for sim_info in sims: workbench.add_rel(sim_info['source'], sim_info['target'], 'peids') ''' # Compute pe_deep_sim on all files of type pe results = workbench.set_work_request('pe_deep_sim', sample_set) # Store the ssdeep sims as relationships for result in list(results): for sim_info in result['sim_list']: workbench.add_rel(result['md5'], sim_info['md5'], 'ssdeep') # Let them know where they can get there graph print 'All done: go to http://localhost:7474/browser and execute this query: "%s"' % \ ('match (n)-[r]-() return n,r')
def run(): """This client generates a similarity graph from features in PE Files.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Test out PEFile -> pe_deep_sim -> pe_jaccard_sim -> graph data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pe/bad') bad_files = [os.path.join(data_path, child) for child in os.listdir(data_path)][:5] data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pe/good') good_files = [os.path.join(data_path, child) for child in os.listdir(data_path)][:5] # Clear any graph in the Neo4j database workbench.clear_graph_db() # First throw them into workbench and add them as nodes into the graph all_md5s = add_it(workbench, bad_files, ['exe', 'bad']) + add_it(workbench, good_files, ['exe', 'good']) # Make a sample set sample_set = workbench.store_sample_set(all_md5s) # Compute pe_features on all files of type pe, just pull back the sparse features import_gen = workbench.set_work_request('pe_features', sample_set, ['md5', 'sparse_features.imported_symbols']) imports = [{'md5': r['md5'], 'features': r['imported_symbols']} for r in import_gen] # Compute pe_features on all files of type pe, just pull back the sparse features warning_gen = workbench.set_work_request('pe_features', sample_set, ['md5', 'sparse_features.pe_warning_strings']) warnings = [{'md5': r['md5'], 'features': r['pe_warning_strings']} for r in warning_gen] # Compute strings on all files of type pe, just pull back the string_list string_gen = workbench.set_work_request('strings', sample_set, ['md5', 'string_list']) strings = [{'md5': r['md5'], 'features': r['string_list']} for r in string_gen] # Compute pe_peid on all files of type pe, just pull back the match_list # Fixme: commenting this out until we figure out why peid is SO slow ''' peid_gen = workbench.set_work_request('pe_peid', sample_set, ['md5', 'match_list']}) peids = [{'md5': r['md5'], 'features': r['match_list']} for r in peid_gen] ''' # Compute the Jaccard Index between imported systems and store as relationships sims = jaccard_sims(imports) for sim_info in sims: workbench.add_rel(sim_info['source'], sim_info['target'], 'imports') # Compute the Jaccard Index between warnings and store as relationships sims = jaccard_sims(warnings) for sim_info in sims: workbench.add_rel(sim_info['source'], sim_info['target'], 'warnings') # Compute the Jaccard Index between strings and store as relationships sims = jaccard_sims(strings) for sim_info in sims: workbench.add_rel(sim_info['source'], sim_info['target'], 'strings') # Compute the Jaccard Index between peids and store as relationships # Fixme: commenting this out until we figure out why peid is SO slow ''' sims = jaccard_sims(peids) for sim_info in sims: workbench.add_rel(sim_info['source'], sim_info['target'], 'peids') ''' # Compute pe_deep_sim on all files of type pe results = workbench.set_work_request('pe_deep_sim', sample_set) # Store the ssdeep sims as relationships for result in list(results): for sim_info in result['sim_list']: workbench.add_rel(result['md5'], sim_info['md5'], 'ssdeep') # Let them know where they can get there graph print 'All done: go to http://localhost:7474/browser and execute this query: "%s"' % \ ('match (n)-[r]-() return n,r')
def run(): """This client pushes PE Files -> ELS Indexer.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://' + args['server'] + ':' + args['port']) # Test out PEFile -> strings -> indexer -> search data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/pe/bad') file_list = [ os.path.join(data_path, child) for child in os.listdir(data_path) ][:20] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename, 'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'exe') # Index the strings and features output (notice we can ask for any worker output) # Also (super important) it all happens on the server side. workbench.index_worker_output('strings', md5, 'strings', None) print '\n<<< Strings for PE: %s Indexed>>>' % (base_name) workbench.index_worker_output('pe_features', md5, 'pe_features', None) print '<<< Features for PE: %s Indexed>>>' % (base_name) # Well we should execute some queries against ElasticSearch at this point but as of # version 1.2+ the dynamic scripting disabled by default, see # 'http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-scripting.html#_enabling_dynamic_scripting # Now actually do something interesing with our ELS index # ES Facets are kewl (http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-facets.html) facet_query = '{"facets" : {"tag" : {"terms" : {"field" : "string_list"}}}}' results = workbench.search_index('strings', facet_query) try: print '\nQuery: %s' % facet_query print 'Number of hits: %d' % results['hits']['total'] print 'Max Score: %f' % results['hits']['max_score'] pprint.pprint(results['facets']) except TypeError: print 'Probably using a Stub Indexer, if you want an ELS Indexer see the readme' # Fuzzy is kewl (http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html) fuzzy_query = '{"fields":["md5","sparse_features.imported_symbols"],' \ '"query": {"fuzzy" : {"sparse_features.imported_symbols" : "loadlibrary"}}}' results = workbench.search_index('pe_features', fuzzy_query) try: print '\nQuery: %s' % fuzzy_query print 'Number of hits: %d' % results['hits']['total'] print 'Max Score: %f' % results['hits']['max_score'] pprint.pprint([(hit['fields']['md5'], hit['fields']['sparse_features.imported_symbols']) for hit in results['hits']['hits']]) except TypeError: print 'Probably using a Stub Indexer, if you want an ELS Indexer see the readme'