def readrows(self): """The readrows method reads simply 'combines' the rows of multiple files OR gunzips the file and then reads the rows """ # For each file (may be just one) create a BroLogReader and use it for self._filepath in self._files: # Check if the file is zipped if self._filepath.endswith('.gz'): tmp = tempfile.NamedTemporaryFile(delete=False) with gzip.open(self._filepath, 'rb') as f_in, open(tmp.name, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) # Set the file path to the new temp file self._filepath = tmp.name # Create a BroLogReader reader = bro_log_reader.BroLogReader(self._filepath) for row in reader.readrows(): yield row # Clean up any temp files try: os.remove(tmp.name) print('Removed temporary file {:s}...'.format(tmp.name)) except: pass
def test_bro_files(self): filename = 'http.log' weblogs = [] key_list = None current_user = self.user type_file = AnalysisSession.TYPE_FILES.bro_http_log uuid_str = str(uuid.uuid4()) num_lines = 1 rows = [] path_filename = str(os.path.join(self.path_dir_files, filename)) reader = bro_log_reader.BroLogReader(path_filename) for row in reader.readrows(): if not key_list: key_list = row.keys() row['ts'] = str(row['ts']) value = row.values() value.append('undefined') value.append(1) value.append(str(num_lines)) value.append(uuid_str) weblogs.append(value) rows.append(row) num_lines += 1 self.__assert_for_creation_analysis_session__(filename, key_list, weblogs, current_user, type_file, uuid_str, num_lines)
def __init__(self, log_filename, ts_index=True): """Initialize the LogToDataFrame class""" # Create a bro reader on a given log file reader = bro_log_reader.BroLogReader(log_filename) # Create a Pandas dataframe from reader super(LogToDataFrame, self).__init__(reader.readrows()) # Set the index if ts_index: self.set_index('ts', inplace=True)
def find_traffic(log, flags): reader = bro_log_reader.BroLogReader(log) traffic = [] for row in reader.readrows(): for flag in flags: if (flag) in (row['server_name'] or row['subject']): traffic.append({ 'ip': row['id.orig_h'], 'port': row['id.orig_p'] }) return sorted(traffic, key=lambda k: k['ip'])
def __init__(self, filepath, eps=10, max_rows=None): """Initialization for the LiveSimulator Class Args: eps (int): Events Per Second that the simulator will emit events (default = 10) max_rows (int): The maximum number of rows to generate (default = None (go forever)) """ # Compute EPS timer # Logic: # - Normal distribution centered around 1.0/eps # - Make sure never less than 0 # - Precompute 1000 deltas and then just cycle around self.eps_timer = itertools.cycle([max(0, delta) for delta in np.random.normal(1.0/float(eps), .5/float(eps), size=1000)]) # Initialize the Bro log reader self.log_reader = bro_log_reader.BroLogReader(filepath, tail=False) # Store max_rows self.max_rows = max_rows
def create_dataframe(self, log_filename, ts_index=True, aggressive_category=True): """ Create a Pandas dataframe from a Bro/Zeek log file Args: log_fllename (string): The full path to the Bro log ts_index (bool): Set the index to the 'ts' field (default = True) """ # Create a Bro log reader just to read in the header for names and types _bro_reader = bro_log_reader.BroLogReader(log_filename) _, field_names, field_types, _ = _bro_reader._parse_bro_header( log_filename) # Get the appropriate types for the Pandas Dataframe pandas_types = self.pd_column_types(field_names, field_types, aggressive_category) # Now actually read the Bro Log using Pandas read CSV self._df = pd.read_csv(log_filename, sep='\t', names=field_names, dtype=pandas_types, comment="#", na_values='-') # Now we convert 'time' and 'interval' fields to datetime and timedelta respectively for name, bro_type in zip(field_names, field_types): if bro_type == 'time': self._df[name] = pd.to_datetime(self._df[name], unit='s') if bro_type == 'interval': self._df[name] = pd.to_timedelta(self._df[name], unit='s') # Set the index if ts_index and not self._df.empty: self._df.set_index('ts', inplace=True) return self._df
try: vtq = pickle.load(open('vtq.pkl', 'rb')) print('Opening VirusTotal Query Cache (cache_size={:d})...'.format(vtq.size)) except IOError: vtq = vt_query.VTQuery(max_cache_time=60*24*7) # One week cache # See our 'Risky Domains' Notebook for the analysis and # statistical methods used to compute this risky set of TLDs risky_tlds = set(['info', 'tk', 'xyz', 'online', 'club', 'ru', 'website', 'in', 'ws', 'top', 'site', 'work', 'biz', 'name', 'tech', 'loan', 'win', 'pro']) # Launch long lived process with signal catcher with signal_utils.signal_catcher(save_vtq): # Run the bro reader on the dns.log file looking for risky TLDs reader = bro_log_reader.BroLogReader(args.bro_log) for row in reader.readrows(): # Pull out the TLD query = row['query'] tld = tldextract.extract(query).suffix # Check if the TLD is in the risky group if tld in risky_tlds: # Make the query with the full query results = vtq.query_url(query) if results.get('positives', 0) > 3: # At least four hits print('\nRisky Domain DNS Query Found') print('From: {:s} To: {:s} QType: {:s} RCode: {:s}'.format(row['id.orig_h'], row['id.resp_h'], row['qtype_name'], row['rcode_name'])) pprint(results)
# Check for unknown args if commands: print('Unrecognized args: %s' % commands) sys.exit(1) # Sanity check that this is a ssl log if not args.bro_log.endswith('ssl.log'): print('This example only works with Bro ssl.log files..') sys.exit(1) # File may have a tilde in it if args.bro_log: args.bro_log = os.path.expanduser(args.bro_log) # Run the bro reader on the ssl.log file looking for potential Tor connections reader = bro_log_reader.BroLogReader(args.bro_log, tail=args.t) # Just a counter to keep an eye on how many possible Tor connections we identify number = 0 # A empty list to use for the port statistics ports = [] for row in reader.readrows(): # Add the destination port to the list of ports ports.append(row['id.resp_p']) # Pull out the Certificate Issuer try: issuer = row['issuer'] except KeyError: print( 'Could not find the issuer field in your ssl.log. Please verify your log file.' )
from bat.log_to_dataframe import LogToDataFrame from bat import bro_log_reader from bat.utils import vt_query reader = bro_log_reader.BroLogReader('files.log', tail=True) # This will dynamically monitor this Bro log for row in reader.readrows(): pprint(vt_query.query.file(row['sha256']))
parser = argparse.ArgumentParser() parser.add_argument('bro_log', type=str, help='Specify a bro log to run BroLogReader test on') parser.add_argument('-t', '--tail', action='store_true', help='Turn on log tailing') args, commands = parser.parse_known_args() # Check for unknown args if commands: print('Unrecognized args: %s' % commands) sys.exit(1) # If no args just call help if len(sys.argv) == 1: parser.print_help() sys.exit(1) # File may have a tilde in it if args.bro_log: args.bro_log = os.path.expanduser(args.bro_log) # Run the bro reader on a given log file reader = bro_log_reader.BroLogReader(args.bro_log, tail=args.tail, strict=True) for row in reader.readrows(): pprint(row)