Ejemplo n.º 1
0
    def readrows(self):
        """The readrows method reads simply 'combines' the rows of
           multiple files OR gunzips the file and then reads the rows
        """

        # For each file (may be just one) create a BroLogReader and use it
        for self._filepath in self._files:

            # Check if the file is zipped
            if self._filepath.endswith('.gz'):
                tmp = tempfile.NamedTemporaryFile(delete=False)
                with gzip.open(self._filepath,
                               'rb') as f_in, open(tmp.name, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)

                # Set the file path to the new temp file
                self._filepath = tmp.name

            # Create a BroLogReader
            reader = bro_log_reader.BroLogReader(self._filepath)
            for row in reader.readrows():
                yield row

            # Clean up any temp files
            try:
                os.remove(tmp.name)
                print('Removed temporary file {:s}...'.format(tmp.name))
            except:
                pass
Ejemplo n.º 2
0
    def test_bro_files(self):
        filename = 'http.log'
        weblogs = []
        key_list = None
        current_user = self.user
        type_file = AnalysisSession.TYPE_FILES.bro_http_log
        uuid_str = str(uuid.uuid4())
        num_lines = 1
        rows = []
        path_filename = str(os.path.join(self.path_dir_files, filename))
        reader = bro_log_reader.BroLogReader(path_filename)
        for row in reader.readrows():
            if not key_list:
                key_list = row.keys()
            row['ts'] = str(row['ts'])
            value = row.values()
            value.append('undefined')
            value.append(1)
            value.append(str(num_lines))
            value.append(uuid_str)
            weblogs.append(value)
            rows.append(row)
            num_lines += 1

        self.__assert_for_creation_analysis_session__(filename, key_list,
                                                      weblogs, current_user,
                                                      type_file, uuid_str,
                                                      num_lines)
Ejemplo n.º 3
0
    def __init__(self, log_filename, ts_index=True):
        """Initialize the LogToDataFrame class"""

        # Create a bro reader on a given log file
        reader = bro_log_reader.BroLogReader(log_filename)

        # Create a Pandas dataframe from reader
        super(LogToDataFrame, self).__init__(reader.readrows())

        # Set the index
        if ts_index:
            self.set_index('ts', inplace=True)
Ejemplo n.º 4
0
def find_traffic(log, flags):
    reader = bro_log_reader.BroLogReader(log)
    traffic = []

    for row in reader.readrows():
        for flag in flags:
            if (flag) in (row['server_name'] or row['subject']):
                traffic.append({
                    'ip': row['id.orig_h'],
                    'port': row['id.orig_p']
                })

    return sorted(traffic, key=lambda k: k['ip'])
Ejemplo n.º 5
0
    def __init__(self, filepath, eps=10, max_rows=None):
        """Initialization for the LiveSimulator Class
           Args:
               eps (int): Events Per Second that the simulator will emit events (default = 10)
               max_rows (int): The maximum number of rows to generate (default = None (go forever))
        """

        # Compute EPS timer
        # Logic:
        #     - Normal distribution centered around 1.0/eps
        #     - Make sure never less than 0
        #     - Precompute 1000 deltas and then just cycle around
        self.eps_timer = itertools.cycle([max(0, delta) for delta in np.random.normal(1.0/float(eps), .5/float(eps), size=1000)])

        # Initialize the Bro log reader
        self.log_reader = bro_log_reader.BroLogReader(filepath, tail=False)

        # Store max_rows
        self.max_rows = max_rows
Ejemplo n.º 6
0
    def create_dataframe(self,
                         log_filename,
                         ts_index=True,
                         aggressive_category=True):
        """ Create a Pandas dataframe from a Bro/Zeek log file
            Args:
               log_fllename (string): The full path to the Bro log
               ts_index (bool): Set the index to the 'ts' field (default = True)
        """

        # Create a Bro log reader just to read in the header for names and types
        _bro_reader = bro_log_reader.BroLogReader(log_filename)
        _, field_names, field_types, _ = _bro_reader._parse_bro_header(
            log_filename)

        # Get the appropriate types for the Pandas Dataframe
        pandas_types = self.pd_column_types(field_names, field_types,
                                            aggressive_category)

        # Now actually read the Bro Log using Pandas read CSV
        self._df = pd.read_csv(log_filename,
                               sep='\t',
                               names=field_names,
                               dtype=pandas_types,
                               comment="#",
                               na_values='-')

        # Now we convert 'time' and 'interval' fields to datetime and timedelta respectively
        for name, bro_type in zip(field_names, field_types):
            if bro_type == 'time':
                self._df[name] = pd.to_datetime(self._df[name], unit='s')
            if bro_type == 'interval':
                self._df[name] = pd.to_timedelta(self._df[name], unit='s')

        # Set the index
        if ts_index and not self._df.empty:
            self._df.set_index('ts', inplace=True)
        return self._df
Ejemplo n.º 7
0
        try:
            vtq = pickle.load(open('vtq.pkl', 'rb'))
            print('Opening VirusTotal Query Cache (cache_size={:d})...'.format(vtq.size))
        except IOError:
            vtq = vt_query.VTQuery(max_cache_time=60*24*7) # One week cache

        # See our 'Risky Domains' Notebook for the analysis and
        # statistical methods used to compute this risky set of TLDs
        risky_tlds = set(['info', 'tk', 'xyz', 'online', 'club', 'ru', 'website', 'in', 'ws',
                          'top', 'site', 'work', 'biz', 'name', 'tech', 'loan', 'win', 'pro'])

        # Launch long lived process with signal catcher
        with signal_utils.signal_catcher(save_vtq):

            # Run the bro reader on the dns.log file looking for risky TLDs
            reader = bro_log_reader.BroLogReader(args.bro_log)
            for row in reader.readrows():

                # Pull out the TLD
                query = row['query']
                tld = tldextract.extract(query).suffix

                # Check if the TLD is in the risky group
                if tld in risky_tlds:
                    # Make the query with the full query
                    results = vtq.query_url(query)
                    if results.get('positives', 0) > 3: # At least four hits
                        print('\nRisky Domain DNS Query Found')
                        print('From: {:s} To: {:s} QType: {:s} RCode: {:s}'.format(row['id.orig_h'],
                               row['id.resp_h'], row['qtype_name'], row['rcode_name']))
                        pprint(results)
Ejemplo n.º 8
0
    # Check for unknown args
    if commands:
        print('Unrecognized args: %s' % commands)
        sys.exit(1)

    # Sanity check that this is a ssl log
    if not args.bro_log.endswith('ssl.log'):
        print('This example only works with Bro ssl.log files..')
        sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Run the bro reader on the ssl.log file looking for potential Tor connections
        reader = bro_log_reader.BroLogReader(args.bro_log, tail=args.t)
        # Just a counter to keep an eye on how many possible Tor connections we identify
        number = 0
        # A empty list to use for the port statistics
        ports = []

        for row in reader.readrows():
            # Add the destination port to the list of ports
            ports.append(row['id.resp_p'])
            # Pull out the Certificate Issuer
            try:
                issuer = row['issuer']
            except KeyError:
                print(
                    'Could not find the issuer field in your ssl.log. Please verify your log file.'
                )
Ejemplo n.º 9
0
from bat.log_to_dataframe import LogToDataFrame
from bat import bro_log_reader
from bat.utils import vt_query
reader = bro_log_reader.BroLogReader('files.log', tail=True) # This will dynamically monitor this Bro log
for row in reader.readrows():
    pprint(vt_query.query.file(row['sha256']))
Ejemplo n.º 10
0
    parser = argparse.ArgumentParser()
    parser.add_argument('bro_log',
                        type=str,
                        help='Specify a bro log to run BroLogReader test on')
    parser.add_argument('-t',
                        '--tail',
                        action='store_true',
                        help='Turn on log tailing')
    args, commands = parser.parse_known_args()

    # Check for unknown args
    if commands:
        print('Unrecognized args: %s' % commands)
        sys.exit(1)

    # If no args just call help
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Run the bro reader on a given log file
        reader = bro_log_reader.BroLogReader(args.bro_log,
                                             tail=args.tail,
                                             strict=True)
        for row in reader.readrows():
            pprint(row)