def update_files(self): """Ensures all files are properly loaded. Detects new files, file removals, file rotation, and truncation. On non-linux platforms, it will also manually reload the file for tailing. Note that this hack is necessary because EOF is cached on BSD systems. """ self._update_time = int(time.time()) ls = [] files = [] if len(self._beaver_config.get('globs')) > 0: for name, exclude in self._beaver_config.get('globs').items(): globbed = [os.path.realpath(filename) for filename in eglob(name, exclude)] files.extend(globbed) self._file_config.addglob(name, globbed) self._callback(("addglob", (name, globbed))) else: for name in self.listdir(): files.append(os.path.realpath(os.path.join(self._folder, name))) for absname in files: try: st = os.stat(absname) except EnvironmentError, err: if err.errno != errno.ENOENT: raise else: if not stat.S_ISREG(st.st_mode): continue fid = self.get_file_id(st) ls.append((fid, absname))
def update_files(self): """Ensures all files are properly loaded. Detects new files, file removals, file rotation, and truncation. On non-linux platforms, it will also manually reload the file for tailing. Note that this hack is necessary because EOF is cached on BSD systems. """ self._update_time = int(time.time()) ls = [] files = [] if len(self._beaver_config.get('globs')) > 0: for name, exclude in self._beaver_config.get('globs').items(): globbed = [ os.path.realpath(filename) for filename in eglob(name, exclude) ] files.extend(globbed) self._beaver_config.addglob(name, globbed) self._callback(("addglob", (name, globbed))) else: for name in self.listdir(): files.append(os.path.realpath(os.path.join(self._folder, name))) for absname in files: try: st = os.stat(absname) except EnvironmentError, err: if err.errno != errno.ENOENT: raise else: if not stat.S_ISREG(st.st_mode): continue fid = self.get_file_id(st) ls.append((fid, absname))
def update_files(self): """Ensures all files are properly loaded. Detects new files, file removals, file rotation, and truncation. On non-linux platforms, it will also manually reload the file for tailing. Note that this hack is necessary because EOF is cached on BSD systems. """ if self._update_time and int( time.time()) - self._update_time < self._discover_interval: return self._update_time = int(time.time()) possible_files = [] files = [] if len(self._beaver_config.get('globs')) > 0: extend_files = files.extend for name, exclude in self._beaver_config.get('globs').items(): globbed = [ os.path.realpath(filename) for filename in eglob(name, exclude) ] extend_files(globbed) self._beaver_config.addglob(name, globbed) self._callback(("addglob", (name, globbed))) else: append_files = files.append for name in self.listdir(): append_files(os.path.realpath(os.path.join(self._folder, name))) for absname in files: try: st = os.stat(absname) except EnvironmentError, err: if err.errno != errno.ENOENT: raise else: if not stat.S_ISREG(st.st_mode): continue elif (int(self._beaver_config.get('ignore_old_files_days')) > 0 or \ int(self._beaver_config.get('ignore_old_files_hours')) > 0 or \ int(self._beaver_config.get('ignore_old_files_minutes')) > 0 \ ) and datetime.datetime.fromtimestamp(st.st_mtime) < (datetime.datetime.today() - datetime.timedelta(days=int(self._beaver_config.get('ignore_old_files_days')), hours=int(self._beaver_config.get('ignore_old_files_hours')), minutes=int(self._beaver_config.get('ignore_old_files_minutes')))): self._logger.debug( '[{0}] - file {1} older then {2} days {3} hours {4} minutes so ignoring it' .format( self.get_file_id(st), absname, self._beaver_config.get('ignore_old_files_days'), self._beaver_config.get('ignore_old_files_hours'), self._beaver_config.get( 'ignore_old_files_minutes'))) continue append_possible_files = possible_files.append fid = self.get_file_id(st) append_possible_files((fid, absname))
def _parse(self): glob_paths = {} files = {} for filename in self._config.sections(): if not self._config.get(filename, 'type'): raise Exception('%s: missing mandatory config "type"' % filename) config = dict((x[0], x[1]) for x in self._config.items(filename)) glob_paths[filename] = config globs = eglob(filename) if not globs: self._logger.debug('Skipping glob due to no files found: %s' % filename) continue for globbed_file in globs: files[os.path.realpath(globbed_file)] = config return files, glob_paths
def _parse(self, args): def _main_parser(config): transpose = [ 'config', 'confd_path', 'debug', 'daemonize', 'files', 'format', 'fqdn', 'hostname', 'path', 'pid', 'transport' ] namspace_dict = vars(args) for key in transpose: if key not in namspace_dict or namspace_dict[ key] is None or namspace_dict[key] == '': continue config[key] = namspace_dict[key] if args.mode: config['zeromq_bind'] = args.mode # HACK: Python 2.6 ConfigParser does not properly # handle non-string values for key in config: if config[key] == '': config[key] = None require_bool = [ 'debug', 'daemonize', 'fqdn', 'rabbitmq_exchange_durable', 'rabbitmq_queue_durable', 'rabbitmq_ha_queue' ] for key in require_bool: config[key] = bool(int(config[key])) require_int = [ 'max_failure', 'max_queue_size', 'queue_timeout', 'rabbitmq_port', 'respawn_delay', 'subprocess_poll_sleep', 'tcp_port', 'udp_port', 'wait_timeout', 'zeromq_hwm', ] for key in require_int: if config[key] is not None: config[key] = int(config[key]) require_float = [ 'update_file_mapping_time', 'discover_interval', ] for key in require_float: if config[key] is not None: config[key] = float(config[key]) if config.get('format') == 'null': config['format'] = 'raw' if config['files'] is not None and type(config['files']) == str: config['files'] = config['files'].split(',') if config['path'] is not None: config['path'] = os.path.realpath(config['path']) if not os.path.isdir(config['path']): raise LookupError('{0} does not exist'.format( config['path'])) if config.get('hostname') is None: if config.get('fqdn') is True: config['hostname'] = socket.getfqdn() else: config['hostname'] = socket.gethostname() if config.get('sincedb_path'): config['sincedb_path'] = os.path.realpath( config.get('sincedb_path')) config['globs'] = {} return config def _section_parser(config, raise_exceptions=True): '''Parse a given INI-style config file using ConfigParser module. Stanza's names match file names, and properties are defaulted as in http://logstash.net/docs/1.1.1/inputs/file Config file example: [/var/log/syslog] type: syslog tags: sys,main [/var/log/auth] type: syslog ;tags: auth,main ''' fields = config.get('add_field', '') if type(fields) != dict: try: if type(fields) == str: fields = filter(None, fields.split(',')) if len(fields) == 0: config['fields'] = {} elif (len(fields) % 2) == 1: if raise_exceptions: raise Exception( 'Wrong number of values for add_field') else: fieldkeys = fields[0::2] fieldvalues = [[x] for x in fields[1::2]] config['fields'] = dict(zip(fieldkeys, fieldvalues)) except TypeError: config['fields'] = {} if 'add_field' in config: del config['add_field'] try: tags = config.get('tags', '') if type(tags) == str: tags = filter(None, tags.split(',')) if len(tags) == 0: tags = [] config['tags'] = tags except TypeError: config['tags'] = [] if config.get('format') == 'null': config['format'] = 'raw' file_type = config.get('type', None) if not file_type: config['type'] = 'file' require_bool = ['debug', 'ignore_empty', 'ignore_truncate'] for k in require_bool: config[k] = bool(int(config[k])) require_int = [ 'sincedb_write_interval', 'stat_interval', 'tail_lines' ] for k in require_int: config[k] = int(config[k]) return config conf = Configuration(name='beaver', path=self._configfile, main_defaults=self._main_defaults, section_defaults=self._section_defaults, main_parser=_main_parser, section_parser=_section_parser, path_from_main='confd_path') config = conf.raw() self._beaver_config = config['beaver'] self._file_config = config['sections'] self._main_parser = _main_parser(self._main_defaults) self._section_defaults = _section_parser(self._section_defaults, raise_exceptions=False) self._files = {} for section in config['sections']: globs = eglob(section, config['sections'][section].get('exclude', '')) if not globs: self._logger.debug('Skipping glob due to no files found: %s' % section) continue for globbed_file in globs: self._files[os.path.realpath( globbed_file)] = config['sections'][section]
def _parse(self, args): def _main_parser(config): transpose = ['config', 'confd_path', 'debug', 'daemonize', 'files', 'format', 'fqdn', 'hostname', 'path', 'pid', 'transport'] namspace_dict = vars(args) for key in transpose: if key not in namspace_dict or namspace_dict[key] is None or namspace_dict[key] == '': continue config[key] = namspace_dict[key] if args.mode: config['zeromq_bind'] = args.mode # HACK: Python 2.6 ConfigParser does not properly # handle non-string values for key in config: if config[key] == '': config[key] = None require_bool = ['debug', 'daemonize', 'fqdn', 'rabbitmq_exchange_durable', 'rabbitmq_queue_durable', 'rabbitmq_ha_queue', 'rabbitmq_ssl', 'tcp_ssl_enabled', 'tcp_ssl_verify'] for key in require_bool: config[key] = bool(int(config[key])) require_int = [ 'max_failure', 'max_queue_size', 'queue_timeout', 'rabbitmq_port', 'rabbitmq_timeout', 'rabbitmq_delivery_mode', 'respawn_delay', 'subprocess_poll_sleep', 'refresh_worker_process', 'tcp_port', 'udp_port', 'wait_timeout', 'zeromq_hwm', 'logstash_version', 'kafka_batch_n', 'kafka_batch_t', 'kafka_ack_timeout', 'number_of_consumer_processes', 'ignore_old_files' ] for key in require_int: if config[key] is not None: config[key] = int(config[key]) require_float = [ 'update_file_mapping_time', 'discover_interval', ] for key in require_float: if config[key] is not None: config[key] = float(config[key]) if config.get('format') == 'null': config['format'] = 'raw' if config['files'] is not None and type(config['files']) == str: config['files'] = config['files'].split(',') if config['path'] is not None: config['path'] = os.path.realpath(config['path']) if not os.path.isdir(config['path']): raise LookupError('{0} does not exist'.format(config['path'])) if config.get('hostname') is None: if config.get('fqdn') is True: config['hostname'] = socket.getfqdn() else: config['hostname'] = socket.gethostname() if config.get('sincedb_path'): config['sincedb_path'] = os.path.realpath(config.get('sincedb_path')) if config['zeromq_address'] and type(config['zeromq_address']) == str: config['zeromq_address'] = [x.strip() for x in config.get('zeromq_address').split(',')] if config.get('ssh_options') is not None: csv = config.get('ssh_options') config['ssh_options'] = [] if type(csv) == str: for opt in csv.split(','): config['ssh_options'].append('-o %s' % opt.strip()) else: config['ssh_options'] = [] config['globs'] = {} return config def _section_parser(config, raise_exceptions=True): '''Parse a given INI-style config file using ConfigParser module. Stanza's names match file names, and properties are defaulted as in http://logstash.net/docs/1.1.1/inputs/file Config file example: [/var/log/syslog] type: syslog tags: sys,main [/var/log/auth] type: syslog ;tags: auth,main ''' fields = config.get('add_field', '') if type(fields) != dict: try: if type(fields) == str: fields = filter(None, fields.split(',')) if len(fields) == 0: config['fields'] = {} elif (len(fields) % 2) == 1: if raise_exceptions: raise Exception('Wrong number of values for add_field') else: fieldkeys = fields[0::2] fieldvalues = [[x] for x in fields[1::2]] config['fields'] = dict(zip(fieldkeys, fieldvalues)) except TypeError: config['fields'] = {} if 'add_field' in config: del config['add_field'] envFields = config.get('add_field_env', '') if type(envFields) != dict: try: if type(envFields) == str: envFields = envFields.replace(" ","") envFields = filter(None, envFields.split(',')) if len(envFields) == 0: config['envFields'] = {} elif (len(envFields) % 2) == 1: if raise_exceptions: raise Exception('Wrong number of values for add_field_env') else: envFieldkeys = envFields[0::2] envFieldvalues = [] for x in envFields[1::2]: envFieldvalues.append(os.environ.get(x)) config['fields'].update(dict(zip(envFieldkeys, envFieldvalues))) except TypeError: config['envFields'] = {} if 'add_field_env' in config: del config['add_field_env'] try: tags = config.get('tags', '') if type(tags) == str: tags = filter(None, tags.split(',')) if len(tags) == 0: tags = [] config['tags'] = tags except TypeError: config['tags'] = [] if config.get('format') == 'null': config['format'] = 'raw' file_type = config.get('type', None) if not file_type: config['type'] = 'file' require_bool = ['debug', 'ignore_empty', 'ignore_truncate'] for k in require_bool: config[k] = bool(int(config[k])) config['delimiter'] = config['delimiter'].decode('string-escape') if config['multiline_regex_after']: config['multiline_regex_after'] = re.compile(config['multiline_regex_after']) if config['multiline_regex_before']: config['multiline_regex_before'] = re.compile(config['multiline_regex_before']) require_int = ['sincedb_write_interval', 'stat_interval', 'tail_lines'] for k in require_int: config[k] = int(config[k]) return config conf = Configuration( name='beaver', path=self._configfile, main_defaults=self._main_defaults, section_defaults=self._section_defaults, main_parser=_main_parser, section_parser=_section_parser, path_from_main='confd_path', config_parser=self._config_parser ) config = conf.raw() self._beaver_config = config['beaver'] self._file_config = config['sections'] self._main_parser = _main_parser(self._main_defaults) self._section_defaults = _section_parser(self._section_defaults, raise_exceptions=False) self._files = {} for section in config['sections']: globs = eglob(section, config['sections'][section].get('exclude', '')) if not globs: self._logger.debug('Skipping glob due to no files found: %s' % section) continue for globbed_file in globs: self._files[os.path.realpath(globbed_file)] = config['sections'][section]
def _parse(self, args): def _main_parser(config): transpose = [ "config", "confd_path", "debug", "daemonize", "files", "format", "fqdn", "hostname", "path", "pid", "transport", ] namspace_dict = vars(args) for key in transpose: if key not in namspace_dict or namspace_dict[key] is None or namspace_dict[key] == "": continue config[key] = namspace_dict[key] if args.mode: config["zeromq_bind"] = args.mode # HACK: Python 2.6 ConfigParser does not properly # handle non-string values for key in config: if config[key] == "": config[key] = None require_bool = [ "debug", "daemonize", "fqdn", "rabbitmq_exchange_durable", "rabbitmq_queue_durable", "rabbitmq_ha_queue", "rabbitmq_ssl", "tcp_ssl_enabled", "tcp_ssl_verify", ] for key in require_bool: config[key] = bool(int(config[key])) require_int = [ "max_failure", "max_queue_size", "queue_timeout", "rabbitmq_port", "rabbitmq_timeout", "rabbitmq_delivery_mode", "respawn_delay", "subprocess_poll_sleep", "refresh_worker_process", "tcp_port", "udp_port", "wait_timeout", "zeromq_hwm", "logstash_version", "kafka_batch_n", "kafka_batch_t", "kafka_ack_timeout", "number_of_consumer_processes", ] for key in require_int: if config[key] is not None: config[key] = int(config[key]) require_float = ["update_file_mapping_time", "discover_interval"] for key in require_float: if config[key] is not None: config[key] = float(config[key]) if config.get("format") == "null": config["format"] = "raw" if config["files"] is not None and type(config["files"]) == str: config["files"] = config["files"].split(",") if config["path"] is not None: config["path"] = os.path.realpath(config["path"]) if not os.path.isdir(config["path"]): raise LookupError("{0} does not exist".format(config["path"])) if config.get("hostname") is None: if config.get("fqdn") is True: config["hostname"] = socket.getfqdn() else: config["hostname"] = socket.gethostname() if config.get("sincedb_path"): config["sincedb_path"] = os.path.realpath(config.get("sincedb_path")) if config["zeromq_address"] and type(config["zeromq_address"]) == str: config["zeromq_address"] = [x.strip() for x in config.get("zeromq_address").split(",")] if config.get("ssh_options") is not None: csv = config.get("ssh_options") config["ssh_options"] = [] if type(csv) == str: for opt in csv.split(","): config["ssh_options"].append("-o %s" % opt.strip()) else: config["ssh_options"] = [] config["globs"] = {} return config def _section_parser(config, raise_exceptions=True): """Parse a given INI-style config file using ConfigParser module. Stanza's names match file names, and properties are defaulted as in http://logstash.net/docs/1.1.1/inputs/file Config file example: [/var/log/syslog] type: syslog tags: sys,main [/var/log/auth] type: syslog ;tags: auth,main """ fields = config.get("add_field", "") if type(fields) != dict: try: if type(fields) == str: fields = filter(None, fields.split(",")) if len(fields) == 0: config["fields"] = {} elif (len(fields) % 2) == 1: if raise_exceptions: raise Exception("Wrong number of values for add_field") else: fieldkeys = fields[0::2] fieldvalues = [[x] for x in fields[1::2]] config["fields"] = dict(zip(fieldkeys, fieldvalues)) except TypeError: config["fields"] = {} if "add_field" in config: del config["add_field"] envFields = config.get("add_field_env", "") if type(envFields) != dict: try: if type(envFields) == str: envFields = envFields.replace(" ", "") envFields = filter(None, envFields.split(",")) if len(envFields) == 0: config["envFields"] = {} elif (len(envFields) % 2) == 1: if raise_exceptions: raise Exception("Wrong number of values for add_field_env") else: envFieldkeys = envFields[0::2] envFieldvalues = [] for x in envFields[1::2]: envFieldvalues.append(os.environ.get(x)) config["fields"].update(dict(zip(envFieldkeys, envFieldvalues))) except TypeError: config["envFields"] = {} if "add_field_env" in config: del config["add_field_env"] try: tags = config.get("tags", "") if type(tags) == str: tags = filter(None, tags.split(",")) if len(tags) == 0: tags = [] config["tags"] = tags except TypeError: config["tags"] = [] if config.get("format") == "null": config["format"] = "raw" file_type = config.get("type", None) if not file_type: config["type"] = "file" require_bool = ["debug", "ignore_empty", "ignore_truncate"] for k in require_bool: config[k] = bool(int(config[k])) config["delimiter"] = config["delimiter"].decode("string-escape") if config["multiline_regex_after"]: config["multiline_regex_after"] = re.compile(config["multiline_regex_after"]) if config["multiline_regex_before"]: config["multiline_regex_before"] = re.compile(config["multiline_regex_before"]) require_int = ["sincedb_write_interval", "stat_interval", "tail_lines"] for k in require_int: config[k] = int(config[k]) return config conf = Configuration( name="beaver", path=self._configfile, main_defaults=self._main_defaults, section_defaults=self._section_defaults, main_parser=_main_parser, section_parser=_section_parser, path_from_main="confd_path", config_parser=self._config_parser, ) config = conf.raw() self._beaver_config = config["beaver"] self._file_config = config["sections"] self._main_parser = _main_parser(self._main_defaults) self._section_defaults = _section_parser(self._section_defaults, raise_exceptions=False) self._files = {} for section in config["sections"]: globs = eglob(section, config["sections"][section].get("exclude", "")) if not globs: self._logger.debug("Skipping glob due to no files found: %s" % section) continue for globbed_file in globs: self._files[os.path.realpath(globbed_file)] = config["sections"][section]
def _parse(self, args): def _main_parser(config): transpose = ['config', 'debug', 'daemonize', 'files', 'format', 'fqdn', 'hostname', 'path', 'pid', 'transport'] namspace_dict = vars(args) for key in transpose: if key not in namspace_dict or namspace_dict[key] is None or namspace_dict[key] == '': continue config[key] = namspace_dict[key] if args.mode: config['zeromq_bind'] = args.mode # HACK: Python 2.6 ConfigParser does not properly # handle non-string values for key in config: if config[key] == '': config[key] = None require_bool = ['debug', 'daemonize', 'fqdn', 'rabbitmq_exchange_durable', 'rabbitmq_queue_durable', 'rabbitmq_ha_queue'] for key in require_bool: config[key] = bool(int(config[key])) require_int = [ 'max_failure', 'max_queue_size', 'queue_timeout', 'rabbitmq_port', 'respawn_delay', 'subprocess_poll_sleep', 'udp_port', 'wait_timeout', 'zeromq_hwm', ] for key in require_int: if config[key] is not None: config[key] = int(config[key]) require_float = [ 'update_file_mapping_time', 'discover_interval', ] for key in require_float: if config[key] is not None: config[key] = float(config[key]) if config['files'] is not None and type(config['files']) == str: config['files'] = config['files'].split(',') config['path'] = os.path.realpath(config['path']) if not os.path.isdir(config['path']): raise LookupError('{0} does not exist'.format(config['path'])) if config.get('hostname') is None: if config.get('fqdn') is True: config['hostname'] = socket.getfqdn() else: config['hostname'] = socket.gethostname() if config.get('sincedb_path'): config['sincedb_path'] = os.path.realpath(config.get('sincedb_path')) config['globs'] = {} return config def _section_parser(config, raise_exceptions=True): '''Parse a given INI-style config file using ConfigParser module. Stanza's names match file names, and properties are defaulted as in http://logstash.net/docs/1.1.1/inputs/file Config file example: [/var/log/syslog] type: syslog tags: sys,main [/var/log/auth] type: syslog ;tags: auth,main ''' fields = config.get('add_field', '') if type(fields) != dict: try: if type(fields) == str: fields = filter(None, fields.split(',')) if len(fields) == 0: config['fields'] = {} elif (len(fields) % 2) == 1: if raise_exceptions: raise Exception('Wrong number of values for add_field') else: fieldkeys = fields[0::2] fieldvalues = [[x] for x in fields[1::2]] config['fields'] = dict(zip(fieldkeys, fieldvalues)) except TypeError: config['fields'] = {} if 'add_field' in config: del config['add_field'] try: tags = config.get('tags', '') if type(tags) == str: tags = filter(None, tags.split(',')) if len(tags) == 0: tags = [] config['tags'] = tags except TypeError: config['tags'] = [] try: file_type = config.get('type', 'file') if not file_type: file_type = 'file' config['type'] = file_type except: config['type'] = 'file' if config['type']: if raise_exceptions: raise Exception('Missing mandatory config "type"') require_bool = ['debug', 'ignore_empty', 'ignore_truncate'] for k in require_bool: config[k] = bool(int(config[k])) require_int = ['sincedb_write_interval', 'stat_interval', 'tail_lines'] for k in require_int: config[k] = int(config[k]) return config conf = Configuration( name='beaver', path=self._configfile, main_defaults=self._main_defaults, section_defaults=self._section_defaults, main_parser=_main_parser, section_parser=_section_parser, ) config = conf.raw() self._beaver_config = config['beaver'] self._file_config = config['sections'] self._main_parser = _main_parser(self._main_defaults) self._section_defaults = _section_parser(self._section_defaults, raise_exceptions=False) self._files = {} for section in config['sections']: globs = eglob(section, config['sections'][section].get('exclude', '')) if not globs: self._logger.debug('Skipping glob due to no files found: %s' % section) continue for globbed_file in globs: self._files[os.path.realpath(globbed_file)] = config['sections'][section]