def __init__(self, conf, logger): super(AccessLogDelivery, self).__init__(conf, logger, 'access-log-delivery') self.frequency = int(conf.get('frequency', '3600')) self.metadata_key = conf.get('metadata_key', 'x-container-meta-access-log-delivery').lower() self.server_name = conf.get('server_name', 'proxy-server') self.working_dir = conf.get('working_dir', '/tmp/swift').rstrip('/') buffer_limit = conf.get('buffer_limit', '10485760') self.file_buffer = FileBuffer(buffer_limit, logger) self.hidden_ips = [x.strip() for x in conf.get('hidden_ips', '').split(',') if x.strip()] self.source_account = conf['log_source_account'] self.source_container = conf.get('log_source_container_name', 'log_data')
def __init__(self, conf, logger): super(AccessLogDelivery, self).__init__(conf, logger, "access-log-delivery") self.frequency = int(conf.get("frequency", "3600")) self.metadata_key = conf.get("metadata_key", "x-container-meta-access-log-delivery").lower() self.server_name = conf.get("server_name", "proxy-server") self.working_dir = conf.get("working_dir", "/tmp/swift").rstrip("/") buffer_limit = conf.get("buffer_limit", "10485760") self.file_buffer = FileBuffer(buffer_limit, logger) self.hidden_ips = [x.strip() for x in conf.get("hidden_ips", "").split(",") if x.strip()] self.source_account = conf["log_source_account"] self.source_container = conf.get("log_source_container_name", "log_data")
class AccessLogDelivery(LogProcessorCommon): def __init__(self, conf, logger): super(AccessLogDelivery, self).__init__(conf, logger, 'access-log-delivery') self.frequency = int(conf.get('frequency', '3600')) self.metadata_key = conf.get('metadata_key', 'x-container-meta-access-log-delivery').lower() self.server_name = conf.get('server_name', 'proxy-server') self.working_dir = conf.get('working_dir', '/tmp/swift').rstrip('/') buffer_limit = conf.get('buffer_limit', '10485760') self.file_buffer = FileBuffer(buffer_limit, logger) self.hidden_ips = [x.strip() for x in conf.get('hidden_ips', '').split(',') if x.strip()] self.source_account = conf['log_source_account'] self.source_container = conf.get('log_source_container_name', 'log_data') def get_logs_to_process(self, already_processed_files): lookback_start, lookback_end = self.calculate_lookback() logs_to_process = self.get_container_listing( self.source_account, self.source_container, lookback_start, lookback_end, already_processed_files) logs_to_process = [(self.source_account, self.source_container, x) for x in logs_to_process] self.logger.info(_('loaded %d files to process') % len(logs_to_process)) return logs_to_process def process_one_file(self, account, container, object_name): files_to_upload = set() try: year, month, day, hour, _unused = object_name.split('/', 4) except ValueError: self.logger.info(_('Odd object name: %s. Skipping' % object_name)) return filename_pattern = '%s/%%s/%%s/%s/%s/%s/%s' % (self.working_dir, year, month, day, hour) self.logger.debug(_('Processing %s' % object_name)) # get an iter of the object data compressed = object_name.endswith('.gz') stream = self.get_object_data(account, container, object_name, compressed=compressed) buff = collections.defaultdict(list) for line in stream: clf, account, container = self.convert_log_line(line) if not clf or not account or not container: # bad log line continue if self.get_container_save_log_flag(account, container): filename = filename_pattern % (account, container) self.file_buffer.write(filename, clf) files_to_upload.add(filename) self.file_buffer.flush() return files_to_upload @memoize def get_container_save_log_flag(self, account, container): key = 'save-access-logs-%s-%s' % (account, container) flag = self.memcache.get(key) if flag is None: metadata = self.internal_proxy.get_container_metadata(account, container) val = metadata.get(self.metadata_key, '') flag = val.lower() in TRUE_VALUES self.memcache.set(key, flag, timeout=self.frequency) return flag def convert_log_line(self, raw_log): parts = self.log_line_parser(raw_log) if parts == {}: return None, None, None return (make_clf_from_parts(parts), parts.get('account'), parts.get('container_name')) def log_line_parser(self, raw_log): '''given a raw access log line, return a dict of the good parts''' d = {} try: log_arr = raw_log[16:].split(' ') if len(log_arr) > 18: log_source = log_arr[18] if log_source != '-': # internal proxy log return {} (unused, server, client_ip, lb_ip, timestamp, method, request, http_version, code, referrer, user_agent, auth_token, bytes_in, bytes_out, etag, trans_id, headers, processing_time) = (unquote(x) for x in log_arr[:18]) except ValueError: self.logger.debug(_('Bad line data: %s') % repr(raw_log)) return {} if server != self.server_name: # incorrect server name in log line self.logger.debug(_('Bad server name: found "%(found)s" ' \ 'expected "%(expected)s"') % {'found': server, 'expected': self.server_name}) return {} try: (version, account, container_name, object_name) = \ split_path(request, 2, 4, True) except ValueError, e: self.logger.debug(_('Invalid path: %(error)s from data: %(log)s') % {'error': e, 'log': repr(raw_log)}) return {} if container_name is not None: container_name = container_name.split('?', 1)[0] if object_name is not None: object_name = object_name.split('?', 1)[0] account = account.split('?', 1)[0] if client_ip in self.hidden_ips: client_ip = '0.0.0.0' d['client_ip'] = client_ip d['lb_ip'] = lb_ip d['method'] = method d['request'] = request d['http_version'] = http_version d['code'] = code d['referrer'] = referrer d['user_agent'] = user_agent d['auth_token'] = auth_token d['bytes_in'] = bytes_in d['bytes_out'] = bytes_out d['etag'] = etag d['trans_id'] = trans_id d['processing_time'] = processing_time day, month, year, hour, minute, second = timestamp.split('/') d['day'] = day month = ('%02s' % month_map.index(month)).replace(' ', '0') d['month'] = month d['year'] = year d['hour'] = hour d['minute'] = minute d['second'] = second d['tz'] = '+0000' d['account'] = account d['container_name'] = container_name d['object_name'] = object_name d['bytes_out'] = int(d['bytes_out'].replace('-', '0')) d['bytes_in'] = int(d['bytes_in'].replace('-', '0')) d['code'] = int(d['code']) return d
class AccessLogDelivery(LogProcessorCommon): def __init__(self, conf, logger): super(AccessLogDelivery, self).__init__(conf, logger, "access-log-delivery") self.frequency = int(conf.get("frequency", "3600")) self.metadata_key = conf.get("metadata_key", "x-container-meta-access-log-delivery").lower() self.server_name = conf.get("server_name", "proxy-server") self.working_dir = conf.get("working_dir", "/tmp/swift").rstrip("/") buffer_limit = conf.get("buffer_limit", "10485760") self.file_buffer = FileBuffer(buffer_limit, logger) self.hidden_ips = [x.strip() for x in conf.get("hidden_ips", "").split(",") if x.strip()] self.source_account = conf["log_source_account"] self.source_container = conf.get("log_source_container_name", "log_data") def get_logs_to_process(self, already_processed_files): lookback_start, lookback_end = self.calculate_lookback() logs_to_process = self.get_container_listing( self.source_account, self.source_container, lookback_start, lookback_end, already_processed_files ) logs_to_process = [(self.source_account, self.source_container, x) for x in logs_to_process] self.logger.info(_("loaded %d files to process") % len(logs_to_process)) return logs_to_process def process_one_file(self, account, container, object_name): files_to_upload = set() try: year, month, day, hour, _unused = object_name.split("/", 4) except ValueError: self.logger.info(_("Odd object name: %s. Skipping" % object_name)) return filename_pattern = "%s/%%s/%%s/%s/%s/%s/%s" % (self.working_dir, year, month, day, hour) self.logger.debug(_("Processing %s" % object_name)) # get an iter of the object data compressed = object_name.endswith(".gz") stream = self.get_object_data(account, container, object_name, compressed=compressed) buff = collections.defaultdict(list) for line in stream: clf, account, container = self.convert_log_line(line) if not clf or not account or not container: # bad log line continue if self.get_container_save_log_flag(account, container): filename = filename_pattern % (account, container) self.file_buffer.write(filename, clf) files_to_upload.add(filename) self.file_buffer.flush() return files_to_upload @memoize def get_container_save_log_flag(self, account, container): key = "save-access-logs-%s-%s" % (account, container) flag = self.memcache.get(key) if flag is None: metadata = self.internal_proxy.get_container_metadata(account, container) val = metadata.get(self.metadata_key, "") flag = val.lower() in TRUE_VALUES self.memcache.set(key, flag, timeout=self.frequency) return flag def convert_log_line(self, raw_log): parts = self.log_line_parser(raw_log) return (make_clf_from_parts(parts), parts.get("account"), parts.get("container_name")) def log_line_parser(self, raw_log): """given a raw access log line, return a dict of the good parts""" d = {} try: ( unused, server, client_ip, lb_ip, timestamp, method, request, http_version, code, referrer, user_agent, auth_token, bytes_in, bytes_out, etag, trans_id, headers, processing_time, ) = (unquote(x) for x in raw_log[16:].split(" ")[:18]) except ValueError: self.logger.debug(_("Bad line data: %s") % repr(raw_log)) return {} if server != self.server_name: # incorrect server name in log line self.logger.debug( _('Bad server name: found "%(found)s" ' 'expected "%(expected)s"') % {"found": server, "expected": self.server_name} ) return {} try: (version, account, container_name, object_name) = split_path(request, 2, 4, True) except ValueError, e: self.logger.debug(_("Invalid path: %(error)s from data: %(log)s") % {"error": e, "log": repr(raw_log)}) return {} if container_name is not None: container_name = container_name.split("?", 1)[0] if object_name is not None: object_name = object_name.split("?", 1)[0] account = account.split("?", 1)[0] if client_ip in self.hidden_ips: client_ip = "0.0.0.0" d["client_ip"] = client_ip d["lb_ip"] = lb_ip d["method"] = method d["request"] = request d["http_version"] = http_version d["code"] = code d["referrer"] = referrer d["user_agent"] = user_agent d["auth_token"] = auth_token d["bytes_in"] = bytes_in d["bytes_out"] = bytes_out d["etag"] = etag d["trans_id"] = trans_id d["processing_time"] = processing_time day, month, year, hour, minute, second = timestamp.split("/") d["day"] = day month = ("%02s" % month_map.index(month)).replace(" ", "0") d["month"] = month d["year"] = year d["hour"] = hour d["minute"] = minute d["second"] = second d["tz"] = "+0000" d["account"] = account d["container_name"] = container_name d["object_name"] = object_name d["bytes_out"] = int(d["bytes_out"].replace("-", "0")) d["bytes_in"] = int(d["bytes_in"].replace("-", "0")) d["code"] = int(d["code"]) return d