def __init__(self, conf, logger):
     super(AccessLogDelivery, self).__init__(conf, logger,
                                             'access-log-delivery')
     self.frequency = int(conf.get('frequency', '3600'))
     self.metadata_key = conf.get('metadata_key',
                             'x-container-meta-access-log-delivery').lower()
     self.server_name = conf.get('server_name', 'proxy-server')
     self.working_dir = conf.get('working_dir', '/tmp/swift').rstrip('/')
     buffer_limit = conf.get('buffer_limit', '10485760')
     self.file_buffer = FileBuffer(buffer_limit, logger)
     self.hidden_ips = [x.strip() for x in
                         conf.get('hidden_ips', '').split(',') if x.strip()]
     self.source_account = conf['log_source_account']
     self.source_container = conf.get('log_source_container_name',
                                      'log_data')
 def __init__(self, conf, logger):
     super(AccessLogDelivery, self).__init__(conf, logger, "access-log-delivery")
     self.frequency = int(conf.get("frequency", "3600"))
     self.metadata_key = conf.get("metadata_key", "x-container-meta-access-log-delivery").lower()
     self.server_name = conf.get("server_name", "proxy-server")
     self.working_dir = conf.get("working_dir", "/tmp/swift").rstrip("/")
     buffer_limit = conf.get("buffer_limit", "10485760")
     self.file_buffer = FileBuffer(buffer_limit, logger)
     self.hidden_ips = [x.strip() for x in conf.get("hidden_ips", "").split(",") if x.strip()]
     self.source_account = conf["log_source_account"]
     self.source_container = conf.get("log_source_container_name", "log_data")
class AccessLogDelivery(LogProcessorCommon):

    def __init__(self, conf, logger):
        super(AccessLogDelivery, self).__init__(conf, logger,
                                                'access-log-delivery')
        self.frequency = int(conf.get('frequency', '3600'))
        self.metadata_key = conf.get('metadata_key',
                                'x-container-meta-access-log-delivery').lower()
        self.server_name = conf.get('server_name', 'proxy-server')
        self.working_dir = conf.get('working_dir', '/tmp/swift').rstrip('/')
        buffer_limit = conf.get('buffer_limit', '10485760')
        self.file_buffer = FileBuffer(buffer_limit, logger)
        self.hidden_ips = [x.strip() for x in
                            conf.get('hidden_ips', '').split(',') if x.strip()]
        self.source_account = conf['log_source_account']
        self.source_container = conf.get('log_source_container_name',
                                         'log_data')

    def get_logs_to_process(self, already_processed_files):
        lookback_start, lookback_end = self.calculate_lookback()
        logs_to_process = self.get_container_listing(
                                                self.source_account,
                                                self.source_container,
                                                lookback_start,
                                                lookback_end,
                                                already_processed_files)
        logs_to_process = [(self.source_account, self.source_container, x)
                            for x in logs_to_process]
        self.logger.info(_('loaded %d files to process') %
                         len(logs_to_process))
        return logs_to_process

    def process_one_file(self, account, container, object_name):
        files_to_upload = set()
        try:
            year, month, day, hour, _unused = object_name.split('/', 4)
        except ValueError:
            self.logger.info(_('Odd object name: %s. Skipping' % object_name))
            return
        filename_pattern = '%s/%%s/%%s/%s/%s/%s/%s' % (self.working_dir, year,
                                                   month, day, hour)
        self.logger.debug(_('Processing %s' % object_name))
        # get an iter of the object data
        compressed = object_name.endswith('.gz')
        stream = self.get_object_data(account, container, object_name,
                                      compressed=compressed)
        buff = collections.defaultdict(list)
        for line in stream:
            clf, account, container = self.convert_log_line(line)
            if not clf or not account or not container:
                # bad log line
                continue
            if self.get_container_save_log_flag(account, container):
                filename = filename_pattern % (account, container)
                self.file_buffer.write(filename, clf)
                files_to_upload.add(filename)
        self.file_buffer.flush()
        return files_to_upload

    @memoize
    def get_container_save_log_flag(self, account, container):
        key = 'save-access-logs-%s-%s' % (account, container)
        flag = self.memcache.get(key)
        if flag is None:
            metadata = self.internal_proxy.get_container_metadata(account,
                                                                  container)
            val = metadata.get(self.metadata_key, '')
            flag = val.lower() in TRUE_VALUES
            self.memcache.set(key, flag, timeout=self.frequency)
        return flag

    def convert_log_line(self, raw_log):
        parts = self.log_line_parser(raw_log)
        if parts == {}:
            return None, None, None
        return (make_clf_from_parts(parts),
                parts.get('account'),
                parts.get('container_name'))

    def log_line_parser(self, raw_log):
        '''given a raw access log line, return a dict of the good parts'''
        d = {}
        try:
            log_arr = raw_log[16:].split(' ')
            if len(log_arr) > 18:
                log_source = log_arr[18]
                if log_source != '-':
                    # internal proxy log
                    return {}
            (unused,
            server,
            client_ip,
            lb_ip,
            timestamp,
            method,
            request,
            http_version,
            code,
            referrer,
            user_agent,
            auth_token,
            bytes_in,
            bytes_out,
            etag,
            trans_id,
            headers,
            processing_time) = (unquote(x) for x in log_arr[:18])
        except ValueError:
            self.logger.debug(_('Bad line data: %s') % repr(raw_log))
            return {}
        if server != self.server_name:
            # incorrect server name in log line
            self.logger.debug(_('Bad server name: found "%(found)s" ' \
                    'expected "%(expected)s"') %
                    {'found': server, 'expected': self.server_name})
            return {}
        try:
            (version, account, container_name, object_name) = \
                split_path(request, 2, 4, True)
        except ValueError, e:
            self.logger.debug(_('Invalid path: %(error)s from data: %(log)s') %
            {'error': e, 'log': repr(raw_log)})
            return {}
        if container_name is not None:
            container_name = container_name.split('?', 1)[0]
        if object_name is not None:
            object_name = object_name.split('?', 1)[0]
        account = account.split('?', 1)[0]
        if client_ip in self.hidden_ips:
            client_ip = '0.0.0.0'
        d['client_ip'] = client_ip
        d['lb_ip'] = lb_ip
        d['method'] = method
        d['request'] = request
        d['http_version'] = http_version
        d['code'] = code
        d['referrer'] = referrer
        d['user_agent'] = user_agent
        d['auth_token'] = auth_token
        d['bytes_in'] = bytes_in
        d['bytes_out'] = bytes_out
        d['etag'] = etag
        d['trans_id'] = trans_id
        d['processing_time'] = processing_time
        day, month, year, hour, minute, second = timestamp.split('/')
        d['day'] = day
        month = ('%02s' % month_map.index(month)).replace(' ', '0')
        d['month'] = month
        d['year'] = year
        d['hour'] = hour
        d['minute'] = minute
        d['second'] = second
        d['tz'] = '+0000'
        d['account'] = account
        d['container_name'] = container_name
        d['object_name'] = object_name
        d['bytes_out'] = int(d['bytes_out'].replace('-', '0'))
        d['bytes_in'] = int(d['bytes_in'].replace('-', '0'))
        d['code'] = int(d['code'])
        return d
class AccessLogDelivery(LogProcessorCommon):
    def __init__(self, conf, logger):
        super(AccessLogDelivery, self).__init__(conf, logger, "access-log-delivery")
        self.frequency = int(conf.get("frequency", "3600"))
        self.metadata_key = conf.get("metadata_key", "x-container-meta-access-log-delivery").lower()
        self.server_name = conf.get("server_name", "proxy-server")
        self.working_dir = conf.get("working_dir", "/tmp/swift").rstrip("/")
        buffer_limit = conf.get("buffer_limit", "10485760")
        self.file_buffer = FileBuffer(buffer_limit, logger)
        self.hidden_ips = [x.strip() for x in conf.get("hidden_ips", "").split(",") if x.strip()]
        self.source_account = conf["log_source_account"]
        self.source_container = conf.get("log_source_container_name", "log_data")

    def get_logs_to_process(self, already_processed_files):
        lookback_start, lookback_end = self.calculate_lookback()
        logs_to_process = self.get_container_listing(
            self.source_account, self.source_container, lookback_start, lookback_end, already_processed_files
        )
        logs_to_process = [(self.source_account, self.source_container, x) for x in logs_to_process]
        self.logger.info(_("loaded %d files to process") % len(logs_to_process))
        return logs_to_process

    def process_one_file(self, account, container, object_name):
        files_to_upload = set()
        try:
            year, month, day, hour, _unused = object_name.split("/", 4)
        except ValueError:
            self.logger.info(_("Odd object name: %s. Skipping" % object_name))
            return
        filename_pattern = "%s/%%s/%%s/%s/%s/%s/%s" % (self.working_dir, year, month, day, hour)
        self.logger.debug(_("Processing %s" % object_name))
        # get an iter of the object data
        compressed = object_name.endswith(".gz")
        stream = self.get_object_data(account, container, object_name, compressed=compressed)
        buff = collections.defaultdict(list)
        for line in stream:
            clf, account, container = self.convert_log_line(line)
            if not clf or not account or not container:
                # bad log line
                continue
            if self.get_container_save_log_flag(account, container):
                filename = filename_pattern % (account, container)
                self.file_buffer.write(filename, clf)
                files_to_upload.add(filename)
        self.file_buffer.flush()
        return files_to_upload

    @memoize
    def get_container_save_log_flag(self, account, container):
        key = "save-access-logs-%s-%s" % (account, container)
        flag = self.memcache.get(key)
        if flag is None:
            metadata = self.internal_proxy.get_container_metadata(account, container)
            val = metadata.get(self.metadata_key, "")
            flag = val.lower() in TRUE_VALUES
            self.memcache.set(key, flag, timeout=self.frequency)
        return flag

    def convert_log_line(self, raw_log):
        parts = self.log_line_parser(raw_log)
        return (make_clf_from_parts(parts), parts.get("account"), parts.get("container_name"))

    def log_line_parser(self, raw_log):
        """given a raw access log line, return a dict of the good parts"""
        d = {}
        try:
            (
                unused,
                server,
                client_ip,
                lb_ip,
                timestamp,
                method,
                request,
                http_version,
                code,
                referrer,
                user_agent,
                auth_token,
                bytes_in,
                bytes_out,
                etag,
                trans_id,
                headers,
                processing_time,
            ) = (unquote(x) for x in raw_log[16:].split(" ")[:18])
        except ValueError:
            self.logger.debug(_("Bad line data: %s") % repr(raw_log))
            return {}
        if server != self.server_name:
            # incorrect server name in log line
            self.logger.debug(
                _('Bad server name: found "%(found)s" ' 'expected "%(expected)s"')
                % {"found": server, "expected": self.server_name}
            )
            return {}
        try:
            (version, account, container_name, object_name) = split_path(request, 2, 4, True)
        except ValueError, e:
            self.logger.debug(_("Invalid path: %(error)s from data: %(log)s") % {"error": e, "log": repr(raw_log)})
            return {}
        if container_name is not None:
            container_name = container_name.split("?", 1)[0]
        if object_name is not None:
            object_name = object_name.split("?", 1)[0]
        account = account.split("?", 1)[0]
        if client_ip in self.hidden_ips:
            client_ip = "0.0.0.0"
        d["client_ip"] = client_ip
        d["lb_ip"] = lb_ip
        d["method"] = method
        d["request"] = request
        d["http_version"] = http_version
        d["code"] = code
        d["referrer"] = referrer
        d["user_agent"] = user_agent
        d["auth_token"] = auth_token
        d["bytes_in"] = bytes_in
        d["bytes_out"] = bytes_out
        d["etag"] = etag
        d["trans_id"] = trans_id
        d["processing_time"] = processing_time
        day, month, year, hour, minute, second = timestamp.split("/")
        d["day"] = day
        month = ("%02s" % month_map.index(month)).replace(" ", "0")
        d["month"] = month
        d["year"] = year
        d["hour"] = hour
        d["minute"] = minute
        d["second"] = second
        d["tz"] = "+0000"
        d["account"] = account
        d["container_name"] = container_name
        d["object_name"] = object_name
        d["bytes_out"] = int(d["bytes_out"].replace("-", "0"))
        d["bytes_in"] = int(d["bytes_in"].replace("-", "0"))
        d["code"] = int(d["code"])
        return d