Esempio n. 1
0
 def handle(self, fn, retry, **options):
     result = parse_logs(
         os.path.join(get_setting('ROOT'), '%(key)s', fn),
         progress=progress_bar,
         retry=retry,
     )
     process_results(result, progress=progress_bar)
Esempio n. 2
0
def parse_file(key, log, result=None, protect=True, retry=False, **kwargs):
    """
      Parse a log file with the given parser (key) and return a dictionary of
      paths, by a dictionary of dates, with a dictionary of metrics that may
      be a list of all values parsed.
    """
    logs = get_setting('LOGS')
    if key not in logs:
        raise ValueError("Log parser '%s' not found in LOGBOOK_PARSERS" % key)
    logs = logs[key]

    if result is None:
        result = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

    if not os.path.isfile(log):
        raise IOError("No %s log to process: %s" % (key, log))

    inode = os.stat(log)[1]
    if not retry and LogFile.objects.filter(inode=inode).count() > 0:
        raise IOError("Log file already processed: %s" % log)

    for kwargs in matches_in(key, log, logs['rex'], **kwargs):
        add_result(result, **run(kwargs, *logs.get('ignore', ())))

    if protect:
        LogFile.objects.get_or_create(
            inode=inode, defaults={'filename': os.path.basename(log)})

    return result
Esempio n. 3
0
def url(path):
    """Seperate out language and standardise url"""
    qs = ''
    local = False
    path = urllib.unquote(path)

    if '?' in path:
        (path, qs) = path.split('?', 1)

    if '://' in path:
        # For refers, we want to shorten them down to local urls.
        server = path.split('://')[-1].split('/')[0]
        if server in settings.ALLOWED_HOSTS:
            path = '/' + path.split('/', 3)[-1]
            local = True

    if path.startswith('/'):
        path = path.lstrip('/')
        if '/' in path:
            (lang, rest) = path.split('/', 1)
            lang = lang.replace('zh-tw', 'zh-hant')
            if lang in get_setting('LANGUAGES'):
                return (True, lang, rest.strip('/'), qs)

    return (local, None, path.strip('/'), qs)
Esempio n. 4
0
def get_search(url):
    for m in get_setting('SEARCHES'):
        res = re.match(m, url)
        if res:
            return ("search://" + res.groupdict()['site'],
                    res.groupdict().get('q', 'unknown').lower())
    return (url.split('?', 1)[0], None)
Esempio n. 5
0
def process_results(result, progress=None):
    """Each result is either an average or a count"""
    LogMetric.objects.clear_metrics()
    count = 0
    done = 0
    total = float(len(result))

    for count, path in enumerate(result):
        if progress:
            progress("save ", count / total, count, done)

        try:
            (request, _) = LogRequest.objects.get_or_create(path=path)
        except utils.IntegrityError:
            continue
        except utils.OperationalError:
            continue

        for d_ate, data in result[path].items():
            (period,
             _) = LogPeriod.objects.get_or_create(period=0,
                                                  date=d_ate,
                                                  request_id=request.pk)
            for key, value in data.items():
                unit = get_setting('UNITS').get(key, None)
                metric = LogMetric.objects.get_metric(key, unit)
                period.values.create_or_update(metric, value)
                done += 1
    if progress:
        progress("save ", 1.0, count, done)
Esempio n. 6
0
def parse_logs(location, **kwargs):
    """
    Loop through all defined log types and parse results.
    """
    result = None
    for key in get_setting('LOGS'):
        src = location % {'key': key}
        result = parse_file(key, src, result, **kwargs)
    return result
Esempio n. 7
0
def add_result(result, path, **data):
    """Add a single result to the results matrix"""
    if path is None or not 'date' in data:
        return

    # Index by request path and date
    request = result[path][data.get('date')]
    sitewide = result[None][data.pop('date')]

    ignore = get_setting('IGNORE')
    pathfields = get_setting('PATH_FIELDS')

    # Record all columns regardless
    for key, value in data.items():
        if key in ignore:
            continue
        if key in pathfields:
            request[key].append(value)
        sitewide[key].append(value)
Esempio n. 8
0
def agent_filter(items):
    """Filter user agent items to be more useful"""
    filters = get_setting('FILTERS', {})
    for (kind, family, version) in items:
        version = agent_version(version)
        variable = "%s__%s" % (family, version)
        for (rex, replace) in filters.get(kind, []):
            variable = re.sub(rex, replace, variable, flags=re.IGNORECASE)
            if variable == '':
                break
            elif '__' not in variable:
                raise KeyError("Filter %s not family__version pair." %
                               str(rex))
        if variable != '':
            yield (kind, tuple(variable.split('__', 1)))
Esempio n. 9
0
def run(data, *junk):
    """Attempt to format various keys and remove the junk"""
    data['M'] = [
        '', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
        'Oct', 'Nov', 'Dec'
    ].index(data['M'])
    dtm = datetime(*[int(data.pop(k)) for k in ['Y', 'M', 'D', 'h', 'm', 's']])
    data['date'] = dtm.date()
    #data['time'] = time(dtm.time().hour, 0, 0)

    (local, data['lang'], data['path'], _) = url(data['path'])

    for m in get_setting('EXCLUSIONS'):
        if re.match(m, data['path']):
            return {'path': None}

    (local, _, data['refer'], query) = url(data.get('refer', '-'))
    if local:
        data['link'] = data.pop('refer')
    else:
        (data['refer'],
         data['search']) = get_search(data['refer'] + '?' + query)

    data['count'] = data['path']
    data['country'] = country(data.pop('ip'))
    data.update(dict(get_agent(data.pop('agent', None))))

    if data['status'] != '200':
        junk += ('count', 'country', 'agent')

    for key in junk:
        data.pop(key, None)

    for key, value in data.items():
        if value is None or value == '-':
            data.pop(key)
        # Usually the refer can be long
        if type(value) is str and len(value) > 255:
            data[key] = value[:255]
        if type(value) is tuple:
            if len(value) != 2:
                raise ValueError("Family, name pair error: %s" % str(value))
            data[key] = (value[0][:128], value[1][:255])

    return data
Esempio n. 10
0
 def test_override(self):
     self.assertEqual(get_setting('TEST'), 'Foo')
Esempio n. 11
0
 def test_regex(self):
     self.assertIn('rex', get_setting('LOGS')['nginx'])
Esempio n. 12
0
 def test_default_setting(self):
     self.assertIn('size', get_setting('UNITS'))