Esempio n. 1
0
def report(request, classes):
    ctx = {}
    date_from, date_to = get_time_interval(request.REQUEST, ctx, days_ago=1)
    page = int(request.REQUEST.get("page", 1))
    size = int(request.REQUEST.get("size", 5))
    query = \
    """ SELECT hmv.crawl_id, hmv.start_time, hgcls.classes, 
               hgcnt.group_id, hgcnt.title, hgcnt.description
        FROM hits_mv AS hmv
        JOIN main_hitgroupclass AS hgcls ON hmv.group_id = hgcls.group_id
        JOIN main_hitgroupcontent AS hgcnt ON hgcls.group_id = hgcnt.group_id
        WHERE hmv.start_time >= '{}' AND hmv.start_time < '{}' AND 
              hgcls.classes & {} <> 0
        ORDER BY start_time ASC
        LIMIT {}
        OFFSET {}
    """.format(date_from, date_to, classes, size, (page - 1) * size)
    data = query_to_dicts(query)
    def _data_formatter(input):
        for cc in input:
            yield cc[0], cc[1], cc[2]
    # data = _data_formatter(data)
    ctx["data"] = data
    ctx["classes"] = classes
    ctx["first_page"] = page == 1
    ctx["last_page"] = False # TODO finish it.
    ctx["next_page"] = page + 1
    ctx["prev_page"] = page - 1
    if size != 5:
        ctx["size"] = size
    return direct_to_template(request, 'main/classification_report.html', ctx)
Esempio n. 2
0
def arrivals(request, tab_slug=None):

    tab = ArrivalsTabEnum.value_for_slug.get(tab_slug, ArrivalsTabEnum.ALL)

    ctx = {
        'multichart': False,
        'columns': ArrivalsTabEnum.get_graph_columns(tab),
        'title': ArrivalsTabEnum.get_tab_title(tab),
        'top_tabs': ArrivalsTabEnum.enum_dict.values(),
        'current_tab': ArrivalsTabEnum.enum_dict[tab],
    }

    def arrivals_data_formater(input, tab):
        for cc in input:
            yield {
                'date': cc.date,
                'row': ArrivalsTabEnum.data_set_processor[tab](cc),
            }

    date_from, date_to = get_time_interval(request.GET, ctx)
    data = DayStats.objects.filter(date__gte=date_from, date__lte=date_to)
    ctx['data'] = arrivals_data_formater(data, tab)
    return direct_to_template(request, 'main/graphs/timeline.html', ctx)
Esempio n. 3
0
def general(request, tab_slug=None):

    tab = GeneralTabEnum.value_for_slug.get(tab_slug, GeneralTabEnum.ALL)

    ctx = {
        'multichart': tab == GeneralTabEnum.ALL,
        'columns': GeneralTabEnum.get_graph_columns(tab),
        'title': GeneralTabEnum.get_tab_title(tab),
        'current_tab': GeneralTabEnum.enum_dict[tab],
        'top_tabs': GeneralTabEnum.enum_dict.values(),
    }

    date_from, date_to = get_time_interval(request.GET, ctx, days_ago=7)

    data = data_formater(query_to_dicts('''
            SELECT reward, hits, projects as "count", spam_projects, start_time
            FROM main_crawlagregates
            WHERE start_time >= %s AND start_time <= %s
            ORDER BY start_time ASC
        ''', date_from, date_to))

    def _is_anomaly(a, others):
        mid = sum(map(lambda e: int(e['row'][0]), others)) / len(others)
        return abs(mid - int(a['row'][0])) > 7000

    def _fixer(a, others):
        val = sum(map(lambda e: int(e['row'][0]), others)) / len(others)
        a['row'] = (str(val), a['row'][1], a['row'][2], a['row'][3])
        return a

    if settings.DATASMOOTHING:
        ctx['data'] = plot.repair(list(data), _is_anomaly, _fixer, 2)
    else:
        ctx['data'] = list(data)

    ctx['data'] = GeneralTabEnum.data_set_processor[tab](ctx['data'])
    return direct_to_template(request, 'main/graphs/timeline.html', ctx)
Esempio n. 4
0
def aggregates(request, classes=None):
    """ Displays charts with variability of classes in time. """
    all_classes = sorted(LABELS.keys())
    max_classes = sum(all_classes)
    classes = int(classes) if classes is not None else 0
    if classes > max_classes:
        raise Http404 
    top_tabs = map(lambda c: ClassificationTab(classes, c), all_classes)
    if classes > 0:
        chosen_classes = []
        for cls in all_classes:
            if classes & cls:
                chosen_classes.append(cls)
    else:
        chosen_classes = [0]
        num_classes = 1
    num_classes = len(chosen_classes)
    ctx = {
        "top_tabs": top_tabs,
        "multitabs": True,
        "multichart": False,
        "columns": (("date", "Date"),) +
                   # Create a column description for a quantity of each class.
                   tuple(map(lambda c: ("number", str(c)), chosen_classes)),
        "title": "Classification",
        "active_tabs": chosen_classes,
    }

    def _data_formatter(input):
        for cc in input:
            yield {
                "date": cc[0],
                "row": (str(cc[l + 1]) for l in range(num_classes)),
            }

    date_from, date_to = get_time_interval(request.GET, ctx, days_ago=7)
    if classes > 0:
        # Create a list of columns corresponding to the available classes.
        # This is a pivot query. For example it translates record from:
        # crawl_id | classess | hits_available
        # 735      | 0        | 12
        # 735      | 1        | 18
        # 735      | 2        | 98
        # 736      | 0        | 7
        # 736      | 1        | 17
        # 736      | 2        | 99
        # to the form that is easy to use in templates:
        # crawl_id | 0  | 1  | 2
        # 735      | 12 | 18 | 98
        # 736      | 7  |17  | 99
        # Given from http://sykosomatic.org/2011/09/pivot-tables-in-postgresql/
        columns = map(lambda l: "COALESCE(MAX(CASE classes "
                                    "WHEN {0} THEN hits_available END"
                                "), 0) AS \"{0}\"".format(l), chosen_classes)
        query_prefix = \
        """
            SELECT start_time, {}
            FROM main_hitgroupclassaggregate
        """.format(", ".join(columns))
    else:
        query_prefix = \
        """
            SELECT start_time, sum(hits_available) AS "0"
            FROM main_hitgroupclassaggregate
        """ 
    query = \
    """ {}
        WHERE start_time >= \'{}\' AND start_time <= \'{}\'
        GROUP BY crawl_id, start_time
        ORDER BY start_time ASC
    """.format(query_prefix, date_from, date_to)
    data = query_to_lists(query)

    def _anomalies(row, others):
        lgt = len(others)
        siz = len(row)
        mids = [sum(map(lambda o: o[i], others)) / lgt for i in range(1, siz)]
        abss = [abs(mids[i - 1] - row[i]) for i in range(1, siz)]
        return [i for i in range(1, siz) if abss[i - 1] > 7000]

    def _fixer(row, others, anomalies):
        lgt = len(others)
        for a in anomalies:
            val = sum(map(lambda o: o[a], others)) / lgt
            row[a] = val
        return row

    if settings.DATASMOOTHING:
        data = plot.vrepair(list(data), _anomalies, _fixer, 8)
    else:
        data = list(data)
    ctx['data'] = _data_formatter(data)
    return direct_to_template(request, 'main/graphs/timeline.html', ctx)