Beispiel #1
0
def test_celery_success_to_task(input_json: str,
                                expected_events: List[TaskEvent]):
    state: celery_state.State = cr.app.events.State()

    task_id = expected_events[0].id

    task_description = model.TaskDescription(
        type_="fc.test",
        task_dt=None,
        events_path=None,
        logs_path=None,
        parameters={},
        # Task-app framework
        runtime_state=model.TaskAppState(
            config_path=None,
            task_serialisation_path=None,
        ))

    events = []
    for j in JSONLIterator(StringIO(input_json)):
        state.event(j)

        celery_task: celery_state.Task = state.tasks[task_id]
        events.append(
            _celery_event_to_task(task_description,
                                  celery_task,
                                  user='******'))

    # test events one at a time as the pytest failure output is more readable...
    assert len(events) == len(expected_events)
    for i, event in enumerate(events):
        assert event == expected_events[i]
Beispiel #2
0
def main():
    cities_json = json.load(open(CITIES_FILE))
    pop_cities = sorted(cities_json,
                        lambda o, _: int(o['population']),
                        reverse=True)
    # print pop_cities[:20]
    cities = [o['city'] for o in pop_cities][:20]
    jsonl_iter = JSONLIterator(open(BRIEFINGS_FILE))

    res = defaultdict(list)

    for obj in jsonl_iter:
        title = obj['title']
        briefing_html = obj['content']

        content_tree = soupparser.fromstring(briefing_html)
        pane_tree = content_tree.cssselect('.pane-node-field-forall-body')
        briefing_text = pane_tree[0].text_content()

        for city in cities:
            if city in briefing_text:
                res[city].append(title)
                print 'found', repr(city), 'in', repr(title)

    omd = OMD()
    for k in res:
        omd.addlist(k, res[k])
    top_items = sorted(omd.counts().items(), key=lambda x: x[1], reverse=True)

    import pdb
    pdb.set_trace()
Beispiel #3
0
def mismatches_from_file(path: Path):
    """
    Load mismatches from a json lines file
    """
    with path.open('r') as f:
        for row in JSONLIterator(f):
            if not row:
                continue

            yield Mismatch.from_dict(row)
Beispiel #4
0
def test_jsonl_iterator():
    ref = [{u'4': 4}, {u'3': 3}, {u'2': 2}, {u'1': 1}, {}]
    jsonl_iter = JSONLIterator(open(JSONL_DATA_PATH), reverse=True)
    jsonl_list = list(jsonl_iter)
    assert jsonl_list == ref
Beispiel #5
0
def export_metrics(plist,
                   earliest,
                   metrics_dir,
                   metrics=None,
                   output_path=None,
                   output_format=None,
                   _show_exportable=False):
    "export a csv with metrics collated from previous collect-metrics runs"
    metric_mods = all_metric_mods = _get_all_metric_mods(check_reqs=False)
    if metrics:
        metric_mods = [m for m in metric_mods if m.__name__ in metrics]
    if not metric_mods:
        print_err(
            'failed to collect data. no known metrics selected (available: %s)'
            % ', '.join([m.__name__ for m in all_metric_mods]))
        return

    metrics_map = {(m.__name__, p.name_slug): None
                   for m in all_metric_mods for p in plist.project_list}

    metrics_files = iter_find_files(metrics_dir, '*.jsonl')
    earliest_text = earliest.isoformat()
    files_to_search = []
    for metric_file in metrics_files:
        metric_base_fn = os.path.basename(os.path.splitext(metric_file)[0])
        _, run_dt_text, newest_dt_text, oldest_dt_text = metric_base_fn.split(
            '__')
        if newest_dt_text < earliest_text:
            print('skipping', metric_file)
            continue
        files_to_search.append(metric_file)

        with open(metric_file) as f:
            # TODO: possible optimization when searching for a
            # specific project/metric. search for the project name
            # slug and metric name in the part of the line before the
            # result begins (the jsonl keys are carefully chosen to
            # sort nicely)
            for line_data in JSONLIterator(f):
                metric_name, proj_slug = line_data['metric_name'], line_data[
                    'project']
                try:
                    cur_data = metrics_map[metric_name, proj_slug]
                except KeyError:
                    # not a tracked project/metric
                    continue
                if cur_data is None or cur_data['pull_date'] < line_data[
                        'pull_date']:
                    metrics_map[metric_name, proj_slug] = line_data

    possible_paths = IndexedSet()
    for (metric_name, proj_slug), data in metrics_map.items():
        if data is None:
            continue

        def _visit(path, key, value):
            if not isinstance(value, (list, dict)):
                possible_paths.add((metric_name, ) + path + (key, ))
            return True

        remap(data['result'], visit=_visit)

    # TODO: deal with missing metrics
    # TODO: output csv or something
    ''' --cols 'license.total,evcs.*, sloc.TOTAL_*  --cols-file

    if col.endswith('*'):
       pop the segment with the star, fetch up until that point, then fetch/flatten everything underneath
    '''
    possible_paths = sorted(possible_paths)
    path_texts = ['.'.join('%s' % s for s in path) for path in possible_paths]

    from pprint import pprint
    if _show_exportable:
        print('\n'.join(path_texts))
        print('Showing %s exportable columns.' % len(possible_paths))
        return

    # for each project, output project_name, ...cols..., pull_date
    cols = path_texts
    all_proj_dicts = []
    for project in plist.project_list:
        cur_proj_dict = {'name': project.name_slug}
        for col in cols:
            metric_mod_name, glom_path = col.split('.', 1)
            cur_result_dict = (metrics_map[metric_mod_name, project.name_slug]
                               or {
                                   'result': {}
                               })['result']
            cur_proj_dict[col] = glom.glom(cur_result_dict,
                                           glom_path,
                                           default='')
            cur_proj_dict[col] = cur_proj_dict[col] if cur_proj_dict[
                col] is not None else ''
        all_proj_dicts.append(cur_proj_dict)

    all_cols = [
        'name'
    ] + cols  # TODO: + ['pull_date'] (oldest of all the collated metrics or?

    with open('apatite_export.csv', 'w') as f:
        w = csv.DictWriter(f, all_cols)
        w.writeheader()
        for proj_dict in all_proj_dicts:
            w.writerow(proj_dict)

    print('exported %s columns for %s projects across %s metrics (%s)' %
          (len(all_cols), len(all_proj_dicts), len(metric_mods), ', '.join(
              sorted(m.__name__ for m in metric_mods))))

    return