Exemple #1
0
 def editor(self, index):
     """Gets the proper key to look up a member of a create_test_cohort result"""
     return str(WikiUserKey(
         self.editors[index].user_id,
         mediawiki_project,
         self.cohort.id,
     ))
Exemple #2
0
def get_usernames_for_task_result(task_result):
    """
    Parameters
        task_result : the result dictionary from Celery
    Returns
         user_names : dictionary of user names (keyed by WikiUserKey)
                      empty if results are not detailed by user

    TODO: this function should move outside the controller,
          at the time of writing the function we are
          consolidating code that wasduplicated
    """
    user_names = {}
    if Aggregation.IND in task_result:
        session = db.get_session()
        # cohort should be the same for all users
        # get cohort from first key
        cohort_id = None

        for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems():
            wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str)
            cohort_id = wiki_user_key.cohort_id
            break

        user_names = g.cohort_service.get_wikiusernames_for_cohort(cohort_id, session)

    return user_names
Exemple #3
0
def add_user_names_to_json(json_result, user_names):
    """
    Parameters
        json_result : the result dictionary from pj.get_json_result
        user_names  : dictionary of user names (keyed by WikiUserKey)
    Returns
        The result dict, with user names added to the WikiUserKey id strings
    """
    new_individual_ids = {}
    for individual in json_result['result'][Aggregation.IND]:
        user_name = user_names[WikiUserKey.fromstr(individual)]
        new_id_string = '{}|{}'.format(user_name, individual)
        new_individual_ids[individual] = new_id_string

    json_with_names = deepcopy(json_result)
    json_with_names['result'][Aggregation.IND] = {
        new_individual_ids[key]: value for (key, value) in
        json_result['result'][Aggregation.IND].items()}
    return json_with_names
Exemple #4
0
 def test_repr(self):
     wuk = WikiUserKey(123, 'wiki', 444)
     assert_equal(str(wuk), '123|wiki|444')
Exemple #5
0
 def test_from_string(self):
     wuk = WikiUserKey.fromstr('123|wiki|444')
     assert_equal(wuk.user_id, '123')
     assert_equal(wuk.user_project, 'wiki')
     assert_equal(wuk.cohort_id, '444')
def get_simple_csv(task_result, pj, parameters):
    """
    Parameters
        task_result : the result dictionary from Celery
        pj          : a pointer to the permanent job
        parameters  : a dictionary of pj.parameters

    Returns
        A StringIO instance representing simple CSV
    """

    csv_io = StringIO()
    if task_result:
        columns = []

        if Aggregation.IND in task_result:
            columns = task_result[Aggregation.IND].values()[0].keys()
        elif Aggregation.SUM in task_result:
            columns = task_result[Aggregation.SUM].keys()
        elif Aggregation.AVG in task_result:
            columns = task_result[Aggregation.AVG].keys()
        elif Aggregation.STD in task_result:
            columns = task_result[Aggregation.STD].keys()

        # if task_result is not empty find header in first row
        fieldnames = ['user_id', 'user_name'] + columns
    else:
        fieldnames = ['user_id', 'user_name']
    writer = DictWriter(csv_io, fieldnames)

    # collect rows to output in CSV
    task_rows = []

    try:
        session = db.get_session()
        # Individual Results
        if Aggregation.IND in task_result:
            # fold user_id into dict so we can use DictWriter to escape things
            for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems():
                wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str)
                user_id = wiki_user_key.user_id
                user_name = get_user_name(session, wiki_user_key)
                task_row = row.copy()
                task_row['user_id'] = user_id
                task_row['user_name'] = user_name
                task_rows.append(task_row)
    finally:
        session.close()

    # Aggregate Results
    if Aggregation.SUM in task_result:
        task_row = task_result[Aggregation.SUM].copy()
        task_row['user_id'] = Aggregation.SUM
        task_rows.append(task_row)

    if Aggregation.AVG in task_result:
        task_row = task_result[Aggregation.AVG].copy()
        task_row['user_id'] = Aggregation.AVG
        task_rows.append(task_row)

    if Aggregation.STD in task_result:
        task_row = task_result[Aggregation.STD].copy()
        task_row['user_id'] = Aggregation.STD
        task_rows.append(task_row)

    # generate some empty rows to separate the result
    # from the parameters
    task_rows.append({})
    task_rows.append({})
    task_rows.append({'user_id': 'parameters'})

    for key, value in sorted(parameters.items()):
        task_rows.append({'user_id': key , fieldnames[1]: value})

    writer.writeheader()
    writer.writerows(task_rows)
    return csv_io
Exemple #7
0
def get_simple_csv(task_result, pj, parameters, user_names):
    """
    Parameters
        task_result : the result dictionary from Celery
        pj          : a pointer to the permanent job
        parameters  : a dictionary of pj.parameters
        user_names  : dictionary of user names (keyed by WikiUserKey)

    Returns
        A StringIO instance representing simple CSV
    """

    csv_io = StringIO()
    if task_result:
        columns = []

        if Aggregation.IND in task_result:
            columns = task_result[Aggregation.IND].values()[0].keys()
        elif Aggregation.SUM in task_result:
            columns = task_result[Aggregation.SUM].keys()
        elif Aggregation.AVG in task_result:
            columns = task_result[Aggregation.AVG].keys()
        elif Aggregation.STD in task_result:
            columns = task_result[Aggregation.STD].keys()

        # if task_result is not empty find header in first row
        fieldnames = ['user_id', 'user_name', 'project'] + columns
    else:
        fieldnames = ['user_id', 'user_name', 'project']
    writer = DictWriter(csv_io, fieldnames)

    # collect rows to output in CSV
    task_rows = []
    # Individual Results
    if Aggregation.IND in task_result:
        # fold user_id into dict so we can use DictWriter to escape things
        for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems():
            wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str)
            user_id = wiki_user_key.user_id
            project = wiki_user_key.user_project

            # careful tuple stores user_id like a string
            user_name = user_names.get(wiki_user_key, '')
            task_row = row.copy()
            task_row['user_id'] = user_id
            task_row['user_name'] = user_name
            task_row['project'] = project
            task_rows.append(task_row)

    # Aggregate Results
    if Aggregation.SUM in task_result:
        task_row = task_result[Aggregation.SUM].copy()
        task_row['user_id'] = Aggregation.SUM
        task_rows.append(task_row)

    if Aggregation.AVG in task_result:
        task_row = task_result[Aggregation.AVG].copy()
        task_row['user_id'] = Aggregation.AVG
        task_rows.append(task_row)

    if Aggregation.STD in task_result:
        task_row = task_result[Aggregation.STD].copy()
        task_row['user_id'] = Aggregation.STD
        task_rows.append(task_row)

    # generate some empty rows to separate the result
    # from the parameters
    task_rows.append({})
    task_rows.append({})
    task_rows.append({'user_id': 'parameters'})

    for key, value in sorted(parameters.items()):
        task_rows.append({'user_id': key , fieldnames[1]: value})

    writer.writeheader()
    writer.writerows(task_rows)
    return csv_io
def get_timeseries_csv(task_result, pj, parameters, user_names):
    """
    Parameters
        task_result : the result dictionary from Celery
        pj          : a pointer to the permanent job
        parameters  : a dictionary of pj.parameters
        user_names  : dictionary of user names (keyed by (user_id, project))

    Returns
        A StringIO instance representing timeseries CSV
    """
    csv_io = StringIO()
    if task_result:
        columns = []

        if Aggregation.IND in task_result:
            columns = task_result[Aggregation.IND].values()[0].values()[0].keys()
        elif Aggregation.SUM in task_result:
            columns = task_result[Aggregation.SUM].values()[0].keys()
        elif Aggregation.AVG in task_result:
            columns = task_result[Aggregation.AVG].values()[0].keys()
        elif Aggregation.STD in task_result:
            columns = task_result[Aggregation.STD].values()[0].keys()

        # if task_result is not empty find header in first row
        fieldnames = ['user_id', 'user_name', 'project', 'submetric'] + sorted(columns)
    else:
        fieldnames = ['user_id', 'user_name', 'project', 'submetric']
    writer = DictWriter(csv_io, fieldnames)

    # collect rows to output in CSV
    task_rows = []

    # Individual Results
    if Aggregation.IND in task_result:
        # fold user_id into dict so we can use DictWriter to escape things
        for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems():
            wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str)
            user_id = wiki_user_key.user_id
            project = wiki_user_key.user_project
            # careful tuple stores user_id like a string
            user_name = user_names.get(wiki_user_key, '')
            for subrow in row.keys():
                task_row = row[subrow].copy()
                task_row['user_id'] = user_id
                task_row['user_name'] = user_name
                task_row['project'] = project
                task_row['submetric'] = subrow
                task_rows.append(task_row)

    # Aggregate Results
    if Aggregation.SUM in task_result:
        row = task_result[Aggregation.SUM]
        for subrow in row.keys():
            task_row = row[subrow].copy()
            task_row['user_id'] = Aggregation.SUM
            task_row['submetric'] = subrow
            task_rows.append(task_row)

    if Aggregation.AVG in task_result:
        row = task_result[Aggregation.AVG]
        for subrow in row.keys():
            task_row = row[subrow].copy()
            task_row['user_id'] = Aggregation.AVG
            task_row['submetric'] = subrow
            task_rows.append(task_row)

    if Aggregation.STD in task_result:
        row = task_result[Aggregation.STD]
        for subrow in row.keys():
            task_row = row[subrow].copy()
            task_row['user_id'] = Aggregation.STD
            task_row['submetric'] = subrow
            task_rows.append(task_row)

    # generate some empty rows to separate the result
    # from the parameters
    task_rows.append({})
    task_rows.append({})
    task_rows.append({'user_id': 'parameters'})

    for key, value in sorted(parameters.items()):
        task_rows.append({'user_id': key , fieldnames[1]: value})

    writer.writeheader()
    writer.writerows(task_rows)
    return csv_io
 def test_from_string(self):
     wuk = WikiUserKey.fromstr('123|wiki|444')
     assert_equal(wuk.user_id, '123')
     assert_equal(wuk.user_project, 'wiki')
     assert_equal(wuk.cohort_id, '444')