def editor(self, index): """Gets the proper key to look up a member of a create_test_cohort result""" return str(WikiUserKey( self.editors[index].user_id, mediawiki_project, self.cohort.id, ))
def get_usernames_for_task_result(task_result): """ Parameters task_result : the result dictionary from Celery Returns user_names : dictionary of user names (keyed by WikiUserKey) empty if results are not detailed by user TODO: this function should move outside the controller, at the time of writing the function we are consolidating code that wasduplicated """ user_names = {} if Aggregation.IND in task_result: session = db.get_session() # cohort should be the same for all users # get cohort from first key cohort_id = None for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems(): wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str) cohort_id = wiki_user_key.cohort_id break user_names = g.cohort_service.get_wikiusernames_for_cohort(cohort_id, session) return user_names
def add_user_names_to_json(json_result, user_names): """ Parameters json_result : the result dictionary from pj.get_json_result user_names : dictionary of user names (keyed by WikiUserKey) Returns The result dict, with user names added to the WikiUserKey id strings """ new_individual_ids = {} for individual in json_result['result'][Aggregation.IND]: user_name = user_names[WikiUserKey.fromstr(individual)] new_id_string = '{}|{}'.format(user_name, individual) new_individual_ids[individual] = new_id_string json_with_names = deepcopy(json_result) json_with_names['result'][Aggregation.IND] = { new_individual_ids[key]: value for (key, value) in json_result['result'][Aggregation.IND].items()} return json_with_names
def test_repr(self): wuk = WikiUserKey(123, 'wiki', 444) assert_equal(str(wuk), '123|wiki|444')
def test_from_string(self): wuk = WikiUserKey.fromstr('123|wiki|444') assert_equal(wuk.user_id, '123') assert_equal(wuk.user_project, 'wiki') assert_equal(wuk.cohort_id, '444')
def get_simple_csv(task_result, pj, parameters): """ Parameters task_result : the result dictionary from Celery pj : a pointer to the permanent job parameters : a dictionary of pj.parameters Returns A StringIO instance representing simple CSV """ csv_io = StringIO() if task_result: columns = [] if Aggregation.IND in task_result: columns = task_result[Aggregation.IND].values()[0].keys() elif Aggregation.SUM in task_result: columns = task_result[Aggregation.SUM].keys() elif Aggregation.AVG in task_result: columns = task_result[Aggregation.AVG].keys() elif Aggregation.STD in task_result: columns = task_result[Aggregation.STD].keys() # if task_result is not empty find header in first row fieldnames = ['user_id', 'user_name'] + columns else: fieldnames = ['user_id', 'user_name'] writer = DictWriter(csv_io, fieldnames) # collect rows to output in CSV task_rows = [] try: session = db.get_session() # Individual Results if Aggregation.IND in task_result: # fold user_id into dict so we can use DictWriter to escape things for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems(): wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str) user_id = wiki_user_key.user_id user_name = get_user_name(session, wiki_user_key) task_row = row.copy() task_row['user_id'] = user_id task_row['user_name'] = user_name task_rows.append(task_row) finally: session.close() # Aggregate Results if Aggregation.SUM in task_result: task_row = task_result[Aggregation.SUM].copy() task_row['user_id'] = Aggregation.SUM task_rows.append(task_row) if Aggregation.AVG in task_result: task_row = task_result[Aggregation.AVG].copy() task_row['user_id'] = Aggregation.AVG task_rows.append(task_row) if Aggregation.STD in task_result: task_row = task_result[Aggregation.STD].copy() task_row['user_id'] = Aggregation.STD task_rows.append(task_row) # generate some empty rows to separate the result # from the parameters task_rows.append({}) task_rows.append({}) task_rows.append({'user_id': 'parameters'}) for key, value in sorted(parameters.items()): task_rows.append({'user_id': key , fieldnames[1]: value}) writer.writeheader() writer.writerows(task_rows) return csv_io
def get_simple_csv(task_result, pj, parameters, user_names): """ Parameters task_result : the result dictionary from Celery pj : a pointer to the permanent job parameters : a dictionary of pj.parameters user_names : dictionary of user names (keyed by WikiUserKey) Returns A StringIO instance representing simple CSV """ csv_io = StringIO() if task_result: columns = [] if Aggregation.IND in task_result: columns = task_result[Aggregation.IND].values()[0].keys() elif Aggregation.SUM in task_result: columns = task_result[Aggregation.SUM].keys() elif Aggregation.AVG in task_result: columns = task_result[Aggregation.AVG].keys() elif Aggregation.STD in task_result: columns = task_result[Aggregation.STD].keys() # if task_result is not empty find header in first row fieldnames = ['user_id', 'user_name', 'project'] + columns else: fieldnames = ['user_id', 'user_name', 'project'] writer = DictWriter(csv_io, fieldnames) # collect rows to output in CSV task_rows = [] # Individual Results if Aggregation.IND in task_result: # fold user_id into dict so we can use DictWriter to escape things for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems(): wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str) user_id = wiki_user_key.user_id project = wiki_user_key.user_project # careful tuple stores user_id like a string user_name = user_names.get(wiki_user_key, '') task_row = row.copy() task_row['user_id'] = user_id task_row['user_name'] = user_name task_row['project'] = project task_rows.append(task_row) # Aggregate Results if Aggregation.SUM in task_result: task_row = task_result[Aggregation.SUM].copy() task_row['user_id'] = Aggregation.SUM task_rows.append(task_row) if Aggregation.AVG in task_result: task_row = task_result[Aggregation.AVG].copy() task_row['user_id'] = Aggregation.AVG task_rows.append(task_row) if Aggregation.STD in task_result: task_row = task_result[Aggregation.STD].copy() task_row['user_id'] = Aggregation.STD task_rows.append(task_row) # generate some empty rows to separate the result # from the parameters task_rows.append({}) task_rows.append({}) task_rows.append({'user_id': 'parameters'}) for key, value in sorted(parameters.items()): task_rows.append({'user_id': key , fieldnames[1]: value}) writer.writeheader() writer.writerows(task_rows) return csv_io
def get_timeseries_csv(task_result, pj, parameters, user_names): """ Parameters task_result : the result dictionary from Celery pj : a pointer to the permanent job parameters : a dictionary of pj.parameters user_names : dictionary of user names (keyed by (user_id, project)) Returns A StringIO instance representing timeseries CSV """ csv_io = StringIO() if task_result: columns = [] if Aggregation.IND in task_result: columns = task_result[Aggregation.IND].values()[0].values()[0].keys() elif Aggregation.SUM in task_result: columns = task_result[Aggregation.SUM].values()[0].keys() elif Aggregation.AVG in task_result: columns = task_result[Aggregation.AVG].values()[0].keys() elif Aggregation.STD in task_result: columns = task_result[Aggregation.STD].values()[0].keys() # if task_result is not empty find header in first row fieldnames = ['user_id', 'user_name', 'project', 'submetric'] + sorted(columns) else: fieldnames = ['user_id', 'user_name', 'project', 'submetric'] writer = DictWriter(csv_io, fieldnames) # collect rows to output in CSV task_rows = [] # Individual Results if Aggregation.IND in task_result: # fold user_id into dict so we can use DictWriter to escape things for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems(): wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str) user_id = wiki_user_key.user_id project = wiki_user_key.user_project # careful tuple stores user_id like a string user_name = user_names.get(wiki_user_key, '') for subrow in row.keys(): task_row = row[subrow].copy() task_row['user_id'] = user_id task_row['user_name'] = user_name task_row['project'] = project task_row['submetric'] = subrow task_rows.append(task_row) # Aggregate Results if Aggregation.SUM in task_result: row = task_result[Aggregation.SUM] for subrow in row.keys(): task_row = row[subrow].copy() task_row['user_id'] = Aggregation.SUM task_row['submetric'] = subrow task_rows.append(task_row) if Aggregation.AVG in task_result: row = task_result[Aggregation.AVG] for subrow in row.keys(): task_row = row[subrow].copy() task_row['user_id'] = Aggregation.AVG task_row['submetric'] = subrow task_rows.append(task_row) if Aggregation.STD in task_result: row = task_result[Aggregation.STD] for subrow in row.keys(): task_row = row[subrow].copy() task_row['user_id'] = Aggregation.STD task_row['submetric'] = subrow task_rows.append(task_row) # generate some empty rows to separate the result # from the parameters task_rows.append({}) task_rows.append({}) task_rows.append({'user_id': 'parameters'}) for key, value in sorted(parameters.items()): task_rows.append({'user_id': key , fieldnames[1]: value}) writer.writeheader() writer.writerows(task_rows) return csv_io