Esempio n. 1
0
def process_forum_entries():
    """
    input file: "forum_entries_courses.csv"

    header: service,course_id,kurs,semester,description,user,name,nid,id,parent_id,date,subject_length,text_length

    returns a dict with:

        (kurs,semester) -> "username" -> (written count [int],
                                          avg subject length [float],
                                          median text length [int])
    """
    KURS_COL = 2
    SEMESTER_COL = 3
    USER_COL = 5
    SUBJ_COL = 11
    TEXT_COL = 12
    def handle_forum_entries(grouped):
        result = {}
        for username in grouped:
            if not is_valid_matrikel_nummer(username):
                continue
            value = grouped[username]
            written_count = len(value)
            subject_lengths = [int(line[SUBJ_COL]) for line in value]
            text_lengths = [int(line[TEXT_COL]) for line in value]
            result[username] = (written_count, average(subject_lengths), median(text_lengths))
        return result
    lines = load_lines("forum_entries_courses.csv")
    by_kurs_semester = groupby(lines, [KURS_COL, SEMESTER_COL])
    result = groupby(by_kurs_semester, [USER_COL], handle_forum_entries)
    return result
Esempio n. 2
0
def process_entries_read(kurs_mapping):
    """
    input:

        kurs_mapping - dict with: forum_course_id -> (kurs,semester)

    input file: "forum_readlist.py"

    header: service,course_id,username,nid,id

    returns a dict with

        (kurs,semester) -> "username" -> read count [integer]
    """
    COURSE_ID_COL = 1
    USERNAME_COL = 2
    def calculate_read(grouped):
        return dict((username, len(grouped[username])) for username in grouped\
                    if is_valid_matrikel_nummer(username))
    lines = load_lines("forum_readlist.csv")
    by_course_id = groupby(lines, [COURSE_ID_COL])
    by_user = groupby(by_course_id, [USERNAME_COL], calculate_read)
    result = {}
    for course_id in by_course_id:
        key = kurs_mapping[course_id]
        result[key] = by_user[course_id]
    return result
Esempio n. 3
0
def process_median_feedback_length():
    """
    input file: "abgabe_feedback_courses.csv"

    header: service,course_id,kurs,semester,description,user_id,task,subtask,author,comment_length

    returns a dict with

        (kurs,semester) -> "username" -> median feedback [float]

    """
    KURS_COL = 2
    SEMESTER_COL = 3
    USER_COL = 5
    COMMENT_LENGTH_COL = 9
    def calculate_feedback(grouped):
        result = {}
        for username in grouped:
            if not is_valid_matrikel_nummer(username):
                continue
            value = grouped[username]
            feedback_lengths = [int(line[COMMENT_LENGTH_COL]) for line in value]
            median_feedback = median(feedback_lengths)
            result[username] = median_feedback
        return result
    lines = load_lines("abgabe_feedback_courses.csv")
    by_kurs_semester = groupby(lines, [KURS_COL, SEMESTER_COL])
    result = groupby(by_kurs_semester, [USER_COL], calculate_feedback)
    return result
Esempio n. 4
0
def process_avg_score():
    """
    input file: "abgabe_assessment_results_courses.csv"

    header: service,course_id,kurs,semester,description,user_id,result_id,result_value

    returns a dict with

        (kurs,semester) -> "username" -> (quality score [float],
                                          avg score [float],
                                          number of scores [int])

    where quality score:

        avg score * number of scores
    """
    KURS_COL = 2
    SEMESTER_COL = 3
    USER_COL = 5
    RESULT_ID_COL = 6
    RESULT_SCORE_COL = 7
    def calculate_avg_score(grouped):
        result = {}
        for username in grouped:
            if not is_valid_matrikel_nummer(username):
                continue
            value = grouped[username]
            by_result_id = groupby(value, [RESULT_ID_COL])
            result_scores = [float(line[0][RESULT_SCORE_COL]) \
                             for line in by_result_id.values()]
            avg_score = average(result_scores)
            num_scores = len(by_result_id.keys())
            max_score = max(result_scores)
            if max_score == 0:
                quality_score = 0
            else:
                quality_score = float(sum(result_scores)) / float(max_score)
            result[username] = (quality_score, avg_score, num_scores)
        return result
    lines = load_lines("abgabe_assessment_results_courses.csv")
    by_kurs_semester = groupby(lines, [KURS_COL, SEMESTER_COL])
    result = groupby(by_kurs_semester, [USER_COL], calculate_avg_score)
    return result
Esempio n. 5
0
def process_plus_count():
    """
    input file: "abgabe_assessment_pluses_courses.csv"

    header: service,course_id,kurs,semester,description,user_id,plus_date

    returns a dict with

        (kurs,semester) -> "username" -> plus count [integer]

    """
    def count_pluses(d):
        return dict((x, len(d[x])) for x in d\
                    if is_valid_matrikel_nummer(x))

    lines = load_lines("abgabe_assessment_pluses_courses.csv")
    KURS_COL = 2
    SEMESTER_COL = 3
    USER_COL = 5
    by_kurs_semester = groupby(lines, [KURS_COL, SEMESTER_COL])
    result = groupby(by_kurs_semester, [USER_COL], count_pluses)
    return result
Esempio n. 6
0
def process_kurs_mapping():
    """
    input file: "forum_entries_courses.csv"

    header: service,course_id,kurs,semester,description,user,name,nid,id,parent_id,date,subject_length,text_length

    returns a dict with:
        (course_id) -> (kurs,semester)
    """
    KURS_COL = 2
    SEMESTER_COL = 3
    COURSE_ID_COL = 1
    lines = load_lines("forum_entries_courses.csv")
    by_kurs_semester = groupby(lines, [KURS_COL, SEMESTER_COL])
    return dict((by_kurs_semester[x][0][COURSE_ID_COL], x)\
                for x in by_kurs_semester)
Esempio n. 7
0
 def calculate_avg_score(grouped):
     result = {}
     for username in grouped:
         if not is_valid_matrikel_nummer(username):
             continue
         value = grouped[username]
         by_result_id = groupby(value, [RESULT_ID_COL])
         result_scores = [float(line[0][RESULT_SCORE_COL]) \
                          for line in by_result_id.values()]
         avg_score = average(result_scores)
         num_scores = len(by_result_id.keys())
         max_score = max(result_scores)
         if max_score == 0:
             quality_score = 0
         else:
             quality_score = float(sum(result_scores)) / float(max_score)
         result[username] = (quality_score, avg_score, num_scores)
     return result
Esempio n. 8
0
from common import transpose, first, groupby, cat
import unittest

if __name__ == '__main__':

    assert tuple(transpose(((1, 2, 3), (4, 5, 6)))) == ((1, 4), (2, 5), (3, 6))
    assert first('abc') == first(['a', 'b', 'c']) == 'a'
    assert cat(['a', 'b', 'c']) == 'abc'
    assert (groupby(['test', 'one', 'two', 'three', 'four'], key=len) == {
        3: ['one', 'two'],
        4: ['test', 'four'],
        5: ['three']
    })
    unittest.main()