cursor = collection['format_tests'].find({'parent_data.chapter_display_name' : 'Test 1'})
users_sessions = defaultdict(list)

for index,item in enumerate(cursor):
    #print index, item['parent_data']['chapter_display_name']
    users_sessions[(item['username'], item['session'])].append(item['time'])
users_tests_events = defaultdict(int)
for (username,session),times in users_sessions.iteritems():
    end_time = datetime.strptime(max(times).split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
    start_time = datetime.strptime(min(times).split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
    cursor = collection['tracking'].find({'username' : username, 'session' : session, '$or' : [{'event_type' : 'seq_goto'},{'event_type':'seq_prev'},{'event_type' : 'seq_next'}]})
    for index, document in enumerate(cursor):
        try:
            time_stamp = datetime.strptime(document['time'].split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
            if start_time <= time_stamp <= end_time:
                if 'sequential_display_name' in document['parent_data'] and  document['parent_data']['sequential_display_name']:
                    sequential_display_name = document['parent_data']['sequential_display_name']
                elif document['metadata']['display_name']:
                    sequential_display_name = document['metadata']['display_name']
                else:
                    sequential_display_name = None
                #users_tests_events[(username, session, document['parent_data'].get('chapter_display_name', None),document['parent_data'].get('sequential_display_name', None))] += 1
                users_tests_events[(username, session, document['parent_data'].get('chapter_display_name', None),sequential_display_name)] += 1
        except:
            print index,document
result = []
for (username,session,chapter_name, sequential_name) in users_tests_events:
    result.append([username,session,chapter_name, sequential_name, users_tests_events[(username,session,chapter_name, sequential_name)]])
output = CSV(result, ['Username', 'Session ID', 'Chapter Display Name', 'Sequential Display Name', 'Navigation Count'], output_file='test1_analysis.csv')
output.generate_csv()
'''
This module determines how many chapters were accessed by each user for a 
given course

Usage:

python chapters_accessed_per_user

'''
from collections import defaultdict

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('tracking', 'course_structure')
collection = connection.get_access_to_collection()

# Get all chapters
chapters = collection['course_structure'].distinct(
    'parent_data.chapter_display_name')

tracking = collection['tracking'].find()
result = []
for document in tracking:
    if 'parent_data' in document:
        pass

output = CSV(result, ['Username'].extend(chapters),
             output_file='atoc185x_chapters_accesses_per_user.csv')
output.generate_csv()
    '$match': {
        'username': {
            '$in': usernames
        },
        '$or': [{
            'event_type': 'play_video'
        }, {
            'event_type': 'problem_check',
            'event_source': 'server'
        }]
    }
}, {
    '$group': {
        '_id': {
            "username": "******",
            "chapter_name": "$parent_data.chapter_display_name",
            "sequential_name": "$parent_data.sequential_display_name",
            "vertical_name": "$parent_data.vertical_display_name"
        }
    }
}])  #, {'$out' : 'students_50_to_59_events'}])

result = [[
    document['_id']['username'], document['_id']['chapter_name'],
    document['_id']['sequential_name'], document['_id']['vertical_name']
] for document in cursor['result'] if 'chapter_name' in document['_id']]
output = CSV(result,
             ['Username', 'Chapter Name', 'Sequential Name', 'Vertical Name'],
             output_file='failure_analysis_50_to_59.csv')
output.generate_csv()
Beispiel #4
0
        item['_id'], item['parent_data']['chapter_display_name'],
        item['metadata']['display_name'],
        len(children)
    ]
    aggregate_vertical = defaultdict(int)
    aggregate_category = defaultdict(int)
    for _id in children:
        try:
            vertical = collection['course_structure'].find_one({'_id': _id})
            aggregate_vertical[vertical['category']] += 1
            for _id in vertical['children']:
                child = collection['course_structure'].find_one({'_id': _id})
                aggregate_category[child['category']] += 1
        except Exception:
            pass
    temp_result.extend([
        aggregate_category['video'], aggregate_category['html'],
        aggregate_category['problem'], aggregate_category['discussion'],
        aggregate_category['poll_question'], aggregate_category['word_cloud']
    ])
    result.append(temp_result)

output = CSV(result, [
    'Sequential ID', 'Chapter Display Name', 'Sequential Name',
    'Number of Verticals', 'Number of Videos', 'Number of HTML',
    'Number of Problems', 'Number of Discussions', 'Number of Poll Questions',
    'Number of Word Cloud'
],
             output_file='sequential_aggregation.csv')
output.generate_csv()
Beispiel #5
0
        try:
            country_code = geoip.country(value)
            country = country_code_to_country[country_code]
            if not key:
                key = 'anonymous'
                ip_to_country.append([key, value, country_code, country])
            elif (key, country) not in country_set:
                country_set.add((key, country))
                ip_to_country.append([key, value, country_code, country])
        except:
            # IMPORTANT
            # The following code for an exception are hardcoded for those IPs which do have a mapping to a
            # country code but they were not available in GeoIP.dat (most probably because it was not updated)
            # People using this script can either report this code (under except) and or additional conditions
            # IP addresses which cannot be mapped to a country code stored in GeoIP.dat
            if value == '41.79.120.29':
                country = country_code_to_country['SS']
                if not key:
                    key = 'anonymous'
                    ip_to_country.append(
                        [key, value, 'SS', country_code_to_country['SS']])
                elif (key, country) not in country_set:
                    country_set.add((key, country))
                    ip_to_country.append(
                        [key, value, 'SS', country_code_to_country['SS']])

output = CSV(ip_to_country,
             ['Username', 'IP Address', 'Country Code', 'Country'],
             output_file=db_name + '_ip_to_country.csv')
output.generate_csv()
                else:
                    student_answers_values[key] = values 
            else:
                if "choice_" in values:
                    mapped_values = answer_keys[key].values()
                    student_answers_values[key] = mapped_values
                else:
                    student_answers_values[key] = values
        if username not in output:
            output[username] = {'problem_id' : problem_id, 'student_answers' :  student_answers_values, 'attempts' : student_attempts}
        else: 
            if student_attempts > output[username]['attempts']:
                output[username]['problem_id'] = problem_id
                output[username]['student_answers'] = student_answers_values
                output[username]['attempts'] = student_attempts
    except:
        print "Key Error!!!", key, values, problem_id

result = []
for key, values in output.iteritems():
    username = key
    problem_id = values['problem_id']
    for question, answer in values['student_answers'].iteritems():
        result.append([username, problem_id, question, answer])
        
output = CSV(result, ['Username','Problem ID', 'Student Answers' ], output_file=db_name + '_student_answers.csv')
output.generate_csv()

#with open(db_name + '_student_answers', 'w') as f:
#    json.dump(output, f)
'''

import csv

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('tracking_atoc185x')
collection = connection.get_access_to_collection()

# Can replace csv file with any csv file that contains the list of usernames
# who completed the course and achieved a certificate. Alternately, one can
# save that info in another collection in mongoDB and extra it from the collection
with open('atoc185x/course_completers.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    reader.next()
    usernames = {row[1] for row in reader}

cursor = collection['tracking_atoc185x'].find(
    {'event_type': 'show_transcript'})
result = []
seen = set()
for document in cursor:
    if document['username'] in usernames and document['username'] not in seen:
        seen.add(document['username'])
        result.append([document['username']])
output = CSV(result, ['Username'],
             output_file='show_transcript_completers.csv')
output.generate_csv()
Beispiel #8
0
def _generate_name_from_problem_id(problem_id):
    '''
    Generate name of csv output file from problem id
    '''
    return '_'.join(problem_id.split('/')[3:]) + '.csv'


cursor = collection['atoc185x_problem_ids'].find(
    {'event.problem_id': sys.argv[1]})
#cursor = collection['atoc185x_problem_ids'].aggregate([{'$match' :
#{'problem_id':sys.argv[1]}}, {'$group' : { '_id' :  {'username' : '$username',
#'attempt_number' : '$event.attempts', 'time' : '$time','answers' : '$event.answers',
#'success' : '$event.success', 'grade' : '$event.grade', 'max_grade' : '$event.max_grade'}}}])

result = []
for document in cursor:
    result.append([
        document['username'], document['event']['attempts'],
        document['module']['display_name'], document['time'],
        document['event']['success'], document['event']['grade'],
        document['event']['max_grade'], document['event']['answers']
    ])

csv_report_name = _generate_name_from_problem_id(sys.argv[1])
output = CSV(result, [
    'Username', 'Attempt Number', 'Module', 'Time', 'Success',
    'Grade Achieved', 'Max Grade', 'Answers'
],
             output_file=csv_report_name)
output.generate_csv()
'''
This module extracts the student IDs from the collection certificates_generatedcertificate
of the students who completed the course and achieved a certificate. The ids
are then used to extract the usernames of the course completers

Usage:

python course_completers.py

'''

from collections import defaultdict

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('certificates_generatedcertificate', 'auth_user')
collection = connection.get_access_to_collection()

completers = collection['certificates_generatedcertificate'].find({'status' : 'downloadable'})

result = []
for document in completers:
    user_document = collection['auth_user'].find_one({"id" : document['user_id']})
    result.append([user_document['id'],user_document['username'], document['name'], document['grade']])

output = CSV(result, ['User ID','Username', 'Name', 'Grade'], output_file='course_completers.csv')
output.generate_csv()
Beispiel #10
0
from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('seek_video')
collection = connection.get_access_to_collection()
sort_parameters = [('parent_data.chapter_display_name', 1),
                   ('parent_data.sequential_display_name', 1),
                   ('parent_data.vertical_display_name', 1)]
cursor = collection['seek_video'].find()
result = []
for index, item in enumerate(cursor):
    if 'old_time' in item['event']:
        old_time = item['event']['old_time']
    else:
        old_time = 0
    result.append([
        item['username'], item['parent_data']['chapter_display_name'],
        item['parent_data']['sequential_display_name'],
        item['parent_data']['vertical_display_name'], old_time,
        item['event']['new_time']
    ])

output = CSV(result, [
    'Username', 'Chapter Name', 'Sequential Name', 'Vertical Name', 'Old Time',
    'New Time'
],
             output_file='seek_video.csv',
             row_limit=200000)
output.generate_csv()
Beispiel #11
0
username, video associated with load_video event, parent_data: {chapter_display_name, sequential_display_name, vertical_display_name,}, edx_video_id, video watch segments

get the event_types : load_video, play_video, pause_video, seek_video

sort by "time": "" so that the events are chronologically ordered

for each load_video new video watch segment should include ONLY:

- time between play_video -> next video event in time (pause_video or seek_video)
- time between seek_video : {'new_time' : Time}  -> pause video (only with new_time > old_time, this is to avoid including rewinds)

watch periods:
    event_type : pause_video - "event_type":"play_video" {"event":{"currentTime":TIME}} = new video watch segment
    if seek_video : {'old_time' : TIME} < seek_video : {'new_time' : TIME} 
      "pause_video" {"event":{"currentTime":TIME}} - seek_video : {'new_time': TIME } = new video watch segment

    if seek_video : {'old_time' : TIME} > seek_video : {'new_time' : TIME} = rewind (exclude from watch segments)
'''

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('video_watch_duration_collection')
collection = connection.get_access_to_collection()
cursor = collection['video_watch_duration_collection'].find()

result = []

output = CSV(result,['Username',], output_file='video_watch_duration.csv', row_limit=200000) 
output.generate_csv()
Beispiel #12
0
from collections import defaultdict
import json
import sys

from base_edx import EdXConnection
from generate_csv_report import CSV

db_name = sys.argv[1]

# Change name of collection as required
connection = EdXConnection(db_name, 'forum')
collection = connection.get_access_to_collection()

forum_data = collection['forum'].find()
csv_data = []
for document in forum_data:
    csv_data.append([
        document['_id']['oid'], document['author_username'], document['_type'],
        document.get('title', ''), document['body'],
        document['created_at']['date']
    ])

headers = ['ID', 'Author Username', 'Type', 'Title', 'Body', 'Created At Date']
output = CSV(csv_data, headers, output_file=db_name + '_forum_data.csv')
output.generate_csv()
        print "Fail -> %s" % item
        fail.append(item)

print "Number of fail: " + str(len(fail))
if fail:
    import json
    with open('report.txt', 'w') as outfile:
        json.dump(fail, outfile)
else:
    print "no fail"
result = []
for item in users_to_sessions:
    for nested_item in users_to_sessions[item]:
        max_time = max(users_to_sessions[item][nested_item])
        end_time = datetime.strptime(
            max_time.split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
        min_time = min(users_to_sessions[item][nested_item])
        start_time = datetime.strptime(
            min_time.split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
        result.append([
            item, nested_item,
            len(users_to_sessions[item][nested_item]), start_time, end_time,
            end_time - start_time
        ])
output = CSV(result, [
    'Username', 'Session ID', 'Number of Events', 'Start Time', 'End Time',
    'Time Spent'
],
             output_file='session_info.csv')
output.generate_csv()
Usage:

python activities_with_lower_completion.py

'''
from collections import defaultdict

from base_edx import EdXConnection
from generate_csv_report import CSV

# Connect to MongoDB and extra the tracking collection
connection = EdXConnection('user_attempts_per_problem_id')
collection = connection.get_access_to_collection()

cursor = collection['user_attempts_per_problem_id'].find()
result = defaultdict(lambda: defaultdict(int))
for index, document in enumerate(cursor):
    # If there is a correct attempts, accept as answered correctly, else accept
    #as incorrect only once per student per problem id
    if 'correct' in document['attempts']:
        result[document['_id']['problem_id']]['correct'] += 1
    else:
        result[document['_id']['problem_id']]['incorrect'] += 1

csv_result = [[item, result[item]['correct'], result[item]['incorrect']]
              for item in result]
output = CSV(csv_result, ['Problem Id', 'Correct Count', 'Incorrect Count'],
             output_file='activities_with_lower_completion.csv')
output.generate_csv()
Beispiel #15
0
with open('csv_files/McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    reader.next()
    usernames = [row[2] for row in reader]

NAVIGATION_TABS = {'/courses/McGillX/ATOC185x/2T2014/info' : 'info', '/courses/McGillX/ATOC185x/2T2014/progress' : 'progress', '/courses/McGillX/ATOC185x/2T2014/109d5374b52040e2a8b737cf90c5618a/' : 'syllabus', '/courses/McGillX/ATOC185x/2T2014/441b2c519f5c464883e2ddceb26c5559/' : 'maps','/courses/McGillX/ATOC185x/2T2014/84f630e833eb4dbabe0a6c45c52bb443/' : 'scoreboard' , '/courses/McGillX/ATOC185x/2T2014/e75195cb39fa4e3890a613a1b3c04c7d/' : 'faq', 'courseware' : 'courseware', 'discussion': 'discussion', '/courses/McGillX/ATOC185x/2T2014/instructor' : 'instructor'}

cursor = collection['tracking_atoc185x'].find({'username' : {'$in' : usernames},'event_type' : { '$regex' : '^/courses/McGillX/ATOC185x/2T2014/(info$|progress$|instructor$|109d5374b52040e2a8b737cf90c5618a/$|441b2c519f5c464883e2ddceb26c5559/$|84f630e833eb4dbabe0a6c45c52bb443/$|e75195cb39fa4e3890a613a1b3c04c7d/$|courseware|discussion)'}})

tab_events_per_date = defaultdict(int)
for doc in cursor:
    date = datetime.strptime(doc['time'].split('T')[0], "%Y-%m-%d").date()
    if 'courseware' in doc['event_type']:
        tab_events_per_date[(date,'courseware')] += 1
    elif 'discussion' in doc['event_type']:
        tab_events_per_date[(date, 'discussion')] += 1
    else:
        tab_events_per_date[(date, doc['event_type'])] += 1

result = []
for date, tab in tab_events_per_date:
    result.append([date,tab, tab_events_per_date[(date,tab)]])
output = CSV(result, ['Date','Tab ID','Number of Events'], output_file='number_of_tab_events_per_date_completers.csv')
output.generate_csv()

#with open('csv_files/number_of_tab_events_per_date_completers.csv', 'w') as csv_file:
#    writer = csv.writer(csv_file)
#    writer.writerow(['Date','Tab ID','Number of Events'])
#    for date,tab in tab_events_per_date:
#        writer.writerow([date,tab, tab_events_per_date[(date,tab)] ])
from generate_csv_report import CSV

connection = EdXConnection('tracking_atoc185x')
collection = connection.get_access_to_collection()

with open('csv_files/McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    usernames = [row[2] for row in reader]


cursor = collection['tracking_atoc185x'].aggregate([{"$match" : {"event_source" : "browser", "$or" : [{"event_type" : "seq_prev"},{"event_type" : "seq_goto"},{"event_type" : "seq_next"}], 'username' : {'$in' : usernames}}}, {"$group" : {"_id" : {'chapter_name' : "$parent_data.chapter_display_name", "display_name" :  "$metadata.display_name", "event_type"  : "$event_type", "event_old" : "$event.old", "event_new" : "$event.new"}, "count" : {"$sum" : 1}}}])

#with open('csv_files/navigation_frequency_completers.csv', 'w') as csv_file:
#    writer = csv.writer(csv_file)
#    writer.writerow(['Chapter Name', 'Display Name', 'Event Type', 'Event Old', 'Event New', 'Count'])
#    for item in cursor['result']:
#        try:
#            writer.writerow([item['_id']['chapter_name'], item['_id']['display_name'], item['_id']['event_type'], item['_id'].get('event_old', 0), item['_id']['event_new'], item['count']])
#        except:
#           pass 

result = []
for item in cursor['result']:
    try:
        result.append([item['_id']['chapter_name'], item['_id']['display_name'], item['_id']['event_type'], item['_id'].get('event_old', 0), item['_id']['event_new'], ite    m['count']])
    except:
        pass

output = CSV(result, ['Chapter Name', 'Display Name', 'Event Type', 'Event Old', 'Event New', 'Count'], output_file='navigation_frequency_completers.csv')
output.generate_csv()
from datetime import datetime

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('student_courseenrollment')
collection = connection.get_access_to_collection()

# Can replace csv file with any csv file that contains the list of usernames
# who completed the course and achieved a certificate. Alternately, one can
# save that info in another collection in mongoDB and extra it from the collection
with open('atoc185x/course_completers.csv') as csv_file:
    reader = csv.reader(csv_file)
    reader.next()
    users = {row[0]: row[1] for row in reader}

result = []
student_courseenrollment = collection['student_courseenrollment'].find()
seen = set()
for document in student_courseenrollment:
    if str(document['user_id']) in users and document['user_id'] not in seen:
        seen.add(document['user_id'])
        result.append([
            document['user_id'], users[str(document['user_id'])],
            document['created'].split()[0]
        ])

output = CSV(result, ['Username', 'Date of Registration'],
             output_file='date_of_registration_completers.csv')
output.generate_csv()
Beispiel #18
0
'''
import sys
import csv

from base_edx import EdXConnection
from generate_csv_report import CSV 

db_name = sys.argv[1]

# Change name of collection as required
connection = EdXConnection(db_name, 'user_id_map' )
collection = connection.get_access_to_collection()

with open(sys.argv[2]) as f:
    headers = next(f)
    reader = csv.reader(f)
    data = [row for row in reader]

result = []
for row in data:
    cursor = collection['user_id_map'].find_one({'id' : long(row[0])})
    hash_id = cursor['hash_id']
    username = cursor['username']
    result.append([row[0], username, hash_id] + row[1:])

input_file, extension = sys.argv[2].split('.')
output = CSV(result, [headers.split(',')[0],'Username','User Hash ID'] + headers.split(',')[1:], output_file=input_file+'_userid_anon.'+extension)
output.generate_csv()
    

        '^/courses/McGillX/ATOC185x/2T2014/(info$|progress$|instructor$|109d5374b52040e2a8b737cf90c5618a/$|441b2c519f5c464883e2ddceb26c5559/$|84f630e833eb4dbabe0a6c45c52bb443/$|e75195cb39fa4e3890a613a1b3c04c7d/$|courseware|discussion)'
    }
})
unique_users_per_tab = defaultdict(set)
for doc in cursor:
    if 'courseware' in doc['event_type']:
        unique_users_per_tab['courseware'].add(doc['username'])
    elif 'discussion' in doc['event_type']:
        unique_users_per_tab['discussion'].add(doc['username'])
    else:
        unique_users_per_tab[doc['event_type']].add(doc['username'])

#with open('csv_files/number_of_unique_users_per_navigation_tab.csv', 'w') as csv_file:
#    writer = csv.writer(csv_file)
#    writer.writerow(['Navigation Tab', 'Number of Unique Users'])
#    for key in unique_users_per_tab:
#        writer.writerow([key, len(unique_users_per_tab[key])])
#with open('csv_files/users_per_navigation_tab.csv', 'w') as csv_file:
#    writer = csv.writer(csv_file)
#    writer.writerow(['Navigation Tab','Tab', 'Number of Unique Users'])
#    for key in unique_users_per_tab:
#        writer.writerow([key,NAVIGATION_TABS[key] ,len(unique_users_per_tab[key])])

result = []
for key in unique_users_per_tab:
    result.append([key, NAVIGATION_TABS[key], len(unique_users_per_tab[key])])

output = CSV(result, ['Navigation Tab', 'Tab', 'Number of Unique Users'],
             output_file='number_of_unique_users_per_navigation_tab.csv')
output.generate_csv()
Beispiel #20
0
            not_in_auth_user.add(document['student_id'])

# For loop to retrieve the names of all the survey pages. Since a student may
# not have filled all pages, we look for the longest list and use the values
# to retrieve the survey pages
survey_question_ids = {}
for value in result.values():
    if len(value) == 5:
        temp = {key for item in value for key in item.keys()}
        if len(temp) > len(survey_question_ids):
            survey_question_ids = temp

survey_question_ids = sorted(list(survey_question_ids))
csv_data = []
for username, survey_info in result.iteritems():
    temp = [''] * len(survey_question_ids)
    for item in survey_info:
        for key, value in item.iteritems():
            try:
                index = survey_question_ids.index(key)
                if key in survey_question_ids:
                    temp[index] = value
            except:
                pass
    temp.insert(0, username)
    csv_data.append(temp)

output = CSV(csv_data, ['Username'] + survey_question_ids,
             output_file=db_name + '_entrance_exit_surveys.csv')
output.generate_csv()
                    student_answers_values[key] = mapped_values
                else:
                    student_answers_values[key] = values
        if username not in output:
            output[username] = {
                'problem_id': problem_id,
                'student_answers': student_answers_values,
                'attempts': student_attempts
            }
        else:
            if student_attempts > output[username]['attempts']:
                output[username]['problem_id'] = problem_id
                output[username]['student_answers'] = student_answers_values
                output[username]['attempts'] = student_attempts
    except:
        print "Key Error!!!", key, values, problem_id

result = []
for key, values in output.iteritems():
    username = key
    problem_id = values['problem_id']
    for question, answer in values['student_answers'].iteritems():
        result.append([username, problem_id, question, answer])

output = CSV(result, ['Username', 'Problem ID', 'Student Answers'],
             output_file=db_name + '_student_answers.csv')
output.generate_csv()

#with open(db_name + '_student_answers', 'w') as f:
#    json.dump(output, f)
Beispiel #22
0
from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('certificates_generatedcertificate',
                           'auth_userprofile')
collection = connection.get_access_to_collection()
documents = collection['auth_userprofile'].find()

result = []
for document in documents:
    user_id = document['user_id']
    try:
        final_grade = collection['certificates_generatedcertificate'].find_one(
            {'user_id': user_id})['grade']
        result.append([
            user_id, document['name'], final_grade, document['gender'],
            document['year_of_birth'], document['level_of_education'],
            document['country'], document['city']
        ])
    except:
        # Handle users with no grades
        pass

output = CSV(result, [
    'User ID', 'Username', 'Final Grade', 'Gender', 'Year of Birth',
    'Level of Education', 'Country', 'City'
],
             output_file='atoc185x_user_info.csv')
output.generate_csv()
Beispiel #23
0
Since we will need to sort a very large number of documents, you should create a separate collection to 
aggregate all required documents in one collection and then extract results from the new collection.

Command to run on the mongo shell to create new collection:

db.tracking_atoc185x.aggregate([{$match : {$and : [{"event_type" : "speed_change_video"},{ "parent_data": { $exists: true } }]}}, {$sort : {"parent_data.chapter_display_name" : 1, "parent_data.sequential_display_name" : 1, "parent_data.vertical_display_name" : 1}}, {$out : "speed_change_video_data"}], {allowDiskUse : true})

Usage: 
python speed_change_video.py

'''

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('speed_change_video_data')
collection = connection.get_access_to_collection()
cursor = collection['speed_change_video_data'].find()
result = [[
    item['username'], item['parent_data']['chapter_display_name'],
    item['parent_data']['sequential_display_name'],
    item['parent_data']['vertical_display_name'], item['event']['old_speed'],
    item['event']['new_speed']
] for item in cursor]
output = CSV(result, [
    'Username', 'Chapter Name', 'Sequential Name', 'Vertical Name',
    'Old Speed', 'New Speed'
],
             output_file='speed_change.csv')
output.generate_csv()