db.tracking_atoc185x.aggregate([{$match : {event_type : 'problem_check', 'event_source': 'server'}}, {$group : {_id : {"username" : "$username", "problem_id" : "$event.problem_id"}, attempts : {$push : "$event.success"}}}, {$out : "user_attempts_per_problem_id"}])

Then run this script on the above collection

Usage:

python activities_with_lower_completion.py

'''
from collections import defaultdict

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

# Connect to MongoDB and extra the tracking collection
connection = EdXConnection('user_attempts_per_problem_id')
collection = connection.get_access_to_collection()

cursor = collection['user_attempts_per_problem_id'].find()
result = defaultdict(lambda: defaultdict(int)) 
for index,document in enumerate(cursor):
    # If there is a correct attempts, accept as answered correctly, else accept
    #as incorrect only once per student per problem id
    if 'correct' in document['attempts']:
        result[document['_id']['problem_id']]['correct'] += 1
    else:
        result[document['_id']['problem_id']]['incorrect'] += 1

csv_result = [[item, result[item]['correct'], result[item]['incorrect']] for item in result]
output = CSV(csv_result, ['Problem Id', 'Correct Count', 'Incorrect Count'], output_file='activities_with_lower_completion.csv')
output.generate_csv()
Example #2
0
'''
This module calculates the number of forum threads and posts for a given course
stored in the MongoDB database

Usage:

python <path_to_script> 

'''
from common.base_edx import EdXConnection

connection = EdXConnection('forum' )
collection = connection.get_access_to_collection()

# Number of documents with _type CommentThread
# A CommentThread represents the first level of interaction: a post that opens 
#a new thread, often a student question of some sort
number_of_comment_threads = collection['forum'].find({'_type' : 'CommentThread'}).count()

# Total number of comments in the forum
number_of_posts = collection['forum'].find().count()

print number_of_posts, number_of_comment_threads
'''
This module gets the number of completers who did each activity
'''
import csv
from datetime import datetime
from collections import defaultdict
import sys

from common.base_edx import EdXConnection

connection = EdXConnection('tracking' )
collection = connection.get_access_to_collection()

with open('csv_files/McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    #usernames = [row[2] for row in reader]
    usernames = [row[2] for row in reader]

cursor = collection['tracking'].aggregate([{'$match' : {'username' : {'$in' : usernames}}},{'$group' : { '_id' : { "chapter_name" : "$parent_data.chapter_display_name" ,"sequential_name" : "$parent_data.sequential_display_name","vertical_name" : "$parent_data.vertical_display_name"},'students' : {'$addToSet':"$username"}}}, {'$unwind' : "$students"} ,{'$group' : {'_id': "$_id",' num_of_students' : {'$sum' : 1}}}, {'$out' : sys.argv[1]}])
db.tracking_atoc185x.aggregate([{$match : {event_type : 'problem_check', 'event_source': 'server'}}, {$group : {_id : {"username" : "$username", "problem_id" : "$event.problem_id"}, attempts : {$push : "$event.success"}}}, {$out : "user_attempts_per_problem_id"}])

Then run this script on the above collection

Usage:

python activities_with_lower_completion.py

'''
from collections import defaultdict

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

# Connect to MongoDB and extra the tracking collection
connection = EdXConnection('user_attempts_per_problem_id')
collection = connection.get_access_to_collection()

cursor = collection['user_attempts_per_problem_id'].find()
result = defaultdict(lambda: defaultdict(int))
for index, document in enumerate(cursor):
    # If there is a correct attempts, accept as answered correctly, else accept
    #as incorrect only once per student per problem id
    if 'correct' in document['attempts']:
        result[document['_id']['problem_id']]['correct'] += 1
    else:
        result[document['_id']['problem_id']]['incorrect'] += 1

csv_result = [[item, result[item]['correct'], result[item]['incorrect']]
              for item in result]
output = CSV(csv_result, ['Problem Id', 'Correct Count', 'Incorrect Count'],
Example #5
0
'''
This module gets all the events per user while watching videos. Since we will
need to sort a very large number of documents, user should create a separate
collection to aggregate all required documents in one collection and then 
extract results from the new collection
Command to run on the mongo shell to creare new collection:

db.tracking_atoc185x.aggregate([{$match : {$and : [{"event_type" : "seek_video"},{ "parent_data": { $exists: true } }]}}, {$sort : {"parent_data.chapter_display_name" : 1, "parent_data.sequential_display_name" : 1, "parent_data.vertical_display_name" : 1}}, {$out : "seek_video"}, {allowDiskUse : true}])

'''

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

connection = EdXConnection('seek_video')
collection = connection.get_access_to_collection()
sort_parameters = [('parent_data.chapter_display_name', 1),
                   ('parent_data.sequential_display_name', 1),
                   ('parent_data.vertical_display_name', 1)]
cursor = collection['seek_video'].find()
result = []
for index, item in enumerate(cursor):
    if 'old_time' in item['event']:
        old_time = item['event']['old_time']
    else:
        old_time = 0
    result.append([
        item['username'], item['parent_data']['chapter_display_name'],
        item['parent_data']['sequential_display_name'],
        item['parent_data']['vertical_display_name'], old_time,
        item['event']['new_time']
if len(sys.argv) < 3:
    usage_message = """
    No problem id given as a command line argument. Please provide a problem_id

    Usage:
    python -m problem_ids.get_csv_report_by_problem_id <db_name> <problem_id> [--final_attempts]

    """

    sys.stderr.write(usage_message)
    sys.exit(1)

db_name = sys.argv[1]
problem_id = sys.argv[2]
final_attempts = True if len(sys.argv) == 4 else False
connection = EdXConnection(db_name, 'problem_ids')
collection = connection.get_access_to_collection()


def _generate_name_from_problem_id(problem_id, display_name):
    '''Generate name of csv output file from problem id'''
    attempts_name = '_FinalAttempts' if final_attempts else '_AllAttempts'
    return ('_'.join(problem_id.split('/')[3:]) + '_' +
            ''.join(e for e in display_name if e.isalnum()) + attempts_name +
            '.csv')


cursor = collection['problem_ids'].find({'event.problem_id': problem_id})
display_name = cursor[0]['module']['display_name']
one_record = cursor[0]['event']
problem_ids_keys = sorted(one_record['correct_map'].keys(),
to their hash ids and return a new csv_report

Usage:
python username_to_hash_id_reports.py db_name csv_report

'''
import sys
import csv

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

db_name = sys.argv[1]

# Change name of collection as required
connection = EdXConnection(db_name, 'user_id_map')
collection = connection.get_access_to_collection()

with open(sys.argv[2]) as f:
    headers = next(f)
    reader = csv.reader(f)
    data = [row for row in reader]

result = []
for row in data:
    username = row[0]
    if username.isdigit():
        username = int(username)
    cursor = collection['user_id_map'].find_one({'username': username})
    if cursor:
        hash_id = cursor['hash_id']
from common.generate_csv_report import CSV

# If you have access to the grade report provided by edX, you can use the following
# 7 lines of code to get all usernames with grades between 50% and 59% inclusive
#with open('csv_files/grades_report.csv') as f:
#    reader = csv.reader(f)
#    header = reader.next()
#    usernames = [row[2] for row in reader if '0.5' <= row[3] <= '0.59']
#connection = EdXConnection('tracking')
#collection = connection.get_access_to_collection()
#cursor = collection['tracking'].aggregate([{'$match' : {'username' : {'$in' : usernames}, '$or': [{'event_type' : 'play_video'},{'event_type' : 'problem_check', 'event_source' : 'server'}]}},{'$group' : { '_id' : { "username" : "$username", "chapter_name" : "$parent_data.chapter_display_name" ,"sequential_name" : "$parent_data.sequential_display_name","vertical_name" : "$parent_data.vertical_display_name"}}}, {'$out' : 'students_50_to_59_events'}])

# Else you can extract the names of the students with grades betweeb 50% and 59%
# inclusive from the collection certificates_generatedcertificate from the
# following lines of code
connection = EdXConnection('tracking_atoc185x', 'auth_user',
                           'certificates_generatedcertificate')
collection = connection.get_access_to_collection()

# Get all user ids of students with grades between 50% and 59% inclusive from
# the collection certificates_generatedcertificate
user_ids = {
    document['user_id']
    for document in collection['certificates_generatedcertificate'].find(
        {'$and': [{
            'grade': {
                '$gte': 0.5
            }
        }, {
            'grade': {
                '$lte': 0.59
            }
Example #9
0
Usage:

python navigation_tabs_data_date.py 

'''

import csv
from datetime import datetime
from collections import defaultdict
import sys

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

connection = EdXConnection('tracking_atoc185x')
collection = connection.get_access_to_collection()

# Get all users who completed the course. If you do not have a CSV with list
# of users who had completed the course, you will have to extra it from the 
# MongoDB database
with open('csv_files/McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    reader.next()
    usernames = [row[2] for row in reader]

NAVIGATION_TABS = {'/courses/McGillX/ATOC185x/2T2014/info' : 'info', '/courses/McGillX/ATOC185x/2T2014/progress' : 'progress', '/courses/McGillX/ATOC185x/2T2014/109d5374b52040e2a8b737cf90c5618a/' : 'syllabus', '/courses/McGillX/ATOC185x/2T2014/441b2c519f5c464883e2ddceb26c5559/' : 'maps','/courses/McGillX/ATOC185x/2T2014/84f630e833eb4dbabe0a6c45c52bb443/' : 'scoreboard' , '/courses/McGillX/ATOC185x/2T2014/e75195cb39fa4e3890a613a1b3c04c7d/' : 'faq', 'courseware' : 'courseware', 'discussion': 'discussion', '/courses/McGillX/ATOC185x/2T2014/instructor' : 'instructor'}

cursor = collection['tracking_atoc185x'].find({'username' : {'$in' : usernames},'event_type' : { '$regex' : '^/courses/McGillX/ATOC185x/2T2014/(info$|progress$|instructor$|109d5374b52040e2a8b737cf90c5618a/$|441b2c519f5c464883e2ddceb26c5559/$|84f630e833eb4dbabe0a6c45c52bb443/$|e75195cb39fa4e3890a613a1b3c04c7d/$|courseware|discussion)'}})

tab_events_per_date = defaultdict(int)
Example #10
0
Since we will need to sort a very large number of documents, you should create a separate collection to 
aggregate all required documents in one collection and then extract results from the new collection.

Command to run on the mongo shell to create new collection:

db.tracking_atoc185x.aggregate([{$match : {$and : [{"event_type" : "speed_change_video"},{ "parent_data": { $exists: true } }]}}, {$sort : {"parent_data.chapter_display_name" : 1, "parent_data.sequential_display_name" : 1, "parent_data.vertical_display_name" : 1}}, {$out : "speed_change_video_data"}], {allowDiskUse : true})

Usage: 
python speed_change_video.py

'''

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

connection = EdXConnection('speed_change_video_data')
collection = connection.get_access_to_collection()
cursor = collection['speed_change_video_data'].find()
result = [[
    item['username'], item['parent_data']['chapter_display_name'],
    item['parent_data']['sequential_display_name'],
    item['parent_data']['vertical_display_name'], item['event']['old_speed'],
    item['event']['new_speed']
] for item in cursor]
output = CSV(result, [
    'Username', 'Chapter Name', 'Sequential Name', 'Vertical Name',
    'Old Speed', 'New Speed'
],
             output_file='speed_change.csv')
output.generate_csv()
Example #11
0
db.tracking.aggregate([{$match:{$and:[{'event_source':'browser'},{$or:[{'event_type':'play_video'},{'event_type':'speed_change_video'},{'event_type':'seq_goto'}, {'event_type':'seq_next'}, {'event_type':'seq_prev'}, {'event_type':'page_close'}, {'event_type':'play_video'},{'event_type':'pause_video'}, {'event_type':'seek_video'}, {'event_type':'pause_video'}]}]}}, {$project:{"username":1, "event_type":1, 'time':1,"event":1}},{ $sort: {'username':1,'time':1}},{$out: "video_watching"}],{allowDiskUse:true})
or
db.runCommand({aggregate:'tracking',pipeline:[{$match:{$and:[{'event_source':'browser'},{$or:[{'event_type':'play_video'},{'event_type':'speed_change_video'},{'event_type':'seq_goto'}, {'event_type':'seq_next'}, {'event_type':'seq_prev'}, {'event_type':'page_close'}, {'event_type':'play_video'},{'event_type':'pause_video'}, {'event_type':'seek_video'}, {'event_type':'pause_video'}]}]}}, {$project:{"username":1, "event_type":1, 'time':1,"event":1}},{ $sort: {'username':1,'time':1}},{$out: "video_watching"}],allowDiskUse:true})
'''
import sys
import csv
import time
from datetime import datetime

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

db_name = sys.argv[1]

eventCollection = 'video_watching'
connection = EdXConnection(db_name, eventCollection)
collection = connection.get_access_to_collection()

students = collection[eventCollection].distinct('username')
print
print students
print len(students)

watch_durations = []
start_event_time = {'blank'}
end_event_time = {'blank'}
errors = []
count_errors = 0

for student in students:
    cursor = collection[eventCollection].find({'username': student})
Example #12
0
'''
This module gets the date of registration of all users who completed the course

Usage:

python date_of_registration_completers.py 

'''

import csv
from datetime import datetime

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

connection = EdXConnection('student_courseenrollment')
collection = connection.get_access_to_collection()

# Can replace csv file with any csv file that contains the list of usernames
# who completed the course and achieved a certificate. Alternately, one can
# save that info in another collection in mongoDB and extra it from the collection
with open('atoc185x/course_completers.csv') as csv_file:
    reader = csv.reader(csv_file)
    reader.next()
    users = {row[0]: row[1] for row in reader}

result = []
student_courseenrollment = collection['student_courseenrollment'].find()
seen = set()
for document in student_courseenrollment:
    if str(document['user_id']) in users and document['user_id'] not in seen:
Example #13
0
'''
This module retrieve the first activity of all user who completed a course
The list of users who has completed the course was provided in a given csv file,
McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv. When these users are
extracted, we then check the database for all of these users and detect their 
first activity. First activity is defined by the event_type '/courses/McGillX/CHEM181x/1T2014/info'

'''
import csv
from datetime import datetime
from collections import defaultdict

from common.base_edx import EdXConnection

# Connect to MongoDB and extra the tracking collection
connection = EdXConnection('tracking', 'tracking_before_jan22')
collection = connection.get_access_to_collection()

# Retrieve users who has completed the course. This could be done anyway depending on what is provided
with open('csv_files/McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv',
          'r') as csv_file:
    reader = csv.reader(csv_file)
    #usernames = [row[2] for row in reader]
    usernames = [row[2] for row in reader]

# Retrieve the time of the first activity of all users who completed the course
time_events = defaultdict(list)
cursor = collection['tracking'].find(
    {'event_type': '/courses/McGillX/CHEM181x/1T2014/info'})
#cursor_before_jan_22 = collection['tracking_before_jan22'].find({'event_type' : '/courses/McGillX/CHEM181x/1T2014/info'})
with open('csv_files/first_activity_completers.csv', 'w') as csv_file:
Example #14
0
'''
This module gets the number of navigation events for each user while they are taking Test 1
'''
from collections import defaultdict
import time
from datetime import datetime

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

connection = EdXConnection('format_tests', 'tracking')
collection = connection.get_access_to_collection()

cursor = collection['format_tests'].find(
    {'parent_data.chapter_display_name': 'Test 1'})
users_sessions = defaultdict(list)

for index, item in enumerate(cursor):
    #print index, item['parent_data']['chapter_display_name']
    users_sessions[(item['username'], item['session'])].append(item['time'])
users_tests_events = defaultdict(int)
for (username, session), times in users_sessions.iteritems():
    end_time = datetime.strptime(
        max(times).split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
    start_time = datetime.strptime(
        min(times).split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
    cursor = collection['tracking'].find({
        'username':
        username,
        'session':
        session,
Usage:

python -m problem_ids.create_problem_ids_collection.py <db_name> 

'''
from common.base_edx import EdXConnection
import sys

db_name = sys.argv[1]

# The second argument in line 27 is the name of the new collection which will 
# contain the results of this script. Each new document will be inserted into
# this new collection. The name of the resulting collection could be anything;
# preferrably relevant to the course
connection = EdXConnection(db_name, 'test1', 'tracking', 'user_id_map', 'problem_ids')
collection = connection.get_access_to_collection()

# Drop problem_ids collection if exists
collection['problem_ids'].drop()

cursor = collection['tracking'].find({'event_type' : 'problem_check', 'event_source' : 'server'})
for document in cursor:
    doc_result = {}
    username = document['username']
    if username.isdigit():
        username = int(username)
    doc_result['username'] = username
    user_id_map = collection['user_id_map'].find_one({'username' : username})
    if not user_id_map:
        print "Username {0} not found in collection user_id_map".format(username)