def main():
    '''
    This function plots the number of observed events every day from 2012-03-04
    '''    

    sql = '''
    -- Takes 160 seconds to execute
     SELECT DATEDIFF(observed_events.observed_event_timestamp, '2012-03-03 00:00:01') AS Day, 
         COUNT(*)  AS `Number of observed events`
         FROM moocdb.observed_events AS observed_events 
         -- WHERE observed_events.observed_event_duration < 200
         GROUP BY day
        ORDER BY day ASC 
    ;
    '''
    
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("area_chart")   
    options.set_chart_title("Number of observed events every day from March 3, 2012")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title("Number of observed events every day from March 3, 2012")
    options.set_h_axis("{title: 'Day #' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis("{title: 'Number of observed events every day' ,  titleTextStyle: {color: 'blue'}}")
    options.set_output_file("./output/observed_events_per_day.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)  
Exemple #2
0
def main():
    '''
    This function plots the Average number of submissions per country (countries with less than 50 students were removed)
    '''    

    data_sql = '''
    -- Takes 1 seconds to execute
    SELECT users.user_country, (COUNT(*) / countries.country_number_of_users) AS `Average number of submissions per country for students who got a certificate (countries with less than 50 students were removed)`
    FROM moocdb.submissions AS submissions, moocdb.users AS users, moocdb.countries AS countries
    WHERE submissions.user_id = users.user_id
        AND users.user_country = countries.country_code
        AND countries.country_number_of_users > 50
        AND users.user_final_grade >= 0.5
    GROUP BY users.user_country
    ORDER BY `Average number of submissions per country for students who got a certificate (countries with less than 50 students were removed)` DESC
    ;

    ;
    '''
    
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(data_sql))
    options.set_chart_type("geo_map")   
    options.set_chart_title("Average number of submissions per country for students who got a certificate (countries with less than 50 students were removed)")
    options.set_height(600)
    options.set_width(1000)
    options.set_page_title("Average number of submissions per country for students who got a certificate (countries with less than 50 students were removed)")
    options.set_output_file("./output/submissions_per_country_with_cert.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)  
Exemple #3
0
def main():
    '''
    This function plots the number of students per number of observed_events
    '''

    sql = '''
    -- Takes 32 seconds to execute
    SELECT number_of_events_per_user.`Number of observed events`, COUNT(*) AS `Number of students`
    FROM 
        (
        SELECT COUNT(*) AS `Number of observed events`, observed_events.user_id
        FROM moocdb.observed_events AS observed_events
        GROUP BY observed_events.user_id
        ) number_of_events_per_user
    GROUP BY number_of_events_per_user.`Number of observed events`
    ;
    '''

    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("area_chart")
    options.set_chart_title("Number of students per number of observed events")
    options.set_height(500)
    options.set_width(1200)
    options.set_page_title("Number of students per number of observed events")
    options.set_h_axis(
        "{title: 'Number of observed events' ,  titleTextStyle: {color: 'blue'}}"
    )
    options.set_v_axis(
        "{title: 'Number of students' ,  titleTextStyle: {color: 'blue'}}")
    options.set_output_file(
        "./output/number_of_users_per_number_of_observed_events.html")
    google_charts_wrapper.generate_html(options)
Exemple #4
0
def main():
    '''
    This function plots the number of submissions per user
    '''    

    sql = '''
    -- Takes 5 seconds to run
    -- http://stackoverflow.com/questions/5351628/how-can-i-add-a-new-column-which-counts-the-number-of-rows-as-serial-number
    SELECT @n := @n + 1 `User #`,  t.*
    FROM (SELECT @n:=0) initvars, 
        ( SELECT COUNT(*) AS `Number of submissions` FROM moocdb.submissions GROUP BY user_id ORDER BY `Number of submissions` DESC
        ) t
    ;
    '''
    
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("line_chart")   
    options.set_chart_title("Number of submissions per user")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title("Number of submissions per user")
    options.set_h_axis("{title: 'User #' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis("{title: 'Number of submissions' ,  titleTextStyle: {color: 'blue'}}")
    options.set_output_file("./output/submissions_nb_of_attempts_per_user.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)  
def main():
    '''
    This function plots the number of observed events per submission by country
    '''    

    data_sql = '''
    -- Takes 1 seconds to execute   
    SELECT users.user_country AS `Country`,
    AVG(users.user_observed_events_count / users.user_submissions_count) AS `Number observed events per submission for students who got a certificate`
    FROM moocdb.users AS users
    WHERE users.user_observed_events_count > 0
        AND users.user_submissions_count > 0
        AND users.user_final_grade >= 0.5
    GROUP BY `Country`
    HAVING COUNT(*) > 50
    ORDER BY `Number observed events per submission for students who got a certificate` DESC
    ;
    '''
    
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(data_sql))
    options.set_chart_type("geo_map")   
    options.set_chart_title("Number observed events per submission for students who got a certificate by country having at least 50 students")
    options.set_height(600)
    options.set_width(1000)
    options.set_page_title("Number observed events per submission for students who got a certificate by country having at least 50 students")
    options.set_output_file("./output/observed_events_per_submission_by_country_with_cert_cutoff50students.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)  
Exemple #6
0
def main():
    '''
    This function plots the number of students per number of observed events
    '''    

    sql = '''
    -- Takes 3600 seconds to execute
    SELECT number_of_events_per_user.`Number of observed events`, COUNT(*) AS `Number of students`
    FROM 
        (
        SELECT COUNT(*) AS `Number of observed events`, observed_events.user_id,
            MIN(observed_events.observed_event_timestamp) AS first_event,
            MAX(observed_events.observed_event_timestamp) AS last_event
        FROM moocdb.observed_events AS observed_events
        GROUP BY observed_events.user_id
        ) number_of_events_per_user
    WHERE DATEDIFF(last_event, first_event) >= 1
        AND number_of_events_per_user.`Number of observed events` < 1000
    GROUP BY number_of_events_per_user.`Number of observed events`
    ;
    '''
    
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("area_chart")   
    options.set_chart_title("Number of students per number of observed events for students having dropped out at least one day after their first event - Cutoff 1000")
    options.set_height(500)
    options.set_width(1200)
    options.set_page_title("Number of students per number of observed events for students having dropped out at least one day after their first event - Cutoff 1000")
    options.set_h_axis("{title: 'Number of observed events' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis("{title: 'Number of students' ,  titleTextStyle: {color: 'blue'}}")
    options.set_output_file("./output/number_of_users_per_number_of_observed_events_cutoff_mintime1day_cutoff1000.html")
    google_charts_wrapper.generate_html(options)  
def main():
    '''
    This function plots the number of edits on the wiki every day from 2012-03-04
    '''    

    sql = '''
    -- Takes 5 seconds to execute
    SELECT DATEDIFF(collaborations.collaboration_timestamp, '2012-03-03 00:00:01') AS Day, COUNT(*) AS `Number of edits on the wiki`
    FROM moocdb.collaborations AS collaborations
    WHERE collaborations.collaboration_parent_id = 2
    GROUP BY day
    ORDER BY day ASC ;
    ;
    '''
    
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("area_chart")   
    options.set_chart_title("Number of edits on the wiki every day from March 3, 2012")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title("Number of edits on the wiki every day from March 3, 2012")
    options.set_h_axis("{title: 'Day #' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis("{title: 'Number of edits on the wiki' ,  titleTextStyle: {color: 'blue'}}")
    options.set_output_file("./output/wiki_edits_per_day.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)  
def main():
    '''
    This function plots the number of observed events per submission by country
    '''    

    data_sql = '''
    -- Takes 1 seconds to execute   
    SELECT users.user_country AS `Country`,
    AVG(users.user_observed_events_count / users.user_submissions_count) AS `Number observed events per submission`
    FROM moocdb.users AS users
    WHERE users.user_observed_events_count > 0
        AND users.user_submissions_count > 0
    GROUP BY `Country`
    HAVING COUNT(*) > 200
    ORDER BY `Number observed events per submission` DESC
    ;
    '''
    
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(data_sql))
    options.set_chart_type("geo_map")   
    options.set_chart_title("Number observed events per submission by country")
    options.set_height(600)
    options.set_width(1000)
    options.set_page_title("Number observed events per submission by country")
    options.set_output_file("./output/observed_events_per_submission_by_country.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)  
def main():
    '''
    This function plots the distribution of the length of the posts on the forum
    '''

    sql = '''
    -- Takes 5 seconds to execute
    SELECT @n := @n + 1 `Number of Submissions`,  t.*
    FROM (SELECT @n:=0) initvars, 
        ( SELECT LOG(LENGTH(collaborations.collaboration_content)) AS `Logarithmic length`
        FROM moocdb.collaborations AS collaborations
        WHERE LENGTH(collaborations.collaboration_content) > 0
        ORDER BY LENGTH(collaborations.collaboration_content) DESC 
        ) t;
    '''

    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("line_chart")
    options.set_chart_title(
        "Distribution of the length of the posts on the forum")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title(
        "Distribution of the length of the posts on the forum")
    options.set_h_axis("{title: 'Post #' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis(
        "{title: 'Length of the post (logarithm)' ,  titleTextStyle: {color: 'blue'}}"
    )
    options.set_output_file("./output/forum_post_length.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)
def main():
    '''
    This function plots the number of new registered students every day from February 13, 2012
    '''    

    sql = '''
    -- Takes 1 second to execute
    SELECT CONCAT(CAST(users.user_join_timestamp AS DATE), '')  AS `Day`, 
     COUNT(*) AS `Number of new registered students`
     FROM moocdb.users AS users
     WHERE users.user_join_timestamp IS NOT NULL
        AND DATEDIFF(users.user_join_timestamp, '2012-02-13 00:00:01') >= 0
     GROUP BY `Day`
    ORDER BY `Day` ASC 
    ;
    '''
    
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("area_chart")   
    options.set_chart_title("New registered students by date")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title("New registered students by date")
    options.set_h_axis("{title: 'Date' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis("{title: 'New registered students' ,  titleTextStyle: {color: 'blue'}}")
    options.set_output_file("./output/users_join_date_date_labels.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)  
Exemple #11
0
def main():
    '''
    This function plots the number of edits on the wiki every day from 2012-03-04
    '''

    sql = '''
    -- Takes 5 seconds to execute
    SELECT CONCAT(CAST( collaborations.collaboration_timestamp AS DATE) , '') AS Day, COUNT(*) AS `Number of edits on the wiki for users with certificate`
    FROM moocdb.collaborations AS collaborations,
        moocdb.users AS users
    WHERE collaborations.collaboration_parent_id = 2
        AND users.user_final_grade >= 0.5
        AND users.user_id = collaborations.user_id
    GROUP BY day
    ORDER BY day ASC ;
    ;
    '''

    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("area_chart")
    options.set_chart_title("Wiki edits by date for users with certificate")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title("Wiki edits by date for users with certificate")
    options.set_h_axis("{title: 'Date' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis(
        "{title: 'Number of edits on the wiki for users with certificate' ,  titleTextStyle: {color: 'blue'}}"
    )
    options.set_output_file(
        "./output/wiki_edits_per_day_date_label_with_cert.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)
def main():
    '''
    This function plots the average duration of observed events every day from 2012-03-04
    '''

    sql = '''
    -- Takes 160 seconds to execute
     SELECT CONCAT(CAST( observed_events.observed_event_timestamp AS DATE), '') AS Day, 
        AVG(observed_events.observed_event_duration) AS `Average duration of observed events`
         FROM moocdb.observed_events AS observed_events 
         -- WHERE observed_events.observed_event_duration < 200
         GROUP BY day
        ORDER BY day ASC 
    ;
    '''

    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("area_chart")
    options.set_chart_title("Average duration of observed events by date")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title("Average duration of observed events by date")
    options.set_h_axis("{title: 'Date' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis(
        "{title: 'Average duration of observed events' ,  titleTextStyle: {color: 'blue'}}"
    )
    options.set_output_file(
        "./output/observed_events_avg_duration_per_day_date_label.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)
def main():
    '''
    This function plots the number of posts on the forum every day from 2012-03-04
    '''    

    # http://dev.mysql.com/doc/refman/5.0/en/cast-functions.html
    # If you use a number in string context, the number automatically is converted to a string 
    # This way x does not blow Google Chart API
    sql = '''
    -- Takes 5 seconds to execute
    SELECT CONCAT(CAST(collaborations.collaboration_timestamp AS DATE), '') AS day, COUNT(*) AS `Number of posts on the forum`
    FROM moocdb.collaborations AS collaborations
    WHERE collaborations.collaboration_parent_id = 1
        AND DATEDIFF(collaborations.collaboration_timestamp, '2012-03-03 00:00:01') < 120
    GROUP BY day
    ORDER BY day ASC ;
    ;
    '''
    
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("area_chart")   
    options.set_chart_title("Number of posts on the forum by date")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title("Number of posts on the forum by date")
    options.set_h_axis("{title: 'Date' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis("{title: 'Number of posts' ,  titleTextStyle: {color: 'blue'}}")
    options.set_output_file("./output/forum_posts_per_day_date_labels_cutoff120.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)  
Exemple #14
0
def main():
    '''
    This function plots the distribution of the number of dropouts by week
    '''

    sql = '''
    -- Takes 1 second to execute
    SELECT CONCAT(users.user_dropout_week, '') AS `Week #`, COUNT(*) AS `Number of dropouts`
    FROM moocdb.users
    WHERE users.user_dropout_week IS NOT NULL 
    GROUP BY users.user_dropout_week
    ;
        '''

    # Careful, wrapper needs to be fixed: transpose result matrix
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("column_chart")
    options.set_chart_title("Number of dropouts by week")
    options.set_height(500)
    options.set_width(1200)
    options.set_page_title("Number of dropouts by week")
    options.set_h_axis("{title: 'Week #' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis(
        "{title: 'Number of dropouts',  titleTextStyle: {color: 'blue'}}")
    options.set_output_file("./output/dropout_per_week.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)
Exemple #15
0
def main():
    '''
    This function plots the distribution of the days between the last submission and the deadline
    '''

    sql = '''
    -- Takes 300 seconds to execute    
    SELECT 
           FLOOR((UNIX_TIMESTAMP(problems.problem_hard_deadline) 
                - UNIX_TIMESTAMP(submissions.submission_timestamp)) / (3600 * 24 )) AS day,
         COUNT(*) AS `Number of days between the last submission and the deadline for all homework`
    FROM moocdb.submissions AS submissions
     INNER JOIN moocdb.problems AS problems
     ON problems.problem_id = submissions.problem_id
    WHERE submissions.submission_attempt_number = ( -- We just want the last attempt
                SELECT MAX(submissions2.submission_attempt_number)
                FROM moocdb.submissions AS submissions2
                WHERE submissions2.problem_id = submissions.problem_id
                    AND submissions2.user_id = submissions.user_id
            )
        AND problems.problem_type_id = 1 -- = 1 means Homework
     GROUP BY day
     ORDER BY day ASC 
    ;
    ;
        '''

    # Careful, wrapper needs to be fixed: transpose result matrix
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("column_chart")
    options.set_chart_title(
        "Number of days between the last submission and the deadline for all homework"
    )
    options.set_height(500)
    options.set_width(1200)
    options.set_page_title(
        "Number of days between the last submission and the deadline for all homework"
    )
    options.set_h_axis("{title: 'Day #' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis(
        "{title: 'Number of submissions',  titleTextStyle: {color: 'blue'}}")
    options.set_output_file(
        "./output/days_last_submission_before_deadline_homework.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)
Exemple #16
0
def main():
    '''
    This function plots the number of submissions per user
    '''

    sql = '''
    -- Takes 7 seconds to run
    -- http://stackoverflow.com/questions/5351628/how-can-i-add-a-new-column-which-counts-the-number-of-rows-as-serial-number
    SELECT @n := @n + 1 `Submission before deadline`,  t.*
        FROM (SELECT @n:=0) initvars, 
            (
            SELECT -- submissions.submission_timestamp, problems.problem_hard_deadline,
                TIME_TO_SEC(TIMEDIFF(problems.problem_hard_deadline, submissions.submission_timestamp))/3600 AS diff
            FROM moocdb.submissions AS submissions,
                moocdb.problems AS problems,
                moocdb.problem_types AS problem_types,
                moocdb.assessments AS assessments
            WHERE submissions.problem_id = problems.problem_id
                 AND problems.problem_type_id = problem_types.problem_type_id
                 AND problem_types.problem_type_name = 'Final Exam'
                 AND assessments.submission_id = submissions.submission_id
                 AND assessments.assessment_grade = 1
                AND TIME_TO_SEC(TIMEDIFF(problems.problem_hard_deadline, submissions.submission_timestamp))/3600 > 0
             ORDER BY diff ASC 
            -- LIMIT 1000
            ) t
    -- LIMIT 1000
    ;

    '''

    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("line_chart")
    options.set_chart_title("Submission before deadline")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title("Submission before deadline")
    options.set_h_axis("{title: 'User #' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis(
        "{title: 'Number of submissions' ,  titleTextStyle: {color: 'blue'}}")
    options.set_output_file(
        "./output/final_exam_submissions_before_deadline.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)
Exemple #17
0
def main():
    '''
    This function plots the number of edits on the wiki every day from 2012-03-04
    '''

    sql = '''
    -- Takes 10 seconds to execute
    SELECT wiki.day, `Number of edits on the wiki (multiplied by 10 for readibility)`, `Number of posts on the forum`
    FROM (
        -- Takes 5 seconds to execute
        SELECT CONCAT(CAST( collaborations.collaboration_timestamp AS DATE) , '') AS day, COUNT(*) * 10 AS `Number of edits on the wiki (multiplied by 10 for readibility)`
        FROM moocdb.collaborations AS collaborations
        WHERE collaborations.collaboration_parent_id = 2
        GROUP BY day
        ORDER BY day ASC
    ) wiki,
        (-- Takes 5 seconds to execute
        SELECT CONCAT(CAST(collaborations.collaboration_timestamp AS DATE), '') AS day, COUNT(*) AS `Number of posts on the forum`
        FROM moocdb.collaborations AS collaborations
        WHERE collaborations.collaboration_parent_id = 1
        GROUP BY day
        ORDER BY day ASC
    ) forum
    WHERE forum.day = wiki.day
    
    ;
    '''

    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("area_chart")
    options.set_chart_title(
        "Number of edits on the wiki and posts on the forum by date")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title(
        "Number of edits on the wiki and posts on the forum by date")
    options.set_h_axis("{title: 'Date' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis(
        "{title: 'Number of edits on wiki and posts on forum' ,  titleTextStyle: {color: 'blue'}}"
    )
    options.set_output_file(
        "./output/wiki_editsx10_and_forum_posts_per_day_date_label.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)
Exemple #18
0
def main():
    '''
    This function plots the number of edits on the wiki every day from 2012-03-04
    '''

    preprocess_sql = '''
    -- Takes 1 seconds to execute    
    -- we first create a temporary table that contains the students of certificates per country
    CREATE OR REPLACE VIEW temp_students_per_country AS
    SELECT users.user_country AS country, COUNT(*) AS count
            FROM moocdb.users AS users
            GROUP BY users.user_country;
    '''

    data_sql = '''
    -- 
    SELECT users.user_country, COUNT(*) / temp_students_per_country.count * 100 AS `Percentage of students who got a certificate`
    FROM moocdb.users AS users, temp_students_per_country
    WHERE users.user_final_grade >= 0.5
    AND temp_students_per_country.country = users.user_country
    AND temp_students_per_country.count >= 100
    GROUP BY users.user_country
    ORDER BY count DESC 
    ;
    '''

    postprocess_sql = '''
    DROP VIEW temp_students_per_country;    
    ;
    '''

    google_charts_wrapper.run_query(preprocess_sql)
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(data_sql))
    options.set_chart_type("geo_map")
    options.set_chart_title("Percentage of students who got a certificate")
    options.set_height(600)
    options.set_width(1000)
    options.set_page_title("Percentage of students who got a certificate")
    options.set_output_file(
        "./output/user_certifcate_per_country_normalized_cutoff100.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)
    google_charts_wrapper.run_query(postprocess_sql)
def main():
    '''
    This function plots the distribution of the length of the posts on the forum
    '''

    sql = '''
    -- Takes 5 seconds to execute
    SELECT no_cert.`Average length of posts on forum for students who did not get a certificate`,
    with_cert.`Average length of posts on forum for students who got a certificate`

    FROM
    (SELECT AVG(LENGTH(collaborations.collaboration_content)) AS `Average length of posts on forum for students who did not get a certificate`
        FROM moocdb.collaborations AS collaborations,
             moocdb.users AS users
        WHERE LENGTH(collaborations.collaboration_content) > 0
            AND collaborations.collaboration_parent_id = 1
            AND (users.user_final_grade IS NULL OR users.user_final_grade < 0.5)
            AND users.user_id = collaborations.user_id) no_cert,

    (SELECT AVG(LENGTH(collaborations.collaboration_content)) AS `Average length of posts on forum for students who got a certificate`
            FROM moocdb.collaborations AS collaborations,
                 moocdb.users AS users
            WHERE LENGTH(collaborations.collaboration_content) > 0
                AND collaborations.collaboration_parent_id = 1
                AND users.user_final_grade >= 0.5
                AND users.user_id = collaborations.user_id) with_cert
;
        '''

    # Careful, wrapper needs to be fixed: transpose result matrix
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("column_chart")
    options.set_chart_title("Length of the posts on the forum")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title("Length of the posts on the forum")
    options.set_h_axis("{title: '' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis(
        "{title: 'Average length of the posts',  titleTextStyle: {color: 'blue'}}"
    )
    options.set_output_file("./output/forum_post_length_cert_vs_no_cert.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)
def main():
    '''
    This function plots the distribution of the length of the posts on the forum
    '''    

    sql = '''
    -- duration_per_grade.sql
    -- Takes 900 seconds to execute
    SELECT letter_grade.grade,
        (COUNT(*) / letter_grade.number_of_students) AS occurrences,
        MIN(observed_events.observed_event_duration),
        MAX(observed_events.observed_event_duration),
        AVG(observed_events.observed_event_duration),
        STD(observed_events.observed_event_duration),
        SUM(observed_events.observed_event_duration) / letter_grade.number_of_students
        
    FROM moocdb.observed_events AS observed_events,
         moocdb.users AS users,
        moocdb.letter_grade AS letter_grade
    WHERE observed_events.user_id = users.user_id
        AND letter_grade.grade = users.user_final_grade
    GROUP BY letter_grade.grade
    ORDER BY SUM(observed_events.observed_event_duration) DESC
    ;

    '''
    
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("line_chart")   
    options.set_chart_title("Distribution of the length of the posts on the forum")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title("Distribution of the length of the posts on the forum")
    options.set_h_axis("{title: 'Post #' ,  titleTextStyle: {color: 'blue'}}")
    options.set_v_axis("{title: 'Length of the post (logarithm)' ,  titleTextStyle: {color: 'blue'}}")
    options.set_output_file("./output/forum_post_length.html")
    print options.get_data()
    google_charts_wrapper.generate_html(options)  
Exemple #21
0
def main():
    '''
    This function plots the final grade distribution
    '''    

    sql = '''
    -- Takes 1 second to execute
    SELECT letter_grade.letter AS `Grade`,  COUNT(*) AS `Number of students` 
    FROM moocdb.users AS users, moocdb.letter_grade AS letter_grade
    WHERE users.user_final_grade = letter_grade.grade
    GROUP BY letter_grade.grade
    ORDER BY letter_grade.grade DESC 
    ;
    '''
    
    options = google_charts_wrapper.options()
    options.set_data(google_charts_wrapper.get_data(sql))
    options.set_chart_type("pie_chart")   
    options.set_chart_title("Final grade distribution")
    options.set_height(500)
    options.set_width(900)
    options.set_page_title("Final grade distribution")
    options.set_output_file("./output/final_grade_distribution.html")
    google_charts_wrapper.generate_html(options)