Exemple #1
0
def feedback_tmc():

    cfg = f.load_config()

    f.pnt_notice(c.msg['console_start'], os.path.basename(__file__))
    f.pnt_info(c.msg["console_loading"])

    teams = f.load_tsv('data_tmc')
    teams.drop_duplicates(subset=['list_team'], keep='first', inplace=True)
    teams = teams['list_team']

    data_tmc = f.load_tsv('data_tmc')
    data_tmc.replace(cfg['tmc_chart']['find_labels'],
                     cfg['tmc_chart']['replace_values'],
                     inplace=True)

    tm_cols_tmc = [col for col in data_tmc.columns if 'contribution' in col]
    tm_cols_tmc.sort()
    tm_cols_tmc.insert(0, 'username')
    tm_cols_id = [w.replace('contribution', 'id') for w in tm_cols_tmc]

    f.pnt_info(c.msg["console_tmc"])

    for team in teams:
        print(team)

        this_data = f.filter_row('data_tmc', 'list_team', team)
        team_header = this_data[tm_cols_id].values.tolist()[0]
        team_header[0] = 'reviews'

        team_data = this_data[tm_cols_tmc].values.tolist()

        this_conf_df = DataFrame.from_records(
            team_data,
            columns=team_header).set_index('reviews').dropna(axis=1, how='all')
        this_conf_df = this_conf_df.rename(
            columns=lambda x: re.sub(' - .*', '', x)).T
        this_anon_df = this_conf_df.rename(columns=lambda x: re.sub(
            'u.*', cfg['tmc_chart']['anon_legend'], x))

        shape = this_anon_df.shape

        this_anon_df['average'] = this_anon_df.mean(axis=1)
        this_conf_df['average'] = this_conf_df.mean(axis=1)

        f.make_tmc_chart(this_anon_df, c.d['charts'] + team + "_tmc_anon.png")
        f.make_tmc_chart(this_conf_df, c.d['charts'] + team + "_tmc_conf.png")

        format_tmc_feedback(team, 'conf', shape, this_data)
        format_tmc_feedback(team, 'anon', shape, this_data)

    f.pnt_notice(c.msg['console_complete'], os.path.basename(__file__))

    all_conf = glob.glob(c.d['pdf'] + '/*_conf.pdf')
    all_conf.sort()
    merger = PdfFileMerger()
    for pdf in all_conf:
        merger.append(pdf)
    merger.write(c.f['all_conf'])
    merger.close()
def load_data():
    '''load and clean all the csvs in the files directory'''

    cfg = f.load_config()

    f.pnt_notice(c.msg['console_start'], os.path.basename(__file__))
    f.pnt_info(c.msg["console_loading"])
    f.make_directories(c.d)

    # load in all csvs in the files dir
    glob.glob('./files/*.csv')
    for file in glob.glob('./files/*.csv'):
        this_csv = Path(file).stem
        this_rename = cfg['load_file'][this_csv]['rename']
        this_required = cfg['load_file'][this_csv]['required']
        this_index = cfg['load_file'][this_csv]['index']
        this_expected = cfg['load_file'][this_csv]['expected']

        f.load_csv(this_csv)

        # if this_csv == 'data_client':
            # remove first two lines of the qualtrics default csv

        if this_csv == 'feedback_course':
            c.df[this_csv]['tutor_name'] = c.df[this_csv]['tutor'].str.replace(' ', '_')

        if this_csv == 'fields':
            f.col_to_lower(this_csv, "field")

        if this_rename:
            f.rename_header(this_csv, this_rename)
        if this_csv == 'marks':
            c.df[this_csv]['marker_name'] = c.df[this_csv]['marker'].str.replace('\'', '').str.replace(' ', '_')
            print(c.df[this_csv]['marker_name'])
            if not cfg['feedback_type']['group']:
                c.df[this_csv][['user', 'name']] = c.df[this_csv]['list_name'].str.split('\s+-\s+', expand=True)
        if this_index:
            f.check_for_duplicates(this_csv, this_index)
        if this_required:
            f.check_for_empty_cells(this_csv, this_required)
        if this_expected == "crit":
            f.check_for_columns(this_csv)
        elif this_expected == "labels":
            f.check_for_labels(this_csv)

        f.save_tsv(this_csv)

    # print message to console - complete!
    f.pnt_notice(c.msg['console_complete'], os.path.basename(__file__))
def feedback_marks():
    cfg = f.load_config()
    f.pnt_notice(c.msg['console_start'], os.path.basename(__file__))
    f.pnt_info(c.msg["console_loading"])
    
    # load in tsvs of needed fields
    marks_df = f.load_tsv('marks')

    # create a df of just the crit and the comments
    crit = f.filter_row('fields', 'field', 'crit_')
    comm = f.filter_row('fields', 'field', 'comment_')

    f.pnt_info(c.msg["console_creating_feedback_files"])
    
    # create distribution charts for later
    if cfg['crit_display']['graph']:
        stats = f.make_crit_list(crit, marks_df)
        f.make_crit_chart(crit, stats, "na")

    # iterate through the marks file
    for record_row in marks_df.itertuples():

        # decide whether to use the list_team or list_name field
        if cfg['feedback_type']['group']:
            this_record = record_row.list_team
            this_record_name = record_row.list_team
        else:
            this_record = record_row.user
            this_record_name = record_row.list_name

        f.pnt_console(this_record)
                
        # create the pandoc header
        with open(c.d['yaml'] + this_record + '.yaml', 'w') as out:
            f.pandoc_yaml(out, this_record_name)

        with open(c.d['css'] + this_record + '.css', 'w') as out:
            f.pandoc_css(out, this_record_name, 'anon')

        #open up a file to print to
        with open(c.d['md'] + this_record + '.md', 'w') as out:
            print("## " + cfg['pdf_messages']['comment_title'] + "{-}\n\n", file=out)
            for loop_row in comm.itertuples():
                f.print_results_header(loop_row, out)
                f.print_results_text(loop_row, record_row, out)

            #loop through the crit columns according to app_config
            if cfg['crit_display']['text']\
                or cfg['crit_display']['scale']\
                or cfg['crit_display']['graph']:
                
                # start with indicator title and notes
                print("# " + cfg['pdf_messages']['indicator_title'] + "{-}\n\n", file=out)
                print(cfg['pdf_messages']['indicator_note'] + "\n\n", file=out)
                print(cfg['pdf_messages']['chart_note'] + "\n\n", file=out)

            for loop_row in crit.itertuples():
                if cfg['crit_display']['text'] \
                    or cfg['crit_display']['scale'] \
                    or cfg['crit_display']['graph']:
                    f.print_results_header(loop_row, out)
                if cfg['crit_display']['text']:
                    f.print_results_text(loop_row, record_row, out)
                if cfg['crit_display']['scale']:
                    f.print_results_scale(loop_row, record_row, out)
                if cfg['crit_display']['graph']:
                    f.print_results_graph(loop_row, record_row, out)
                # if cfg['crit_display']['rubric_new_page']:
                #     f.print_new_page(out)

            if cfg['crit_display']['rubric']:
                if cfg['rubric_display']['rubric_new_page']:
                    print("# " + cfg['pdf_messages']['rubric_title'] + "{-}\n\n", file=out)
                else:
                    print("## " + cfg['pdf_messages']['rubric_title'] + "{-}\n\n", file=out)
                print(cfg['pdf_messages']['rubric_note'] + "\n", file=out)
                f.print_results_rubric(record_row, this_record)
                print("\n", file=out)

        f.pandoc_html_single(this_record)

        # add the rubric
        if cfg['crit_display']['rubric']:
            files = [c.d['rubric'] + this_record + ".html"]
            with open(c.d['html'] + this_record + '.html', 'a') as outfile:
                for fname in files:
                    with open(fname) as infile:
                        outfile.write(infile.read())

        f.pandoc_pdf(this_record)
 
    # print message to console - complete!
    f.pnt_notice(c.msg['console_complete'], os.path.basename(__file__))
def feedback_many_eyes():

    cfg = f.load_config()

    # print message to console - starting!
    f.pnt_notice(c.msg['console_start'], os.path.basename(__file__))

    # print message to console
    f.pnt_info(c.msg["console_loading"])

    # load in tsvs of needed fields
    fields = f.load_tsv('fields')
    crit_levels = f.load_tsv('crit_levels')

    data_self = f.load_tsv('data_self')
    data_shadow = f.load_tsv('data_shadow')
    data_tutor = f.load_tsv('data_tutor')
    data_client = f.load_tsv('data_client')

    # create a df of just the crit and the comments
    crit = f.filter_row('fields', 'field', 'crit_')
    comm = f.filter_row('fields', 'field', 'comment_')

    # print message to console
    f.pnt_info(c.msg["console_creating_feedback_files"])

    # load data and create list of teams
    teams = f.load_tsv('students')
    teams.drop_duplicates(subset=['group'], keep='first', inplace=True)

    # create a list of crit for access
    crit_list = []
    for j, row in crit.iterrows():
        crit_list.append(row['label'])

    # create a df for access for self and shadow
    f.many_eyes_dataframe_sort('data_self')
    f.many_eyes_dataframe_sort('data_shadow')

    self_df = f.load_tsv('data_self_sorted')
    shadow_df = f.load_tsv('data_shadow_sorted')

    # create a df to access for tutor comments
    tutor_df = c.df['data_tutor']
    tutor_df.replace(cfg['audit_chart']['find_labels'],
                     cfg['audit_chart']['replace_values'],
                     inplace=True)

    # create a df to access for client comments
    client_df = c.df['data_client']
    client_df.replace(cfg['audit_chart']['find_client_labels'],
                      cfg['audit_chart']['replace_client_values'],
                      inplace=True)

    # work through the many eyes data
    for eyes in cfg['many_eyes']['eyes']:
        this_array = 'class_' + eyes + '_ave'
        vars()[this_array] = []

    # start a list for each eye
    class_self_ave = []
    class_shadow_ave = []
    class_tutor_ave = []
    class_client_ave = []

    #loop through the crit columns
    for j, row in crit.iterrows():
        class_self_crit_df = self_df[self_df['crit_text'].str.contains(
            row['label'])]
        class_shadow_crit_df = shadow_df[shadow_df['crit_text'].str.contains(
            row['label'])]
        class_self_ave.append(class_self_crit_df['crit_val'].mean())
        class_shadow_ave.append(class_shadow_crit_df['crit_val'].mean())
        class_tutor_ave.append(tutor_df[row['field']].mean())

        try:
            class_client_ave.append(client_df[row['field']].mean())
        except:
            print("there was a key error")
            class_client_ave.append(0)

    # build a df for class averages
    class_ave_df = pd.DataFrame()
    class_ave_df['criterion'] = crit_list
    class_ave_df['self'] = class_self_ave
    class_ave_df['shadow'] = class_shadow_ave
    class_ave_df['tutor'] = class_tutor_ave
    class_ave_df['client'] = class_client_ave
    class_ave_df['class_ave'] = class_ave_df.mean(axis=1)

    class_ave_list = class_ave_df["class_ave"].values

    # create a team list to iterate through
    team_list = []
    for i, row in teams.iterrows():
        this_team = row['group']
        team_list.append(this_team)

    f.pnt_info(c.msg["console_many_eyes"])

    # for each team
    for team in team_list:
        # report to the console
        print(team)

        # create a df for summary stats
        this_team_self_df = self_df[self_df['team'].str.contains(team)]
        this_team_shadow_df = shadow_df[shadow_df['team'].str.contains(team)]
        this_team_tutor_df = tutor_df[tutor_df['team'].str.contains(team)]
        this_team_client_df = client_df[client_df['team'].str.contains(
            team, na=False)]

        # create lists for the averages
        crit_self_ave = []
        crit_shadow_ave = []
        crit_tutor_ave = []
        crit_client_ave = []

        #loop through the crit columns
        for j, row in crit.iterrows():

            this_team_self_crit_df = this_team_self_df[
                this_team_self_df['crit_text'].str.contains(row['label'])]
            this_team_shadow_crit_df = this_team_shadow_df[
                this_team_shadow_df['crit_text'].str.contains(row['label'])]
            # add the mean to the list
            crit_self_ave.append(this_team_self_crit_df['crit_val'].mean())
            crit_shadow_ave.append(this_team_shadow_crit_df['crit_val'].mean())
            crit_tutor_ave.append(this_team_tutor_df[row['field']].mean())

        # deal with empty client df entries
        if this_team_client_df.empty:
            for l, row in crit.iterrows():
                crit_client_ave.append('0')
        else:
            for l, row in crit.iterrows():
                try:
                    crit_client_ave.append(
                        this_team_client_df[row['field']].mean())
                except KeyError:
                    crit_client_ave.append(0)

        # create a df for the team to generate a graph
        this_team_ave_df = pd.DataFrame()
        this_team_ave_df['criterion'] = crit_list
        this_team_ave_df['self'] = crit_self_ave
        this_team_ave_df['shadow'] = crit_shadow_ave
        this_team_ave_df['tutor'] = crit_tutor_ave
        this_team_ave_df['client'] = crit_client_ave

        # report the team ave and the class ave
        this_team_ave_df['team_ave'] = this_team_ave_df.mean(axis=1)
        this_team_ave_df['class_ave'] = class_ave_list

        # now create the team chart
        this_team_ave_df.set_index("criterion", drop=True, inplace=True)
        f.make_audit_chart(this_team_ave_df,
                           c.d['charts'] + team + "_audit.png")

        # run the format audit feedback for anon/conf as defined below
        format_audit_feedback(team, 'conf')
        format_audit_feedback(team, 'anon')

        # compile the anon feedback for students
        files = [
            c.d['files'] + 'text_preamble.md',
            c.d['md'] + team + "_tmc_anon.md",
            c.d['md'] + team + "_audit_anon.md",
            c.d['files'] + 'text_changelog.md'
        ]
        with open(
                c.d['md'] + team + "_" +
                cfg['assignment']['assignment_short'] + "_audit_anon.md",
                'w') as outfile:
            for fname in files:
                with open(fname) as infile:
                    outfile.write(infile.read())

        # compile the conf feedback for teaching team
        files = [
            c.d['md'] + team + "_tmc_conf.md",
            c.d['md'] + team + "_audit_conf.md"
        ]
        with open(
                c.d['md'] + team + "_" +
                cfg['assignment']['assignment_short'] + "_audit_conf.md",
                'w') as outfile:
            for fname in files:
                with open(fname) as infile:
                    outfile.write(infile.read())

        # convert md to pdf
        f.pandoc_html_toc(
            team + "_" + cfg['assignment']['assignment_short'] + "_audit_anon",
            team, 'anon')
        f.pandoc_html_toc(
            team + "_" + cfg['assignment']['assignment_short'] + "_audit_conf",
            team, 'conf')

        f.pandoc_pdf(team + "_" + cfg['assignment']['assignment_short'] +
                     "_audit_anon")
        f.pandoc_pdf(team + "_" + cfg['assignment']['assignment_short'] +
                     "_audit_conf")
                for k, c_row in this_team_tutor_df.iterrows():
                    this_text = str(c_row[this_field])
                    print("**Tutor**\n\n" + this_text + "\n\n", file=out)
                for k, c_row in this_team_client_df.iterrows():
                    this_text = str(c_row[this_field])
                    print("**Client**\n\n" + this_text + "\n\n", file=out)

        # print the confidential comments
        if kind == 'conf':
            print("\n\n# Confidential feedback about the team progress",
                  file=out)
            self_conf_df = c.df['data_self'].copy()
            this_team_self_conf_df = self_conf_df[self_df['team'].str.contains(
                team)]

            for j, row in this_team_self_conf_df.iterrows():
                if (str(row['comment_confidential']) == ""):
                    # print messages if the comment is empty
                    print("**" + row['user'] + " (" + row['username'] + ")" +
                          "**\n\n" + cfg['tmc_pdf']['member_no_comment'] +
                          "\n\n",
                          file=out)
                else:
                    print("**" + row['user'] + " (" + row['username'] + ")" +
                          "**\n\n" + str(row['comment_confidential']) + "\n\n",
                          file=out)


# print message to console - complete!
f.pnt_notice(c.msg['console_complete'], os.path.basename(__file__))
Exemple #6
0
def wattle_csv():
    cfg = f.load_config()

    # print message to console - complete!
    f.pnt_notice(c.msg['console_start'], os.path.basename(__file__))

    # organising the files for uploading to wattle
    # print message to console
    f.pnt_info(c.msg["console_wattle"])

    # get the list of students
    f.load_tsv('students')
    f.load_tsv('marks')

    if cfg['feedback_type']['tmc']:
        for i, row in c.df['students'].iterrows():
            user = row['user']
            group = row['group']
            secret = hashlib.sha1(row['user'].encode('utf-8')).hexdigest()
            secret_file = user + "-" + secret + ".pdf"

            # cp pdf to secret here
            file_from = c.d['pdf'] + group + "_tmc_anon.pdf"
            file_to = c.d['upload'] + secret_file

            if path.exists(file_from):
                copyfile(file_from, file_to)
                comment = "<a href=\"" + cfg['assignment'][
                    'feedback_url'] + "/" + user + "-" + secret + ".pdf\">PDF Feedback</a>"
            else:
                comment = "No Team Member Contribution received from the team"

            # update the df
            c.df['students'].at[i, 'secret'] = comment
        wattle_out = c.df['students'][['user', 'secret']]

    # decide whether to use the list_team or list_name field
    elif not cfg['feedback_type']['group']:
        # print message to console - creating secrets
        f.pnt_info(c.msg['console_secrets'])

        # loop through each row and create a secret for each student
        for i, row in c.df['marks'].iterrows():
            user = row['user']
            secret = hashlib.sha1(row['user'].encode('utf-8')).hexdigest()
            secret_file = user + "-" + secret + ".pdf"
            comment = "<a href=\"" + cfg['assignment'][
                'feedback_url'] + "/" + user + "-" + secret + ".pdf\">PDF Feedback</a>"

            # update the df
            c.df['marks'].at[i, 'secret'] = comment

            # cp pdf to secret here
            file_from = c.d['pdf'] + user + ".pdf"
            file_to = c.d['upload'] + secret_file
            copyfile(file_from, file_to)
        marks_out = c.df['marks'][['user', 'grade_final', 'secret']]
        wattle_out = marks_out.merge(c.df['students'], on='user', how='left')[[
            'user', 'grade_final', 'secret'
        ]]

    else:  # use the list_team
        # loop through each row and create a secret for each student
        for i, row in c.df['marks'].iterrows():
            group = row['list_team']
            if 'suggested_indicator' in c.df['marks']:
                comment = "Team Progress Indicator: " + row[
                    'suggested_indicator'] + "<br /><a href=\"" + cfg['assignment'][
                        'feedback_url'] + "/" + group + ".pdf\">PDF Feedback for " + group + "</a>"
            else:
                comment = "<a href=\"" + cfg['assignment'][
                    'feedback_url'] + "/" + group + ".pdf\">PDF Feedback for " + group + "</a>"

            c.df['marks'].at[i, 'secret'] = comment

            # cp pdf to secret here
            file_from = c.d['pdf'] + group + ".pdf"
            file_to = c.d['upload'] + group + ".pdf"
            copyfile(file_from, file_to)

        marks_out = c.df['marks'][['list_team', 'grade_final', 'secret']]
        print(marks_out)
        wattle_out = marks_out.merge(
            c.df['students'],
            left_on='list_team',
            right_on='group',
            how='left')[['user', 'grade_final', 'secret', 'group']]
        print(wattle_out)

    f.pnt_info(c.msg['console_upload'])
    wattle_out.to_csv(c.f['wattle'], index=False)
    f.pnt_notice(c.msg['console_complete'], os.path.basename(__file__))
def feedback_course():

    cfg = f.load_config()
    f.pnt_notice(c.msg['console_start'], os.path.basename(__file__))
    f.pnt_info(c.msg["console_loading"])

    tutors = f.load_tsv('feedback_course')
    tutors.drop_duplicates(subset=['tutor_name'], keep='first', inplace=True)
    feedback_course_df = f.load_tsv('feedback_course')
    crit_levels = f.load_tsv('crit_levels')
    crit = f.filter_row('fields', 'field', 'crit_')
    comm = f.filter_row('fields', 'field', 'comment_')

    tutor_list = f.create_list(tutors, 'tutor_name')
    crit_levels_list = f.create_list(crit_levels, 'index')

    f.pnt_info(c.msg["console_creating_feedback_files"])
    for i, row in crit.iterrows():
        this_crit = row['field']
        this_crit_list = []
        for tutor in tutor_list:
            this_tutor_df = feedback_course_df[
                feedback_course_df['tutor_name'].str.contains(tutor)]
            this_tutor_crit = []
            this_header = [tutor]
            for val in crit_levels_list:
                this_sum = (this_tutor_df[this_crit] == val).sum()
                this_tutor_crit.append(this_sum)
            this_crit_list.append(this_tutor_crit)
            this_crit_this_tutor = pd.DataFrame(this_tutor_crit,
                                                columns=this_header,
                                                index=crit_levels_list)
            f.make_feedback_chart(
                this_crit_this_tutor,
                c.d['charts'] + this_crit + "_" + tutor + ".png")

        this_crit_all_tutors = pd.DataFrame(this_crit_list,
                                            columns=crit_levels_list,
                                            index=tutor_list)
        this_crit_all_tutors = this_crit_all_tutors.T
        f.make_feedback_chart(this_crit_all_tutors,
                              c.d['charts'] + this_crit + "_all.png")

    with open(c.d['yaml'] + 'all.yaml', 'w') as out:
        f.pandoc_yaml(out, 'All Tutors')

    with open(c.d['css'] + 'all_conf.css', 'w') as out:
        f.pandoc_css(out, 'Course Feedback', 'conf')

    # open up a file to print to
    with open(c.d['md'] + 'all_charts.md', 'w') as out:
        print("# Quantitative Feedback\n\n", file=out)
        for i, row in crit.iterrows():
            this_crit = str(row['field'])
            this_text = str(row['description'])
            this_image = c.d['charts'] + this_crit + "_all.png"
            print("### " + this_text + "\n\n", file=out)
            print("![](../../." + this_image + ")\n\n", file=out)

    confidential_files = [c.d['md'] + 'all_charts.md']

    for tutor in tutor_list:
        print(tutor)
        this_tutor_df = feedback_course_df[
            feedback_course_df['tutor_name'].str.contains(tutor)]
        with open(c.d['yaml'] + tutor + '.yaml', 'w') as out:
            f.pandoc_yaml(out, tutor)

        with open(c.d['css'] + tutor + '.css', 'w') as out:
            f.pandoc_css(out, tutor, 'anon')

        f_out = open(c.d['md'] + tutor + '.md', 'w')

        with f_out as out:
            try:
                for i, row in crit.iterrows():
                    this_crit = str(row['field'])
                    this_text = str(row['description'])
                    this_image = c.d[
                        'charts'] + this_crit + "_" + tutor + ".png"
                    print("### " + this_text + "\n", file=out)
                    print("![](../../." + this_image + ")\n", file=out)

                for i, row in comm.iterrows():
                    this_field = str(row['field'])
                    this_description = str(row['description'])

                    this_df = this_tutor_df[['tutor_name',
                                             this_field]].dropna()

                    if this_field != 'comment_confidential':
                        print("\n\n## " + this_description + "\n\n", file=out)
                        for i, i_row in this_df.iterrows():
                            this_text = str(i_row[this_field])
                            if (this_text != "" or this_text != "nan"
                                    or this_text != "N/A"):
                                this_text_clean = BeautifulSoup(
                                    this_text, features="html5lib")
                                print("**Student Comment**\n\n" +
                                      this_text_clean.get_text() + "\n\n",
                                      file=out)
            finally:
                f_out.close()

        f.pandoc_html_single(tutor)
        f.pandoc_pdf(tutor)

        with open(c.d['md'] + tutor + '_conf.md', 'w') as out:
            print("\n\n# Feedback for " + tutor + "\n\n", file=out)
            confidential_files.append(c.d['md'] + tutor + '_conf.md')

            # loop through the comment columns
            for i, row in comm.iterrows():
                this_field = str(row['field'])
                this_description = str(row['description'])

                this_df = this_tutor_df[['tutor_name', 'user',
                                         this_field]].dropna()

                print("\n\n## " + this_description + "\n\n", file=out)
                for i, i_row in this_df.iterrows():
                    this_text = str(i_row[this_field])
                    this_user = str(i_row['user'])
                    if (this_text != "" or this_text != "nan"
                            or this_text != "N/A"):
                        this_text_clean = BeautifulSoup(this_text,
                                                        features="html5lib")
                        print("**" + this_user + "**\n\n" +
                              this_text_clean.get_text() + "\n\n",
                              file=out)

    with open(c.d['md'] + "all.md", 'w') as outfile:
        for fname in confidential_files:
            with open(fname) as infile:
                outfile.write(infile.read())

    f.pandoc_html_toc('all', 'all', 'conf')
    f.pandoc_pdf('all')

    f.pnt_notice(c.msg['console_complete'], os.path.basename(__file__))
Exemple #8
0
def analysis_many_eyes():

    cfg = f.load_config()

    # print message to console - starting!
    f.pnt_notice(c.msg['console_start'], os.path.basename(__file__))

    # print message to console
    f.pnt_info(c.msg["console_loading"])

    # load in tsvs of needed fields
    student = f.load_tsv('students')
    student.drop_duplicates(keep='first', inplace=True)
    student.dropna(how='any', subset=['user'], inplace=True)
    students = student.copy()

    fields = f.load_tsv('fields')
    crit_levels = f.load_tsv('crit_levels')

    data_self = f.load_tsv('data_self')
    data_shadow = f.load_tsv('data_shadow')
    data_tutor = f.load_tsv('data_tutor')
    data_client = f.load_tsv('data_client')

    # create a df of just the crit and the comments
    crit = f.filter_row('fields', 'field', 'crit_')
    comm = f.filter_row('fields', 'field', 'comment_')

    # print message to console
    f.pnt_info(c.msg["console_reading_feedback_files"])

    # load data and create list of teams
    teams = f.load_tsv('students')
    teams.drop_duplicates(subset=['group'], keep='first', inplace=True)

    # create a team list to iterate through
    team_list = []
    for i, row in teams.iterrows():
        this_team = row['group']
        team_list.append(this_team)

    # create a list of crit for access
    crit_list = []
    crit_list_header = ['team', 'role']
    for j, row in crit.iterrows():
        crit_list.append(row['label'])
        crit_list_header.append(row['label'])

    # create a list of comments for access
    comment_list = f.create_list(comm, 'field')

    # create a df for access for self and shadow
    f.many_eyes_dataframe_sort('data_self')
    f.many_eyes_dataframe_sort('data_shadow')

    self_df = f.load_tsv('data_self_sorted')
    shadow_df = f.load_tsv('data_shadow_sorted')

    # create a df to access for tutor comments
    tutor_df = c.df['data_tutor']
    tutor_df.replace(cfg['audit_chart']['find_labels'],
                     cfg['audit_chart']['replace_values'],
                     inplace=True)

    # create a df to access for client comments
    client_df = c.df['data_client']
    client_df.replace(cfg['audit_chart']['find_client_labels'],
                      cfg['audit_chart']['replace_client_values'],
                      inplace=True)

    f.pnt_info(c.msg["console_many_eyes"])

    print("Cleaning the self data...")
    for i, row in self_df.iterrows():
        for readability_list in cfg['analytics']['readability_stats']:
            f.readability_stats('data_self_sorted', row, i, 'crit_comment',
                                readability_list[0], readability_list[1],
                                readability_list[2])

    print("Cleaning the shadow data...")
    for i, row in shadow_df.iterrows():
        for readability_list in cfg['analytics']['readability_stats']:
            f.readability_stats('data_shadow_sorted', row, i, 'crit_comment',
                                readability_list[0], readability_list[1],
                                readability_list[2])

    team_stats_list = []

    # report to the console
    print('Generating team stats...')

    # for each team
    for team in team_list:
        # work through the many eyes data
        for eye in cfg['many_eyes']['eyes']:
            this_stats_list = [team, eye]
            this_df = locals()[eye + "_df"]
            this_crit_df = this_df[this_df['team'].str.contains(team,
                                                                na=False)]

            if not this_crit_df.empty:
                for j, row in crit.iterrows():

                    if eye == 'self' or eye == 'shadow':
                        this_crit_df = this_df[
                            this_df['crit_text'].str.contains(row['label'])]
                        this_stats_list.append(this_crit_df['crit_val'].mean())
                    else:
                        try:
                            this_stats_list.append(
                                this_crit_df[row['field']].mean())
                        except:
                            this_stats_list.append(0)

            team_stats_list.append(this_stats_list)

    team_stats = pd.DataFrame(team_stats_list, columns=crit_list_header)
    team_stats_ave_list = []
    for team in team_list:

        this_team_stats = team_stats[team_stats['team'].str.contains(team)]

        this_team_ave_list = [team, 'average']
        for crit in crit_list:
            this_team_crit_mean = this_team_stats[crit].mean()
            this_team_ave_list.append(this_team_crit_mean)

        team_stats_ave_list.append(this_team_ave_list)

    team_stats_ave_df = pd.DataFrame(team_stats_ave_list,
                                     columns=crit_list_header)

    frames = [team_stats, team_stats_ave_df]
    team_stats_df = pd.concat(frames)

    crit = f.filter_row('fields', 'field', 'crit_')

    # report to the console
    print('Generating team charts...')
    for team in team_list:
        print(team)
        #loop through the crit columns
        for i, row in crit.iterrows():
            this_crit_label = row['label']
            this_crit_field = row['field']

            this_team_crit_list = []
            this_team_crit_header = ['role', 'value']
            for eye in cfg['many_eyes']['eyes']:
                this_df = locals()[eye + "_df"]
                this_team_df = this_df[this_df['team'].str.contains(team,
                                                                    na=False)]
                if not this_team_df.empty:
                    if eye == 'self' or eye == 'shadow':
                        this_team_crit_df = this_team_df[this_team_df[
                            'crit_text'].str.contains(this_crit_label)]
                        for j, row in this_team_crit_df.iterrows():
                            this_team_crit_list.append([eye, row['crit_val']])
                    else:
                        for j, row in this_team_df.iterrows():
                            try:
                                this_team_crit_list.append(
                                    [eye, row[this_crit_field]])
                            except:
                                this_team_crit_list.append([eye, 0])
            this_team_crit_df = pd.DataFrame(this_team_crit_list,
                                             columns=this_team_crit_header).T
            this_team_crit_df.columns = this_team_crit_df.iloc[0]
            this_team_crit_df = this_team_crit_df.iloc[1:].rename_axis(None,
                                                                       axis=1)
            this_team_crit_df['average'] = this_team_crit_df.mean(axis=1)
            f.make_audit_crit_chart(
                this_team_crit_df,
                c.d['charts'] + team + "_" + this_crit_field + "_audit.png")

    self_crit_val_diff_list = []
    self_crit_val_abs_list = []
    self_wps_list = []
    for i, row in self_df.iterrows():
        this_crit_val = row['crit_val']
        this_crit_text = row['crit_text']
        this_team = row['team']
        this_wc = row['wc']
        this_sc = row['sc']

        this_team_row = team_stats_ave_df[
            team_stats_ave_df['team'].str.contains(this_team)]
        # this_crit_ave = this_team_row.iloc[0][this_crit_text]
        this_crit_ave = 0

        this_diff = this_crit_val - this_crit_ave
        this_diff_abs = abs(this_diff)

        self_crit_val_diff_list.append(this_diff)
        self_crit_val_abs_list.append(this_diff_abs)

        this_wps = this_wc / this_sc
        self_wps_list.append(this_wps)

    self_df['crit_val_diff'] = self_crit_val_diff_list
    self_df['crit_val_abs'] = self_crit_val_abs_list
    self_df['wps'] = self_wps_list

    shadow_crit_val_diff_list = []
    shadow_crit_val_abs_list = []
    shadow_wps_list = []
    for i, row in shadow_df.iterrows():
        print(shadow_df)
        this_crit_val = row['crit_val']
        this_crit_text = row['crit_text']
        this_team = row['team']
        this_wc = row['wc']
        this_sc = row['sc']

        this_team_row = team_stats_ave_df[
            team_stats_ave_df['team'].str.contains(this_team)]
        if not this_team_row.empty:
            this_crit_ave = this_team_row.iloc[0][this_crit_text]
        else:
            this_crit_ave = 0

        this_diff = this_crit_val - this_crit_ave
        this_diff_abs = abs(this_diff)

        shadow_crit_val_diff_list.append(this_diff)
        shadow_crit_val_abs_list.append(this_diff_abs)

        this_wps = this_wc / this_sc
        shadow_wps_list.append(this_wps)

    shadow_df['crit_val_diff'] = shadow_crit_val_diff_list
    shadow_df['crit_val_abs'] = shadow_crit_val_abs_list
    shadow_df['wps'] = shadow_wps_list

    self_df['wc_pct_rank'] = self_df.wc.rank(pct=True)
    self_df['wps_pct_rank'] = self_df.sc.rank(pct=True)
    self_df['flesch_pct_rank'] = self_df.flesch.rank(pct=True)
    self_df['crit_val_diff_rank'] = self_df.crit_val_diff.rank(pct=True)

    shadow_df['wc_pct_rank'] = shadow_df.wc.rank(pct=True)
    shadow_df['wps_pct_rank'] = shadow_df.sc.rank(pct=True)
    shadow_df['flesch_pct_rank'] = shadow_df.flesch.rank(pct=True)
    shadow_df['crit_val_diff_rank'] = shadow_df.crit_val_diff.rank(pct=True)

    wc_score = [-2, -1, 0, 1, 2]
    wps_score = [-2, -1, 0, 1, 2]
    flesch_score = [2, 1, 0, -1, -2]
    cvd_score = [2, 1, 0, -1, -2]
    wc_score_actual = [-2, -1, 0, 1, 2]

    score_pct = [0.2, 0.4, 0.6, 0.8, 1.0]
    score_act = [25, 50, 100, 200, 10000]

    score_dataframe(self_df, 'wc_pct_rank', 'wc_score', wc_score, score_pct)
    score_dataframe(shadow_df, 'wc_pct_rank', 'wc_score', wc_score, score_pct)
    score_dataframe(self_df, 'wps_pct_rank', 'wps_score', wps_score, score_pct)
    score_dataframe(shadow_df, 'wps_pct_rank', 'wps_score', wps_score,
                    score_pct)
    score_dataframe(self_df, 'flesch_pct_rank', 'flesch_score', flesch_score,
                    score_pct)
    score_dataframe(shadow_df, 'flesch_pct_rank', 'flesch_score', flesch_score,
                    score_pct)
    score_dataframe(self_df, 'crit_val_diff_rank', 'crit_val_score', cvd_score,
                    score_pct)
    score_dataframe(shadow_df, 'crit_val_diff_rank', 'crit_val_score',
                    cvd_score, score_pct)

    score_dataframe(self_df, 'wc', 'wc_score_act', wc_score, score_act)
    score_dataframe(shadow_df, 'wc', 'wc_score_act', wc_score, score_act)

    col_chart_list = ["wc", "wps", "flesch", "crit_val_diff"]

    for col in col_chart_list:

        if col == "flesch":
            chart_min = 0
            chart_max = 100
        elif col == "crit_val_diff":
            chart_min = -5
            chart_max = 5
        else:
            chart_min = 0
            self_max = self_df[col].max()
            shadow_max = shadow_df[col].max()
            if self_max > shadow_max:
                this_max = self_max
            else:
                this_max = shadow_max
            chart_max = int(round(this_max / 20)) * 20

        f.make_col_chart(self_df, col, 'self', chart_min, chart_max)
        f.make_col_chart(shadow_df, col, 'shadow', chart_min, chart_max)

    # for each team
    for i, row in students.iterrows():
        # report to the console
        this_user = str(row['user'])
        this_first = str(row['firstname'])
        this_last = str(row['lastname'])
        self_team = str(row['group'])
        shadow_team = str(row['shadowteam'])

        print(this_user)

        this_self_df = self_df[self_df['username'].str.contains(this_user)]
        this_shadow_df = shadow_df[shadow_df['username'].str.contains(
            this_user)]

        if this_self_df.empty and this_shadow_df.empty:
            with open(c.d["md"] + str(this_user) + '.md', 'w') as out:
                print("## " + this_user + " - " + this_first + " " +
                      this_last + "\n\n",
                      file=out)
                print("**No reviews received.**\n\n", file=out)
        else:
            try:

                local_text_analysis(this_self_df, this_user, 'self')
                local_text_analysis(this_shadow_df, this_user, 'shadow')

                with open(c.d["md"] + str(this_user) + '.md', 'w') as out:
                    print("## " + this_user + " - " + this_first + " " +
                          this_last + "\n\n",
                          file=out)
                    print(
                        "**This feedback report shows analytics on the feedback you gave in the review.**\n\n",
                        file=out)
                    print(
                        "By considering the analytics in this report, you should get a sense of how your feedback compares to others.\n\n",
                        file=out)
                    print("# Project Audits\n\n", file=out)
                    print("*Compare your review to others' perspectives.*\n\n",
                          file=out)
                    print("## Project Team: " + self_team + "\n\n", file=out)
                    if this_self_df.empty:
                        print("* No review submitted", file=out)
                    else:
                        for i, row in this_self_df.iterrows():

                            print("*Your review of " + str(row['crit_text']) +
                                  ":* **" + str(row['crit_desc']) + "**\n\n",
                                  file=out)
                            this_crit = f.filter_row('fields', 'label',
                                                     row['crit_text'])
                            for j, j_row in this_crit.iterrows():
                                this_crit_field = j_row['field']
                                print("![](../../." + c.d['charts'] +
                                      self_team + "_" + this_crit_field +
                                      "_audit.png)\n\n",
                                      file=out)
                            print("\n*Chart of all reviews of " + self_team +
                                  " for " + str(row['crit_text']) + "*\n\n",
                                  file=out)

                    print("<div class=\"new-page\">.</div>", file=out)
                    print("## Shadow Team: " + shadow_team + "\n\n", file=out)
                    if this_shadow_df.empty:
                        print("* No review submitted", file=out)
                    else:
                        for i, row in this_shadow_df.iterrows():
                            print("*Your review of " + str(row['crit_text']) +
                                  ":* **" + str(row['crit_desc']) + "**\n\n",
                                  file=out)
                            this_crit = f.filter_row('fields', 'label',
                                                     row['crit_text'])
                            for j, j_row in this_crit.iterrows():
                                this_crit_field = j_row['field']
                                print("![](../../." + c.d['charts'] +
                                      shadow_team + "_" + this_crit_field +
                                      "_audit.png)\n\n",
                                      file=out)
                            print("\n*Chart of all reviews of " + shadow_team +
                                  " for " + str(row['crit_text']) + "*\n\n",
                                  file=out)

                    print("<div class=\"new-page\">.</div>", file=out)
                    print("## Difference in review evaluations\n\n", file=out)
                    print(
                        "*A negative value means that your review was below the average of reviews; positive, above. '2' represents a 'full band', such as the difference between 'Baseline' and 'Acceptable'.*\n\n",
                        file=out)

                    this_head = ["Team", "Role"]
                    this_self_list = [self_team, "Self"]
                    this_shadow_list = [shadow_team, "Shadow"]

                    if not this_self_df.empty:
                        for i, row in this_self_df.iterrows():
                            try:
                                this_head.append(str(row['crit_text']))
                                this_self_list.append("**" + str(
                                    int(round(row['crit_val_diff'] * 4) / 4)) +
                                                      "** half-bands")
                            except:
                                this_self_list.append("**N/A**")
                    this_head.append("Class Average")
                    try:
                        this_self_list.append(
                            str(
                                int(
                                    round(this_self_df['crit_val_abs'].mean() *
                                          4) / 4)) + " (self)")
                    except:
                        this_self_list.append("**N/A**")

                    if not this_shadow_df.empty:
                        for i, row in this_shadow_df.iterrows():
                            try:
                                this_head.append(str(row['crit_text']))
                                this_shadow_list.append("**" + str(
                                    int(round(row['crit_val_abs'] * 4) / 4)) +
                                                        "** half-bands")
                            except:
                                this_self_list.append("**N/A**")

                    this_head.append("Class Average")
                    try:
                        this_shadow_list.append(
                            str(
                                int(
                                    round(this_shadow_df['crit_val_abs'].mean(
                                    ) * 4) / 4)) + " (shadow)")
                    except:
                        this_shadow_list.append("**N/A**")

                    this_html_df = pd.DataFrame(
                        list(zip(this_head, this_self_list,
                                 this_shadow_list))).T
                    this_html_df.columns = this_html_df.iloc[0]
                    this_html_df = this_html_df.iloc[1:].rename_axis(None,
                                                                     axis=1)
                    # this_html_df.set_index("Team",drop=True,inplace=True)
                    print(this_html_df.to_html(), file=out)

                    print("![](../../." + c.d['charts'] +
                          "crit_val_diff_self.png)\n",
                          file=out)
                    print("![](../../." + c.d['charts'] +
                          "crit_val_diff_shadow.png)\n",
                          file=out)
                    print(
                        "\n*Top: Histogram of differences for self reviews.*",
                        file=out)
                    print(
                        "*Bottom: Histogram of differences for shadow reviews.*\n\n",
                        file=out)
                    print("<div class=\"new-page\">.</div>", file=out)

                    print("# Descriptive statistics about your comments\n\n",
                          file=out)
                    print("## Word Count\n\n", file=out)
                    print(
                        "The course guide requests you to complete 250-500 words per review.\n\n",
                        file=out)
                    print("*Table of your review word count statistics*\n\n",
                          file=out)

                    this_head = ["Team", "Role"]
                    this_self_list = [self_team, "Self"]
                    this_shadow_list = [shadow_team, "Shadow"]

                    if not this_self_df.empty:
                        for i, row in this_self_df.iterrows():
                            try:
                                this_head.append(str(row['crit_text']))
                                this_self_list.append(
                                    "**" + str(int(round(row['wc']))) +
                                    "** words")
                            except:
                                this_self_list.append("**N/A**")

                    this_head.append("Class Average")
                    try:
                        this_self_list.append(
                            str(int(round(this_self_df['wc'].mean()))) +
                            " (self)")
                    except:
                        this_self_list.append("**N/A**")

                    if not this_shadow_df.empty:
                        for i, row in this_shadow_df.iterrows():
                            try:
                                this_head.append(str(row['crit_text']))
                                this_shadow_list.append(
                                    "**" + str(int(round(row['wc']))) +
                                    "** words")
                            except:
                                this_shadow_list.append("**N/A**")

                    this_head.append("Class Average")
                    try:
                        this_shadow_list.append(
                            str(int(round(this_shadow_df['wc'].mean()))) +
                            " (shadow)")
                    except:
                        this_shadow_list.append("**N/A**")
                    this_html_df = pd.DataFrame(
                        list(zip(this_head, this_self_list,
                                 this_shadow_list))).T
                    this_html_df.columns = this_html_df.iloc[0]
                    this_html_df = this_html_df.iloc[1:].rename_axis(None,
                                                                     axis=1)
                    # this_html_df.set_index("Team",drop=True,inplace=True)
                    print(this_html_df.to_html(), file=out)

                    print("![](../../." + c.d['charts'] + "wc_self.png)\n",
                          file=out)
                    print("![](../../." + c.d['charts'] + "wc_shadow.png)\n",
                          file=out)
                    print(
                        "\n*Top: Histogram of word counts for self reviews.*",
                        file=out)
                    print(
                        "*Bottom: Histogram of word counts for shadow reviews.*\n\n",
                        file=out)
                    print("<div class=\"new-page\">.</div>", file=out)
                    print("## Words per Sentence Count\n\n", file=out)
                    print(
                        "Generally, shorter sentences are easier to read.\n\n",
                        file=out)
                    print(
                        "*Table of your review words per sentence count statistics*\n\n",
                        file=out)

                    this_head = ["Team", "Role"]
                    this_self_list = [self_team, "Self"]
                    this_shadow_list = [shadow_team, "Shadow"]

                    if not this_self_df.empty:
                        for i, row in this_self_df.iterrows():
                            try:
                                this_head.append(str(row['crit_text']))
                                this_self_list.append(
                                    "**" + str(int(round(row['wps']))) +
                                    "** wps")
                            except:
                                this_self_list.append("**N/A**")

                    this_head.append("Class Average")
                    try:
                        this_self_list.append(
                            str(int(round(this_self_df['wps'].mean()))) +
                            " (self)")
                    except:
                        this_self_list.append("**N/A**")

                    if not this_shadow_df.empty:
                        for i, row in this_shadow_df.iterrows():
                            try:
                                this_head.append(str(row['crit_text']))
                                this_shadow_list.append(
                                    "**" + str(int(round(row['wps']))) +
                                    "** wps")
                            except:
                                this_shadow_list.append("**N/A**")

                    this_head.append("Class Average")
                    try:
                        this_shadow_list.append(
                            str(int(round(this_shadow_df['wps'].mean()))) +
                            " (shadow)")
                    except:
                        this_shadow_list.append("**N/A**")

                    this_html_df = pd.DataFrame(
                        list(zip(this_head, this_self_list,
                                 this_shadow_list))).T
                    this_html_df.columns = this_html_df.iloc[0]
                    this_html_df = this_html_df.iloc[1:].rename_axis(None,
                                                                     axis=1)
                    # this_html_df.set_index("Team",drop=True,inplace=True)
                    print(this_html_df.to_html(), file=out)

                    print("![](../../." + c.d['charts'] + "wps_self.png)\n",
                          file=out)
                    print("![](../../." + c.d['charts'] + "wps_shadow.png)\n",
                          file=out)
                    print(
                        "\n*Top: Histogram of sentence counts for self reviews.*",
                        file=out)
                    print(
                        "*Bottom: Histogram of sentence counts for shadow reviews.*\n\n",
                        file=out)

                    print("<div class=\"new-page\">.</div>", file=out)
                    print("## Flesch–Kincaid readability test\n\n", file=out)
                    print(
                        "Based on a score out of 100, lower scores are typically harder to read: scores below 50 are considered difficult to read. [More information on Wikipedia](https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests)\n\n",
                        file=out)
                    print(
                        "*Table of your Flesch–Kincaid readability score statistics*\n\n",
                        file=out)

                    this_head = ["Team", "Role"]
                    this_self_list = [self_team, "Self"]
                    this_shadow_list = [shadow_team, "Shadow"]

                    if not this_self_df.empty:
                        for i, row in this_self_df.iterrows():
                            try:
                                this_head.append(str(row['crit_text']))
                                this_self_list.append(
                                    "**" + str(int(round(row['flesch']))) +
                                    "**")
                            except:
                                this_self_list.append("**N/A**")

                    this_head.append("Class Average")
                    try:
                        this_self_list.append(
                            str(int(round(this_self_df['flesch'].mean()))) +
                            " (self)")
                    except:
                        this_self_list.append("**N/A**")

                    if not this_shadow_df.empty:
                        for i, row in this_shadow_df.iterrows():
                            try:
                                this_head.append(str(row['crit_text']))
                                this_shadow_list.append(
                                    "**" + str(int(round(row['flesch']))) +
                                    "**")
                            except:
                                this_shadow_list.append("**N/A**")

                    this_head.append("Class Average")
                    try:
                        this_shadow_list.append(
                            str(int(round(this_shadow_df['flesch'].mean()))) +
                            " (shadow)")
                    except:
                        this_shadow_list.append("**N/A**")

                    this_html_df = pd.DataFrame(
                        list(zip(this_head, this_self_list,
                                 this_shadow_list))).T
                    this_html_df.columns = this_html_df.iloc[0]
                    this_html_df = this_html_df.iloc[1:].rename_axis(None,
                                                                     axis=1)
                    # this_html_df.set_index("Team",drop=True,inplace=True)
                    print(this_html_df.to_html(), file=out)

                    print("![](../../." + c.d['charts'] + "flesch_self.png)\n",
                          file=out)
                    print("![](../../." + c.d['charts'] +
                          "flesch_shadow.png)\n",
                          file=out)
                    print(
                        "\n*Top: Histogram of Flesch-Kincaid readability for self reviews.*",
                        file=out)
                    print(
                        "*Bottom: Histogram of Flesch-Kincaid readability for shadow reviews.*\n\n",
                        file=out)

                    role_list = ["self", "shadow"]

                    print("# Analytics generated from your comments", file=out)
                    print("\n*" + cfg['analytics']['nlp_source_comment'] +
                          "*\n",
                          file=out)

                    # using enumerate to access list indices for name and title
                    # work through the defined nlp endpoints
                    for num, endpoint in enumerate(cfg['aylien']['endpoints'],
                                                   start=0):
                        name = (cfg['aylien']['endpoint_name'][num])
                        title = (cfg['aylien']['endpoint_title'][num])

                        print("<article id=\"columns\">", file=out)
                        print("\n## Comment " + title + "\n\n", file=out)
                        if endpoint == "sentiment":
                            print("<section>", file=out)

                        # loop through the analysis for each comment
                        for role in role_list:

                            # load the nlp json response to read from
                            with open(c.d['nlp'] + this_user + "_comment_" +
                                      role + ".json") as json_file:
                                this_nlp = json.load(json_file)

                                # print a header to out
                                print("\n**" + title + " for " + role +
                                      " comments**"
                                      "\n\n",
                                      file=out)
                                if endpoint == "sentiment":
                                    item_out = ""
                                    for item in this_nlp[name]:
                                        item_out += item
                                    print("* " + item_out, file=out)
                                else:
                                    try:
                                        item_check = []
                                        item_out = ""
                                        for item in this_nlp[name]:
                                            # replace hashes so that they are not interpreted in markdown
                                            if endpoint != 'entities':
                                                item_out = item.replace(
                                                    "#", "\\#")
                                                print("* " + item_out,
                                                      file=out)
                                            else:
                                                this_item = item.split(' ')
                                                item_out = this_item[
                                                    0].replace("#", "\\#")
                                                if item_out not in item_check:
                                                    # print to out each item in a list
                                                    print("* " + item_out,
                                                          file=out)
                                                    item_check.append(item_out)
                                    except:
                                        # if there's nothing there, print N/A
                                        print("* N/A", file=out)

                        if endpoint == "sentiment":
                            print("</section>", file=out)
                        print("</article>", file=out)
                    print("\n\n# Wordclouds generated from your comments\n",
                          file=out)

                    # loop through the analysis for each comment
                    for role in role_list:
                        print("**Your " + role + " review**\n\n", file=out)
                        print("![](../../." + c.d['wordcloud'] + this_user +
                              "_" + role + ".png)\n",
                              file=out)
                        print(
                            "\n*Above: Wordcloud generated from feedback you gave in "
                            + role + " reviews.*\n",
                            file=out)

                    print("\n\n# Record of submissions", file=out)

                    if this_self_df.empty:
                        print("* No review submitted", file=out)
                    else:
                        for i, row in this_self_df.iterrows():

                            print("**Your review of " + str(row['crit_text']) +
                                  " for " + self_team + "**\n\n",
                                  file=out)
                            print(str(row['crit_comment']) + "\n\n", file=out)

                    if this_shadow_df.empty:
                        print("* No review submitted", file=out)
                    else:
                        for i, row in this_shadow_df.iterrows():

                            print("**Your review of " + str(row['crit_text']) +
                                  " for " + shadow_team + "**\n\n",
                                  file=out)
                            print(str(row['crit_comment']) + "\n\n", file=out)
            except:
                with open(c.d["md"] + str(this_user) + '.md', 'w') as out:
                    print("## " + this_user + " - " + this_first + " " +
                          this_last + "\n\n",
                          file=out)
                    print("**Error in review compilation.**\n\n", file=out)

        # create the weasyprint variables
        with open(c.d['yaml'] + str(this_user) + '.yaml', 'w') as out:
            f.pandoc_yaml(out, str(this_user))

        # create the weasyprint stylesheet
        with open(c.d['css'] + str(this_user) + ".css", 'w') as out:
            f.pandoc_css(out, str(this_user), 'anon')

        # turn the pandoc md to html to pdf

        f.pandoc_html_single(str(this_user))
        f.pandoc_pdf(str(this_user))
    # loop through each row and create a secret for each student

    for i, row in students.iterrows():
        this_user = row['user']
        secret = hashlib.sha1(row['user'].encode('utf-8')).hexdigest()
        secret_file = this_user + "-" + secret + ".pdf"
        comment = "<a href=\"" + cfg['assignment'][
            'feedback_url'] + "reviews/" + this_user + "-" + secret + ".pdf\">PDF Feedback on your Reviews</a>"

        # update the df
        students.loc[i, 'secret'] = comment

        # cp pdf to secret here
        file_from = c.d['pdf'] + this_user + ".pdf"
        file_to = c.d['review'] + secret_file
        copyfile(file_from, file_to)

        this_self_df = self_df[self_df['username'].str.contains(this_user)]
        this_shadow_df = shadow_df[shadow_df['username'].str.contains(
            this_user)]

        self_wc_score = this_self_df['wc_score'].sum()
        shadow_wc_score = this_shadow_df['wc_score'].sum()
        self_wps_score = this_self_df['wps_score'].sum()
        shadow_wps_score = this_shadow_df['wps_score'].sum()
        self_flesch_score = this_self_df['flesch_score'].sum()
        shadow_flesch_score = this_shadow_df['flesch_score'].sum()

        self_crit_score = this_self_df['crit_val_score'].sum()
        shadow_crit_score = this_shadow_df['crit_val_score'].sum()

        this_text_score = self_wc_score + shadow_wc_score + self_wps_score + shadow_wps_score + self_flesch_score + shadow_flesch_score
        this_crit_score = self_crit_score + shadow_crit_score
        students.loc[i, 'text_score'] = this_text_score
        students.loc[i, 'crit_score'] = this_crit_score

    students['text_rank'] = students.text_score.rank(pct=True)
    students['crit_rank'] = students.crit_score.rank(pct=True)

    for i, row in students.iterrows():
        this_text_rank = row['text_rank']
        this_crit_rank = row['crit_rank']
        this_combined_rank = this_text_rank + this_crit_rank

        students.loc[i, 'combined_rank'] = this_combined_rank

    students['score_rank'] = students.combined_rank.rank(pct=True)

    this_out = students[[
        'user', 'secret', 'text_score', 'crit_score', 'score_rank'
    ]]
    this_out.to_csv(c.f['wattle_analysis'], index=False)
def wattle_csv_many_eyes():
    cfg = f.load_config()

    f.pnt_notice(c.msg['console_start'], os.path.basename(__file__))
    f.pnt_info(c.msg["console_wattle"])
    f.load_tsv('data_tutor')

    teams = f.load_tsv('students')
    teams.drop_duplicates(subset=['group'], keep='first', inplace=True)
    for i, row in teams.iterrows():
        this_team = row['group']
        # cp pdf to secret here
        file_from = c.d['pdf'] + this_team + "_" + cfg['assignment'][
            'assignment_short'] + "_audit_anon.pdf"
        file_to = c.d['upload'] + this_team + "_" + cfg['assignment'][
            'assignment_short'] + ".pdf"
        copyfile(file_from, file_to)

    f.load_tsv('students')
    students = f.filter_row('students', 'role', 'Student')
    user_list = []
    comment_list = []

    for i, row in students.iterrows():
        user = row['user']
        role = row['role']
        project_team = row['group']
        team_row = f.filter_row('data_tutor', 'team', project_team)
        if not team_row.empty:
            team_performance = team_row.iloc[0]['suggestedindicator']
        else:
            team_performance = "TBA"

        if role != 'student':
            project_team = row['group']
            shadow_team = row['shadowteam']
            comment = "Your Team's Progress Indicator:"
            comment += "<ul><li><strong>" + team_performance + "</strong></li></ul>"
            comment += "Feedback for Your Teams:"
            comment += "<ul><li><a href=\"" + cfg['assignment'][
                'feedback_url'] + "/" + str(
                    project_team
                ) + "_" + cfg['assignment'][
                    'assignment_short'] + ".pdf\">PDF Feedback for your team: " + str(
                        project_team) + "</a></li>"
            comment += "<li><a href=\"" + cfg['assignment'][
                'feedback_url'] + "/" + str(
                    shadow_team
                ) + "_" + cfg['assignment'][
                    'assignment_short'] + ".pdf\">PDF Feedback for shadow team: " + str(
                        shadow_team) + "</a></li></ul>"

            user_list.append(user)
            comment_list.append(comment)

    f.pnt_info(c.msg['console_upload'])

    this_out = pd.DataFrame()
    this_out['users'] = user_list
    this_out['feedback'] = comment_list
    this_out.to_csv(c.f['wattle'], index=False)

    f.pnt_notice(c.msg['console_complete'], os.path.basename(__file__))
def analysis_marks():

    cfg = f.load_config()

    f.pnt_notice(c.msg['console_start'], os.path.basename(__file__))
    f.pnt_info(c.msg["console_loading"])

    marks_df = f.load_tsv('marks')
    crit_levels = f.load_tsv('crit_levels')

    # create a df of just the crit and the comments
    crit = f.filter_row('fields', 'field', 'crit_')
    comm = f.filter_row('fields', 'field', 'comment_')

    crit_list = f.create_list(crit, 'field')
    comment_list = f.create_list(comm, 'field')

    f.pnt_info(c.msg["console_creating_feedback_files"])
    # clean up marks df
    c.df['marks'].loc[c.df['marks'].grade_final == 0, ['marker_name', 'marker_id']] = 'No_Submission', 'nil'

    for comment in comment_list:
        c.df['marks'][comment + "_txt"] = c.df['marks'][comment].apply(f.html_to_text)

    f.pnt_console("Processing the marks file")
    for i, row in c.df['marks'].iterrows():
        for comment in comment_list:
            for readability_list in cfg['analytics']['readability_stats']:
                f.readability_stats('marks', row, i, comment + '_txt', comment + "_" + readability_list[0], readability_list[1], readability_list[2])

    c.df['marks']['grade_final_pct'] = ( c.df['marks']['grade_final'] / int(cfg['assignment']['grade_final_out_of']) * 100 )
    c.df['marks']['diff_final_sugg'] = (c.df['marks']['grade_final_pct'] - c.df['marks']['grade_suggested']).round(decimals=1)
    c.df['marks']['diff_calc_sugg'] = (c.df['marks']['grade_calculated'] - c.df['marks']['grade_suggested']).round(decimals=1)

    print(c.df['marks']['grade_final'])
    f.make_hist_chart(c.df['marks'], 'grade_final_pct')
    f.make_boxplot_chart(c.df['marks'], 'grade_final_pct')


    # generate a dataframe with the readability stats for each marker
    # then replace any nil submissions with 'no submission'
    marker = c.df['marks']['grade_final'].groupby([c.df['marks']['marker_name']]).mean().reset_index()
    marker = marker[marker.marker_name != 'No_Submission']
    # create a marker_list for iteration
    marker_list = f.create_list(marker, 'marker_name')

    f.pnt_info("Building crit graphs...")
    # work through each criterion
    for criterion in crit_list:
        # print to tho console
        print(criterion)
        this_crit_df = crit_levels['index']
        for i, row in marker.iterrows():

            this_marker_name = row['marker_name']
            this_marker_df = f.filter_row('marks', 'marker_name', this_marker_name)
            this_marker_stats = f.make_crit_list(crit, this_marker_df)
            
            this_col_sum = this_marker_stats[criterion].sum()
            this_marker_stats[this_marker_name] = this_marker_stats[criterion].apply(lambda x: x/this_col_sum*100)
            this_crit_df = pd.merge(this_crit_df, this_marker_stats[this_marker_name], on='index')

        print(this_crit_df)
        f.make_stacked_chart(this_crit_df, criterion, True)

    bin_values = [-10, -5.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 5.5, 10]
    bin_labels = [-10, -5, -2, -1, 0, 1, 2, 5, 10]

    calc_sugg_df = pd.DataFrame(bin_labels,columns=['bin'])
    final_sugg_df = pd.DataFrame(bin_labels,columns=['bin'])

    c.df['marks']['bin_calc_sugg'] = pd.cut(c.df['marks']['diff_calc_sugg'], bins=bin_values, labels=bin_labels)
    c.df['marks']['bin_final_sugg'] = pd.cut(c.df['marks']['diff_final_sugg'], bins=bin_values, labels=bin_labels)

    for i, row in marker.iterrows():
        this_marker_name = row['marker_name']
        this_marker_df = f.filter_row('marks', 'marker_name', this_marker_name)

        this_marker_calc_df = pd.DataFrame(this_marker_df.bin_calc_sugg.value_counts()).reset_index()
        this_marker_calc_df = this_marker_calc_df.rename(columns={'index': 'bin'})

        this_col_sum = this_marker_calc_df['bin_calc_sugg'].sum()
        this_marker_calc_df[this_marker_name] = this_marker_calc_df['bin_calc_sugg'].apply(lambda x: x/this_col_sum*100)
        this_marker_calc_df = this_marker_calc_df.drop(['bin_calc_sugg'], axis=1)
        diff_calc_sugg_df = pd.merge(calc_sugg_df, this_marker_calc_df, on='bin')
        calc_sugg_df = diff_calc_sugg_df.set_index("bin")

    calc_sugg_df.reset_index(inplace=True)
    calc_sugg_df = calc_sugg_df.rename(columns={'bin': 'index'})
    print(calc_sugg_df)
    f.make_stacked_chart(calc_sugg_df, 'suggested', False)

    f.pnt_info("Analysing each marker...")

    for i, row in marker.iterrows():
        this_marker_name = row['marker_name']
        print(this_marker_name)
        this_group_df=f.filter_row('marks', 'marker_name', row['marker_name'])

        marker.at[i, 'grade_count'] = this_group_df['grade_final'].count()
        marker.at[i, 'grade_mean'] = this_group_df['grade_final'].mean()
        marker.at[i, 'grade_min'] = this_group_df['grade_final'].min()
        marker.at[i, 'grade_max'] = this_group_df['grade_final'].max()
        marker.at[i, 'grade_std'] = this_group_df['grade_final'].std()
        marker.at[i, 'grade_skew'] = this_group_df['grade_final'].skew()

        # generate the readability stats
        for comment in comment_list:
            for readability_list in cfg['analytics']['readability_stats']:
                marker.at[i, comment + "_" + readability_list[0]] = this_group_df[comment + "_" + readability_list[0]].mean()

            # get the comment to pass to the nlp
            this_comment = " ".join(f.create_list(this_group_df, comment + "_txt"))

            # store the comment so that it can be accessed for the word cloud
            with open(c.d['txt'] + this_marker_name + "_" + comment + ".txt", 'w') as out:
                print(this_comment, file=out)

            this_nlp_file = Path(c.d['nlp'] + this_marker_name + "_" + comment + ".json")
            # check if the nlp data already exists
            # this is important only to reduce the numbers of calls on the api for local testing
            if not this_nlp_file.is_file():
                # get the results from the api
                this_nlp = f.text_analysis_api(this_comment, 'comment', row['marker_name'])
                with open(this_nlp_file, 'w') as out:
                    print(this_nlp, file=out)

            # create a wordcloud using the wordcloud_cli interface
            subprocess.call("wordcloud_cli --width 1000 --height 250 --text " + c.d['txt'] + this_marker_name + "_" + comment + ".txt --imagefile " + c.d['wordcloud'] + this_marker_name + "_" + comment + ".png --fontfile ./includes/fonts/Roboto-Medium.ttf --background white --color blue", shell=True)

        # open a file to write to for each marker
        # this will be joined to the end of the analysis
        with open(c.d['md'] + this_marker_name + '.md', 'w') as out:
            print("\n\n## Analysis of " + this_marker_name + "'s Feedback\n\n", file=out)

            # loop through the analysis for each comment
            for i, row in comm.iterrows():
                comment = row['field']
                field_text = row['label']
                print("\n\n## Wordcloud for " + field_text + "\n\n", file=out)
                print("![](../../." + c.d['wordcloud'] + this_marker_name + "_" + comment + ".png) \n\n", file=out)

        # using enumerate to access list indices for name and title
        # work through the defined nlp endpoints
        for num, endpoint in enumerate(cfg['aylien']['endpoints'], start=0):
            name = (cfg['aylien']['endpoint_name'][num])
            title = (cfg['aylien']['endpoint_title'][num])
            
            # treat sentiment differently, as it is better presented in a table
            if endpoint != 'sentiment':
                with open(c.d['md'] + this_marker_name + '.md', 'a') as out:
                    print("\n### Comment " + title + "\n\n", file=out)
                    print("\n*" + cfg['analytics']['nlp_source_comment']+  "*\n", file=out)

                    # loop through the analysis for each comment
                    for i, row in comm.iterrows():
                        comment = row['field']
                        field_text = row['label']


                        # load the nlp json response to read from
                        with open(c.d['nlp'] + this_marker_name + "_" + comment + ".json") as json_file:
                            this_nlp = json.load(json_file)

                            # print a header to out
                            print("\n**" + title + " for " + field_text + "**" "\n\n", file=out)
                            try: 
                                item_out = ""
                                for item in this_nlp[name]:
                                    item_out = item.replace("#", "\\#")
                                    print("* " + item_out, file=out)
                            except:
                                print("* N/A", file=out)
    
    # create a stat_chart for the marker means
    f.make_stat_chart(marker, 'marker_name', 'grade_mean', 'grade_mean')


    # work through the readability stats to create a chart
    for readability_list in cfg['analytics']['readability_stats']:

        columns_old = ['marker_name']
        columns_new = ['marker_name']

        for i, row in comm.iterrows():
            field = row['field']
            text = row['label']
            columns_old.append(field + "_" + readability_list[0])
            columns_new.append(text)

        this_marker = marker[columns_old].copy()
        this_marker.columns = columns_new
        f.make_stat_chart(this_marker, 'marker_name', columns_new, readability_list[0])

    # start by creating a file to compile everything into
    with open(c.d['md'] + cfg['analytics']['filename'] + ".md", 'w') as out:
        print("## " + cfg['analytics']['analytics_header'] + "\n\n", file=out)
        print("### " + cfg['analytics']['grade_table_header'] + "\n\n", file=out)
        print("*" + cfg['analytics']['grade_table_comment'] + "*\n\n", file=out)
        # create a summary table for display

        overall_grade_count = c.df['marks']['grade_final'].replace(0, np.NaN).count().round(1)
        overall_grade_mean = c.df['marks']['grade_final'].replace(0, np.NaN).mean().round(1)
        overall_grade_min = c.df['marks']['grade_final'].replace(0, np.NaN).min().round(1)
        overall_grade_max = c.df['marks']['grade_final'].replace(0, np.NaN).max().round(1)
        overall_grade_std = c.df['marks']['grade_final'].replace(0, np.NaN).std().round(1)
        overall_grade_skew = c.df['marks']['grade_final'].replace(0, np.NaN).skew().round(1)


        marker_html = marker[
            ['marker_name', 'grade_count', 'grade_final', 'grade_std', 'grade_min', 'grade_max', 'grade_skew']].round(1)
        overall_row = ["Overall", overall_grade_count, overall_grade_mean, overall_grade_std, overall_grade_min, overall_grade_max, overall_grade_skew]
        marker_html.loc[-1] = overall_row
        last_row = len(marker_html.index)
        no_submission_count = len(c.df['marks'][c.df['marks']['grade_final'] == 0])
        no_submission_row = ["No_Submission", no_submission_count, 'NA', 'NA', 'NA', 'NA', 'NA']
        marker_html.loc[last_row] = no_submission_row  # adding a row
        marker_html = marker_html.sort_index()  # sorting by index

        marker_html.columns = ['Marker', 'Count', 'Mean', 'StDev', 'Min', 'Max', 'Skew']
        marker_html.set_index('Marker', inplace=True)
        print(marker_html.to_html(), file=out)

        print("## Boxplot of final grades \n\n", file=out)
        print("![](../../." + c.d['charts'] + "grade_final_pct_boxplot.png)\n\n", file=out)

        print("## Histogram of final grades \n\n", file=out)
        print("![](../../." + c.d['charts'] + "grade_final_pct.png)\n\n", file=out)

        print("<div class=\"no-break\">\n\n", file=out)
        print("## Difference between suggested and calculated grade \n\n", file=out)
        print("*This highlights the tendancy of markers to drift from the suggested mark provided in the Database*\n\n", file=out)
        print("![](../../." + c.d['charts'] + "count_suggested.png)\n\n", file=out)
        print("</div>\n\n", file=out)

        print("# " + cfg['analytics']['rubric_header'] + "\n\n", file=out)
        for loop_row in crit.itertuples():
            print("<div class=\"no-break\">\n\n", file=out)
            criterion = loop_row.field
            f.print_results_header(loop_row, out)
            print('*' + cfg['analytics']['rubric_comment']+ '*\n\n', file=out)
            print("![](../../." + c.d['charts'] + "count_" + criterion + ".png)\n\n", file=out)
            print("</div>\n\n", file=out)

        print("## " + cfg['analytics']['readability_header'] + "\n\n", file=out)
        for readability_list in cfg['analytics']['readability_stats']:
            print("<div class=\"no-break\">\n\n", file=out)
            print("\n\n### " + cfg['crit_chart'][readability_list[0]], file=out)
            print("![](../../." + c.d['charts']  + readability_list[0] + ".png)\n\n", file=out)
            print("*" + cfg['crit_chart'][readability_list[0]] + cfg['analytics']['readability_comment'] + "*\n\n", file=out)
            print("</div>\n\n", file=out)

        print("\n\n## " + cfg['analytics']['sentiment_header'] + "\n\n", file=out)

        sentiment_df = f.sentiment_table(comm, marker)
        sentiment_df.set_index('Name', inplace=True)
        print(sentiment_df.to_html(), file=out)

        print("# " + cfg['analytics']['summary_header']+ "\n\n", file=out)

    # combine the individual marker files
    with open(c.d['md'] + cfg['analytics']['filename'] + '.md', 'a') as out_file:
        for i, row in marker.iterrows():
            this_marker_name = row['marker_name']
            print(this_marker_name)
            with open(c.d['md'] + this_marker_name + '.md') as in_file:
                out_file.write(in_file.read())

    with open(c.d['md'] + cfg['analytics']['filename'] + '.md', 'a') as out:
        print("\n\n\n\n*** **END OF ANALYSIS** ***\n\n", file=out)

    with open(c.d['yaml'] + cfg['analytics']['filename'] + '.yaml', 'w') as out:
        f.pandoc_yaml(out, cfg['analytics']['filename'])
        
    with open(c.d['css'] + cfg['analytics']['filename'] + ".css", 'w') as out:
        f.pandoc_css(out, cfg['analytics']['filename'], 'anon')

    f.pandoc_html_single(cfg['analytics']['filename'])
    f.pandoc_pdf(cfg['analytics']['filename'])

    f.pnt_notice(c.msg['console_complete'], os.path.basename(__file__))