def main(opts, commline_list):
    """(main):
        Driver of the grade_master script.
    """
    time_start = time.time()

    # now the standard part of the script begins
    logfile = open(opts.logfile, 'a', 0)
    error_file = open(opts.error_file, 'a', 0)

    banner(logfile, SCRIPT_NAME, SCRIPT_VERSION, REVISION_DATE, AUTHOR,
           DESCRIPTION)

    # give out options of this run
    print_invoked_opts(logfile, opts, commline_list)

    home_dir = os.getcwd()

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    # read in the CSV file with the raw data of grades
    # open file
    # read in line by line
    # close file
    # parse and process the raw data, put it in useful data structure
    # use commas for split operation
    # read into a data structure
    # what would be a useful data structure?

    # perform statistics, analysis, projections
    # compute current average according to grading rules
    # translate points into grades
    # test different grading schemes
    # rank students
    # identify best, worst students
    # figure out in detail what we want to do

    #################################################################################################

    # wrap up section
    tmp_str = tot_exec_time_str(time_start) + "\n" + std_datetime_str()
    print tmp_str + 3 * '\n'
    logfile.write(tmp_str + 4 * '\n')
    logfile.close()
    error_file.close()

    # check whether error_file contains content
    chk_rmfile(opts.error_file)

    return 0  #successful termination of program
def main(opts, commline_list):
    """(main):
        Driver of the grade_master script.
    """
    time_start = time.time()

    # now the standard part of the script begins
    logfile = open(opts.logfile, 'a', 0)
    error_file = open(opts.error_file, 'a', 0)

    banner(logfile, SCRIPT_NAME, SCRIPT_VERSION, REVISION_DATE, AUTHOR,
           DESCRIPTION)

    # give out options of this run
    print_invoked_opts(logfile, opts, commline_list)

    home_dir = os.getcwd()

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################
    # read in the CSV file with the raw data of grades

    # make a logfile entry and screen entry so that we know where we stand
    tmp_str = "Starting data acquisition..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # check that file exists, get filename from optparse
    if opts.data_file is None:
        tmp_str = "... data file not specified!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')

        tmp_str = "Aborting due to missing data file!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
# TODO: This should better be done by exception handling

# open CSV file with raw data
    data_file = open(opts.data_file, 'r')

    tmp_str = "   ...reading in data structure..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # read top line of data file, which defines the keys
    line = data_file.readline()
    # use commas for split operation
    words = line.split(',')
    # extract keys, get rid of empty entries
    keys_list = []
    for word in words:
        if word != '' and word != '\r\n':
            keys_list.append(word)
    n_keys = len(keys_list)

    tmp_str = "   ...checking validity of data structure..."
    print tmp_str
    logfile.write(tmp_str + '\n')
    # check that the standard keys are amongst the first three keys, because that's all we have implemented so far
    if "Last name" not in keys_list[0:3]:
        tmp_str = "   ...'Last name' missing in data structure!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        tmp_str = "Aborting due to unknown data structure!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
    elif "First name" not in keys_list[0:3]:
        tmp_str = "   ...'First name' missing in data structure!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        tmp_str = "Aborting due to unknown data structure!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
    elif "Student ID" not in keys_list[0:3]:
        tmp_str = "   ...'Student ID' missing in data structure!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        tmp_str = "Aborting due to unknown data structure!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)

    # suitable data structure for raw and derived data: dictionary of dictionaries with mixed arguments -> stackoverflow
    # template:
    # data_dict['ID']['first_name'] = "xxx"
    # data_dict['ID']['last_name'] = "yyy"
    # data_dict['ID']['hw_grades'] = []    list of variable entries
    # data_dict['ID']['midterm_grades'] = []    list of variable entries
    # data_dict['ID']['final_grade'] = z   some number
    data_dict = defaultdict(lambda: defaultdict(
        int))  # note: we use the anonymous function construct lambda here

    # make ID list since this is our distinguishing dictionary key
    id_list = []

    tmp_str = "   ...reading in bulk of data..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # use standard read in with infinite loop construct
    while 1:
        line = data_file.readline()
        if not line: break
        words = line.split(',')
        # temporary data list
        data_list = []
        for word in words:
            # get rid of junk data
            if word != '' and '\r\n' not in word:  # note: we had to make junk removal more general
                # populate the temporary data_list
                data_list.append(word)

        # continue if data_list is emptycheck that we don't have an empty list
        if len(data_list) == 0:
            continue
        # check that the data_list and key_list have to have the same lenght
        elif len(data_list) != n_keys:
            tmp_str = "   ...invalid data entry (wrong number of data entries): " + line
            print tmp_str
            logfile.write(tmp_str + '\n')
            error_file.write(tmp_str + '\n')
            tmp_str = "Aborting due to invalid data entry!"
            logfile.write(tmp_str + '\n')
            error_file.write(tmp_str + '\n')
            sys.exit(tmp_str)
        # TODO: think about a more sophisticated handling in case of problems

        # find index of list element in keys_list that contains the id
        id_index = keys_list.index("Student ID")
        # get id
        id = data_list[id_index]
        # add id to id_list
        id_list.append(id)
        # set up hw and midterm lists to get added to dictionary later
        hw_list = []
        midterm_list = []

        for i in xrange(n_keys):  # note that we use xrange instead of range
            key = keys_list[i]
            data = data_list[i]
            if key == "Last name":
                data_dict[id]['last_name'] = data
            elif key == "First name":
                data_dict[id]['first_name'] = data
            elif key == "Student ID":
                continue
            elif 'HW' in key:
                hw_list.append(
                    float(data))  # don't forget to convert string to float
            elif (key == 'M1') or (key == 'M2'):
                midterm_list.append(
                    float(data))  # don't forget to convert string to float
            elif key == 'Final':
                data_dict[id]['final_grade'] = float(
                    data)  # don't forget to convert string to float
            else:
                tmp_str = "Aborting due to unknown key!"
                logfile.write(tmp_str + '\n')
                error_file.write(tmp_str + '\n')
                sys.exit(tmp_str)

        # now we have to put lists into dictionary
        data_dict[id]['hw_grades'] = hw_list
        data_dict[id]['midterm_grades'] = midterm_list

    # close file
    data_file.close()

    tmp_str = "...data acquisition finished."
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "Summary of acquired data:"
    print tmp_str
    logfile.write(tmp_str + '\n')

    tmp_str = "   Number of students:  " + str(len(id_list))
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of homeworks: " + str(len(hw_list))
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of midterms:  " + str(len(midterm_list))
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of finals:    " + str(int(key == 'Final'))
    print tmp_str
    logfile.write(tmp_str + '\n')
    # TODO: this should be better formatted

    #################################################################################################

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "Starting calculation of grades and grade projections..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # create lists of various grades
    for id in id_list:
        data_dict[id]['hw_grade_av'] = []
        data_dict[id]['overall_grade'] = []
        data_dict[id]['overall_lettergrade'] = []

    # create assignment keys list for better readability; introduce assignment keys list; note: we trade resources for readability
    assignment_keys_list = keys_list[3:]
    n_assignment_keys = len(assignment_keys_list)

    # we want grades for every point during the semester, so we successively go through list of assignments and compute grade after each
    for i in xrange(n_assignment_keys):
        # determine number of homeworks at any point in semester
        n_hw = 0
        for key in assignment_keys_list[0:i + 1]:
            if "HW" in key: n_hw += 1
        for id in id_list:
            # distinguish different cases for grade projections, depending on where we stand in the semester
            if 'Final' in assignment_keys_list[
                    0:i +
                    1]:  # i.e., this is the final grade after all assignments are in
                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw]) / len(
                    data_dict[id]['hw_grades'][0:n_hw])
                midterm_max = max(data_dict[id]['midterm_grades'])
                midterm_min = min(data_dict[id]['midterm_grades'])
                final = data_dict[id][
                    'final_grade']  # this is really for readability
            elif 'M2' in assignment_keys_list[0:i + 1]:
                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw]) / len(
                    data_dict[id]['hw_grades'][0:n_hw])
                midterm_max = max(data_dict[id]['midterm_grades'])
                midterm_min = min(data_dict[id]['midterm_grades'])
                final = sum(data_dict[id]['midterm_grades']) / len(
                    data_dict[id]['midterm_grades'])
            elif 'M1' in assignment_keys_list[0:i + 1]:
                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw]) / len(
                    data_dict[id]['hw_grades'][0:n_hw])
                midterm_max = max(data_dict[id]['midterm_grades'])
                midterm_min = min(data_dict[id]['midterm_grades'])
                final = sum(data_dict[id]['midterm_grades']) / len(
                    data_dict[id]['midterm_grades'])
            elif 'HW1' in assignment_keys_list[0:i + 1]:
                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw]) / len(
                    data_dict[id]['hw_grades'][0:n_hw])
                midterm_max = hw_average
                midterm_min = hw_average
                final = hw_average
            else:
                tmp_str = "Aborting due to lack of reported grades!"
                logfile.write(tmp_str + '\n')
                error_file.write(tmp_str + '\n')
                sys.exit(tmp_str)

            # implement grading scheme: HW: 20%, better midterm 35%, worse midterm 15%, final: 30%
            overall_grade = 0.2 * hw_average + 0.35 * midterm_max + 0.15 * midterm_min + 0.3 * final
            # TODO: instead of hardwiring, we may want to build more flexibility in here

            overall_lettergrade = percent2lettergrade(overall_grade)

            # add computed information to data dictionary
            data_dict[id]['hw_grade_av'].append(hw_average)
            data_dict[id]['overall_grade'].append(round(overall_grade, 1))
            data_dict[id]['overall_lettergrade'].append(overall_lettergrade)

    # test if this works
    for id in id_list:
        print str(id) + ' ' + str(data_dict[id]['overall_grade']) + ' ' + str(
            data_dict[id]['overall_lettergrade'])

    sys.exit("This is as far as it goes right now.")
    # ok this is a mess by, now, so we should really clean up

    # perform statistics, analysis, projections
    # compute current average according to grading rules

    # course average
    grade_total_list = []
    grade_total_average_list = []
    grade_total_average_letter_list = []
    for j in xrange(n_assignment_keys):
        grade_total_list.append([])
        for id in id_list:
            grade_total_list[j].append(data_dict[id]['grade_total'])

        grade_total_average_list.append(
            sum(grade_total_list[j]) / len(grade_total_list[j]))

        if round(grade_total_average) >= 96:
            grade_total_average_letter = 'A'
        elif round(grade_total_average) >= 91:
            grade_total_average_letter = 'A-'
        elif round(grade_total_average) >= 86:
            grade_total_average_letter = 'B+'
        elif round(grade_total_average) >= 81:
            grade_total_average_letter = 'B'
        elif round(grade_total_average) >= 76:
            grade_total_average_letter = 'B-'
        elif round(grade_total_average) >= 71:
            grade_total_average_letter = 'C+'
        elif round(grade_total_average) >= 66:
            grade_total_average_letter = 'C'
        elif round(grade_total_average) >= 61:
            grade_total_average_letter = 'C-'
        elif round(grade_total_average) >= 56:
            grade_total_average_letter = 'D+'
        elif round(grade_total_average) >= 51:
            grade_total_average_letter = 'D'
        else:
            grade_total_average_letter = 'F'

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str

    print str(grade_total_average) + '  ' + grade_total_average_letter

    # rank students
    # identify best, worst students
    # note: there is no good way to sort a nested dictionary by value, so we just create an auxillary dictionary
    tmp_list = []
    for id in id_list:
        tmp_tuple = (id, data_dict[id]['grade_total'])
        tmp_list.append(tmp_tuple)

    print tmp_list
    print
    print

    sorted_tmp_list = sorted(tmp_list, key=itemgetter(1))
    print sorted_tmp_list

    # count grades
    a0_count = 0
    am_count = 0
    bp_count = 0
    b0_count = 0
    bm_count = 0
    cp_count = 0
    c0_count = 0
    cm_count = 0
    dp_count = 0
    d0_count = 0
    f0_count = 0
    for id in id_list:
        if data_dict[id]['letter_grade'] == 'A':
            a0_count += 1
        elif data_dict[id]['letter_grade'] == 'A-':
            am_count += 1
        elif data_dict[id]['letter_grade'] == 'B+':
            bp_count += 1
        elif data_dict[id]['letter_grade'] == 'B':
            b0_count += 1
        elif data_dict[id]['letter_grade'] == 'B-':
            bm_count += 1
        elif data_dict[id]['letter_grade'] == 'C+':
            cp_count += 1
        elif data_dict[id]['letter_grade'] == 'C':
            c0_count += 1
        elif data_dict[id]['letter_grade'] == 'C-':
            cm_count += 1
        elif data_dict[id]['letter_grade'] == 'D+':
            dp_count += 1
        elif data_dict[id]['letter_grade'] == 'D':
            d0_count += 1
        elif data_dict[id]['letter_grade'] == 'F':
            f0_count += 1

    print 'a0_count   ' + str(a0_count)
    print 'am_count   ' + str(am_count)
    print 'bp_count   ' + str(bp_count)
    print 'b0_count   ' + str(b0_count)
    print 'bm_count   ' + str(bm_count)
    print 'cp_count   ' + str(cp_count)
    print 'c0_count   ' + str(c0_count)
    print 'cm_count   ' + str(cm_count)
    print 'dp_count   ' + str(dp_count)
    print 'd0_count   ' + str(d0_count)
    print 'f0_count   ' + str(f0_count)

    # test CSV files at different stages of semester

    # figure out in detail what we want to do
    # follow progress throughout the semester - here we will need an order criterion
    # test different grading schemes

    tmp_str = "... finished."
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    # wrap up section
    tmp_str = tot_exec_time_str(time_start) + "\n" + std_datetime_str()
    print tmp_str + 3 * '\n'
    logfile.write(tmp_str + 4 * '\n')
    logfile.close()
    error_file.close()

    # check whether error_file contains content
    chk_rmfile(opts.error_file)

    return 0  #successful termination of program
Exemple #3
0
def main(opts, commline_list):
    """(main):
        Driver of the grade_master script.
    """
    time_start = time.time()

    # now the standard part of the script begins
    logfile = open(opts.logfile, 'a', 0)
    error_file = open(opts.error_file, 'a', 0)

    banner(logfile, SCRIPT_NAME, SCRIPT_VERSION, REVISION_DATE, AUTHOR,
           DESCRIPTION)

    # give out options of this run
    print_invoked_opts(logfile, opts, commline_list)

    home_dir = os.getcwd()

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    # read in the CSV file with the raw data of grades
    # make a logfile entry and screen entry so that we know where we stand
    tmp_str = "Start data aquisition... "
    print tmp_str
    logfile.write(tmp_str + '\n')
    # TODO: make use of different print levels

    # check that file exists, get filename from optparse
    if opts.data_file is None:
        tmp_str = "... data file not specified."
        print tmp_str
        logfile.write(tmp_str + '\n')
        errorfile.write(tmp_str + '\n')
        sys.exit("Aborting due to missing data file.")
# TODO: This should better be done by exception handling

# open file
    data_file = open(opts.data_file, 'r')

    # read top line of data file
    line = data_file.readline()
    # use commas for split operation
    words = line.split(',')

    # extract keys, get rid of empty entries
    keys_list = []
    for word in words:
        if word != '' and word != '\r\n':
            keys_list.append(word)

    print words
    print keys_list

    # think how I want to organize data! what would be a useful data structure?
    # how about a dictionary of dictionaries with mixed arguments
    # I want this logic:
    # data['ID']['first_name'] = "xxx"
    # data['ID']['last_name'] = "yyy"
    # data['ID']['hw_grades'] = []    list of variable entries
    # data['ID']['midterm_grades'] = []    list of variable entries
    # data['ID']['final_grade'] = z   some number
    # how do we realize this? -> stackoverflow: dictionary of dictionaries

    data_dict = defaultdict(lambda: defaultdict(int))

    print data_dict

    sys.exit("This is as far as it goes right now.")

    # read bulk of data file
    # use standard infinite loop construct

    # close file
    data_file.close()

    tmp_str = "... finished."
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')

    # parse and process the raw data, put it in useful data structure
    # read into our data structure

    # perform statistics, analysis, projections
    # compute current average according to grading rules
    # translate points into grades
    # test different grading schemes
    # rank students
    # identify best, worst students
    # figure out in detail what we want to do

    #################################################################################################

    # wrap up section
    tmp_str = tot_exec_time_str(time_start) + "\n" + std_datetime_str()
    print tmp_str + 3 * '\n'
    logfile.write(tmp_str + 4 * '\n')
    logfile.close()
    error_file.close()

    # check whether error_file contains content
    chk_rmfile(opts.error_file)

    return 0  #successful termination of program
Exemple #4
0
def main(args, commline_list):
    """(main):
        Driver of the grademaster script.
    """
    time_start = time.time()

    # now the standard part of the script begins
    logfile = open(args.logfile, 'a')
    error_file = open(args.error_file, 'a')

    banner(logfile, SCRIPT_NAME, SCRIPT_VERSION, REVISION_DATE, AUTHOR,
           DESCRIPTION)

    # give out options of this run
    print_invoked_opts(logfile, args, commline_list)

    home_dir = os.getcwd()

    tmp_str = "------------------------------------------------------------------------------ "
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    #################################################################################################
    # read in the CSV file with the raw data of grades

    # make a logfile entry and screen entry so that we know where we stand
    tmp_str = "Starting data acquisition..."
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    # check that file exists, get filename from optparse
    if args.data_file is None or os.path.getsize(args.data_file) == 0:
        tmp_str = "... data file not specified/empty!"
        print(tmp_str)
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')

        tmp_str = "Aborting due to missing data file!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
# TODO: This should better be done by exception handling

    tmp_str = "   ...reading in data..."
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    std_filename = args.data_file.replace('.csv',
                                          '')  #Filename prefix for outputs
    os.mkdir(std_filename)
    std_filename = std_filename + '/'
    # open CSV file with raw data
    rawdata_df = pd.read_csv(args.data_file)
    print(rawdata_df)
    #sys.exit("print(rawdata_df)")

    tmp_str = "   ...cleaning data structure..."
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    # remove empty entries
    for i in rawdata_df.columns:
        if 'Unnamed' in i:
            rawdata_df = rawdata_df.drop(i, 1)
    rawdata_df = rawdata_df.dropna(how='all')
    #    print(rawdata_df)

    tmp_str = "   ...identify keys..."
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    # read top line of data file, which defines the keys
    keys_list = list(rawdata_df.columns)
    n_keys = len(keys_list)
    #    print keys_list

    #Ensuring all scores are between 0 and 100
    for i in range(4, n_keys):
        rawdata_df[keys_list[i]] = rawdata_df[keys_list[i]].clip(lower=0,
                                                                 upper=100)

#     # OLD VERSION
#     # open CSV file with raw data
#     data_file = open(opts.data_file,'r')
#
#     # read top line of data file, which defines the keys
#     line = data_file.readline()
#     # use commas for split operation
#     words = line.split(',')
#     # extract keys, get rid of empty entries
#     keys_list = []
#     for word in words:
#         if word != '' and word != '\r\n':
#             keys_list.append(word)

# TODO: we should make this more general purpose
# TODO: rewrite this in a more elegant form
    tmp_str = "   ...checking validity of data structure..."
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    # check that the standard keys are amongst the first three keys, because that's all we have implemented so far

    #    if "Last name" not in keys_list[0:4]:
    #        tmp_str = "   ...'Last name' missing in data structure!"
    #        print(tmp_str)
    #        logfile.write(tmp_str + '\n')
    #        error_file.write(tmp_str + '\n')
    #    elif "First name" not in keys_list[0:4]:
    #        tmp_str = "   ...'First name' missing in data structure!"
    #        print(tmp_str)
    #        logfile.write(tmp_str + '\n')
    #        error_file.write(tmp_str + '\n')
    #    elif "Student ID" not in keys_list[0:4]:
    #        tmp_str = "   ...'Student ID' missing in data structure!"
    #        print(tmp_str)
    #        logfile.write(tmp_str + '\n')
    #        error_file.write(tmp_str + '\n')
    #    elif "email" not in keys_list[0:4]:
    #        tmp_str = "   ...'email' missing in data structure!"
    #        print(tmp_str)
    #        logfile.write(tmp_str + '\n')
    #        error_file.write(tmp_str + '\n')

    strucheck('Last name', keys_list)
    strucheck('First name', keys_list)
    strucheck('Student ID', keys_list)
    strucheck('email', keys_list)

    # check if all the grades are in float type (not object)
    for i in keys_list[4:]:
        if rawdata_df[i].dtypes == object:
            tmp_str = "Aborting due to unknown grade format in column %s!" % i
            logfile.write(tmp_str + '\n')
            error_file.write(tmp_str + '\n')
            sys.exit(tmp_str)

    course_id = str(input('Enter the course number: '))

    #     # OLD VERSION
    # suitable data structure for raw and derived data: dictionary of dictionaries with mixed arguments -> stackoverflow
    # template:
    # data_dict['ID']['first_name'] = "xxx"
    # data_dict['ID']['last_name'] = "yyy"
    # data_dict['ID']['hw_grades'] = []    list of variable entries
    # data_dict['ID']['midterm_grades'] = []    list of variable entries
    # data_dict['ID']['final_grade'] = z   some number
    #     data_dict = defaultdict(lambda : defaultdict(int))  # note: we use the anonymous function construct lambda here

    # make ID list since this is our distinguishing dictionary key
    #     id_list = []
    #     tmp_str = "   ...reading in bulk of data..."
    #     print tmp_str
    #     logfile.write(tmp_str + '\n')

    # use standard read in with infinite loop construct
    #     while 1:
    #         line = data_file.readline()
    #         if not line: break
    #         words = line.split(',')
    #         # temporary data list
    #         data_list = []
    #         for word in words:
    #             # get rid of junk data
    #             if word != '' and '\r\n' not in word:   # note: we had to make junk removal more general
    #                 # populate the temporary data_list
    #                 data_list.append(word)

    #         # continue if data_list is emptycheck that we don't have an empty list
    #         if len(data_list) == 0:
    #             continue
    #         # check that the data_list and key_list have to have the same lenght
    #         elif len(data_list) != n_keys:
    #             tmp_str = "   ...invalid data entry (wrong number of data entries): " + line
    #             print tmp_str
    #             logfile.write(tmp_str + '\n')
    #             error_file.write(tmp_str + '\n')
    #             tmp_str = "Aborting due to invalid data entry!"
    #             logfile.write(tmp_str + '\n')
    #             error_file.write(tmp_str + '\n')
    #             sys.exit(tmp_str)
    #         # TODO: think about a more sophisticated handling in case of problems
    #
    #
    #         # find index of list element in keys_list that contains the id
    #         id_index = keys_list.index("Student ID")
    #         # get id
    #         id = data_list[id_index]
    #         # add id to id_list
    #         id_list.append(id)
    #         # set up hw and midterm lists to get added to dictionary later
    #         hw_list = []
    #         midterm_list = []
    #
    #         for i in range(n_keys):    # note that we use range instead of range
    #             key = keys_list[i]
    #             data = data_list[i]
    #             if key == "Last name":
    #                 data_dict[id]['last_name'] = data
    #             elif key == "First name":
    #                 data_dict[id]['first_name'] = data
    #             elif key == "Student ID":
    #                 continue
    #             elif 'HW' in key:
    #                 hw_list.append(float(data))         # don't forget to convert string to float
    #             elif (key == 'M1') or (key == 'M2'):
    #                 midterm_list.append(float(data))    # don't forget to convert string to float
    #             elif key == 'Final':
    #                 data_dict[id]['final_grade'] = float(data)  # don't forget to convert string to float
    #             else:
    #                 tmp_str = "Aborting due to unknown key!"
    #                 logfile.write(tmp_str + '\n')
    #                 error_file.write(tmp_str + '\n')
    #                 sys.exit(tmp_str)
    #
    #         # now we have to put lists into dictionary
    #         data_dict[id]['hw_grades'] = hw_list
    #         data_dict[id]['midterm_grades'] = midterm_list
    #
    #
    #     # close file
    #     data_file.close()

    # some bookkeeping on where we stand in the semester
    n_hws = 0
    n_midterms = 0
    n_final = 0
    for key in keys_list[4:]:
        if "HW" in key:
            n_hws += 1
        elif "M" in key:
            n_midterms += 1
        elif "Final" in key:
            n_final += 1
        else:
            tmp_str = "Aborting due to unknown key!"
            logfile.write(tmp_str + '\n')
            error_file.write(tmp_str + '\n')
            sys.exit(tmp_str)

#    print n_hws
#    print n_midterms
#    print n_final

    tmp_str = "...data acquisition finished."
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    #################################################################################################

    tmp_str = "------------------------------------------------------------------------------ "
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    tmp_str = "Summary of acquired data for course " + course_id + ":"
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    tmp_str = "   Number of students:  " + str(len(rawdata_df))
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of homeworks: " + str(n_hws)
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of midterms:  " + str(n_midterms)
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of finals:    " + str(n_final)
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    # TODO: this should be better formatted

    #################################################################################################

    #     print "Info"
    #     print rawdata_df.info()
    #     print "Keys"
    #     print rawdata_df.keys()
    #     print "Index"
    #     print rawdata_df.index
    #     print "Columns"
    #     print rawdata_df.columns
    #     print "Values"
    #     print rawdata_df.values
    #     print "Describe"
    #     print rawdata_df.describe()

    # TODO: this is very inelegant and should be changed
    # Set up projection dataframe
    #    hwdata_df = rawdata_df.copy()
    #    examdata_df = rawdata_df.copy()
    # empty all data fields in projection_df
    #    hwdata_df['Final'] = 0
    #    for i in range(4,n_keys):
    #        key = keys_list[i]
    #        if 'HW' in key:
    #            examdata_df.drop(key, axis=1, inplace=True)
    #        elif key in ('M1', 'M2','F'):
    #            hwdata_df.drop(key, axis=1, inplace=True)

    #    print hwdata_df
    #    print examdata_df

    #    hwkeys_list = list(hwdata_df.columns)
    #    n_hwkeys = len(hwkeys_list)

    #    examkeys_list = list(examdata_df.columns)
    #    n_examkeys = len(examkeys_list)

    #    acc_hwdata_df = hwdata_df.copy()
    #    acc_examdata_df = examdata_df.copy()

    #    for i in range(4,n_hwkeys):
    #        key = hwkeys_list[i]
    #        if key == 'HW1':
    #            continue
    #        else:
    #            prevkey = hwkeys_list[i-1]
    #            acc_hwdata_df[key] += acc_hwdata_df[prevkey]

    #    for i in range(4,n_examkeys):
    #        key = examkeys_list[i]
    #        if key == 'M1':
    #            continue
    #        else:
    #            prevkey = examkeys_list[i-1]
    #            acc_examdata_df[key] += acc_examdata_df[prevkey]

    #    print acc_hwdata_df
    #    print acc_examdata_df

    #    av_hwdata_df = acc_hwdata_df.copy()
    #    av_examdata_df = acc_examdata_df.copy()
    #    minmax_midtermdata_df = examdata_df.copy()

    #    for i in range(4,n_hwkeys):
    #        key = hwkeys_list[i]
    #hw_n = int(key[2:])
    #        av_hwdata_df[key] = 1.0*av_hwdata_df[key]/n_hws

    #    for i in range(4,n_examkeys):
    #        key = examkeys_list[i]
    #        if key == 'F':
    #            av_examdata_df[key] = 1.0*av_examdata_df[key]/3
    #        else:
    #exam_n = int(key[1:])
    #            av_examdata_df[key] = 1.0*av_examdata_df[key]/(n_midterms)

    #    print("Are we there yet?")

    #    if n_midterms == 2:
    #        print("Here we are now")
    #        print(minmax_midtermdata_df)
    #        print(examdata_df)
    #        print(acc_examdata_df)
    #        print(av_examdata_df)
    #        print(hwdata_df)
    #        print(acc_hwdata_df)
    #        print(av_hwdata_df)
    #        sys.exit()

    #    print av_hwdata_df
    #    print av_examdata_df

    #    for i in range(4,n_keys):
    #        key = keys_list[i]
    #        projection_df[key] = 0
    #        if key in ('HW1','HW2','HW3','HW4'):
    #            projection_df[key] = av_hwdata_df[key]
    #        elif key == 'M1':
    #            projection_df[key] = 0.2*av_hwdata_df['HW4']+0.8*av_examdata_df['M1']
    #        elif key in ('HW5', 'HW6','HW7','HW8'):
    #            projection_df[key] = 0.2*av_hwdata_df[key]+0.8*av_examdata_df['M1']
    #        elif key == 'M2':
    #            projection_df[key] = 0.2*av_hwdata_df['HW8']+0.3*av_examdata_df['M1']
    #        else:
    #            sys.exit("Not yet implemented!")

    # I've moved the course statistics section above the prediction section as I feel this is required,
    # while the prediction section automatically quits if the course is completed and won't execute the sections below it.

    #Course Statistics Section Redux
    #Rewriting section to use dataframes
    tmp_str = "------------------------------------------------------------------------------ "
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    tmp_str = "Starting calculation of course statistics..."
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    tmp_str = "------------------------------------------------------------------------------ "
    print(tmp_str)

    hw_average = 0
    midterm_max = 0
    midterm_min = 0
    final = 0

    hwdata_df = rawdata_df.copy()
    examdata_df = rawdata_df.copy()

    #Removing student ID to help with mean calculation
    hwdata_df.drop('Student ID', axis=1, inplace=True)
    examdata_df.drop('Student ID', axis=1, inplace=True)

    #hwdata_df['Final'] = 0
    for i in range(4, n_keys):
        key = keys_list[i]
        if 'HW' in key:
            examdata_df.drop(key, axis=1, inplace=True)
        elif key in ('M1', 'M2'):
            hwdata_df.drop(key, axis=1, inplace=True)
        elif 'F' in key:
            hwdata_df.drop(key, axis=1, inplace=True)
            examdata_df.drop(key, axis=1, inplace=True)

#    print(hwdata_df)
#    print(examdata_df)

    hwkeys_list = list(hwdata_df.columns)
    n_hwkeys = len(hwkeys_list)

    examkeys_list = list(examdata_df.columns)
    n_examkeys = len(examkeys_list)

    hwdata_df = hwdata_df.assign(mean=hwdata_df.mean(axis=1,
                                                     numeric_only=True),
                                 std_dev=hwdata_df.std(axis=1,
                                                       numeric_only=True))
    examdata_df = examdata_df.assign(mean=examdata_df.mean(axis=1,
                                                           numeric_only=True),
                                     std_dev=examdata_df.std(
                                         axis=1, numeric_only=True))
    #    print(hwdata_df)
    if 'M1' in examkeys_list and 'M2' in examkeys_list:
        examdata_df = examdata_df.assign(max=examdata_df.max(axis=1),
                                         min=examdata_df[['M1',
                                                          'M2']].min(axis=1))
    elif 'M1' in examkeys_list and 'M2' not in examkeys_list:
        pass

    #participation grade: Just a randomized distribution with max of 5 and min of 0
    participate = np.random.uniform(low=0,
                                    high=5,
                                    size=(len(list(hwdata_df.index))))
    hwdata_df['Participation'] = participate
    hwdata_df = hwdata_df.assign(adj_mean=hwdata_df['mean'] +
                                 hwdata_df['Participation'])
    hwdata_df['adj_mean'] = hwdata_df['adj_mean'].clip(lower=0, upper=100)

    tmp_str = "Course status:\n"
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    if n_midterms > 0 or n_final > 0:
        tmp_str = "Exam data:\n"
        print(tmp_str)
        logfile.write(tmp_str + '\n')
        print(examdata_df)
    tmp_str = "Homework data:\n"
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    print(hwdata_df)
    plt.plot(hwdata_df['mean'])
    plt.savefig(std_filename + 'grade_plot.png')

    finaldata_df = rawdata_df.copy()
    finaldata_df = finaldata_df.assign(hw_mean=hwdata_df['adj_mean'],
                                       exam_mean=examdata_df['mean'])

    # Final grading scheme: HW: 20%, better midterm 35%, worse midterm 15%, final: 30%
    hw_grad = 0.2
    midmax_grad = 0.35
    midmin_grad = 0.15
    fin_grad = 0.3
    #overall_grade = hw_grad*hw_average + midmax_grad*midterm_max + midmin_grad*midterm_min + fin_grad*final
    #overall_lettergrade = percent2lettergrade(overall_grade,'absolute')
    # TODO: instead of hardwiring, we may want to build more flexibility in here
    # Change: Done.

    print("Grading scheme:")
    print("Homework: " + str(hw_grad * 100) + '%')
    print("Highest midterm: " + str(midmax_grad * 100) + '%')
    print("Lowest midterm: " + str(midmin_grad * 100) + '%')
    print("Final: " + str(fin_grad * 100) + '%')

    #Now to write this into the database:
    if n_final > 0:
        finaldata_df = finaldata_df.assign(
            overall_mean=hwdata_df['adj_mean'] * hw_grad +
            midmax_grad * examdata_df['max'] +
            midmin_grad * examdata_df['min'] + fin_grad * rawdata_df['Final'])
    elif n_midterms == 2:  #If there's no final yet, compensate the grading scheme by making the sum of the remaining grades 1.
        grad_adj = 1 / (hw_grad + midmax_grad + midmin_grad)
        hw_grad = grad_adj * hw_grad
        midmax_grad = grad_adj * midmax_grad
        midmin_grad = grad_adj * midmin_grad
        finaldata_df = finaldata_df.assign(
            overall_mean=hwdata_df['adj_mean'] * hw_grad +
            midmax_grad * examdata_df['max'] +
            midmin_grad * examdata_df['min'])
    elif n_midterms == 1:  #If only one midterm has happened, all midterm weights are applied to this.
        grad_adj = 1 / (hw_grad + midmax_grad + midmin_grad)
        hw_grad = grad_adj * hw_grad
        midmax_grad = grad_adj * midmax_grad
        midmin_grad = grad_adj * midmin_grad
        finaldata_df = finaldata_df.assign(
            overall_mean=hwdata_df['adj_mean'] * hw_grad +
            midmax_grad * examdata_df['mean'] +
            midmin_grad * examdata_df['mean'])
    else:  #If there have been no exams yet, just use the homework average without weights.
        finaldata_df = finaldata_df.assign(overall_mean=hwdata_df['adj_mean'])
    finaldata_df['grade'] = 0
    for i in finaldata_df.index:
        finaldata_df.loc[i, 'grade'] = percent2lettergrade(
            finaldata_df.loc[i, 'overall_mean'], 'absolute')
    finaldata_df = finaldata_df.sort_values(by='overall_mean', ascending=False)
    finaldata_df.dropna(axis=1)

    tmp_str = "Sorted and graded list:\n"
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    print(finaldata_df)

    tmp_str = "Congratulations to the toppers!"
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    if n_hws < 5:
        tmp_str = "To those with lower grades, don't lose heart! There's still time to make up!"
        print(tmp_str)
        logfile.write(tmp_str + '\n')

    tmp_str = "Exporting sorted data to CSV."
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    finaldata_df.to_csv(path_or_buf=std_filename + 'Results.csv' +
                        std_datetime_str('date'))

    # add computed information to data dictionary
    #    data_dict[id]['hw_grade_av'].append(hw_average)
    # TODO: we should take out the rounding here
    #    data_dict[id]['overall_grade'].append(round(overall_grade,3))
    #    data_dict[id]['overall_lettergrade'].append(overall_lettergrade)

    #     # output for testing
    #     for id in id_list:
    #         print str(id) + ' ' + str(data_dict[id]['overall_grade'])+ ' ' + str(data_dict[id]['overall_lettergrade'])

    #################################################################################################

    #    Deprecated sorting section
    #     tmp_str = "   ...computing basic distribution statistics..."
    #     print tmp_str
    #     logfile.write(tmp_str + '\n')

    # create lists of lists with all the overall grades
    #    course_overall_grade_list = []
    #    course_overall_lettergrade_list = []
    #    course_overall_grade_stat_list = []

    # iterate through all assignments
    #    for j in range(n_assignment_keys):
    #        course_overall_grade_list.append([])
    #        course_overall_lettergrade_list.append([])
    #        for id in id_list:
    #            course_overall_grade_list[j].append(data_dict[id]['overall_grade'][j])
    #            course_overall_lettergrade_list[j].append(data_dict[id]['overall_lettergrade'][j])

    #        stat = distribution_stat(course_overall_grade_list[j])
    #        course_overall_grade_stat_list.append(stat)

    #        course_overall_grade_stat_list[j]['letter_av'] = percent2lettergrade(course_overall_grade_stat_list[j]['av'],'absolute')
    #        course_overall_grade_stat_list[j]['letter_median'] = percent2lettergrade(course_overall_grade_stat_list[j]['median'],'absolute')
    #        course_overall_grade_stat_list[j]['letter_min'] = percent2lettergrade(course_overall_grade_stat_list[j]['min'],'absolute')
    #        course_overall_grade_stat_list[j]['letter_max'] = percent2lettergrade(course_overall_grade_stat_list[j]['max'],'absolute')

    #        course_overall_grade_stat_list[j]['letter_dist'] = histogram(course_overall_lettergrade_list[j])

    # TODO: here we need a proper print statement now.
    #        print(course_overall_grade_stat_list[j])
    #        print()
    #        sys.exit("This is as far as it goes right now.")

    #    tmp_str = "   ...computing letter grade distribution..."
    #    print(tmp_str)
    #    logfile.write(tmp_str + '\n')

    # perform statistics, analysis, projections
    # compute current average according to grading rules
    # rank students
    # identify best, worst students
    # compile info for each student
    # visualize trends
    # add course participation into grading scheme
    # test different grading schemes

    #     print str(grade_total_average) + '  ' +  grade_total_average_letter

    # rank students
    # identify best, worst students
    # note: there is no good way to sort a nested dictionary by value, so we just create an auxillary dictionary
    #    tmp_list = []
    #    for id in id_list:
    #        tmp_tuple = (id,data_dict[id]['grade_total'])
    #        tmp_list.append(tmp_tuple)

    #    print(tmp_list)

    #    sorted_tmp_list = sorted(tmp_list, key=itemgetter(1))
    #    print(sorted_tmp_list)

    # Prediction Section 1.0 - A simple gradient based predictor, can be replaced by a better prediction algorithm later.
    # Director's Note:  I thought of using some kind of regression, but there's too little data to make any sensible pattern.
    #                   I'm most likely doing something wrong, this can be fixed in future versions.

    tmp_str = "------------------------------------------------------------------------------ "
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    tmp_str = "Starting calculation of grade projections..."
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    projection_df = rawdata_df.copy()

    n_hws_total = 10
    n_midterms_total = 2
    n_finals_total = 1
    #emptyrows, emptycolumns = np.where(pd.isnull(projection_df))
    emptykeys = []
    #populating prediction db with current data

    for i in range(4, n_keys):
        key = keys_list[i]
        projection_df[key] = 0
        projection_df[key] = rawdata_df[key]

#Prediction Algorithm:
#   1. Check which columns are missing. (Complete!)
#   2. Determine gradient based on final 3 entries. (Could be refined in a future version)
#   3. Predict scores based on no. of assignments completed and gradient. (no. of assignments predicted <= no. of assignments completed)

#   1. Generating list of empty columns
    for i in range(n_hws + 1, n_hws_total + 1):
        emptykeys.append('HW' + str(i))
    for i in range(len(emptykeys)):
        if emptykeys[i - 1] == 'HW4' and emptykeys[i] != 'M1' or emptykeys[
                i] == 'HW5' and emptykeys[i - 1] != 'M1':
            emptykeys.insert(i, 'M1')
        if emptykeys[i - 1] == 'HW8' and emptykeys[i] != 'M2' or emptykeys[
                i] == 'HW9' and emptykeys[i - 1] != 'M2':
            emptykeys.insert(i, 'M2')
    if emptykeys[:0] != 'F':
        emptykeys.append('F')
    #print(emptykeys)
    endkey = keys_list[n_keys - 1]
    #print(endkey)
    if n_keys < 6:
        tmp_str = "Too few datapoints!"
        logfile.write(tmp_str + '\n')
        sys.exit(tmp_str)
    elif n_keys >= 17:
        tmp_str = "Semester is already over, just look at the data!"
        logfile.write(tmp_str + '\n')
        sys.exit(tmp_str)

#   2. Gradient determination

    gradient_samplesize = min(
        3, n_keys - 4)  # this setting is a lot of fun, tweak to check accuracy
    for i in range(n_keys - gradient_samplesize, n_keys):
        projection_df['Gradient' + str(i)] = projection_df[
            keys_list[i]] - projection_df[keys_list[i - 1]]

    proj_keys_list = list(projection_df.columns)
    proj_n_keys = len(proj_keys_list)

    col_tmp = projection_df.loc[:, 'Gradient' +
                                str(n_keys - gradient_samplesize):'Gradient' +
                                str(n_keys - 1)]
    projection_df['MeanGradient'] = col_tmp.mean(axis=1)

    #    print(projection_df)    #Gradients included, uncomment for diagnostics

    count = 0
    #   3. Score prediction and entry
    j = n_keys
    #print(n_keys+(n_keys-4+count), 4+n_hws_total+n_midterms_total+n_finals_total+count,j)
    #print(range(n_keys,min(n_keys+(n_keys-4+count),4+n_hws_total+n_midterms_total+n_finals_total+count)))
    while j in range(
            n_keys,
            min(n_keys + (n_keys - 4 + count),
                4 + n_hws_total + n_midterms_total + n_finals_total + count)):
        #print(n_keys+(n_keys-4+count), 4+n_hws_total+n_midterms_total+count, j)
        if 'Gradient' not in proj_keys_list[j - 1]:
            projection_df.insert(
                j, emptykeys[j - n_keys - count],
                projection_df[proj_keys_list[j - 1]] +
                projection_df['MeanGradient'])
            proj_keys_list.append(emptykeys[j - n_keys - count])
        #    print(emptykeys[j-n_keys-1-count])
        #    print(proj_keys_list[j-1])
        else:
            count += 1

        j += 1

    proj_keys_list = list(projection_df.columns)
    proj_n_keys = len(proj_keys_list)

    #Conditioning the projection database:
    for i in range(4, proj_n_keys):
        if 'Gradient' in proj_keys_list[i]:
            projection_df.drop(proj_keys_list[i], axis=1, inplace=True)
        else:
            projection_df[proj_keys_list[i]] = projection_df[
                proj_keys_list[i]].clip(0, 100)
            projection_df[proj_keys_list[i]] = projection_df[
                proj_keys_list[i]].astype(int)

    tmp_str = "------------------------------------------------------------------------------ "
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    tmp_str = "Projected scores"
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    print(projection_df)  #Final prediction database
    projection_df.to_csv(path_or_buf=std_filename + 'Predictions.csv')
    proj_keys_list = list(projection_df.columns)
    proj_n_keys = len(proj_keys_list)

    #     print keys_list

    #     # OLD VERSION
    #     # empty all data fields in projection_df
    #     for i in range(4,n_keys):
    #         key = keys_list[i]
    #         accumulateddata_df[key] = 0
    #         projection_df[key] = 0
    #         if key == 'HW1':
    #             projection_df[key] = rawdata_df[key]
    #         elif key in ('HW2', 'HW3','HW4'):
    #             for j in range(4,i+1):
    #                 keytmp = keys_list[j]
    #                 projection_df[key] += rawdata_df[keytmp]
    #             projection_df[key] = projection_df[key]/(i-3)
    #         elif key == 'M1':
    #             projection_df[key] = 0.2*projection_df['HW4']+0.8*rawdata_df['M1']
    #         elif key in ('HW5', 'HW6','HW7'):
    #             for j in range(4,i+1):
    #                 keytmp = keys_list[j]
    #                 projection_df[key] += rawdata_df[keytmp]
    #             projection_df[key] = projection_df[key]/(i-3)

    tmp_str = "------------------------------------------------------------------------------ "
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    tmp_str = "Starting automated message generation.\n"
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    # open text dump file
    messagefile_name = std_filename + '_messagefile_' + std_datetime_str(
        'date') + '.txt'
    messagefile = open(messagefile_name, 'w')

    for index in projection_df.index:
        tmp_str = rawdata_df.loc[index, 'email']
        messagefile.write(tmp_str + '\n')
        update_n = n_hws + n_midterms + n_final
        tmp_str = "Grade summary and projection for " + course_id + " (#" + str(
            update_n) + ")"
        messagefile.write(tmp_str + '\n\n')

        firstname = rawdata_df.loc[index, 'First name'].split()[0]
        if firstname == ".":
            firstname = rawdata_df.loc[index, 'Last name'].split()[0]

        tmp_str = "Dear " + firstname + ","
        messagefile.write(tmp_str + '\n\n')

        tmp_str = "I'm writing to give you a brief update on where you stand in " + course_id + ". Here are the marks I have on record for you so far:"
        messagefile.write(tmp_str + '\n')

        #         tmp_str = str(rawdata_df.loc[index,'HW1':])
        # #         tmp_str = str(rawdata_df[index, 4:])
        #         print tmp_str
        #         sys.exit()
        #         messagefile.write(tmp_str + '\n\n')
        for i in range(4, n_keys):
            key = keys_list[i]
            tmp_str = key + ": "
            if len(key) == 2:
                tmp_str += " "
            tmp_str += " %5.1f " % (rawdata_df.iloc[index, i])
            messagefile.write(tmp_str + '\n')
        messagefile.write('\n\n')

        tmp_str = "In the following you can find the class statistics for each assignment/exam:"
        messagefile.write(tmp_str + '\n\n')

        pd.options.display.float_format = '{:7.2f}'.format
        tmp_str = str(rawdata_df.loc[:, 'HW1':].describe())
        #         tmp_str = str(rawdata_df.describe())
        messagefile.write(tmp_str + '\n\n\n')

        tmp_str = "Based on your assignment marks, I arrived at the following grade projections:"
        messagefile.write(tmp_str + '\n')

        for i in range(n_keys, proj_n_keys):
            key = proj_keys_list[i]
            if 'Gradient' not in key:
                tmp_str = "Grade projection after " + key + ": "
                if len(key) == 2:
                    tmp_str += " "
                tmp_str += " %5.1f " % (projection_df.iloc[index, i])
                tmp_str += "(" + percent2lettergrade(
                    projection_df.iloc[index, i], 'absolute') + ")"
                messagefile.write(tmp_str + '\n')
        messagefile.write('\n')

        if percent2lettergrade(projection_df.iloc[index, i],
                               'absolute') == 'A':
            tmp_str = "Well done - excellent job, " + firstname + "! Keep up the good work!"
            messagefile.write(tmp_str + '\n\n')

        tmp_str = "Note: These grade projections are based on default 5-point lettergrade brackets as well as the weights for exams and homeworks indicated in the course syllabus. "
        tmp_str += "Your prior homework and exam averages are used as placeholders for the missing homeworks and exams, respectively. \n"
        tmp_str += "They do NOT yet incorporate extra credit for in-class participation, nor do they consider potential adjustments to the grade brackets. \n"
        tmp_str += "I'm providing the grades after each assignment to give you an idea about your progress. "
        tmp_str += "It is worth noting that grades tend to pick up after the first midterm.\n"
        tmp_str += "Please let me know if you have any questions or concerns."
        messagefile.write(tmp_str + '\n\n')

        if args.requestmeeting is True:
            if projection_df.iloc[index, i] < 66:
                tmp_str = firstname + ", since you are current not doing so great, I wanted to offer to have a meeting with you to see what we can do to improve things. Please let me know what you think."
                messagefile.write(tmp_str + '\n\n\n')

        tmp_str = "Best wishes,"
        messagefile.write(tmp_str + '\n\n')

        tmp_str = "JH"
        messagefile.write(tmp_str + '\n\n\n')
        tmp_str = "------------------------------------------------------------------------------ "
        messagefile.write(tmp_str + '\n\n')

    messagefile.close()
    tmp_str = "Message file successfully generated. Check data directory.\nClosing..."
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    tmp_str = "------------------------------------------------------------------------------ "
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    #    sys.exit("test 14")

    tmp_str = "...calculation of grades and grade projections finished."
    print(tmp_str)
    logfile.write(tmp_str + '\n')
    tmp_str = "------------------------------------------------------------------------------ "
    print(tmp_str)
    logfile.write(tmp_str + '\n')

    # Old course statistics section

    # TODO compute other cases

    #     classsummary_df = rawdata_df
    #     print(classsummary_df)

    #    id_list = []
    #    data_dict = []
    #    data_dict.append([])
    #    for i in range(n_keys):
    #        if 'id' in keys_list[i].lower():
    #            id_list = rawdata_df[keys_list[i]].tolist()

    # create lists of various grades
    #    for id in id_list:
    #        data_dict[id]['hw_grade_av'] = []
    #        data_dict[id]['overall_grade'] = []
    #        data_dict[id]['overall_lettergrade'] = []

    # create assignment keys list for better readability; introduce assignment keys list; note: we trade resources for readability
    #    assignment_keys_list = keys_list[3:]
    #    n_assignment_keys = len(assignment_keys_list)

    # we want grades for every point during the semester, so we successively go through list of assignments and compute grade after each
    #    for i in range(n_assignment_keys):
    # determine number of homeworks at any point in semester
    #        n_hw = 0
    #        for key in assignment_keys_list[0:i+1]:
    #            if "HW" in key: n_hw +=1
    #        for id in id_list:
    #        # distinguish different cases for grade projections, depending on where we stand in the semester
    #            if 'Final' in assignment_keys_list[0:i+1]:  # i.e., this is the final grade after all assignments are in
    #                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw])/len(data_dict[id]['hw_grades'][0:n_hw])
    #                midterm_max = max(data_dict[id]['midterm_grades'])
    #                midterm_min = min(data_dict[id]['midterm_grades'])
    #                final = data_dict[id]['final_grade']    # this is really for readability
    #            elif 'M2' in assignment_keys_list[0:i+1]:
    #                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw])/len(data_dict[id]['hw_grades'][0:n_hw])
    #                midterm_max = max(data_dict[id]['midterm_grades'])
    #                midterm_min = min(data_dict[id]['midterm_grades'])
    #                final = sum(data_dict[id]['midterm_grades'])/len(data_dict[id]['midterm_grades'])
    #            elif 'M1' in assignment_keys_list[0:i+1]:
    #                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw])/len(data_dict[id]['hw_grades'][0:n_hw])
    #                midterm_max = max(data_dict[id]['midterm_grades'])
    #                midterm_min = min(data_dict[id]['midterm_grades'])
    #                final = sum(data_dict[id]['midterm_grades'])/len(data_dict[id]['midterm_grades'])
    #            elif 'HW1' in assignment_keys_list[0:i+1]:
    #                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw])/len(data_dict[id]['hw_grades'][0:n_hw])
    #                midterm_max = hw_average
    #                midterm_min = hw_average
    #                final = hw_average
    #            else:
    #                tmp_str = "Aborting due to lack of reported grades!"
    #                logfile.write(tmp_str + '\n')
    #                error_file.write(tmp_str + '\n')
    #                sys.exit(tmp_str)
    #################################################################################################

    # wrap up section
    tmp_str = tot_exec_time_str(time_start) + "\n" + std_datetime_str()
    print(tmp_str + 3 * '\n')
    logfile.write(tmp_str + 4 * '\n')
    logfile.close()
    error_file.close()

    # check whether error_file contains content
    chk_rmfile(args.error_file)

    return 0  #successful termination of program
def main(opts, commline_list):
    """(main):
        Driver of the grade_master script.
    """
    time_start = time.time()

    # now the standard part of the script begins
    logfile = open(opts.logfile, 'a', 0)
    error_file = open(opts.error_file, 'a', 0)

    banner(logfile, SCRIPT_NAME, SCRIPT_VERSION, REVISION_DATE, AUTHOR,
           DESCRIPTION)

    # give out options of this run
    print_invoked_opts(logfile, opts, commline_list)

    home_dir = os.getcwd()

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    # read in the CSV file with the raw data of grades
    # make a logfile entry and screen entry so that we know where we stand
    tmp_str = "Start data aquisition... "
    print tmp_str
    logfile.write(tmp_str + '\n')
    # TODO: make use of different print levels

    # check that file exists, get filename from optparse
    if opts.data_file is None:
        tmp_str = "... data file not specified."
        print tmp_str
        logfile.write(tmp_str + '\n')
        errorfile.write(tmp_str + '\n')
        sys.exit("Aborting due to missing data file.")
# TODO: This should better be done by exception handling

# open file
    data_file = open(opts.data_file, 'r')

    # read top line of data file
    line = data_file.readline()
    # use commas for split operation
    words = line.split(',')

    # extract keys, get rid of empty entries
    keys_list = []
    for word in words:
        if word != '' and word != '\r\n':
            keys_list.append(word)

    n_keys = len(keys_list)

    print words
    print keys_list

    # think how I want to organize data! what would be a useful data structure?
    # how about a dictionary of dictionaries with mixed arguments
    # I want this logic:
    # data['ID']['first_name'] = "xxx"
    # data['ID']['last_name'] = "yyy"
    # data['ID']['hw_grades'] = []    list of variable entries
    # data['ID']['midterm_grades'] = []    list of variable entries
    # data['ID']['final_grade'] = z   some number
    # how do we realize this? -> stackoverflow: dictionary of dictionaries

    data_dict = defaultdict(lambda: defaultdict(
        int))  # note: we use the anonymous function construct lambda here

    # let's collect the
    id_list = []

    # read bulk of data file
    # use standard read in with infinite loop construct
    while 1:
        line = data_file.readline()
        if not line: break
        words = line.split(',')
        # we have to put the data somewhere, so let's try a list for the time being
        data_list = []
        for word in words:
            if word != '' and word != '\r\n':  # again, get rid of junk data
                data_list.append(word)  # populate data_list

# TODO: we should have some check here to account for non-uniformity issues
# the least that has to be given is that the data_list and key_list have to have the same lenght
# TODO: we should probably do more than that but we can figure that out later
        if len(data_list) == n_keys:
            id = data_list[2]
            id_list.append(id)
            hw_list = []
            midterm_list = []
            for i in xrange(
                    n_keys):  # note that we use xrange instead of range
                key = keys_list[i]
                data = data_list[i]
                if key == "Last name":
                    data_dict[id]['last_name'] = data
                elif key == "First name":
                    data_dict[id]['first_name'] = data
                elif key == "Student ID":
                    continue
                elif 'HW' in key:
                    hw_list.append(
                        float(data))  # don't forget to convert string to float
                elif (key == 'M1') or (
                        key == 'M2'
                ):  # careful, since we basically hardwire this; we may want to implement a more general version down the road
                    midterm_list.append(
                        float(data))  # don't forget to convert string to float
                elif key == 'Final':
                    data_dict[id]['final_grade'] = float(data)
                else:
                    sys.exit(
                        "There is something funny going on here. We have an unknown key!"
                    )

            # now we have to put lists into dictionary
            data_dict[id]['hw_grades'] = hw_list
            data_dict[id]['midterm_grades'] = midterm_list

            print data_dict[id]
            print

    # close file
    data_file.close()

    # TODO: well, this is not yet the data structure we want, so we have to rewrite a little

    # perform statistics, analysis, projections
    # compute current average according to grading rules
    # implement grading rules: HW: 20%, better midterm 35%, worse midterm 15%, final: 30%
    # cast this into different scenarios
    if 'Final' in keys_list:
        for id in id_list:
            hw_average = sum(data_dict[id]['hw_grades']) / len(
                data_dict[id]['hw_grades'])
            midterm_max = max(data_dict[id]['midterm_grades'])
            midterm_min = min(data_dict[id]['midterm_grades'])
            final = data_dict[id][
                'final_grade']  # this is really for readability
            grade_total = 0.2 * hw_average + 0.35 * midterm_max + 0.15 * midterm_min + 0.3 * final

            data_dict[id]['hw_grade_av'] = hw_average
            data_dict[id]['midterm_grade_max'] = midterm_max
            data_dict[id]['midterm_grade_min'] = midterm_min
            data_dict[id]['grade_total'] = grade_total

    elif 'M2' in keys_list:
        hw_average = sum(data_dict[id]['hw_grades']) / len(
            data_dict[id]['hw_grades'])
        midterm_max = max(data_dict[id]['midterm_grades'])
        midterm_min = min(data_dict[id]['midterm_grades'])
        midterm_average = sum(data_dict[id]['midterm_grades']) / len(
            data_dict[id]['midterm_grades'])

        grade_total = 0.2 * hw_average + 0.35 * midterm_max + 0.15 * midterm_min + 0.3 * midterm_average

        data_dict[id]['hw_grade_av'] = hw_average
        data_dict[id]['midterm_grade_max'] = midterm_max
        data_dict[id]['midterm_grade_min'] = midterm_min
        data_dict[id]['grade_total'] = grade_total

    elif 'M1' in keys_list:
        hw_average = sum(data_dict[id]['hw_grades']) / len(
            data_dict[id]['hw_grades'])
        midterm_max = max(data_dict[id]['midterm_grades'])
        midterm_min = min(data_dict[id]['midterm_grades'])
        midterm_average = sum(data_dict[id]['midterm_grades']) / len(
            data_dict[id]['midterm_grades'])

        grade_total = 0.2 * hw_average + 0.35 * midterm_max + 0.15 * midterm_min + 0.3 * midterm_average

        data_dict[id]['hw_grade_av'] = hw_average
        data_dict[id]['midterm_grade_max'] = midterm_max
        data_dict[id]['midterm_grade_min'] = midterm_min
        data_dict[id]['grade_total'] = grade_total

    elif 'HW1' in keys_list:
        # TODO: test if this really works
        hw_average = sum(data_dict[id]['hw_grades']) / len(
            data_dict[id]['hw_grades'])
        #             midterm_max = max(data_dict[id]['midterm_grades'])
        #             midterm_min = min(data_dict[id]['midterm_grades'])
        #             midterm_average = sum(data_dict[id]['midterm_grades'])/len(data_dict[id]['midterm_grades'])

        grade_total = hw_average

        data_dict[id]['hw_grade_av'] = hw_average
        #             data_dict[id]['midterm_grade_max'] = midterm_max
        #             data_dict[id]['midterm_grade_min'] = midterm_min
        data_dict[id]['grade_total'] = grade_total
    else:
        sys.exit("No grades given right now, so there is not much we can do.")

    print id_list
    # test if this works
    for id in id_list:
        print str(id) + ' ' + str(data_dict[id]['grade_total'])

    # translate points into grades
    for id in id_list:
        if round(data_dict[id]['grade_total']) >= 96:
            data_dict[id]['letter_grade'] = 'A'
        elif round(data_dict[id]['grade_total']) >= 91:
            data_dict[id]['letter_grade'] = 'A-'
        elif round(data_dict[id]['grade_total']) >= 86:
            data_dict[id]['letter_grade'] = 'B+'
        elif round(data_dict[id]['grade_total']) >= 81:
            data_dict[id]['letter_grade'] = 'B'
        elif round(data_dict[id]['grade_total']) >= 76:
            data_dict[id]['letter_grade'] = 'B-'
        elif round(data_dict[id]['grade_total']) >= 71:
            data_dict[id]['letter_grade'] = 'C+'
        elif round(data_dict[id]['grade_total']) >= 66:
            data_dict[id]['letter_grade'] = 'C'
        elif round(data_dict[id]['grade_total']) >= 61:
            data_dict[id]['letter_grade'] = 'C-'
        elif round(data_dict[id]['grade_total']) >= 56:
            data_dict[id]['letter_grade'] = 'D+'
        elif round(data_dict[id]['grade_total']) >= 51:
            data_dict[id]['letter_grade'] = 'D'
        else:
            data_dict[id]['letter_grade'] = 'F'

    # test if this works
    for id in id_list:
        print str(id) + ' ' + str(
            data_dict[id]['grade_total']) + ' ' + data_dict[id]['letter_grade']

    # course average
    grade_total_list = []
    for id in id_list:
        grade_total_list.append(data_dict[id]['grade_total'])

    grade_total_average = sum(grade_total_list) / len(grade_total_list)

    if round(grade_total_average) >= 96:
        grade_total_average_letter = 'A'
    elif round(grade_total_average) >= 91:
        grade_total_average_letter = 'A-'
    elif round(grade_total_average) >= 86:
        grade_total_average_letter = 'B+'
    elif round(grade_total_average) >= 81:
        grade_total_average_letter = 'B'
    elif round(grade_total_average) >= 76:
        grade_total_average_letter = 'B-'
    elif round(grade_total_average) >= 71:
        grade_total_average_letter = 'C+'
    elif round(grade_total_average) >= 66:
        grade_total_average_letter = 'C'
    elif round(grade_total_average) >= 61:
        grade_total_average_letter = 'C-'
    elif round(grade_total_average) >= 56:
        grade_total_average_letter = 'D+'
    elif round(grade_total_average) >= 51:
        grade_total_average_letter = 'D'
    else:
        grade_total_average_letter = 'F'

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str

    print str(grade_total_average) + '  ' + grade_total_average_letter

    # rank students
    # identify best, worst students
    # note: there is no good way to sort a nested dictionary by value, so we just create an auxillary dictionary

    # count grades
    a0_count = 0
    am_count = 0
    bp_count = 0
    b0_count = 0
    bm_count = 0
    cp_count = 0
    c0_count = 0
    cm_count = 0
    dp_count = 0
    d0_count = 0
    f0_count = 0
    for id in id_list:
        if data_dict[id]['letter_grade'] == 'A':
            a0_count += 1
        elif data_dict[id]['letter_grade'] == 'A-':
            am_count += 1
        elif data_dict[id]['letter_grade'] == 'B+':
            bp_count += 1
        elif data_dict[id]['letter_grade'] == 'B':
            b0_count += 1
        elif data_dict[id]['letter_grade'] == 'B-':
            bm_count += 1
        elif data_dict[id]['letter_grade'] == 'C+':
            cp_count += 1
        elif data_dict[id]['letter_grade'] == 'C':
            c0_count += 1
        elif data_dict[id]['letter_grade'] == 'C-':
            cm_count += 1
        elif data_dict[id]['letter_grade'] == 'D+':
            dp_count += 1
        elif data_dict[id]['letter_grade'] == 'D':
            d0_count += 1
        elif data_dict[id]['letter_grade'] == 'F':
            f0_count += 1

    print 'a0_count   ' + str(a0_count)
    print 'am_count   ' + str(am_count)
    print 'bp_count   ' + str(bp_count)
    print 'b0_count   ' + str(b0_count)
    print 'bm_count   ' + str(bm_count)
    print 'cp_count   ' + str(cp_count)
    print 'c0_count   ' + str(c0_count)
    print 'cm_count   ' + str(cm_count)
    print 'dp_count   ' + str(dp_count)
    print 'd0_count   ' + str(d0_count)
    print 'f0_count   ' + str(f0_count)

    sys.exit("This is as far as it goes right now.")

    # test CSV files at different stages of semester

    # figure out in detail what we want to do
    # follow progress throughout the semester - here we will need an order criterion
    # test different grading schemes

    tmp_str = "... finished."
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    # wrap up section
    tmp_str = tot_exec_time_str(time_start) + "\n" + std_datetime_str()
    print tmp_str + 3 * '\n'
    logfile.write(tmp_str + 4 * '\n')
    logfile.close()
    error_file.close()

    # check whether error_file contains content
    chk_rmfile(opts.error_file)

    return 0  #successful termination of program
Exemple #6
0
def main(opts,commline_list):
    """(main):
        Driver of the grade_master script.
    """
    time_start = time.time()

    # now the standard part of the script begins
    logfile = open(opts.logfile,'a',0)
    error_file = open(opts.error_file,'a',0)
    
    banner(logfile, SCRIPT_NAME, SCRIPT_VERSION, REVISION_DATE, AUTHOR, DESCRIPTION)

    # give out options of this run
    print_invoked_opts(logfile,opts,commline_list)

    home_dir = os.getcwd() 

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')


    #################################################################################################

# read in the CSV file with the raw data of grades 
    # make a logfile entry and screen entry so that we know where we stand
    tmp_str = "Start data aquisition... "
    print tmp_str
    logfile.write(tmp_str + '\n')
# TODO: make use of different print levels
    
    # check that file exists, get filename from optparse
    if opts.data_file is None:
        tmp_str = "... data file not specified."
        print tmp_str
        logfile.write(tmp_str + '\n')
        errorfile.write(tmp_str + '\n')
        sys.exit("Aborting due to missing data file.")
# TODO: This should better be done by exception handling
        
    # open file
    data_file = open(opts.data_file,'r')

    # read top line of data file
    line = data_file.readline()
    # use commas for split operation
    words = line.split(',')

    # extract keys, get rid of empty entries
    keys_list = []
    for word in words:
        if word != '' and word != '\r\n':
            keys_list.append(word)


    n_keys = len(keys_list)
    
    print words
    print keys_list

    # think how I want to organize data! what would be a useful data structure?
    # how about a dictionary of dictionaries with mixed arguments
    # I want this logic: 
    # data['ID']['first_name'] = "xxx"
    # data['ID']['last_name'] = "yyy"
    # data['ID']['hw_grades'] = []    list of variable entries
    # data['ID']['midterm_grades'] = []    list of variable entries
    # data['ID']['final_grade'] = z   some number
    # how do we realize this? -> stackoverflow: dictionary of dictionaries

    data_dict = defaultdict(lambda : defaultdict(int))  # note: we use the anonymous function construct lambda here

    # read bulk of data file
    # use standard read in with infinite loop construct
    while 1:
        line = data_file.readline()
        if not line: break        
        words = line.split(',')
        # we have to put the data somewhere, so let's try a list for the time being
        data_list = []
        for word in words:
            if word != '' and word != '\r\n':   # again, get rid of junk data
                data_list.append(word)  # populate data_list

        
#NEW NEW NEW #############################################################
# TODO: we should have some check here to account for non-uniformity issues 
        # the least that has to be given is that the data_list and key_list have to have the same lenght
        # TODO: we should probably do more than that but we can figure that out later
        if len(data_list) == n_keys:
            id = data_list[2]
            hw_list = []
            midterm_list = []
            for i in xrange(n_keys):    # note that we use xrange instead of range
                key = keys_list[i]
                data = data_list[i]
                if key == "Last name":
                    data_dict[id]['last_name'] = data 
                elif key == "First name":
                    data_dict[id]['first_name'] = data 
                elif key == "Student ID":
                    continue
                elif 'HW' in key:
                    hw_list.append(float(data))         # don't forget to convert string to float
                elif (key == 'M1') or (key == 'M2'):    # careful, since we basically hardwire this; we may want to implement a more general version down the road
                    midterm_list.append(float(data))    # don't forget to convert string to float
                elif key == 'Final':
                    data_dict[id]['final_grade'] = float(data)
                else:
                    sys.exit("There is something funny going on here. We have an unknown key!")

            # now we have to put lists into dictionary                    
            data_dict[id]['hw_grades'] = hw_list
            data_dict[id]['midterm_grades'] = midterm_list
            
            print data_dict[id]
            print 
#END NEW NEW NEW #############################################################
 
    # close file
    data_file.close()

# TODO: well, this is not yet the data structure we want, so we have to rewrite a little 
                 
    sys.exit("This is as far as it goes right now.")

# perform statistics, analysis, projections
    # compute current average according to grading rules 
    # translate points into grades
    # test different grading schemes 
    # rank students
    # identify best, worst students
    # figure out in detail what we want to do
    

    tmp_str = "... finished."
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')


    #################################################################################################

    # wrap up section
    tmp_str = tot_exec_time_str(time_start) + "\n" + std_datetime_str()
    print tmp_str + 3*'\n'
    logfile.write(tmp_str + 4*'\n')
    logfile.close()    
    error_file.close()
    
    # check whether error_file contains content
    chk_rmfile(opts.error_file)
        
    return 0    #successful termination of program
Exemple #7
0
    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')


    #################################################################################################

    # wrap up section
    tmp_str = tot_exec_time_str(time_start) + "\n" + std_datetime_str()
    print tmp_str + 3*'\n'
    logfile.write(tmp_str + 4*'\n')
    logfile.close()    
    error_file.close()
    
    # check whether error_file contains content
    chk_rmfile(opts.error_file)
        
    return 0    #successful termination of program"""
    
#################################################################################################

# TODO: replace with argpass 
if __name__=="__main__":
    usage_str = "usage: %prog [options] arg"
    version_str = "%prog " + SCRIPT_VERSION
    parser = argparse.ArgumentParser(usage=usage_str, version=version_str)    
    
    parser.add_argument('--data_file', 
                      dest='data_file', 
                      type=str, 
                      help="specifies the name of the raw data file in CSV format")
Exemple #8
0
def main(args, commline_list):
    """(main):
        Driver of the grademaster script.
    """
    time_start = time.time()

    # now the standard part of the script begins
    logfile = open(args.logfile, 'a', 0)
    error_file = open(args.error_file, 'a', 0)

    banner(logfile, SCRIPT_NAME, SCRIPT_VERSION, REVISION_DATE, AUTHOR,
           DESCRIPTION)

    # give out options of this run
    print_invoked_opts(logfile, args, commline_list)

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')

    tmp_str = "Starting data acquisition..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # check that file exists, get filename from optparse
    if args.data_file is None:
        tmp_str = "... data file not specified!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')

        tmp_str = "Aborting due to missing data file!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
# TODO: This should better be done by exception handling

    tmp_str = "   ...reading in data..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # open CSV file with raw data
    rawdata_df = pd.read_csv(opts.data_file)

    tmp_str = "   ...cleaning data structure..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # remove empty entries
    for i in rawdata_df.columns:
        if 'Unnamed' in i:
            rawdata_df = rawdata_df.drop(i, 1)
    rawdata_df = rawdata_df.dropna(how='all')

    tmp_str = "   ...identify keys..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # read top line of data file, which defines the keys
    keys_list = list(rawdata_df.columns)
    n_keys = len(keys_list)

    tmp_str = "   ...checking validity of data structure..."
    print tmp_str
    logfile.write(tmp_str + '\n')
    if "Last name" not in keys_list[0:4]:
        tmp_str = "   ...'Last name' missing in data structure!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        tmp_str = "Aborting due to invalid data structure!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
    elif "First name" not in keys_list[0:4]:
        tmp_str = "   ...'First name' missing in data structure!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        tmp_str = "Aborting due to invalid data structure!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
    elif "Student ID" not in keys_list[0:4]:
        tmp_str = "   ...'Student ID' missing in data structure!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        tmp_str = "Aborting due to invalid data structure!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
    elif "email" not in keys_list[0:4]:
        tmp_str = "   ...'email' missing in data structure!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        tmp_str = "Aborting due to invalid data structure!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)

    # check if all the grades are in float type (not object)
    for i in keys_list[4:]:
        if rawdata_df[i].dtypes == object:
            tmp_str = "Aborting due to unknown grade format in column %s!" % i
            logfile.write(tmp_str + '\n')
            error_file.write(tmp_str + '\n')
            sys.exit(tmp_str)

    # some bookkeeping on where we stand in the semester
    n_hws = 0
    n_midterms = 0
    n_final = 0
    for key in keys_list[4:]:
        if "HW" in key:
            n_hws += 1
        elif "M" in key:
            n_midterms += 1
        elif "Final" in key:
            n_final += 1
        else:
            tmp_str = "Aborting due to unknown key!"
            logfile.write(tmp_str + '\n')
            error_file.write(tmp_str + '\n')
            sys.exit(tmp_str)

    tmp_str = "...data acquisition finished."
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "Summary of acquired data:"
    print tmp_str
    logfile.write(tmp_str + '\n')

    tmp_str = "   Number of students:  " + str(len(rawdata_df))
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of homeworks: " + str(n_hws)
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of midterms:  " + str(n_midterms)
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of finals:    " + str(n_final)
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "Starting calculation of grades and grade projections..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # Set up projection dataframe
    hwdata_df = rawdata_df.copy()
    examdata_df = rawdata_df.copy()
    # empty all data fields in projection_df
    for i in xrange(4, n_keys):
        key = keys_list[i]
        if 'HW' in key[0:2]:
            examdata_df.drop(key, axis=1, inplace=True)
        elif key in ('M1', 'M2', 'Final'):
            hwdata_df.drop(key, axis=1, inplace=True)

    hwkeys_list = list(hwdata_df.columns)
    n_hwkeys = len(hwkeys_list)

    examkeys_list = list(examdata_df.columns)
    n_examkeys = len(examkeys_list)

    acc_hwdata_df = hwdata_df.copy()
    acc_examdata_df = examdata_df.copy()

    for i in xrange(4, n_hwkeys):
        key = hwkeys_list[i]
        if key == 'HW1':
            continue
        else:
            prevkey = hwkeys_list[i - 1]
            acc_hwdata_df[key] += acc_hwdata_df[prevkey]

    for i in xrange(4, n_examkeys):
        key = examkeys_list[i]
        if key == 'M1':
            continue
        else:
            prevkey = examkeys_list[i - 1]
            acc_examdata_df[key] += acc_examdata_df[
                prevkey]  # this is used to get the cumulative grades

    av_hwdata_df = acc_hwdata_df.copy()
    av_examdata_df = acc_examdata_df.copy()
    minmax_midtermdata_df = examdata_df.copy()

    for i in xrange(4, n_hwkeys):
        key = hwkeys_list[i]
        hw_n = int(key[2:])
        av_hwdata_df[key] = 1.0 * av_hwdata_df[key] / hw_n

    for i in xrange(4, n_examkeys):
        key = examkeys_list[i]
        if key == 'Final':
            av_examdata_df[key] = 1.0 * av_examdata_df[key] / 3
        else:
            exam_n = int(key[1:])
            av_examdata_df[key] = 1.0 * av_examdata_df[
                key] / exam_n  # getting the averages after each assignment/exam.
    projection_df = rawdata_df.copy()
    for i in xrange(4, n_keys):
        key = keys_list[i]
        projection_df[key] = 0
        if key in ('HW1', 'HW2', 'HW3', 'HW4'):
            projection_df[key] = av_hwdata_df[key]
        elif key == 'M1':
            projection_df[
                key] = 0.2 * av_hwdata_df['HW4'] + 0.8 * av_examdata_df['M1']
        elif key in ('HW5', 'HW6', 'HW7', 'HW8'):
            projection_df[
                key] = 0.2 * av_hwdata_df[key] + 0.8 * av_examdata_df['M1']
        elif key == 'M2':
            projection_df[
                key] = 0.2 * av_hwdata_df['HW8'] + 0.8 * av_examdata_df['M2']
        elif key in ('HW9', 'HW10'):
            projection_df[
                key] = 0.2 * av_hwdata_df['HW10'] + 0.8 * av_examdata_df['M2']
        elif key == 'Final':
            projection_df[key] = 0.2 * av_hwdata_df[
                'HW10'] + 0.2 * av_examdata_df['M1'] + 0.2 * av_examdata_df[
                    'M2'] + 0.3 * av_examdata_df['Final']

    print "\n\nThese are the grade projections as of now\n\n", projection_df

    print "\n\n\nSending individual mails to all the students... \n\n"

    email_id = raw_input(
        "\n Please enter the Buffalo email id with which you want to send the mails: \n"
    )
    password = raw_input(
        "\n Please enter the password for the given email id: \n")

    for index in rawdata_df.index:
        if index > 5:
            break
        tmp_str = rawdata_df.loc[index, 'email']
        update_n = n_hws + n_midterms + n_final
        tmp_str += "\n Grade summary and projection for CE 317 (#" + str(
            update_n) + ")"

        firstname = rawdata_df.loc[index, 'First name'].split()[0]
        if firstname == ".":
            firstname = rawdata_df.loc[index, 'Last name'].split()[0]

        tmp_str += "\n Dear " + firstname + ","

        tmp_str += "\n \n I'm writing to give you a brief update on where you stand in CE 317. Here are the marks I have on record for you so far: \n"

        for i in xrange(4, n_keys):
            key = keys_list[i]
            tmp_str += key + ": "
            if len(key) == 2:
                tmp_str += " "
            tmp_str += "\n \n %5.1f " % (rawdata_df.iloc[index, i])

        tmp_str += "\n In the following you can find the class statistics for each assignment/exam:"

        pd.options.display.float_format = '{:7.2f}'.format
        tmp_str += "\n" + str(rawdata_df.loc[:, 'HW1':].describe())

        tmp_str += "\n Based on your assignment marks, I arrived at the following grade projections: \n"

        for i in xrange(4, n_keys):
            key = keys_list[i]
            tmp_str += "\n Grade projection after " + key + ": " + "\n"
            if len(key) == 2:
                tmp_str += " "
            tmp_str += " %5.1f " % (projection_df.iloc[index, i])
            tmp_str += "(" + percent2lettergrade(
                projection_df.iloc[index, i]) + ")" + "\n"

        if percent2lettergrade(projection_df.iloc[index, i]) == 'A':
            tmp_str += "Well done - excellent job, " + firstname + "! Keep up the good work! \n"

        tmp_str += "\n\n Note: These grade projections are based on default 5-point lettergrade brackets as well as the weights for exams and homeworks indicated in the course syllabus. "
        tmp_str += "\n Your prior homework and exam averages are used as placeholders for the missing homeworks and exams, respectively. \n"
        tmp_str += "They do NOT yet incorporate extra credit for in-class participation, nor do they consider potential adjustments to the grade brackets. \n"
        tmp_str += "I'm providing the grades after each assignment to give you an idea about your progress. "
        tmp_str += "It is worth noting that grades tend to pick up after the first midterm.\n"
        tmp_str += "Please let me know if you have any questions or concerns."

        if args.requestmeeting is True:
            if projection_df.iloc[index, i] < 66:
                tmp_str = "\n \n" + firstname + ", since you are current not doing so great, I wanted to offer to have a meeting with you to see what we can do to improve things. Please let me know what you think."

        tmp_str += "\n\n Best wishes,"

        tmp_str += "\n JH"
        tmp_str += "\n ------------------------------------------------------------------------------ "

        fromaddress = str(email_id)
        toaddress = rawdata_df.loc[index, 'email']
        msg = MIMEMultipart()
        msg['From'] = fromaddress
        msg['To'] = toaddress
        msg['Subject'] = "Grade summary and projections"
        body = tmp_str
        msg.attach(MIMEText(body, 'plain'))
        server = smtplib.SMTP('smtp.buffalo.edu', 25)
        server.starttls()
        server.login(str(email_id), str(password))
        text = msg.as_string()
        server.sendmail(str(email_id), rawdata_df.loc[index, 'email'], text)
    server.quit()

    tmp_str = "...calculation of grades and grade projections finished."
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "\n\nStarting calculation of course statistics...\n"
    print tmp_str
    logfile.write(tmp_str + '\n')
    dist_stat = distribution_stat(projection_df[keys_list[-1]].values.tolist())
    print "\n\nHistogram for the data\n\n"
    histogram_data = histogram(projection_df[keys_list[-1]].values.tolist())
    plt.hist(projection_df[keys_list[-1]].values.tolist())
    plt.title('Histogram')
    plt.xlabel('Marks')
    plt.ylabel('Number of students')
    plt.savefig('histogram')
    tmp_str = "\n\n\n...computing letter grade distribution..."
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    tmp_str = "... finished."
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    # wrap up section
    tmp_str = tot_exec_time_str(time_start) + "\n" + std_datetime_str()
    print tmp_str + 3 * '\n'
    logfile.write(tmp_str + 4 * '\n')
    logfile.close()
    error_file.close()

    # check whether error_file contains content
    chk_rmfile(opts.error_file)

    return 0  #successful termination of program"""
def main(opts, commline_list):
    """(main):
        Driver of the grademaster script.
    """
    time_start = time.time()

    # now the standard part of the script begins
    logfile = open(opts.logfile, 'a', 0)
    error_file = open(opts.error_file, 'a', 0)

    banner(logfile, SCRIPT_NAME, SCRIPT_VERSION, REVISION_DATE, AUTHOR,
           DESCRIPTION)

    # give out options of this run
    print_invoked_opts(logfile, opts, commline_list)

    home_dir = os.getcwd()

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################
    # read in the CSV file with the raw data of grades

    # make a logfile entry and screen entry so that we know where we stand
    tmp_str = "Starting data acquisition..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # check that file exists, get filename from optparse
    if opts.data_file is None:
        tmp_str = "... data file not specified!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')

        tmp_str = "Aborting due to missing data file!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
# TODO: This should better be done by exception handling

    tmp_str = "   ...reading in data..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # open CSV file with raw data
    rawdata_df = pd.read_csv(opts.data_file)
    #    print_df(rawdata_df)

    tmp_str = "   ...cleaning data structure..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # remove empty entries
    for i in rawdata_df.columns:
        if 'Unnamed' in i:
            rawdata_df = rawdata_df.drop(i, 1)
    rawdata_df = rawdata_df.dropna(how='all')
    #    print_df(rawdata_df)

    tmp_str = "   ...identify keys..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # read top line of data file, which defines the keys
    keys_list = list(rawdata_df.columns)
    n_keys = len(keys_list)
    #    print keys_list

    #     # OLD VERSION
    #     # open CSV file with raw data
    #     data_file = open(opts.data_file,'r')
    #
    #     # read top line of data file, which defines the keys
    #     line = data_file.readline()
    #     # use commas for split operation
    #     words = line.split(',')
    #     # extract keys, get rid of empty entries
    #     keys_list = []
    #     for word in words:
    #         if word != '' and word != '\r\n':
    #             keys_list.append(word)

    # TODO: we should make this more general purpose
    # TODO: rewrite this in a more elegant form
    tmp_str = "   ...checking validity of data structure..."
    print tmp_str
    logfile.write(tmp_str + '\n')
    # check that the standard keys are amongst the first three keys, because that's all we have implemented so far
    if "Last name" not in keys_list[0:4]:
        tmp_str = "   ...'Last name' missing in data structure!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        tmp_str = "Aborting due to invalid data structure!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
    elif "First name" not in keys_list[0:4]:
        tmp_str = "   ...'First name' missing in data structure!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        tmp_str = "Aborting due to invalid data structure!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
    elif "Student ID" not in keys_list[0:4]:
        tmp_str = "   ...'Student ID' missing in data structure!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        tmp_str = "Aborting due to invalid data structure!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)
    elif "email" not in keys_list[0:4]:
        tmp_str = "   ...'email' missing in data structure!"
        print tmp_str
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        tmp_str = "Aborting due to invalid data structure!"
        logfile.write(tmp_str + '\n')
        error_file.write(tmp_str + '\n')
        sys.exit(tmp_str)

    # check if all the grades are in float type (not object)
    for i in keys_list[4:]:
        if rawdata_df[i].dtypes == object:
            tmp_str = "Aborting due to unknown grade format in column %s!" % i
            logfile.write(tmp_str + '\n')
            error_file.write(tmp_str + '\n')
            sys.exit(tmp_str)

#     # OLD VERSION
# suitable data structure for raw and derived data: dictionary of dictionaries with mixed arguments -> stackoverflow
# template:
# data_dict['ID']['first_name'] = "xxx"
# data_dict['ID']['last_name'] = "yyy"
# data_dict['ID']['hw_grades'] = []    list of variable entries
# data_dict['ID']['midterm_grades'] = []    list of variable entries
# data_dict['ID']['final_grade'] = z   some number
#     data_dict = defaultdict(lambda : defaultdict(int))  # note: we use the anonymous function construct lambda here

# make ID list since this is our distinguishing dictionary key
#     id_list = []
#     tmp_str = "   ...reading in bulk of data..."
#     print tmp_str
#     logfile.write(tmp_str + '\n')

# use standard read in with infinite loop construct
#     while 1:
#         line = data_file.readline()
#         if not line: break
#         words = line.split(',')
#         # temporary data list
#         data_list = []
#         for word in words:
#             # get rid of junk data
#             if word != '' and '\r\n' not in word:   # note: we had to make junk removal more general
#                 # populate the temporary data_list
#                 data_list.append(word)

#         # continue if data_list is emptycheck that we don't have an empty list
#         if len(data_list) == 0:
#             continue
#         # check that the data_list and key_list have to have the same lenght
#         elif len(data_list) != n_keys:
#             tmp_str = "   ...invalid data entry (wrong number of data entries): " + line
#             print tmp_str
#             logfile.write(tmp_str + '\n')
#             error_file.write(tmp_str + '\n')
#             tmp_str = "Aborting due to invalid data entry!"
#             logfile.write(tmp_str + '\n')
#             error_file.write(tmp_str + '\n')
#             sys.exit(tmp_str)
#         # TODO: think about a more sophisticated handling in case of problems
#
#
#         # find index of list element in keys_list that contains the id
#         id_index = keys_list.index("Student ID")
#         # get id
#         id = data_list[id_index]
#         # add id to id_list
#         id_list.append(id)
#         # set up hw and midterm lists to get added to dictionary later
#         hw_list = []
#         midterm_list = []
#
#         for i in xrange(n_keys):    # note that we use xrange instead of range
#             key = keys_list[i]
#             data = data_list[i]
#             if key == "Last name":
#                 data_dict[id]['last_name'] = data
#             elif key == "First name":
#                 data_dict[id]['first_name'] = data
#             elif key == "Student ID":
#                 continue
#             elif 'HW' in key:
#                 hw_list.append(float(data))         # don't forget to convert string to float
#             elif (key == 'M1') or (key == 'M2'):
#                 midterm_list.append(float(data))    # don't forget to convert string to float
#             elif key == 'Final':
#                 data_dict[id]['final_grade'] = float(data)  # don't forget to convert string to float
#             else:
#                 tmp_str = "Aborting due to unknown key!"
#                 logfile.write(tmp_str + '\n')
#                 error_file.write(tmp_str + '\n')
#                 sys.exit(tmp_str)
#
#         # now we have to put lists into dictionary
#         data_dict[id]['hw_grades'] = hw_list
#         data_dict[id]['midterm_grades'] = midterm_list
#
#
#     # close file
#     data_file.close()

# some bookkeeping on where we stand int the semester
    n_hws = 0
    n_midterms = 0
    n_final = 0
    for key in keys_list[4:]:
        if "HW" in key:
            n_hws += 1
        elif "M" in key:
            n_midterms += 1
        elif "Final" in key:
            n_final += 1
        else:
            tmp_str = "Aborting due to unknown key!"
            logfile.write(tmp_str + '\n')
            error_file.write(tmp_str + '\n')
            sys.exit(tmp_str)

#    print n_hws
#    print n_midterms
#    print n_final

    tmp_str = "...data acquisition finished."
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "Summary of acquired data:"
    print tmp_str
    logfile.write(tmp_str + '\n')

    tmp_str = "   Number of students:  " + str(len(rawdata_df))
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of homeworks: " + str(n_hws)
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of midterms:  " + str(n_midterms)
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "   Number of finals:    " + str(n_final)
    print tmp_str
    logfile.write(tmp_str + '\n')
    # TODO: this should be better formatted

    #################################################################################################

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "Starting calculation of grades and grade projections..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    #     print "Info"
    #     print rawdata_df.info()
    #     print "Keys"
    #     print rawdata_df.keys()
    #     print "Index"
    #     print rawdata_df.index
    #     print "Columns"
    #     print rawdata_df.columns
    #     print "Values"
    #     print rawdata_df.values
    #     print "Describe"
    #     print rawdata_df.describe()

    # TODO: this is very inelegant and should be changed
    # Set up projection dataframe
    hwdata_df = rawdata_df.copy()
    examdata_df = rawdata_df.copy()
    # empty all data fields in projection_df
    for i in xrange(4, n_keys):
        key = keys_list[i]
        if 'HW' in key:
            examdata_df.drop(key, axis=1, inplace=True)
        elif key in ('M1', 'M2', 'F'):
            hwdata_df.drop(key, axis=1, inplace=True)

#    print hwdata_df
#    print examdata_df

    hwkeys_list = list(hwdata_df.columns)
    n_hwkeys = len(hwkeys_list)

    examkeys_list = list(examdata_df.columns)
    n_examkeys = len(examkeys_list)

    acc_hwdata_df = hwdata_df.copy()
    acc_examdata_df = examdata_df.copy()

    for i in xrange(4, n_hwkeys):
        key = hwkeys_list[i]
        if key == 'HW1':
            continue
        else:
            prevkey = hwkeys_list[i - 1]
            acc_hwdata_df[key] += acc_hwdata_df[prevkey]

    for i in xrange(4, n_examkeys):
        key = examkeys_list[i]
        if key == 'M1':
            continue
        else:
            prevkey = examkeys_list[i - 1]
            acc_examdata_df[key] += acc_examdata_df[prevkey]

#    print acc_hwdata_df
#    print acc_examdata_df

    av_hwdata_df = acc_hwdata_df.copy()
    av_examdata_df = acc_examdata_df.copy()
    minmax_midtermdata_df = examdata_df.copy()

    for i in xrange(4, n_hwkeys):
        key = hwkeys_list[i]
        hw_n = int(key[2:])
        av_hwdata_df[key] = 1.0 * av_hwdata_df[key] / hw_n

    for i in xrange(4, n_examkeys):
        key = examkeys_list[i]
        if key == 'F':
            av_examdata_df[key] = 1.0 * av_examdata_df[key] / 3
        else:
            exam_n = int(key[1:])
            av_examdata_df[key] = 1.0 * av_examdata_df[key] / exam_n

    print "Here we there yet?"

    if n_midterms == 2:
        print "Here we are now"
        print minmax_midtermdata_df
        sys.exit()

#    print av_hwdata_df
#    print av_examdata_df

    projection_df = rawdata_df.copy()
    for i in xrange(4, n_keys):
        key = keys_list[i]
        projection_df[key] = 0
        if key in ('HW1', 'HW2', 'HW3', 'HW4'):
            projection_df[key] = av_hwdata_df[key]
        elif key == 'M1':
            projection_df[
                key] = 0.2 * av_hwdata_df['HW4'] + 0.8 * av_examdata_df['M1']
        elif key in ('HW5', 'HW6', 'HW7', 'HW8'):
            projection_df[
                key] = 0.2 * av_hwdata_df[key] + 0.8 * av_examdata_df['M1']
        elif key == 'M2':
            projection_df[
                key] = 0.2 * av_hwdata_df['HW8'] + 0.3 * av_examdata_df['M1']
        else:
            sys.exit("Not yet implemented!")

#     print_df(projection_df)

#     print keys_list

#     # OLD VERSION
#     # empty all data fields in projection_df
#     for i in xrange(4,n_keys):
#         key = keys_list[i]
#         accumulateddata_df[key] = 0
#         projection_df[key] = 0
#         if key == 'HW1':
#             projection_df[key] = rawdata_df[key]
#         elif key in ('HW2', 'HW3','HW4'):
#             for j in xrange(4,i+1):
#                 keytmp = keys_list[j]
#                 projection_df[key] += rawdata_df[keytmp]
#             projection_df[key] = projection_df[key]/(i-3)
#         elif key == 'M1':
#             projection_df[key] = 0.2*projection_df['HW4']+0.8*rawdata_df['M1']
#         elif key in ('HW5', 'HW6','HW7'):
#             for j in xrange(4,i+1):
#                 keytmp = keys_list[j]
#                 projection_df[key] += rawdata_df[keytmp]
#             projection_df[key] = projection_df[key]/(i-3)

# open text dump file
    messagefile_name = 'messagefile_' + std_datetime_str('date') + '.txt'
    messagefile = open(messagefile_name, 'w', 0)

    for index in rawdata_df.index:
        tmp_str = rawdata_df.loc[index, 'email']
        messagefile.write(tmp_str + '\n')
        update_n = n_hws + n_midterms + n_final
        tmp_str = "Grade summary and projection for CE 317 (#" + str(
            update_n) + ")"
        messagefile.write(tmp_str + '\n\n')

        firstname = rawdata_df.loc[index, 'First name'].split()[0]
        if firstname == ".":
            firstname = rawdata_df.loc[index, 'Last name'].split()[0]

        tmp_str = "Dear " + firstname + ","
        messagefile.write(tmp_str + '\n\n')

        tmp_str = "I'm writing to give you a brief update on where you stand in CE 317. Here are the marks I have on record for you so far:"
        messagefile.write(tmp_str + '\n')

        #         tmp_str = str(rawdata_df.loc[index,'HW1':])
        # #         tmp_str = str(rawdata_df[index, 4:])
        #         print tmp_str
        #         sys.exit()
        #         messagefile.write(tmp_str + '\n\n')
        for i in xrange(4, n_keys):
            key = keys_list[i]
            tmp_str = key + ": "
            if len(key) == 2:
                tmp_str += " "
            tmp_str += " %5.1f " % (rawdata_df.iloc[index, i])
            messagefile.write(tmp_str + '\n')
        messagefile.write('\n\n')

        tmp_str = "In the following you can find the class statistics for each assignment/exam:"
        messagefile.write(tmp_str + '\n\n')

        pd.options.display.float_format = '{:7.2f}'.format
        tmp_str = str(rawdata_df.loc[:, 'HW1':].describe())
        #         tmp_str = str(rawdata_df.describe())
        messagefile.write(tmp_str + '\n\n\n')

        tmp_str = "Based on your assignment marks, I arrived at the following grade projections:"
        messagefile.write(tmp_str + '\n')

        for i in xrange(4, n_keys):
            key = keys_list[i]
            tmp_str = "Grade projection after " + key + ": "
            if len(key) == 2:
                tmp_str += " "
            tmp_str += " %5.1f " % (projection_df.iloc[index, i])
            tmp_str += "(" + percent2lettergrade(projection_df.iloc[index,
                                                                    i]) + ")"
            messagefile.write(tmp_str + '\n')
        messagefile.write('\n')

        if percent2lettergrade(projection_df.iloc[index, i]) == 'A':
            tmp_str = "Well done - excellent job, " + firstname + "! Keep up the good work!"
            messagefile.write(tmp_str + '\n\n')

        tmp_str = "Note: These grade projections are based on default 5-point lettergrade brackets as well as the weights for exams and homeworks indicated in the course syllabus. "
        tmp_str += "Your prior homework and exam averages are used as placeholders for the missing homeworks and exams, respectively. \n"
        tmp_str += "They do NOT yet incorporate extra credit for in-class participation, nor do they consider potential adjustments to the grade brackets. \n"
        tmp_str += "I'm providing the grades after each assignment to give you an idea about your progress. "
        tmp_str += "It is worth noting that grades tend to pick up after the first midterm.\n"
        tmp_str += "Please let me know if you have any questions or concerns."
        messagefile.write(tmp_str + '\n\n')

        if opts.requestmeeting is True:
            if projection_df.iloc[index, i] < 66:
                tmp_str = firstname + ", since you are current not doing so great, I wanted to offer to have a meeting with you to see what we can do to improve things. Please let me know what you think."
                messagefile.write(tmp_str + '\n\n\n')

        tmp_str = "Best wishes,"
        messagefile.write(tmp_str + '\n\n')

        tmp_str = "JH"
        messagefile.write(tmp_str + '\n\n\n')
        tmp_str = "------------------------------------------------------------------------------ "
        messagefile.write(tmp_str + '\n\n')

    messagefile.close()
    sys.exit("test 14")

    # TODO compute other cases

    print_df(projection_df)
    print_df(rawdata_df)

    #     classsummary_df = rawdata_df
    #     print_df(classsummary_df)

    # create lists of various grades
    for id in id_list:
        data_dict[id]['hw_grade_av'] = []
        data_dict[id]['overall_grade'] = []
        data_dict[id]['overall_lettergrade'] = []

    # create assignment keys list for better readability; introduce assignment keys list; note: we trade resources for readability
    assignment_keys_list = keys_list[3:]
    n_assignment_keys = len(assignment_keys_list)

    # we want grades for every point during the semester, so we successively go through list of assignments and compute grade after each
    for i in xrange(n_assignment_keys):
        # determine number of homeworks at any point in semester
        n_hw = 0
        for key in assignment_keys_list[0:i + 1]:
            if "HW" in key: n_hw += 1
        for id in id_list:
            # distinguish different cases for grade projections, depending on where we stand in the semester
            if 'Final' in assignment_keys_list[
                    0:i +
                    1]:  # i.e., this is the final grade after all assignments are in
                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw]) / len(
                    data_dict[id]['hw_grades'][0:n_hw])
                midterm_max = max(data_dict[id]['midterm_grades'])
                midterm_min = min(data_dict[id]['midterm_grades'])
                final = data_dict[id][
                    'final_grade']  # this is really for readability
            elif 'M2' in assignment_keys_list[0:i + 1]:
                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw]) / len(
                    data_dict[id]['hw_grades'][0:n_hw])
                midterm_max = max(data_dict[id]['midterm_grades'])
                midterm_min = min(data_dict[id]['midterm_grades'])
                final = sum(data_dict[id]['midterm_grades']) / len(
                    data_dict[id]['midterm_grades'])
            elif 'M1' in assignment_keys_list[0:i + 1]:
                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw]) / len(
                    data_dict[id]['hw_grades'][0:n_hw])
                midterm_max = max(data_dict[id]['midterm_grades'])
                midterm_min = min(data_dict[id]['midterm_grades'])
                final = sum(data_dict[id]['midterm_grades']) / len(
                    data_dict[id]['midterm_grades'])
            elif 'HW1' in assignment_keys_list[0:i + 1]:
                hw_average = sum(data_dict[id]['hw_grades'][0:n_hw]) / len(
                    data_dict[id]['hw_grades'][0:n_hw])
                midterm_max = hw_average
                midterm_min = hw_average
                final = hw_average
            else:
                tmp_str = "Aborting due to lack of reported grades!"
                logfile.write(tmp_str + '\n')
                error_file.write(tmp_str + '\n')
                sys.exit(tmp_str)

            # implement grading scheme: HW: 20%, better midterm 35%, worse midterm 15%, final: 30%
            overall_grade = 0.2 * hw_average + 0.35 * midterm_max + 0.15 * midterm_min + 0.3 * final
            # TODO: instead of hardwiring, we may want to build more flexibility in here

            overall_lettergrade = percent2lettergrade(overall_grade)

            # add computed information to data dictionary
            data_dict[id]['hw_grade_av'].append(hw_average)
            # TODO: we should take out the rounding here
            data_dict[id]['overall_grade'].append(round(overall_grade, 1))
            data_dict[id]['overall_lettergrade'].append(overall_lettergrade)

#     # output for testing
#     for id in id_list:
#         print str(id) + ' ' + str(data_dict[id]['overall_grade'])+ ' ' + str(data_dict[id]['overall_lettergrade'])

    tmp_str = "...calculation of grades and grade projections finished."
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "Starting calculation of course statistics..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    #     tmp_str = "   ...computing basic distribution statistics..."
    #     print tmp_str
    #     logfile.write(tmp_str + '\n')

    # create lists of lists with all the overall grades
    course_overall_grade_list = []
    course_overall_lettergrade_list = []
    course_overall_grade_stat_list = []

    # iterate through all assignments
    for j in xrange(n_assignment_keys):
        course_overall_grade_list.append([])
        course_overall_lettergrade_list.append([])
        for id in id_list:
            course_overall_grade_list[j].append(
                data_dict[id]['overall_grade'][j])
            course_overall_lettergrade_list[j].append(
                data_dict[id]['overall_lettergrade'][j])

        stat = distribution_stat(course_overall_grade_list[j])
        course_overall_grade_stat_list.append(stat)

        course_overall_grade_stat_list[j]['letter_av'] = percent2lettergrade(
            course_overall_grade_stat_list[j]['av'])
        course_overall_grade_stat_list[j][
            'letter_median'] = percent2lettergrade(
                course_overall_grade_stat_list[j]['median'])
        course_overall_grade_stat_list[j]['letter_min'] = percent2lettergrade(
            course_overall_grade_stat_list[j]['min'])
        course_overall_grade_stat_list[j]['letter_max'] = percent2lettergrade(
            course_overall_grade_stat_list[j]['max'])

        course_overall_grade_stat_list[j]['letter_dist'] = histogram(
            course_overall_lettergrade_list[j])

        # TODO: here we need a proper print statement now.
        print course_overall_grade_stat_list[j]
        print
        sys.exit("This is as far as it goes right now.")

    tmp_str = "   ...computing letter grade distribution..."
    print tmp_str
    logfile.write(tmp_str + '\n')

    # perform statistics, analysis, projections
    # compute current average according to grading rules
    # rank students
    # identify best, worst students
    # compile info for each student
    # visualize trends
    # add course participation into grading scheme
    # test different grading schemes

    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str

    #     print str(grade_total_average) + '  ' +  grade_total_average_letter

    # rank students
    # identify best, worst students
    # note: there is no good way to sort a nested dictionary by value, so we just create an auxillary dictionary
    tmp_list = []
    for id in id_list:
        tmp_tuple = (id, data_dict[id]['grade_total'])
        tmp_list.append(tmp_tuple)

    print tmp_list
    print
    print

    sorted_tmp_list = sorted(tmp_list, key=itemgetter(1))
    print sorted_tmp_list

    tmp_str = "... finished."
    print tmp_str
    logfile.write(tmp_str + '\n')
    tmp_str = "------------------------------------------------------------------------------ "
    print tmp_str
    logfile.write(tmp_str + '\n')

    #################################################################################################

    # wrap up section
    tmp_str = tot_exec_time_str(time_start) + "\n" + std_datetime_str()
    print tmp_str + 3 * '\n'
    logfile.write(tmp_str + 4 * '\n')
    logfile.close()
    error_file.close()

    # check whether error_file contains content
    chk_rmfile(opts.error_file)

    return 0  #successful termination of program