import pandas as pd import difflib from DB.DBUtil import DB """ 对原来的成绩文件进行扩展 增加:(课程类别,班级人数,教师) 三列 """ # 年级 、课程名称、院系 三者找,找不到就用年级和课程找 if __name__ == "__main__": root = "C://zxl/Data/GPA-large/" gpa_dir = root + "grade/records/" save_dir = root + "grade/records_completed/" profile_path = root + "stu/profile.csv" db = DB() processed_dic = {} for file_name in os.listdir(save_dir): processed_dic[file_name[:-4]] = True profile_df = pd.read_csv(profile_path) stu_enrolltime = { x: y[:4] for x, y in zip(profile_df.stu_id, profile_df.enroll_time) } stu_dep = {x: y[2:-8] for x, y in zip(profile_df.stu_id, profile_df.dep)} for file_name in os.listdir(gpa_dir): stu_id = file_name[:-4] if stu_id in processed_dic.keys(): continue
res.append([ semester, break_count[0], lunch_count[0], dinner_count[0], break_count[1], lunch_count[1], dinner_count[1], break_count[2], lunch_count[2], dinner_count[2] ]) return res if __name__ == "__main__": root = "C://zxl/Data/GPA-large/" stu_file = root + "stu/stu_list.csv" save_dir = root + "consume/records/" complete_consume_dir = root + "new_consume/records/" statistic_path = root + "processed/consume2.csv" db = DB() stu_df = pd.read_csv(stu_file) #统计学生每学期的消费特征 stu_lst = [] fea_m = [] for file_name in os.listdir(complete_consume_dir): stu_id = file_name[:-4] df = pd.read_csv(complete_consume_dir + file_name) fea = ExtractConsumeFea(df) i = 0 while i < len(fea): stu_lst.append(stu_id) i += 1 fea_m.extend(fea)
# -*- coding: utf-8 -*- # @Time : 2019/11/19 15:29 # @Author : zxl # @FileName: InsertCourse.py import pandas as pd from DB.DBUtil import DB if __name__ == "__main__": db = DB() file_path = "C://zxl/Data/StudyRelated/course.csv" df = pd.read_csv(file_path) df.columns = [ 'idx1', 'dep_id', 'dep_name', 'c_id', 'c_name', 'stu_num', 'grade', 'type', 'teacher', 'week', 'week_num', 'section', 'classroom', 'campus' ] i = 1 for (dep_id, dep_name, c_id, c_name, stu_num, grade, type, teacher, week, week_num, section, classroom, campus) in zip(df.dep_id, df.dep_name, df.c_id, df.c_name, df.stu_num, df.grade, df.type, df.teacher, df.week, df.week_num, df.section, df.classroom, df.campus): c_name = str(c_name) teacher = str(teacher) c_name = c_name.replace('\'', '') teacher = teacher.replace('\'', '') if str(week_num) == 'nan': week_num = '0' print(i) i += 1