def weekly2all(data_overall_processed_file, data_overall_raw_file, data_weekly_json, data_weekly_raw_file): parse_json_2_csv.json2csv(data_weekly_json, data_weekly_raw_file) data_weekly_raw = pd.read_csv(data_weekly_raw_file, encoding ='utf8') data_overall_raw = pd.read_csv(data_overall_raw_file, encoding = 'utf8') data_raw = process_overlap(data_overall_raw, data_weekly_raw) print(data_raw.shape) data_raw = pd.DataFrame(data_raw.drop_duplicates().values, columns = data_raw.columns) print(data_raw.shape) data_processed = preprocess_data(data_raw) data_raw.to_csv(data_overall_raw_file, index = False, encoding='utf8') data_processed.to_csv(data_overall_processed_file, index = False, encoding = 'utf8')
def main_old(): # add weekly data to the current overall data # weekly2all('teacher.csv','raw/teacher_raw.csv','data/anonymous-teacher-events_1-12.json', 'raw/teacher_1-12_raw.csv') # weekly2all('student.csv','raw/student_raw.csv','data/anonymous-student-events_1-12.json', 'raw/student_1-12_raw.csv') # pre_process all data # teacher parse_json_2_csv.json2csv('data/anonymous-teacher-events.json', 'raw/teacher_raw.csv') data_raw = pd.read_csv('raw/teacher_raw.csv') data = preprocess_data(data_raw) data.to_csv('teacher.csv', index=False, encoding = 'utf8') # student parse_json_2_csv.json2csv('data/anonymous-student-events.json', 'raw/student_raw.csv') data_raw = pd.read_csv('raw/student_raw.csv') data = preprocess_data(data_raw) data.to_csv('student.csv', index=False, encoding = 'utf8')