def process_downloaded_table(table_name): table_file_name = table_name + '.download' downloaded_table_full_path = os.path.join(browser_download_directory, table_file_name) downloaded_file_reader = open(downloaded_table_full_path, 'r') raw_line_at_a_time = downloaded_file_reader.readlines() directory_name = os.path.dirname(downloaded_table_full_path) cleaned_file_name = table_name + '.clean_data' cleaned_file_path = os.path.join(directory_name, cleaned_file_name) clean_file_writer = open(cleaned_file_path, 'w') report_string = '{0} opened for processing'.format( downloaded_table_full_path) line_counter = 0 for raw_line in raw_line_at_a_time: cleaned_line = tools.clean_string_for_sql(raw_line) # replacing |s with \n will put individual records on lines by themselves split_records = cleaned_line.replace('|', '\n') #print split_records clean_file_writer.writelines(split_records) line_counter = line_counter + 1 downloaded_file_reader.close() clean_file_writer.close() report_string = report_string + '\n{0} cleaned lines written to:{1}'.format( line_counter, cleaned_file_path) return report_string
def process_downloaded_table(table_name): table_file_name = table_name + ".download" downloaded_table_full_path = os.path.join(browser_download_directory, table_file_name) downloaded_file_reader = open(downloaded_table_full_path, "r") raw_line_at_a_time = downloaded_file_reader.readlines() directory_name = os.path.dirname(downloaded_table_full_path) cleaned_file_name = table_name + ".clean_data" cleaned_file_path = os.path.join(directory_name, cleaned_file_name) clean_file_writer = open(cleaned_file_path, "w") report_string = "{0} opened for processing".format(downloaded_table_full_path) line_counter = 0 for raw_line in raw_line_at_a_time: cleaned_line = tools.clean_string_for_sql(raw_line) # replacing |s with \n will put individual records on lines by themselves split_records = cleaned_line.replace("|", "\n") # print split_records clean_file_writer.writelines(split_records) line_counter = line_counter + 1 downloaded_file_reader.close() clean_file_writer.close() report_string = report_string + "\n{0} cleaned lines written to:{1}".format(line_counter, cleaned_file_path) return report_string
def download_students_calculated(): calculated_field_list = "Id\nStudent_Number\n*gpa Method=simple\n*gpa Method=weighted\n*gpa Term=s1\n*gpa Term=s2\n*gpa Term=1700\n*gpa Term=1800\n*gpa Term=1900\n*gpa Term=2000\n*gpa Term=2100\n^(*period_info;1;teacher_name)\n^(*period_info;1;course_name)\n^(*period_info;1;course_number)\n^(*period_info;1;section_number)\n^(*period_info;1;current_grade)\n^(*period_info;1;room)\n^(*period_info;2;teacher_name)\n^(*period_info;2;course_name)\n^(*period_info;2;course_number)\n^(*period_info;2;section_number)\n^(*period_info;2;current_grade)\n^(*period_info;2;room)\n^(*period_info;3;teacher_name)\n^(*period_info;3;course_name)\n^(*period_info;3;course_number)\n^(*period_info;3;section_number)\n^(*period_info;3;current_grade)\n^(*period_info;3;room)\n^(*period_info;4;teacher_name)\n^(*period_info;4;course_name)\n^(*period_info;4;course_number)\n^(*period_info;4;section_number)\n^(*period_info;4;current_grade)\n^(*period_info;4;room)\n^(*period_info;5;teacher_name)\n^(*period_info;5;course_name)\n^(*period_info;5;course_number)\n^(*period_info;5;section_number)\n^(*period_info;5;current_grade)\n^(*period_info;5;room)\n^(*period_info;6;teacher_name)\n^(*period_info;6;course_name)\n^(*period_info;6;course_number)\n^(*period_info;6;section_number)\n^(*period_info;6;current_grade)\n^(*period_info;6;room)" # Clear out partial downloads and other files that might interfere for file_name in os.listdir(browser_download_directory): if ("student" in file_name) or ("calculated" in file_name) or ("part" in file_name): full_path_to_doomed_file = os.path.join(browser_download_directory, file_name) os.remove(full_path_to_doomed_file) if os.path.exists(full_path_to_doomed_file): print "{0} still lives!".format(full_path_to_doomed_file) else: print "Removed file:{0}".format(full_path_to_doomed_file) driver = webdriver.Chrome("/home/scott/chromedriver") driver.implicitly_wait(30) base_url = config_server_root url_of_admin_page = base_url + config_pw_page print "Getting {0}".format(url_of_admin_page) driver.get(url_of_admin_page) # Login page driver.find_element_by_id("fieldPassword").clear() driver.find_element_by_id("fieldPassword").send_keys(config_user_password) driver.find_element_by_id("btnEnter").click() # Main page print "choosing school:{0}".format("District Office") driver.find_element_by_id("schoolContext").click() select = Select(driver.find_element_by_name("Schoolid")) select.select_by_visible_text("District Office") print "pausing 10 seconds" time.sleep(10) driver.find_element_by_id("navSetupSystem").click() print "pausing 10 seconds" time.sleep(10) driver.find_element_by_link_text("Direct Database Export (DDE)").click() print "choosing table:{0}".format("1") select = Select(driver.find_element_by_name("filenum")) select.select_by_value("1") print "table {0} selected".format("1") print "pausing 15 seconds" time.sleep(15) print "finding element by name searchselectall" driver.find_element_by_name("searchselectall").click() print "clicking Export Records" driver.find_element_by_link_text("Export Records").click() driver.find_element_by_id("tt").clear() driver.find_element_by_id("tt").send_keys(calculated_field_list) select = Select(driver.find_element_by_name("fielddelim")) select.select_by_visible_text("Other:") driver.find_element_by_name("custfielddelim").clear() driver.find_element_by_name("custfielddelim").send_keys("^") select = Select(driver.find_element_by_name("recdelim")) select.select_by_visible_text("Other:") driver.find_element_by_name("custrecdelim").clear() driver.find_element_by_name("custrecdelim").send_keys("|") driver.find_element_by_name("columntitles").click() print "pausing 15 more seconds" time.sleep(15) print "downloading calculated values from the students table" driver.find_element_by_id("btnSubmit").click() print "pausing 30 seconds for sluggish download starts" time.sleep(30) while os.path.exists(browser_partial_download): print "{0} exists. Waiting.".format(browser_partial_download) time.sleep(30) print "{0} does not exist. Moving on.".format(browser_partial_download) print "pausing 15 seconds for the file system to settle" time.sleep(15) new_download_path = os.path.join(browser_download_directory, "students_calculated.download") print "will rename: {0}".format(browser_completed_download) print "to: {0}".format(new_download_path) os.rename(browser_completed_download, new_download_path) print "download renamed to:{0}".format(new_download_path) print "pausing 10 seconds" time.sleep(10) print "quitting web driver" driver.quit() print "pausing another 15 seconds for everything to settle" time.sleep(15) downloaded_file_reader = open(new_download_path, "r") raw_line_at_a_time = downloaded_file_reader.readlines() directory_name = os.path.dirname(new_download_path) cleaned_file_name = "students_calculated.clean_data" cleaned_file_path = os.path.join(directory_name, cleaned_file_name) clean_file_writer = open(cleaned_file_path, "w") report_string = "{0} opened for processing".format(new_download_path) line_counter = 0 for raw_line in raw_line_at_a_time: cleaned_line = tools.clean_string_for_sql(raw_line) # replacing |s with \n will put individual records on lines by themselves split_records = cleaned_line.replace("|", "\n") # print split_records clean_file_writer.writelines(split_records) line_counter = line_counter + 1 downloaded_file_reader.close() clean_file_writer.close() db_connection = MySQLdb.connect(host=db_host, user=db_user, passwd=db_password, db=db_name) cursor = db_connection.cursor() delete_statement = "DELETE FROM students_calculated" print 'executing delete statement "{0}"'.format(delete_statement) cursor.execute(delete_statement) try: print "re-opening the data file {0}".format(cleaned_file_path) cleaned_file_reader = open(cleaned_file_path, "r") clean_line_at_a_time = cleaned_file_reader.readlines() except: print 'Strange, I can not open the file "{0}"'.format(cleaned_file_path) clean_line_at_a_time = "" sql_statement_counter = 0 for clean_line in clean_line_at_a_time: data_list = clean_line.split("^") sql_data_string = "('" + "','".join(data_list) + "')" sql_string = ( "INSERT INTO students_calculated (student_id,student_number,gpa_simple,gpa_weighted,gpa_s1,gpa_s2,gpa_2007,gpa_2008,gpa_2009,gpa_2010,gpa_2011,period_1_teacher,period_1_course_name,period_1_course_number,period_1_section_number,period_1_current_grade,period_1_room,period_2_teacher,period_2_course_name,period_2_course_number,period_2_section_number,period_2_current_grade,period_2_room,period_3_teacher,period_3_course_name,period_3_course_number,period_3_section_number,period_3_current_grade,period_3_room,period_4_teacher,period_4_course_name,period_4_course_number,period_4_section_number,period_4_current_grade,period_4_room,period_5_teacher,period_5_course_name,period_5_course_number,period_5_section_number,period_5_current_grade,period_5_room,period_6_teacher,period_6_course_name,period_6_course_number,period_6_section_number,period_6_current_grade,period_6_room) VALUES " + sql_data_string ) print sql_string cursor = db_connection.cursor() cursor.execute(sql_string) db_connection.commit() sql_statement_counter = sql_statement_counter + 1 try: cleaned_file_reader.close() db_connection.close() except: print "Error trying to close the cleaned_file_reader and/or close the connection" print "Executed {0} SQL statements on table students_calculated" return sql_statement_counter
def download_students_calculated(): calculated_field_list = 'Id\nStudent_Number\n*gpa Method=simple\n*gpa Method=weighted\n*gpa Term=s1\n*gpa Term=s2\n*gpa Term=1700\n*gpa Term=1800\n*gpa Term=1900\n*gpa Term=2000\n*gpa Term=2100\n^(*period_info;1;teacher_name)\n^(*period_info;1;course_name)\n^(*period_info;1;course_number)\n^(*period_info;1;section_number)\n^(*period_info;1;current_grade)\n^(*period_info;1;room)\n^(*period_info;2;teacher_name)\n^(*period_info;2;course_name)\n^(*period_info;2;course_number)\n^(*period_info;2;section_number)\n^(*period_info;2;current_grade)\n^(*period_info;2;room)\n^(*period_info;3;teacher_name)\n^(*period_info;3;course_name)\n^(*period_info;3;course_number)\n^(*period_info;3;section_number)\n^(*period_info;3;current_grade)\n^(*period_info;3;room)\n^(*period_info;4;teacher_name)\n^(*period_info;4;course_name)\n^(*period_info;4;course_number)\n^(*period_info;4;section_number)\n^(*period_info;4;current_grade)\n^(*period_info;4;room)\n^(*period_info;5;teacher_name)\n^(*period_info;5;course_name)\n^(*period_info;5;course_number)\n^(*period_info;5;section_number)\n^(*period_info;5;current_grade)\n^(*period_info;5;room)\n^(*period_info;6;teacher_name)\n^(*period_info;6;course_name)\n^(*period_info;6;course_number)\n^(*period_info;6;section_number)\n^(*period_info;6;current_grade)\n^(*period_info;6;room)' # Clear out partial downloads and other files that might interfere for file_name in os.listdir(browser_download_directory): if ('student' in file_name) or ('calculated' in file_name) or ('part' in file_name): full_path_to_doomed_file = os.path.join(browser_download_directory, file_name) os.remove(full_path_to_doomed_file) if os.path.exists(full_path_to_doomed_file): print '{0} still lives!'.format(full_path_to_doomed_file) else: print 'Removed file:{0}'.format(full_path_to_doomed_file) driver = webdriver.Chrome('/home/scott/chromedriver') driver.implicitly_wait(30) base_url = config_server_root url_of_admin_page = base_url + config_pw_page print 'Getting {0}'.format(url_of_admin_page) driver.get(url_of_admin_page) # Login page driver.find_element_by_id("fieldPassword").clear() driver.find_element_by_id("fieldPassword").send_keys(config_user_password) driver.find_element_by_id("btnEnter").click() # Main page print 'choosing school:{0}'.format('District Office') driver.find_element_by_id("schoolContext").click() select = Select(driver.find_element_by_name("Schoolid")) select.select_by_visible_text('District Office') print 'pausing 10 seconds' time.sleep(10) driver.find_element_by_id("navSetupSystem").click() print 'pausing 10 seconds' time.sleep(10) driver.find_element_by_link_text("Direct Database Export (DDE)").click() print 'choosing table:{0}'.format('1') select = Select(driver.find_element_by_name("filenum")) select.select_by_value('1') print 'table {0} selected'.format('1') print 'pausing 15 seconds' time.sleep(15) print 'finding element by name searchselectall' driver.find_element_by_name("searchselectall").click() print 'clicking Export Records' driver.find_element_by_link_text("Export Records").click() driver.find_element_by_id("tt").clear() driver.find_element_by_id("tt").send_keys(calculated_field_list) select = Select(driver.find_element_by_name("fielddelim")) select.select_by_visible_text("Other:") driver.find_element_by_name("custfielddelim").clear() driver.find_element_by_name("custfielddelim").send_keys("^") select = Select(driver.find_element_by_name("recdelim")) select.select_by_visible_text("Other:") driver.find_element_by_name("custrecdelim").clear() driver.find_element_by_name("custrecdelim").send_keys("|") driver.find_element_by_name("columntitles").click() print 'pausing 15 more seconds' time.sleep(15) print 'downloading calculated values from the students table' driver.find_element_by_id("btnSubmit").click() print 'pausing 30 seconds for sluggish download starts' time.sleep(30) while os.path.exists(browser_partial_download): print '{0} exists. Waiting.'.format(browser_partial_download) time.sleep(30) print '{0} does not exist. Moving on.'.format(browser_partial_download) print 'pausing 15 seconds for the file system to settle' time.sleep(15) new_download_path = os.path.join(browser_download_directory, 'students_calculated.download') print 'will rename: {0}'.format(browser_completed_download) print 'to: {0}'.format(new_download_path) os.rename(browser_completed_download, new_download_path) print 'download renamed to:{0}'.format(new_download_path) print 'pausing 10 seconds' time.sleep(10) print 'quitting web driver' driver.quit() print 'pausing another 15 seconds for everything to settle' time.sleep(15) downloaded_file_reader = open(new_download_path, 'r') raw_line_at_a_time = downloaded_file_reader.readlines() directory_name = os.path.dirname(new_download_path) cleaned_file_name = 'students_calculated.clean_data' cleaned_file_path = os.path.join(directory_name, cleaned_file_name) clean_file_writer = open(cleaned_file_path, 'w') report_string = '{0} opened for processing'.format(new_download_path) line_counter = 0 for raw_line in raw_line_at_a_time: cleaned_line = tools.clean_string_for_sql(raw_line) # replacing |s with \n will put individual records on lines by themselves split_records = cleaned_line.replace('|', '\n') #print split_records clean_file_writer.writelines(split_records) line_counter = line_counter + 1 downloaded_file_reader.close() clean_file_writer.close() db_connection = MySQLdb.connect(host=db_host, user=db_user, passwd=db_password, db=db_name) cursor = db_connection.cursor() delete_statement = 'DELETE FROM students_calculated' print 'executing delete statement "{0}"'.format(delete_statement) cursor.execute(delete_statement) try: print 're-opening the data file {0}'.format(cleaned_file_path) cleaned_file_reader = open(cleaned_file_path, 'r') clean_line_at_a_time = cleaned_file_reader.readlines() except: print 'Strange, I can not open the file "{0}"'.format( cleaned_file_path) clean_line_at_a_time = '' sql_statement_counter = 0 for clean_line in clean_line_at_a_time: data_list = clean_line.split('^') sql_data_string = "('" + "','".join(data_list) + "')" sql_string = 'INSERT INTO students_calculated (student_id,student_number,gpa_simple,gpa_weighted,gpa_s1,gpa_s2,gpa_2007,gpa_2008,gpa_2009,gpa_2010,gpa_2011,period_1_teacher,period_1_course_name,period_1_course_number,period_1_section_number,period_1_current_grade,period_1_room,period_2_teacher,period_2_course_name,period_2_course_number,period_2_section_number,period_2_current_grade,period_2_room,period_3_teacher,period_3_course_name,period_3_course_number,period_3_section_number,period_3_current_grade,period_3_room,period_4_teacher,period_4_course_name,period_4_course_number,period_4_section_number,period_4_current_grade,period_4_room,period_5_teacher,period_5_course_name,period_5_course_number,period_5_section_number,period_5_current_grade,period_5_room,period_6_teacher,period_6_course_name,period_6_course_number,period_6_section_number,period_6_current_grade,period_6_room) VALUES ' + sql_data_string print sql_string cursor = db_connection.cursor() cursor.execute(sql_string) db_connection.commit() sql_statement_counter = sql_statement_counter + 1 try: cleaned_file_reader.close() db_connection.close() except: print 'Error trying to close the cleaned_file_reader and/or close the connection' print 'Executed {0} SQL statements on table students_calculated' return sql_statement_counter