def makeDistancedTable(): conn, cursor = establishConnection() cursor.execute( f'''CREATE TABLE IF NOT EXISTS Distance_Table (id, type, url, created_at, company, location, title, latitude REAL, longitude REAL);''') conn.commit() jobs.close_db(conn)
def makeFiftyMileRadiusTable(): conn, cursor = establishConnection() cursor.execute( f''' CREATE TABLE IF NOT EXISTS Fifty_Mile_Radius(id, type, url, created_at, company, location, title, latitude REAL, longitude REAL);''') conn.commit() jobs.close_db(conn)
def make_combined_map(): conn, cursor = establishConnection() cursor.execute( f'''CREATE TABLE IF NOT EXISTS combined_table (id, type, url, created_at, company, location, title, latitude, longitude);''') conn.commit() jobs.close_db(conn)
def insertIntoFiftyMileRadiusTable(): makeFiftyMileRadiusTable() conn, cursor = establishConnection() cursor.execute(f'''DELETE FROM Fifty_Mile_Radius;''') cursor.execute( f'''INSERT INTO Fifty_Mile_Radius SELECT * FROM github_jobs;''') conn.commit() jobs.close_db(conn)
def test_open_db(): # Checking to see if the database exists, so we can delete it to check # if the open_db function actually creates the database. if os.path.exists(databaseFileName): os.remove(databaseFileName) connection, cursor = jobs.open_db(databaseFileName) assert os.path.exists('jobs.db') jobs.close_db(connection)
def test_excel_import_jobs(): jobs_data = jobs.excel_jobs_import("state_job_data.xlsx") assert len(jobs_data) > 1000 conn, cursor = jobs.open_db("excel_import_test.sqlite") jobs.setup_db(cursor) jobs.insert_data(jobs_data, "jobs", cursor) jobs.close_db(conn) conn, cursor = jobs.open_db("excel_import_test.sqlite") state_query = jobs.query_run("SELECT count(DISTINCT state_name) from" + " jobs;", cursor) for element in state_query: assert element[0] > 50
def text_visualization(self): self.list_control.clear() conn, cursor = jobs.open_db("jobs_db.sqlite") if self.order_selector_text.currentText() == "ASC": data_visualization_per_state = jobs.query_run('''SELECT state_abrev, state_name, ''' + ''' total(jobs.employment) as employment, total(school.size_2018/4), round(avg(school.repayment_cohort),3) as repayment_cohort, round(avg(jobs.salary_25th_percentile)) as averge_entry_salary FROM school JOIN states using(state_abrev) JOIN jobs using(state_name) GROUP BY state_name ORDER BY employment ASC;''', cursor) elif self.order_selector_text.currentText() == "DESC": data_visualization_per_state = jobs.query_run('''SELECT state_abrev, state_name, ''' + ''' total(jobs.employment) as employment, total(school.size_2018/4), round(avg(school.repayment_cohort),3) as repayment_cohort, round(avg(jobs.salary_25th_percentile)) as averge_entry_salary FROM school JOIN states using(state_abrev) JOIN jobs using(state_name) GROUP BY state_name ORDER BY employment DESC;''', cursor) else: data_visualization_per_state = jobs.query_run('''SELECT state_abrev, state_name, ''' + ''' total(jobs.employment) as employment, total(school.size_2018/4), round(avg(school.repayment_cohort),3) as repayment_cohort, round(avg(jobs.salary_25th_percentile)) as averge_entry_salary FROM school JOIN states using(state_abrev) JOIN jobs using(state_name) GROUP BY state_name ;''', cursor) QListWidgetItem("State", listview=self.list_control) for state in data_visualization_per_state: state_display_data = f"{state[0]}, {state[1]}" grad_employ_data = f"Employment/Graduates: {state[2]/state[3]}" repayment_data = f"Average Entry Salary/Average Declining Balance Percent: {state[5]/state[4]}" state_item = QListWidgetItem(state_display_data, listview=self.list_control) grad_item = QListWidgetItem(grad_employ_data, listview=self.list_control) repayment_item = QListWidgetItem(repayment_data, listview=self.list_control) grad_item.setForeground(Qt.darkGreen) repayment_item.setForeground(Qt.blue) state_item.setForeground(Qt.white) state_item.setBackground(Qt.black) self.list_control.show() jobs.close_db(conn)
def test_create_table(): # Checking to see if the database exists, so we can delete it to check # if the create_table function actually creates the database table with the appropriate columns. if os.path.exists(databaseFileName): os.remove(databaseFileName) connection, cursor = jobs.open_db(databaseFileName) jobs.create_table(connection, cursor) columnExists = False try: cursor.execute('ALTER TABLE jobs ADD COLUMN Description;') except sqlite3.OperationalError: columnExists = True assert columnExists jobs.close_db(connection)
def insertIntoDistance(): starting_coords = (41.9667679, -70.9661533) insertIntoFiftyMileRadiusTable() conn, cursor = establishConnection() cursor.execute(f'''DELETE FROM Distance_Table;''') cursor.execute( f'''SELECT location, latitude, longitude FROM Fifty_Mile_Radius;''') rows = cursor.fetchall() for data in rows: comparative_coord = (data[1], data[2]) distance = geoDist.distance(comparative_coord, starting_coords).miles if distance <= 50: print(data[0]) cursor.execute( f'''INSERT OR IGNORE INTO Distance_Table SELECT * FROM Fifty_Mile_Radius where location = "{data[0]}"''' ) conn.commit() jobs.close_db(conn)
def test_db_creation(): school_test_data = [ {'school.name': 'Test School', '2017.student.size': 5, '2018.student.size': 1, 'school.state': 'NJ', 'id': 1, '2017.earnings.3_yrs_after_completion.overall_count_over_poverty_line': 1, '2016.repayment.3_yr_repayment.overall': 1, '2016.repayment.repayment_cohort.3_year_declining_balance': 0.5}, {'school.name': 'Test School 1', '2017.student.size': 4, '2018.student.size': 6, 'school.state': 'MA', 'id': 2, '2017.earnings.3_yrs_after_completion.overall_count_over_poverty_line': 1, '2016.repayment.3_yr_repayment.overall': 1, '2016.repayment.repayment_cohort.3_year_declining_balance': 0.5}] job_test_data = [{'state': 'Massachusetts', 'code': '00-0001', 'title': 'testttl', 'employment': 5, 'salary': 1000}] expected_tables = ['school', 'jobs'] conn, cursor = jobs.open_db("test_db.sqlite") jobs.setup_db(cursor) jobs.insert_data(job_test_data, "jobs", cursor) jobs.insert_data(school_test_data, "school", cursor) jobs.close_db(conn) conn, cursor = jobs.open_db("test_db.sqlite") tables_query = jobs.query_run("SELECT name FROM " + "sqlite_master WHERE type='table' and name NOT LIKE 'sqlite_%';" "", cursor) for (table, returned_tables) in zip(expected_tables, tables_query): assert table == returned_tables[0] school_query = jobs.query_run("SELECT * FROM" + " SCHOOL;", cursor) for (row, element) in zip(school_query, school_test_data): assert row[0] == element['id'] assert row[1] == element['school.name'] assert row[2] == element['school.state'] assert row[3] == element['2017.student.size'] assert row[4] == element['2018.student.size'] assert row[5] == element['2017.earnings.3_yrs_after_completion.overall_count_over_poverty_line'] assert row[6] == element['2016.repayment.3_yr_repayment.overall'] assert row[7] == element['2016.repayment.repayment_cohort.3_year_declining_balance'] jobs_query = jobs.query_run("SELECT * FROM" + " JOBS;", cursor) for (row, element) in zip(jobs_query, job_test_data): assert row[0] == 1 assert row[1] == element['state'] assert row[2] == element['code'] assert row[3] == element['title'] assert row[4] == element['employment'] assert row[5] == element['salary'] jobs.close_db(conn)
def run_map_visualization(self): conn, cursor = jobs.open_db("jobs_db.sqlite") data_visualization_per_state = jobs.query_run('''SELECT state_abrev, state_name, ''' + ''' total(jobs.employment) as employment, total(school.size_2018/4), round(avg(school.repayment_cohort),3) as repayment_cohort, round(avg(jobs.salary_25th_percentile)) as averge_entry_salary FROM school JOIN states using(state_abrev) JOIN jobs using(state_name) GROUP BY state_name ;''', cursor) state_abrev = [] state_grads = [] state_repayment = [] for state in data_visualization_per_state: state_abrev.append(state[0]) state_grads.append(state[2]/state[3]) state_repayment.append(state[5]/state[4]) if self.data_selector_map.currentText() == "Graduates to Employment": us_map = px.Figure(data=px.Choropleth(locations=state_abrev, z=state_grads, locationmode='USA-states', colorbar_title="Employment/Graduates" )) us_map.update_layout(geo_scope='usa', title_text='Employment VS Graduates By State') us_map.show() elif self.data_selector_map.currentText() == "Average Declining Balance Percent": us_map = px.Figure(data=px.Choropleth(locations=state_abrev, z=state_repayment, locationmode='USA-states', colorbar_title="Salary/Average Percent" )) us_map.update_layout(geo_scope='usa', title_text='Average Salary VS Average ' 'Percent of People with Declining Loans') us_map.show() else: us_map = px.Figure(data=px.Choropleth(locations=state_abrev, z=state_grads, locationmode='USA-states', colorbar_title="Graduates" )) us_map.update_layout(geo_scope='usa', title_text='Graduates By State') us_map.show() jobs.close_db(conn) self.list_control.hide()
def test_add_to_database_with_good_data(): # Adding good data to the database. additionalTestData = [ {"id": "TESTID1", "type": "Full Time", "url": "test1.com", "created_at": "Sat Feb 01 12:53:40 UTC 2020", "company": "TESTCOMPANY", "company_url": "test1.com", "location": "Boston, MA", "title": "Senior Python/Django Developer ", "description": "Test Description", "how_to_apply": "Yes.", "company_logo": "test.png"}] # Checking if the new data is added to the database. connection, cursor = jobs.open_db(databaseFileName) jobs.save_to_database(additionalTestData, connection, cursor) cursor.execute("SELECT * FROM jobs WHERE jobs.id = 'TESTID1'") assert cursor.fetchone() jobs.close_db(connection)
def test_update_from_list(): job_test_data = [{'state': 'Massachusetts', 'code': 99, 'title': 'testttl', 'employment': 5, 'salary': 1000}] jobs_data_to_replace = [1, 'Rhode Island', 10, 'title', 10, 500] conn, cursor = jobs.open_db("update_test.sqlite") jobs.setup_db(cursor) jobs.insert_data(job_test_data, "jobs", cursor) jobs.close_db(conn) conn, cursor = jobs.open_db("update_test.sqlite") original_data = jobs.query_run("SELECT * " + "FROM jobs;", cursor) for (element, query_data) in zip(job_test_data, original_data): assert query_data[0] == 1 assert query_data[1] == element['state'] assert query_data[2] == element['code'] assert query_data[3] == element['title'] assert query_data[4] == element['employment'] assert query_data[5] == element['salary'] jobs.close_db(conn) jobs.update_data_from_list(jobs_data_to_replace, "Jobs", "update_test.sqlite") conn, cursor = jobs.open_db("update_test.sqlite") updated_data = jobs.query_run("SELECT *"+" FROM jobs;", cursor) for element in updated_data: assert jobs_data_to_replace[0] == element[0] assert jobs_data_to_replace[1] == element[1] assert jobs_data_to_replace[2] == element[2] assert jobs_data_to_replace[3] == element[3] assert jobs_data_to_replace[4] == element[4] assert jobs_data_to_replace[5] == element[5] jobs.close_db(conn)
def test_save_to_database(get_data_github, get_data_stackoverflow): # Checking to see if the database exists, so we can delete it to check # if the save_to_database function actually saves the data to a fresh, new database. if os.path.exists(databaseFileName): os.remove(databaseFileName) # The amount of jobs is compressed to 30 per source, so the test doesn't take an eternity to run. connection, cursor = jobs.open_db(databaseFileName) jobs.create_table(connection, cursor) jobs.save_to_database(get_data_github[:30], connection, cursor) jobs.save_to_database(get_data_stackoverflow[:30], connection, cursor) # Checking if the database has some values that should be expected there. In this case, I know # that there is a job where the title is 'Lead Data Acquisition Design Engineer'. # It also picks a random one from the retrieved list and checks if it exists in the database. # UPDATE: It only checks for random titles from the StackOverFlow and Github lists. testTitle1 = random.choice(get_data_github[:30])['title'] testTitle2 = random.choice(get_data_stackoverflow[:30])['title'] cursor.execute("SELECT * FROM jobs WHERE jobs.Title = ?", (testTitle1,)) assert cursor.fetchone() cursor.execute("SELECT * FROM jobs WHERE jobs.Title = ?", (testTitle2,)) assert cursor.fetchone() jobs.close_db(connection)
def test_specific_excel_data(): excel_test_data = ["massachusetts", 0, "test", "major", 5, 5] excel_book = openpyxl.load_workbook(filename="test_workbook.xlsx") test_sheet = excel_book.active test_sheet['B2'] = excel_test_data[0] test_sheet['H2'] = excel_test_data[1] test_sheet['I2'] = excel_test_data[2] test_sheet['J2'] = excel_test_data[3] test_sheet['K2'] = excel_test_data[4] test_sheet['Y2'] = excel_test_data[5] excel_book.save(filename="test_workbook.xlsx") test_dict = jobs.excel_jobs_import("test_workbook.xlsx") conn, cursor = jobs.open_db("test_specific_excel_data.sqlite") jobs.setup_db(cursor) jobs.insert_data(test_dict, "jobs", cursor) jobs.close_db(conn) conn, cursor = jobs.open_db("test_specific_excel_data.sqlite") specific_data_query = jobs.query_run("SELECT * FROM " + "JOBS;", cursor) for element in specific_data_query: assert element[1] == excel_test_data[0] assert element[2] == excel_test_data[1] assert element[3] == excel_test_data[2] assert element[4] == excel_test_data[4] assert element[5] == excel_test_data[5]
def test_update_from_excel(): test_data = [{'state': 'Massachusetts', 'code': 99, 'title': 'testttl', 'employment': 5, 'salary': 1000}] data_to_replace = [1, 'Rhode Island', 10, 'title', 10, 500] conn, cursor = jobs.open_db("test_excel_update.sqlite") jobs.setup_db(cursor) jobs.insert_data(test_data, "jobs", cursor) jobs.close_db(conn) conn, cursor = jobs.open_db("test_excel_update.sqlite") original_data = jobs.query_run("SELECT * " + "FROM jobs;", cursor) for (element, query_data) in zip(test_data, original_data): assert query_data[0] == 1 assert query_data[1] == element['state'] assert query_data[2] == element['code'] assert query_data[3] == element['title'] assert query_data[4] == element['employment'] assert query_data[5] == element['salary'] jobs.close_db(conn) excel_book = openpyxl.load_workbook(filename="update_by_excel.xlsx") active_sheet = excel_book.active active_sheet['A1'] = data_to_replace[0] active_sheet['B1'] = data_to_replace[1] active_sheet['C1'] = data_to_replace[2] active_sheet['D1'] = data_to_replace[3] active_sheet['E1'] = data_to_replace[4] active_sheet['F1'] = data_to_replace[5] excel_book.save("update_by_excel.xlsx") jobs.update_data_from_excel("update_by_excel.xlsx", "Jobs", "test_excel_update.sqlite") conn, cursor = jobs.open_db("test_excel_update.sqlite") updated_data = jobs.query_run("SELECT * "+"FROM jobs", cursor) for element in updated_data: assert data_to_replace[0] == element[0] assert data_to_replace[1] == element[1] assert data_to_replace[2] == element[2] assert data_to_replace[3] == element[3] assert data_to_replace[4] == element[4] assert data_to_replace[5] == element[5]