continue for i in range(0, len(population), 3): try: n = names[i] place_population = int(population[i].replace(',', '')) employment_ratio = population[i + 1] unemployment_ratio = population[i + 2] print(n, place_population, employment_ratio, unemployment_ratio) cur.execute('INSERT INTO UNEMPLOYMENT VALUES(?, ?, ?, ?)', (n, place_population, employment_ratio, unemployment_ratio)) conn.commit() except: traceback.print_exc() print('') for header in headers: total_names.append(header) driver.execute_script("stepshift('h', 'f')") time.sleep(1) except: traceback.print_exc() print() driver = get_headless_driver(no_sandbox=True) driver.set_page_load_timeout(60) driver.get( 'https://factfinder.census.gov/faces/tableservices/jsf/pages/productview.xhtml?pid=ACS_16_5YR_S2301&prodType=table') modify_table(driver) extract_values(driver)
def get_tax_data(input_file, output_file): counter = 0 driver = get_headless_driver() driver.get("https://propertytax.jpso.com/PropertyTax/propsrch.aspx#result") # with open('super_output' + str(sys.argv[1]) + '.txt', 'a+') as real_output: with open(output_file + '.txt', 'a') as real_output: counter = sum(1 for line in real_output) # with open('output' + str(sys.argv[1]) + '.txt', 'r') as parcels: with open(input_file + '.txt', 'r') as parcels: skipped = 0 for index, parcel in enumerate(parcels): if index < counter: continue try: print counter, skipped select = driver.find_element_by_xpath( '//*[@id="ContentPlaceHolder1_body_cboSearchBy"]') select.send_keys('pp') bar = driver.find_element_by_xpath( '//*[@id="ContentPlaceHolder1_body_txtParcel_In"]') bar.send_keys(parcel.split('"')[0].strip()) bar.send_keys(Keys.ENTER) hex = driver.find_element_by_xpath( '//*[@id="ContentPlaceHolder1_body_lblHEX"]') tax = driver.find_element_by_xpath( '//*[@id="ContentPlaceHolder1_body_lblTaxAmt"]') hextax = hex.text + '"' + tax.text string_to_concat = parcel.strip() + '"' + hextax history_button = driver.find_element_by_xpath( '//*[@id="ContentPlaceHolder1_body_btViewHistory"]') history_button.click() rows = driver.find_elements_by_class_name('text_smaller') tax = {} for i in range(1, 4): year = '' tax_notice = '' assessment = '' original_tax_due = '' paid_amount = '' date_paid = '' balance_due = '' year = get_selenium_xpath_if_exists( driver, '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr[' + str(i + 1) + ']/td[1]') tax_notice = get_selenium_xpath_if_exists( driver, '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr[' + str(i + 1) + ']/td[2]') assessment = get_selenium_xpath_if_exists( driver, '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr[' + str(i + 1) + ']/td[3]') original_tax_due = get_selenium_xpath_if_exists( driver, '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr[' + str(i + 1) + ']/td[4]') paid_amount = get_selenium_xpath_if_exists( driver, '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr[' + str(i + 1) + ']/td[5]') date_paid = get_selenium_xpath_if_exists( driver, '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr[' + str(i + 1) + ']/td[6]') balance_due = get_selenium_xpath_if_exists( driver, '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr[' + str(i + 1) + ']/td[7]') tax_concat = '"' + year + '"' + tax_notice + '"' + assessment + '"' + original_tax_due + '"' + paid_amount + '"' + date_paid + '"' + balance_due string_to_concat += tax_concat real_output.write(string_to_concat + '\n') print(string_to_concat) back_button = driver.find_element_by_xpath( '//*[@id="MAIN_OUTLINE_TABLE"]/div[1]/div/div[2]/a/img' ) back_button.click() counter += 1 except: driver.close() driver = get_headless_driver() driver.get( "https://propertytax.jpso.com/PropertyTax/propsrch.aspx#result" ) skipped += 1 continue