Beispiel #1
0
                continue

            for i in range(0, len(population), 3):
                try:
                    n = names[i]
                    place_population = int(population[i].replace(',', ''))
                    employment_ratio = population[i + 1]
                    unemployment_ratio = population[i + 2]
                    print(n, place_population, employment_ratio, unemployment_ratio)
                    cur.execute('INSERT INTO UNEMPLOYMENT VALUES(?, ?, ?, ?)', (n, place_population, employment_ratio, unemployment_ratio))
                    conn.commit()
                except:
                    traceback.print_exc()
                    print('')

            for header in headers:
                total_names.append(header)
            driver.execute_script("stepshift('h', 'f')")
            time.sleep(1)
        except:
            traceback.print_exc()
            print()


driver = get_headless_driver(no_sandbox=True)
driver.set_page_load_timeout(60)
driver.get(
    'https://factfinder.census.gov/faces/tableservices/jsf/pages/productview.xhtml?pid=ACS_16_5YR_S2301&prodType=table')
modify_table(driver)
extract_values(driver)
Beispiel #2
0
def get_tax_data(input_file, output_file):
    counter = 0
    driver = get_headless_driver()
    driver.get("https://propertytax.jpso.com/PropertyTax/propsrch.aspx#result")
    # with open('super_output' + str(sys.argv[1]) + '.txt', 'a+') as real_output:
    with open(output_file + '.txt', 'a') as real_output:
        counter = sum(1 for line in real_output)
        # with open('output' + str(sys.argv[1]) + '.txt', 'r') as parcels:
        with open(input_file + '.txt', 'r') as parcels:
            skipped = 0
            for index, parcel in enumerate(parcels):
                if index < counter:
                    continue
                try:

                    print counter, skipped
                    select = driver.find_element_by_xpath(
                        '//*[@id="ContentPlaceHolder1_body_cboSearchBy"]')
                    select.send_keys('pp')
                    bar = driver.find_element_by_xpath(
                        '//*[@id="ContentPlaceHolder1_body_txtParcel_In"]')
                    bar.send_keys(parcel.split('"')[0].strip())
                    bar.send_keys(Keys.ENTER)
                    hex = driver.find_element_by_xpath(
                        '//*[@id="ContentPlaceHolder1_body_lblHEX"]')
                    tax = driver.find_element_by_xpath(
                        '//*[@id="ContentPlaceHolder1_body_lblTaxAmt"]')
                    hextax = hex.text + '"' + tax.text
                    string_to_concat = parcel.strip() + '"' + hextax
                    history_button = driver.find_element_by_xpath(
                        '//*[@id="ContentPlaceHolder1_body_btViewHistory"]')
                    history_button.click()
                    rows = driver.find_elements_by_class_name('text_smaller')

                    tax = {}
                    for i in range(1, 4):
                        year = ''
                        tax_notice = ''
                        assessment = ''
                        original_tax_due = ''
                        paid_amount = ''
                        date_paid = ''
                        balance_due = ''
                        year = get_selenium_xpath_if_exists(
                            driver,
                            '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr['
                            + str(i + 1) + ']/td[1]')
                        tax_notice = get_selenium_xpath_if_exists(
                            driver,
                            '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr['
                            + str(i + 1) + ']/td[2]')
                        assessment = get_selenium_xpath_if_exists(
                            driver,
                            '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr['
                            + str(i + 1) + ']/td[3]')
                        original_tax_due = get_selenium_xpath_if_exists(
                            driver,
                            '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr['
                            + str(i + 1) + ']/td[4]')
                        paid_amount = get_selenium_xpath_if_exists(
                            driver,
                            '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr['
                            + str(i + 1) + ']/td[5]')
                        date_paid = get_selenium_xpath_if_exists(
                            driver,
                            '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr['
                            + str(i + 1) + ']/td[6]')
                        balance_due = get_selenium_xpath_if_exists(
                            driver,
                            '//*[@id="ContentPlaceHolder1_body_dgHistory"]/tbody/tr['
                            + str(i + 1) + ']/td[7]')

                        tax_concat = '"' + year + '"' + tax_notice + '"' + assessment + '"' + original_tax_due + '"' + paid_amount + '"' + date_paid + '"' + balance_due
                        string_to_concat += tax_concat

                    real_output.write(string_to_concat + '\n')
                    print(string_to_concat)
                    back_button = driver.find_element_by_xpath(
                        '//*[@id="MAIN_OUTLINE_TABLE"]/div[1]/div/div[2]/a/img'
                    )
                    back_button.click()
                    counter += 1
                except:
                    driver.close()
                    driver = get_headless_driver()
                    driver.get(
                        "https://propertytax.jpso.com/PropertyTax/propsrch.aspx#result"
                    )
                    skipped += 1
                    continue