コード例 #1
0
def run(input_sheet, output):
    print("==" * 30)
    print("Johnson Scrapping Started.")
    print("==" * 30)

    driver = initChromeDriver()
    driver.implicitly_wait(10)
    driver.refresh()

    for street in input_sheet:
        print(f"Scrapping {street} ......")
        driver.get(
            f"https://www.johnsoncountytaxoffice.org/Search/Results?Query.SearchField=5&Query.SearchText={street}&Query.SearchAction=&Query.PropertyType=&Query.PayStatus=Both"
        )
        time.sleep(2)
        do_scraping(driver=driver)

    final_dataframe = pd.DataFrame(final_data,
                                   columns=["Account", "Name", "Amount"])
    output.add_worksheet(rows=final_dataframe.shape[0],
                         cols=final_dataframe.shape[1],
                         title="Johnson")  # Creat a new sheet
    work_sheet_instance = output.worksheet(
        "Johnson")  # get that newly created sheet
    set_with_dataframe(work_sheet_instance,
                       final_dataframe)  # Set collected data to sheet

    print("==" * 30)
    print("Johnson Scrapping Ended.")
    print("==" * 30)
コード例 #2
0
def start_driver(account):
    try:
        driver = initChromeDriver()
        driver.implicitly_wait(10)
        driver.get(
            "https://taxonline.tarrantcounty.com/taxweb/accountsearch.asp?linklocation=Iwantto&linkname=Property%20Account"
        )
        time.sleep(1)
        driver.find_element(
            By.XPATH,
            "/html/body/table[1]/tbody/tr[3]/td/table/tbody/tr[1]/td/font/table/tbody/tr/td/table/tbody/tr[2]/td/form/input[1]"
        ).send_keys(account)
        return driver
    except:
        print("==" * 30)
        print("Server is Down, Try it latter!")
        print("==" * 30)
        exit()
コード例 #3
0
def run(input_sheets, output):

    print("==" * 30)
    print("Denton Scrapping Second Instance Starting")
    print("==" * 30)

    driver = initChromeDriver()
    driver.implicitly_wait(10)
    driver.refresh()
    time.sleep(40)

    address = []
    cities = []
    names = []
    mailing_addresses = []
    mailing_cities = []
    assessed_value_list = []

    input_data = input_sheets.get_all_records()
    input_df = pd.DataFrame.from_dict(input_data)
    account_list = input_df["Account"].tolist()

    for account in account_list:
        account = str(int(account))
        print(account)
        driver.get(
            f"https://propaccess.trueautomation.com/clientdb/Property.aspx?cid=19&prop_id={account}"
        )
        time.sleep(1)
        data = []
        try:
            driver.find_element(
                By.XPATH,
                "/html/body/form/div/div[5]/div[1]/span/input").click()
            assert_details = driver.find_element(
                By.XPATH,
                "/html/body/form/div/div[5]/div[5]").get_property("innerHTML")
            assert_soup = BeautifulSoup(assert_details, "lxml")
            assessed_value_list.append([
                i.text for i in assert_soup.find_all("td", class_="currency")
            ][-1])

            property_details = driver.find_element(
                By.XPATH,
                "/html/body/form/div/div[5]/div[3]").get_property("innerHTML")
            property_soup = BeautifulSoup(property_details, "lxml")
            property_data = []
            for i in property_soup.find_all("tr"):
                for j in i.find_all("td"):
                    property_data.append([
                        s.text.replace("\n", "") for s in j
                        if s.text.replace("\n", "") != ""
                    ])

            for n, data in enumerate(property_data):
                if data == ['Address:']:
                    address.append(property_data[n + 1][0])
                    temp = " ".join(property_data[n + 1][1].split(" ")[:-1])
                    cities.append(temp)
                elif data == ['Name:']:
                    names.append(property_data[n + 1][0])
                elif data == ['Mailing Address:']:
                    mailing_addresses.append(property_data[n + 1][0])
                    mailing_cities.append(property_data[n + 1][1])
        except:
            address.append("")
            cities.append("")
            names.append("")
            mailing_addresses.append("")
            mailing_cities.append("")
            assessed_value_list.append("")
        print(address)
        print(cities)
        print(names)
        print(mailing_addresses)
        print(mailing_cities)
        print(assessed_value_list)
        print("====" * 20)

    input_df["Address"] = address
    input_df["City"] = cities
    input_df["Owner(From 2nd Web)"] = names
    input_df["Mailing Address"] = mailing_addresses
    input_df["Mialing City"] = mailing_cities
    input_df["Assessed Value"] = assessed_value_list

    output.add_worksheet(rows=input_df.shape[0],
                         cols=input_df.shape[1],
                         title="Denton")  # Creat a new sheet
    work_sheet_instance = output.worksheet(
        title="Denton")  # get that newly created sheet
    set_with_dataframe(work_sheet_instance,
                       input_df)  # Set collected data to sheet

    print("==" * 30)
    print("Denton Scrapping Second Instance Ended")
    print("==" * 30)
コード例 #4
0
ファイル: ellis_run.py プロジェクト: osamascience96/Python
def run(input_sheet, output):
    print("==" * 30)
    print("Ellis Scrapping is Started")
    print("==" * 30)

    driver = initChromeDriver()
    driver.implicitly_wait(10)
    driver.refresh()

    accounts_list = []
    owners = []
    addresses = []
    cities = []
    property_address = []
    total_due = []
    gross_value = []

    for street in input_sheet:
        print(f"Scrapping {street} ......")
        driver.get("https://actweb.acttax.com/act_webdev/ellis/index.jsp")
        time.sleep(1)
        driver.find_element(
            By.XPATH,
            "/html/body/div[1]/div/div[2]/table/tbody/tr[1]/td/table[2]/tbody/tr/td/center/form/table/tbody/tr[3]/td[2]/h3[4]/b/input[2]"
        ).click()
        time.sleep(0.5)
        driver.find_element(
            By.XPATH,
            "/html/body/div[1]/div/div[2]/table/tbody/tr[1]/td/table[2]/tbody/tr/td/center/form/table/tbody/tr[3]/td[2]/h3[2]/input"
        ).send_keys(street)
        time.sleep(0.5)
        driver.find_element(
            By.XPATH,
            "/html/body/div[1]/div/div[2]/table/tbody/tr[1]/td/table[2]/tbody/tr/td/center/form/table/tbody/tr[5]/td[2]/h3[2]/input"
        ).click()
        time.sleep(1)
        try:
            table = driver.find_element(
                By.XPATH,
                "/html/body/div/div/div[2]/table/tbody/tr[1]/td/form/div"
            ).get_property("innerHTML")
            df = pd.read_html(table)[0]
            accounts = list(df["Account Number"])[2:]

            for account in accounts:
                try:
                    driver.get(
                        f"https://actweb.acttax.com/act_webdev/ellis/showdetail2.jsp?can={account}"
                    )
                    time.sleep(0.5)
                    table_data = driver.find_element(
                        By.XPATH,
                        "/html/body/div/div/div[2]/table/tbody/tr[2]/td/table[2]/tbody/tr"
                    ).get_property("innerHTML")
                    soup = BeautifulSoup(table_data, "lxml")
                    table_text = [i for i in soup.find_all("h3")]
                    address_detail = [
                        i.text.replace("\t", "").replace("\n", "")
                        for i in table_text[1]
                        if i.text.replace("\t", "").replace("\n", "") != ""
                    ]
                    accounts_list.append(account)
                    owners.append(address_detail[2].replace("  ", ""))
                    addresses.append(address_detail[3])
                    cities.append(address_detail[-1].split("  ")[0])
                    property_address.append([
                        i.text.replace("\t", "").replace("\n", "")
                        for i in table_text[2]
                    ][2].replace("  ", ""))
                    table_text_list = [i.text for i in table_text]
                    for i in table_text_list:
                        if "Total Amount Due" in i:
                            total_due.append(i.split("\xa0")[1])
                        elif "Gross Value" in i:
                            gross_value.append(i.split("\xa0")[1])
                except:
                    accounts_list.append(account)
                    owners.append("")
                    addresses.append("")
                    cities.append("")
                    property_address.append("")
                    total_due.append("")
                    gross_value.append("")
        except:
            pass

    final_dataframe = pd.DataFrame(zip(accounts_list, owners, addresses,
                                       cities, property_address, total_due,
                                       gross_value),
                                   columns=[
                                       "Account", "Owner", "Address", "City",
                                       "Property Site Address",
                                       "Total Amount Due", "Gross Value"
                                   ])

    output.add_worksheet(rows=final_dataframe.shape[0],
                         cols=final_dataframe.shape[1],
                         title="Ellis Scrapping")  # Creat a new sheet
    work_sheet_instance = output.worksheet(
        "Ellis Scrapping")  # get that newly created sheet
    set_with_dataframe(work_sheet_instance,
                       final_dataframe)  # Set collected data to sheet

    print("==" * 30)
    print("Ellis Scrapping is Finished")
    print("==" * 30)
コード例 #5
0
def run(list_df, output):
    print("=============Dallas Scrapping Started=============")
    df = list_df
    driver = initChromeDriver()

    continueCount = ReadDallasCount()

    # minimum search results
    minimumSearchCount = 0
    minimumSearch = input("Enter Minimum Search Results in Number: ")
    if minimumSearch is not None:
        minimumSearch = int(minimumSearch)

    for street_obj in df.values:
        if (minimumSearchCount >= minimumSearch):
            break
        while True:
            driver.set_page_load_timeout(10)
            try:
                driver.get(
                    'https://www.dallasact.com/act_webdev/dallas/searchbyproperty.jsp'
                )
                street_num = int(street_obj[0])
                street_name = street_obj[1]

                driver.find_element(
                    By.XPATH,
                    '/html/body/table/tbody/tr[2]/td/table/tbody/tr[1]/td/table/tbody/tr/td/center/form/table/tbody/tr[2]/td[2]/h3/input'
                ).send_keys(street_num)
                driver.find_element(
                    By.XPATH,
                    '/html/body/table/tbody/tr[2]/td/table/tbody/tr[1]/td/table/tbody/tr/td/center/form/table/tbody/tr[3]/td[2]/h3/input'
                ).send_keys(street_name)
                time.sleep(0.5)
                driver.find_element(
                    By.XPATH,
                    '/html/body/table/tbody/tr[2]/td/table/tbody/tr[1]/td/table/tbody/tr/td/center/form/table/tbody/tr[5]/td/center/input'
                ).click()
                time.sleep(0.5)

                try:
                    # Get the searched Results
                    soup = BeautifulSoup(driver.page_source, 'lxml')
                    # Get the size of the data received
                    size = soup.find('span', id="mySize")
                    if (size is not None):
                        size = int(size.get_text())
                        if (size > 0):
                            ContinueWriteProcedure(driver, street_name)
                            break
                except Exception as e:
                    print(e)
                    time.sleep(1)
            except TimeoutException:
                driver.execute_script("window.stop();")

        minimumSearchCount += 1
        continueCount += 1

    # Write last count
    WriteDallasCount(continueCount)

    fileName = f"Dallas Scrapping {str(minimumSearch)} Searches"

    final_dataframe = pd.DataFrame(final_data,
                                   columns=[
                                       "Owner", "Address",
                                       "Property Site Address",
                                       "Current Tax Levy", "Total Amount due",
                                       "Market Value"
                                   ])
    output.add_worksheet(rows=final_dataframe.shape[0],
                         cols=final_dataframe.shape[1],
                         title=fileName)  # Creat a new sheet
    work_sheet_instance = output.worksheet(
        fileName)  # get that newly created sheet
    set_with_dataframe(work_sheet_instance,
                       final_dataframe)  # Set collected data to sheet
    driver.close()  # Close the driver

    print("=============Dallas Scrapping finished=============")
コード例 #6
0
def run(input_sheets, output):
    
    print("=="*30)
    print("Johnson Scrapping Second Instance Started")
    print("=="*30)

    driver = initChromeDriver()
    driver.implicitly_wait(10)
    driver.refresh()

    
    input_data = input_sheets.get_all_records()
    input_df = pd.DataFrame.from_dict(input_data)
    account_list = input_df["Account"].tolist()

    owners = []
    owner_address = []
    property_address = []
    land_state = []
    improve_value = []
    land_value = []
    
    for account in account_list:
        try:
            driver.get(f"http://search.johnson.manatron.com/search.php?searchStr={account}&searchType=account")
            time.sleep(1)
            driver.find_element(By.XPATH, "/html/body/div[3]/table/tbody/tr[2]/td[1]/a").click()
            table = driver.find_element(By.XPATH, "/html/body/div[3]").get_property("innerHTML")
            df = pd.read_html(table)[0]
            data_list = df.values.tolist()
            for data in data_list:
                if data[0]=="Owner Name:":
                    owners.append(data[1])
                elif data[0]=="Owner Address:":
                    owner_address.append(data[1])
                elif data[0]=="Property Location:":
                    property_address.append(data[1])
                elif data[0]=="Land State Code:":
                    land_state.append(data[1])
                elif data[0]=="Improvement Value":
                    improve_value.append(data[1])
                elif data[0]=="Land Market Value:":
                    land_value.append(data[1])
        except:
            owners.append("")
            owner_address.append("")
            property_address.append("")
            land_state.append("")
            improve_value.append("")
            land_value.append("")
            
    input_df["Owner Name(From 2nd Web)"] = owners
    input_df["Owner Address"] = owner_address
    input_df["Property Location"] = property_address
    input_df["Land State Code"] = land_state
    input_df["Improvement Value"] = improve_value
    input_df["Land Market Value"] = land_value
    
    output.add_worksheet(rows=input_df.shape[0], cols=input_df.shape[1], title="Johnson")  # Creat a new sheet
    work_sheet_instance = output.worksheet("Johnson") # get that newly created sheet
    set_with_dataframe(work_sheet_instance, input_df) # Set collected data to sheet
            
    print("=="*30)
    print("Johnson Scrapping Second Instance Ended")
    print("=="*30)
コード例 #7
0
def run(input_streets, output):
    print(
        "================== Tarrant First Instance Started ===================="
    )
    driver = initChromeDriver()
    driver.implicitly_wait(10)
    driver.refresh()
    fake_input(driver)

    for street in input_streets:
        print(f"Scrapping {street} ..... ")
        driver.get("https://www.tad.org/property-search/")
        time.sleep(1)
        time.sleep(0.5)
        driver.find_element(
            By.XPATH,
            "/html/body/div[3]/div[3]/div[3]/form/div/div[3]/div[1]/div[1]/div[1]/input"
        ).clear()
        time.sleep(0.5)
        driver.find_element(
            By.XPATH,
            "/html/body/div[3]/div[3]/div[3]/form/div/div[3]/div[1]/div[1]/div[1]/input"
        ).send_keys(street)
        time.sleep(0.5)
        driver.find_element(
            By.XPATH,
            "/html/body/div[3]/div[3]/div[3]/form/div/div[3]/div[1]/div[5]/input"
        ).click()
        time.sleep(1)

        soup = BeautifulSoup(driver.page_source, 'lxml')
        pagination = soup.find(
            'div', class_="itemPagination property-search-pagination")
        pagination_list = get_pagination(pagination=pagination,
                                         url=driver.current_url)

        if pagination_list != []:
            for page in pagination_list:
                time.sleep(1)
                driver.get(page)
                soup = BeautifulSoup(driver.page_source, 'lxml')
                get_table(soup=soup)
        else:
            get_table(soup=soup)
    driver.close()  # Close the driver

    # final_dataframe = pd.concat(all_tables)
    final_dataframe = pd.DataFrame(all_tables,
                                   columns=[
                                       "Account", "Property Address",
                                       "Property City", "Primary Owner Name",
                                       "Market Value"
                                   ])
    final_dataframe.to_csv("oo.CSV", index=False)

    output.add_worksheet(rows=final_dataframe.shape[0],
                         cols=final_dataframe.shape[1],
                         title="Tarrant")  # Creat a new sheet
    work_sheet_instance = output.worksheet(
        "Tarrant")  # get that newly created sheet
    set_with_dataframe(work_sheet_instance,
                       final_dataframe)  # Set collected data to sheet

    print(
        "================== Tarrant First Instance Finished ===================="
    )