def run(input_sheet, output): print("==" * 30) print("Johnson Scrapping Started.") print("==" * 30) driver = initChromeDriver() driver.implicitly_wait(10) driver.refresh() for street in input_sheet: print(f"Scrapping {street} ......") driver.get( f"https://www.johnsoncountytaxoffice.org/Search/Results?Query.SearchField=5&Query.SearchText={street}&Query.SearchAction=&Query.PropertyType=&Query.PayStatus=Both" ) time.sleep(2) do_scraping(driver=driver) final_dataframe = pd.DataFrame(final_data, columns=["Account", "Name", "Amount"]) output.add_worksheet(rows=final_dataframe.shape[0], cols=final_dataframe.shape[1], title="Johnson") # Creat a new sheet work_sheet_instance = output.worksheet( "Johnson") # get that newly created sheet set_with_dataframe(work_sheet_instance, final_dataframe) # Set collected data to sheet print("==" * 30) print("Johnson Scrapping Ended.") print("==" * 30)
def start_driver(account): try: driver = initChromeDriver() driver.implicitly_wait(10) driver.get( "https://taxonline.tarrantcounty.com/taxweb/accountsearch.asp?linklocation=Iwantto&linkname=Property%20Account" ) time.sleep(1) driver.find_element( By.XPATH, "/html/body/table[1]/tbody/tr[3]/td/table/tbody/tr[1]/td/font/table/tbody/tr/td/table/tbody/tr[2]/td/form/input[1]" ).send_keys(account) return driver except: print("==" * 30) print("Server is Down, Try it latter!") print("==" * 30) exit()
def run(input_sheets, output): print("==" * 30) print("Denton Scrapping Second Instance Starting") print("==" * 30) driver = initChromeDriver() driver.implicitly_wait(10) driver.refresh() time.sleep(40) address = [] cities = [] names = [] mailing_addresses = [] mailing_cities = [] assessed_value_list = [] input_data = input_sheets.get_all_records() input_df = pd.DataFrame.from_dict(input_data) account_list = input_df["Account"].tolist() for account in account_list: account = str(int(account)) print(account) driver.get( f"https://propaccess.trueautomation.com/clientdb/Property.aspx?cid=19&prop_id={account}" ) time.sleep(1) data = [] try: driver.find_element( By.XPATH, "/html/body/form/div/div[5]/div[1]/span/input").click() assert_details = driver.find_element( By.XPATH, "/html/body/form/div/div[5]/div[5]").get_property("innerHTML") assert_soup = BeautifulSoup(assert_details, "lxml") assessed_value_list.append([ i.text for i in assert_soup.find_all("td", class_="currency") ][-1]) property_details = driver.find_element( By.XPATH, "/html/body/form/div/div[5]/div[3]").get_property("innerHTML") property_soup = BeautifulSoup(property_details, "lxml") property_data = [] for i in property_soup.find_all("tr"): for j in i.find_all("td"): property_data.append([ s.text.replace("\n", "") for s in j if s.text.replace("\n", "") != "" ]) for n, data in enumerate(property_data): if data == ['Address:']: address.append(property_data[n + 1][0]) temp = " ".join(property_data[n + 1][1].split(" ")[:-1]) cities.append(temp) elif data == ['Name:']: names.append(property_data[n + 1][0]) elif data == ['Mailing Address:']: mailing_addresses.append(property_data[n + 1][0]) mailing_cities.append(property_data[n + 1][1]) except: address.append("") cities.append("") names.append("") mailing_addresses.append("") mailing_cities.append("") assessed_value_list.append("") print(address) print(cities) print(names) print(mailing_addresses) print(mailing_cities) print(assessed_value_list) print("====" * 20) input_df["Address"] = address input_df["City"] = cities input_df["Owner(From 2nd Web)"] = names input_df["Mailing Address"] = mailing_addresses input_df["Mialing City"] = mailing_cities input_df["Assessed Value"] = assessed_value_list output.add_worksheet(rows=input_df.shape[0], cols=input_df.shape[1], title="Denton") # Creat a new sheet work_sheet_instance = output.worksheet( title="Denton") # get that newly created sheet set_with_dataframe(work_sheet_instance, input_df) # Set collected data to sheet print("==" * 30) print("Denton Scrapping Second Instance Ended") print("==" * 30)
def run(input_sheet, output): print("==" * 30) print("Ellis Scrapping is Started") print("==" * 30) driver = initChromeDriver() driver.implicitly_wait(10) driver.refresh() accounts_list = [] owners = [] addresses = [] cities = [] property_address = [] total_due = [] gross_value = [] for street in input_sheet: print(f"Scrapping {street} ......") driver.get("https://actweb.acttax.com/act_webdev/ellis/index.jsp") time.sleep(1) driver.find_element( By.XPATH, "/html/body/div[1]/div/div[2]/table/tbody/tr[1]/td/table[2]/tbody/tr/td/center/form/table/tbody/tr[3]/td[2]/h3[4]/b/input[2]" ).click() time.sleep(0.5) driver.find_element( By.XPATH, "/html/body/div[1]/div/div[2]/table/tbody/tr[1]/td/table[2]/tbody/tr/td/center/form/table/tbody/tr[3]/td[2]/h3[2]/input" ).send_keys(street) time.sleep(0.5) driver.find_element( By.XPATH, "/html/body/div[1]/div/div[2]/table/tbody/tr[1]/td/table[2]/tbody/tr/td/center/form/table/tbody/tr[5]/td[2]/h3[2]/input" ).click() time.sleep(1) try: table = driver.find_element( By.XPATH, "/html/body/div/div/div[2]/table/tbody/tr[1]/td/form/div" ).get_property("innerHTML") df = pd.read_html(table)[0] accounts = list(df["Account Number"])[2:] for account in accounts: try: driver.get( f"https://actweb.acttax.com/act_webdev/ellis/showdetail2.jsp?can={account}" ) time.sleep(0.5) table_data = driver.find_element( By.XPATH, "/html/body/div/div/div[2]/table/tbody/tr[2]/td/table[2]/tbody/tr" ).get_property("innerHTML") soup = BeautifulSoup(table_data, "lxml") table_text = [i for i in soup.find_all("h3")] address_detail = [ i.text.replace("\t", "").replace("\n", "") for i in table_text[1] if i.text.replace("\t", "").replace("\n", "") != "" ] accounts_list.append(account) owners.append(address_detail[2].replace(" ", "")) addresses.append(address_detail[3]) cities.append(address_detail[-1].split(" ")[0]) property_address.append([ i.text.replace("\t", "").replace("\n", "") for i in table_text[2] ][2].replace(" ", "")) table_text_list = [i.text for i in table_text] for i in table_text_list: if "Total Amount Due" in i: total_due.append(i.split("\xa0")[1]) elif "Gross Value" in i: gross_value.append(i.split("\xa0")[1]) except: accounts_list.append(account) owners.append("") addresses.append("") cities.append("") property_address.append("") total_due.append("") gross_value.append("") except: pass final_dataframe = pd.DataFrame(zip(accounts_list, owners, addresses, cities, property_address, total_due, gross_value), columns=[ "Account", "Owner", "Address", "City", "Property Site Address", "Total Amount Due", "Gross Value" ]) output.add_worksheet(rows=final_dataframe.shape[0], cols=final_dataframe.shape[1], title="Ellis Scrapping") # Creat a new sheet work_sheet_instance = output.worksheet( "Ellis Scrapping") # get that newly created sheet set_with_dataframe(work_sheet_instance, final_dataframe) # Set collected data to sheet print("==" * 30) print("Ellis Scrapping is Finished") print("==" * 30)
def run(list_df, output): print("=============Dallas Scrapping Started=============") df = list_df driver = initChromeDriver() continueCount = ReadDallasCount() # minimum search results minimumSearchCount = 0 minimumSearch = input("Enter Minimum Search Results in Number: ") if minimumSearch is not None: minimumSearch = int(minimumSearch) for street_obj in df.values: if (minimumSearchCount >= minimumSearch): break while True: driver.set_page_load_timeout(10) try: driver.get( 'https://www.dallasact.com/act_webdev/dallas/searchbyproperty.jsp' ) street_num = int(street_obj[0]) street_name = street_obj[1] driver.find_element( By.XPATH, '/html/body/table/tbody/tr[2]/td/table/tbody/tr[1]/td/table/tbody/tr/td/center/form/table/tbody/tr[2]/td[2]/h3/input' ).send_keys(street_num) driver.find_element( By.XPATH, '/html/body/table/tbody/tr[2]/td/table/tbody/tr[1]/td/table/tbody/tr/td/center/form/table/tbody/tr[3]/td[2]/h3/input' ).send_keys(street_name) time.sleep(0.5) driver.find_element( By.XPATH, '/html/body/table/tbody/tr[2]/td/table/tbody/tr[1]/td/table/tbody/tr/td/center/form/table/tbody/tr[5]/td/center/input' ).click() time.sleep(0.5) try: # Get the searched Results soup = BeautifulSoup(driver.page_source, 'lxml') # Get the size of the data received size = soup.find('span', id="mySize") if (size is not None): size = int(size.get_text()) if (size > 0): ContinueWriteProcedure(driver, street_name) break except Exception as e: print(e) time.sleep(1) except TimeoutException: driver.execute_script("window.stop();") minimumSearchCount += 1 continueCount += 1 # Write last count WriteDallasCount(continueCount) fileName = f"Dallas Scrapping {str(minimumSearch)} Searches" final_dataframe = pd.DataFrame(final_data, columns=[ "Owner", "Address", "Property Site Address", "Current Tax Levy", "Total Amount due", "Market Value" ]) output.add_worksheet(rows=final_dataframe.shape[0], cols=final_dataframe.shape[1], title=fileName) # Creat a new sheet work_sheet_instance = output.worksheet( fileName) # get that newly created sheet set_with_dataframe(work_sheet_instance, final_dataframe) # Set collected data to sheet driver.close() # Close the driver print("=============Dallas Scrapping finished=============")
def run(input_sheets, output): print("=="*30) print("Johnson Scrapping Second Instance Started") print("=="*30) driver = initChromeDriver() driver.implicitly_wait(10) driver.refresh() input_data = input_sheets.get_all_records() input_df = pd.DataFrame.from_dict(input_data) account_list = input_df["Account"].tolist() owners = [] owner_address = [] property_address = [] land_state = [] improve_value = [] land_value = [] for account in account_list: try: driver.get(f"http://search.johnson.manatron.com/search.php?searchStr={account}&searchType=account") time.sleep(1) driver.find_element(By.XPATH, "/html/body/div[3]/table/tbody/tr[2]/td[1]/a").click() table = driver.find_element(By.XPATH, "/html/body/div[3]").get_property("innerHTML") df = pd.read_html(table)[0] data_list = df.values.tolist() for data in data_list: if data[0]=="Owner Name:": owners.append(data[1]) elif data[0]=="Owner Address:": owner_address.append(data[1]) elif data[0]=="Property Location:": property_address.append(data[1]) elif data[0]=="Land State Code:": land_state.append(data[1]) elif data[0]=="Improvement Value": improve_value.append(data[1]) elif data[0]=="Land Market Value:": land_value.append(data[1]) except: owners.append("") owner_address.append("") property_address.append("") land_state.append("") improve_value.append("") land_value.append("") input_df["Owner Name(From 2nd Web)"] = owners input_df["Owner Address"] = owner_address input_df["Property Location"] = property_address input_df["Land State Code"] = land_state input_df["Improvement Value"] = improve_value input_df["Land Market Value"] = land_value output.add_worksheet(rows=input_df.shape[0], cols=input_df.shape[1], title="Johnson") # Creat a new sheet work_sheet_instance = output.worksheet("Johnson") # get that newly created sheet set_with_dataframe(work_sheet_instance, input_df) # Set collected data to sheet print("=="*30) print("Johnson Scrapping Second Instance Ended") print("=="*30)
def run(input_streets, output): print( "================== Tarrant First Instance Started ====================" ) driver = initChromeDriver() driver.implicitly_wait(10) driver.refresh() fake_input(driver) for street in input_streets: print(f"Scrapping {street} ..... ") driver.get("https://www.tad.org/property-search/") time.sleep(1) time.sleep(0.5) driver.find_element( By.XPATH, "/html/body/div[3]/div[3]/div[3]/form/div/div[3]/div[1]/div[1]/div[1]/input" ).clear() time.sleep(0.5) driver.find_element( By.XPATH, "/html/body/div[3]/div[3]/div[3]/form/div/div[3]/div[1]/div[1]/div[1]/input" ).send_keys(street) time.sleep(0.5) driver.find_element( By.XPATH, "/html/body/div[3]/div[3]/div[3]/form/div/div[3]/div[1]/div[5]/input" ).click() time.sleep(1) soup = BeautifulSoup(driver.page_source, 'lxml') pagination = soup.find( 'div', class_="itemPagination property-search-pagination") pagination_list = get_pagination(pagination=pagination, url=driver.current_url) if pagination_list != []: for page in pagination_list: time.sleep(1) driver.get(page) soup = BeautifulSoup(driver.page_source, 'lxml') get_table(soup=soup) else: get_table(soup=soup) driver.close() # Close the driver # final_dataframe = pd.concat(all_tables) final_dataframe = pd.DataFrame(all_tables, columns=[ "Account", "Property Address", "Property City", "Primary Owner Name", "Market Value" ]) final_dataframe.to_csv("oo.CSV", index=False) output.add_worksheet(rows=final_dataframe.shape[0], cols=final_dataframe.shape[1], title="Tarrant") # Creat a new sheet work_sheet_instance = output.worksheet( "Tarrant") # get that newly created sheet set_with_dataframe(work_sheet_instance, final_dataframe) # Set collected data to sheet print( "================== Tarrant First Instance Finished ====================" )