def parse(self, response): if response.url[-2] == "t": for quote in response.xpath('//article[@id="post-28"]/div/p[2]'): yield Official(muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=quote.xpath("text()").get(), url=response.url) for quote in response.xpath( '//article[@id="post-28"]/div/p[1]/text()').getall(): yield Official(muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=quote.split("–")[0], url=response.url) else: for quote in response.xpath('//article[@id="post-129"]/div/p[3]'): yield Official( muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=quote.xpath('strong/text()').get(), email=quote.xpath('a/text()').get(), phone=quote.xpath('./text()').getall()[2].split(": ")[1], address=", ".join([ x.strip() for x in quote.xpath('./text()').getall()[0:2] ]), url=response.url)
def parse(self, response): counter = 0 for quote in response.xpath('//div[@class="fusion-text fusion-text-2"]/table/tbody/tr'): if counter == 0: yield Official( muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=" ".join(quote.xpath("td[2]/text()").get().split(" ")[1:]), phone=quote.xpath("td[2]/text()").getall()[1].strip(), email=quote.xpath("td[2]/text()").getall()[2], termEnd=self._termEnd(quote.xpath("td[2]/text()").getall()[3]), url=response.url) else: parts = [x.strip() for x in quote.xpath("td[2]/p//text()").getall() if len(x.strip())>0] yield Official( muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", district="WARD {}".format(counter), name=parts[0], phone=parts[1], email=parts[2], termEnd=self._termEnd(parts[3]), url=response.url) counter += 1
def parse(self, response): for quote in response.xpath('//table[contains(.//h3,"Mayor")]'): yield Official(muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=self._name( quote.xpath("tr[2]//strong/text()").get()), email=quote.xpath("tr[2]//a/@href").get(), termEnd=quote.xpath("tr[2]/td[2]/p/text()").get(), url=response.url) for quote in response.xpath( '//table[contains(.//h3,"Churchill Borough Council")]/tr[contains(.//strong, "(")]' ): yield Official(muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=self._name( quote.xpath(".//strong/text()").get()), email=quote.xpath(".//a/@href").get(), termEnd=quote.xpath("./td[2]/p/text()").get(), url=response.url) for quote in response.xpath( '//table[contains(.//h3,"Real Estate Tax Collector")]'): yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=self._name( quote.xpath("tr[2]//strong/text()").get()), termEnd=quote.xpath("tr[2]/td[2]/p/text()").get(), url=response.url)
def parse(self, response): for quote in response.xpath('//div[@class="entry-summary"]/p[7]'): yield Official( muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=quote.xpath("text()[1]").get().split("–")[0], address=self._address(quote.xpath("text()[2]").get().strip()), phone=quote.xpath("text()[3]").get().split("(")[0], email=quote.xpath("text()[3]").get().split(")")[1].strip(), url=response.url) for quote in response.xpath('//div[@class="entry-summary"]'): yield Official( muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=quote.xpath("p[15]/text()").get(), address=self._address(quote.xpath("p[16]/text()").get().strip()), phone=quote.xpath("p[17]/text()").get().split("(")[0], district="WARD 1", url=response.url) nodesToCheck = [9,10,11,12,13,14,18] for x in nodesToCheck: for quote in response.xpath('//div[@class="entry-summary"]/p[{}]'.format(x)): thisName = quote.xpath("text()[1]").get().split("–")[0] lastName = thisName.strip().split(" ")[-1] yield Official( muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=thisName, address=self._address(quote.xpath("text()[2]").get().strip()), phone=None if quote.xpath("text()[3]").get() ==None else quote.xpath("text()[3]").get().split("(")[0], district=self._wards(lastName), url=response.url)
def parse(self, response): if response.url[-1]=="7": for quote in response.xpath('//tr/td[contains(span/text(),"Tax Coll")]/..'): name = quote.xpath("td[1]/span/a/text()").get().split(", ") name.reverse() yield Official( muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=" ".join(name), phone=quote.xpath("td[4]/span/text()").get(), email=self._email(quote.xpath(".//script").get()), address=", ".join([i.strip() for i in quote.xpath("//span[@class='DirectoryNormalText'][1]/p[1]/text()").getall()]), url=response.url) elif response.url[-1]=='2': for quote in response.xpath('//table/tr'): phone = quote.xpath("td[4]/span/text()").get().strip() name = quote.xpath("td[1]/span/a/text()").get().split(", ") name.reverse() yield Official( muniName=self.muniName, muniType=self.muniType, office="SUPERVISOR", name=" ".join(name), phone=None if phone=='' else phone, email=self._email(quote.xpath(".//script").get()), address=", ".join([i.strip() for i in quote.xpath("//span[@class='DirectoryNormalText'][1]/p[1]/text()").getall()]), url=response.url)
def parse(self, response): if "/mayor/" in response.url: for quote in response.xpath('//*[@class="contacts-content"]'): parts = [x.strip() for x in quote.xpath(".//text()").getall() if len(x.strip())>1] alldict = self._getall(parts) yield Official( muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=alldict["name"], address=alldict["address"], url=response.url) elif "/controller/" in response.url: for quote in response.xpath('//*[@class="contacts-content"]'): parts = [x.strip() for x in quote.xpath(".//text()").getall() if len(x.strip())>1] alldict = self._getall(parts) req = scrapy.Request(url="https://pittsburghpa.gov/controller/controller-bio", callback=self.controllerParse, cb_kwargs=alldict) yield req elif "/council/" in response.url: existed = False for quote in response.xpath('//*[@class="contacts-content" and contains(., "Council")]')[0:1]: existed = True parts = [x.strip() for x in quote.xpath(".//text()").getall() if len(x.strip())>1] alldict = self._getall(parts) if response.url[-10] == '8': tempAddr = [x.strip() for x in response.xpath('//*[@class="contacts-content"][1]//text()').getall() if x.strip()!=''] alldict["address"] = self._address(tempAddr[1:4]) tempPhone = [x.strip() for x in response.xpath('//*[@class="contacts-content"][1]//text()').getall() if x.strip()!=''] alldict["phone"] = tempPhone[4] elif response.url[-10] == '7': tempEmail = [x.strip() for x in response.xpath('//*[@class="contacts-content"]')[1:2].xpath('.//text()').getall() if x.strip()!=''] alldict["email"] = self._email(tempEmail) elif response.url[-10] == '6': tempPhone = [x.strip() for x in response.xpath('//*[@class="contacts-content"]')[1:2].xpath('.//text()').getall() if x.strip()!=''] alldict["phone"] = self._phone(tempPhone) yield Official( muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", district="DISTRICT {}".format(response.url[-10]), name=alldict["name"], address=alldict["address"], phone=alldict["phone"], email=alldict["email"], url=response.url) if not existed: for quote in response.xpath('//div[@class="col-md-6"]'): parts = [x.strip() for x in quote.xpath(".//text()").getall() if len(x.strip())>1][5:10] alldict = self._getall(parts) yield Official( muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", district="DISTRICT {}".format(response.url[-10]), name=alldict["name"], address=alldict["address"], phone=alldict["phone"], email=alldict["email"], url=response.url)
def parse(self, response): if response.url[-2] == "2": for quote in response.xpath('//div[@id="panel-2-0-1-0"]'): yield Official(muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=quote.xpath("h3/text()").get(), email=quote.xpath(".//a/@href").get(), url=response.url) for quote in response.xpath('//div[@id="pl-2"]/div')[1:8]: yield Official(muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=quote.xpath('div[2]//h3/text()').get(), email=quote.xpath('div[2]//a/@href').get(), url=response.url) elif response.url[-2] == "s": for quote in response.xpath('//article[@id="post-291"]/div/p[6]'): yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=quote.xpath("./text()").get(), phone=quote.xpath("./text()").getall() [2].strip().split(" ")[1], url=response.url)
def parse(self, response): if "officials" in response.url: for quote in response.xpath( "//div[contains(h4/strong/u/text(),'Ward ')]"): alltext = getAllText(quote) yield Official(muniName=self.muniName, muniType=self.muniType, office="COMMISSIONER", name=alltext[2], district=alltext[0].upper(), termEnd=alltext[1], address=alltext[3] + ", " + alltext[4], phone=alltext[5], url=response.url) elif "tax" in response.url: name = response.xpath( "//h3[contains(u/text(),'-Tax Collector')]/u/text()").get( ).split("-")[0] address = getAllText( response.xpath( "//div[contains(strong/text(),'Tax Office')]"))[1] phone = getAllText(response.xpath("//div[strong/text()='Phone:']")) print(phone) phone = phone[1] email = getAllText( response.xpath("//div[contains(strong/text(),'Email:')]"))[1] yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=name, address=address, phone=phone, email=email, url=response.url)
def parse(self, response): if response.url[-2] == 'r': for quote in response.xpath('//div[@class="cpTabPanels"]'): arr = [ i.strip() for i in quote.xpath('.//text()').getall() if len(i.strip()) > 0 and '$' not in i ] temp = [] peeps = [] for i in arr: temp.append(i) if '@' in i: peeps.append(temp) temp = [] for pers in peeps: name = self._name( pers[1]) if "Commissioner" in pers[1] else None yield Official(muniName=self.muniName, muniType=self.muniType, office="COMMISSIONER", district=pers[0].upper(), name=name, email=pers[-1], vacant=name == None, url=response.url) elif response.url[-2] == 'l': for quote in response.xpath( '//div[contains(h2/text(),"Ross Tax Collector")]/p[1]'): yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=quote.xpath('text()[1]').get(), email=quote.xpath('a/@href').get(), phone=quote.xpath('text()[2]').get(), url=response.url)
def parse(self, response): for quote in response.xpath("//div[@class='twocolbig']"): alltext = getAllText(quote) # don't try to make the next line interact w quote it wouldn't work headers = getAllText( response.xpath("//div[@class='twocolbig']/h4")) for h in headers: loc = [e for e, part in enumerate(alltext) if h == alltext[e]][0] if "mayor" in h.lower(): yield Official(muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=alltext[loc + 1], url=response.url) if h == "MEMBERS OF COUNCIL": for member in alltext[loc + 1:loc + 8]: yield Official( muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", # if their position, e.g. pres or VP follows name=member.split(",")[0], url=response.url) if "tax" in h.lower(): yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", phone=alltext[loc + 3].split(":")[1], name=alltext[loc + 2], url=response.url)
def parse(self, response): if "tax" in response.url: for quote in response.xpath("//div[p/text()='Tax Collector']/.."): yield Official( muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=quote.xpath("div[2]/h1/span/span/span/text()").get(), phone=quote.xpath("div[4]/p/text()").get(), url=response.url) elif "commissioners" in response.url: names = [getAllText(i)[0].split(" -")[0] for i in response.xpath("//h1")] # print(names) data = [getAllText(quote) for quote in response.xpath("//p[contains(text(),'Ward ')]/../..")] # print(data) for allText in data: if len(allText)<4: allText.insert(0, names[2]) yield Official( muniName=self.muniName, muniType=self.muniType, office="COMMISSIONER", name=allText[0].split(" -")[0], email=allText[1], phone=allText[2], district=allText[3].upper(), url=response.url)
def parse(self, response): if response.url[-6] == "x": for quote in response.xpath('//tr[contains(th/text(),"Mayor")]'): yield Official( muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=quote.xpath("td/text()").get().split("(")[0], url=response.url) for line in response.xpath('//tr[contains(th/text(),"Council Members")]/td/text()').getall(): for name in [x for x in line.split(",") if len(x.strip())>0]: yield Official( muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=name, url=response.url) elif response.url[-6] == "q": for quote in response.xpath('//div[@id="FAQ7ans"]'): yield Official( muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=" ".join(quote.xpath("./text()").get().strip().split(" ")[0:2]), url=response.url)
def parse(self, response): if response.url[-6] == 'n': for quote in response.xpath("//div[@itemprop='articleBody']/p[1]"): yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=self._taxCollector( quote.xpath("text()").get(), "name"), phone=self._taxCollector( quote.xpath("text()").get(), "phone"), url=response.url) elif response.url[-6] == 'r': for quote in response.xpath("//div[@class='custom-title']"): yield Official(muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=quote.xpath("div[1]/text()").get(), phone=quote.xpath("div[3]/text()").get(), url=response.url) elif response.url[-6] == 'l': for quote in response.xpath( "//div[@id='rt-sidebar-a']/div[1]/div/div[2]/ul/li"): url = response.urljoin(quote.xpath('a/@href').get()) req = scrapy.Request(url=url, callback=self.councilParse) yield req
def parse(self, response): if response.url[-1] == "h": for quote in response.xpath('//div[@id="comp-jf8d75h8"]'): yield Official(muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=quote.xpath("p[1]//text()").get(), email=quote.xpath("p[3]//text()").get(), url=response.url) for column in response.xpath( '//h6[contains(span//text(),"COUNCIL MEMBERS")]/../../div' )[1:4]: textbits = column.xpath( './/span[contains(@style,"text-decoration:underline")]//text()' ).getall() textbits = [detox(x) for x in textbits if len(detox(x)) > 0] for i in range(len(textbits) // 2): yield Official(muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=textbits[i * 2], email=textbits[i * 2 + 1], url=response.url) elif response.url[-1] == "e": for quote in response.xpath('//div[@id="comp-j589ncus"]'): yield Official( muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=quote.xpath("h6[3]//text()").get().split("-")[1], email=quote.xpath("h6[5]//text()").get(), phone=quote.xpath("h6[4]//text()").get(), url=response.url)
def parse(self, response): if response.url[-1] == "s": for quote in response.xpath( "//div[contains(@class, 'row container-box-med') and contains(.//div/@class, 'it-grid-one start bl')]" ): bio = quote.xpath('div/p/text()').getall() yield Official( muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=" ".join( quote.xpath("div/h1/text()").get().split(" ")[1:3]), phone=bio[-1], email=bio[-3].replace(" ", ""), url=response.url) elif response.url[-1] == "/": for quote in response.xpath( "//div[@class='med information-text']//div[@class='itg-teambox']" )[1:]: name = quote.xpath("h3/text()").get() yield Official(muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=name, district=self._districts(name.split(" ")[-1]), url=response.url) elif response.url[-1] == "f": for quote in response.xpath( "//p[contains(text(), 'Tax Department Manager')]/.."): yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=quote.xpath("h3/text()").get(), url=response.url)
def parse(self, response): if "officials" in response.url: for quote in response.xpath( "//p[contains(text(),'Council Member') or contains(text(),'Mayor')]/.." ): yield Official( muniName=self.muniName, muniType=self.muniType, office="MAYOR" if "Mayor" in quote.xpath("p/text()").get() else "MEMBER OF COUNCIL", name=quote.xpath("h3/text()").get().split(",")[0], url=response.url) elif "taxes" in response.url: for quote in [ response.xpath( "//text()[contains(.,'Whitehall Borough Tax Collector')]" ).get() ]: part = quote.split(", ")[-1].split(" ") yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=" ".join(part[0:2]), email=part[-1], phone=part[-3], url=response.url)
def parse(self, response): addressSuffix = ", Pittsburgh, PA 15205" for quote in response.xpath('//div[@class="pf-content"]/p[1]'): mayorBits = getAllText(quote) yield Official(muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=mayorBits[1], url=response.url, address=mayorBits[2] + addressSuffix, phone=mayorBits[3]) for quote in response.xpath( '//div[@class="pf-content"]/table/tbody/tr'): memberBits = getAllText(quote) yield Official(muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=memberBits[0].split("–")[0], url=response.url, address=memberBits[1] + addressSuffix, phone=memberBits[2]) for quote in response.xpath('//div[@class="pf-content"]/p[5]'): taxBits = getAllText(quote) yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=taxBits[1], url=response.url, email=taxBits[2], phone=taxBits[3])
def parse(self, response): if "supervisors" in response.url: for quote in response.xpath( "//div[contains(h2/text(),'Current Board of Supervisors')]/ul/li" ): name = quote.xpath("text()").get().split("–")[0] yield Official(muniName=self.muniName, muniType=self.muniType, office="SUPERVISOR", name=name, district=self._district(name), url=response.url) elif "tax" in response.url: for quote in response.xpath("//div[@class='entry']"): name = getAllText(quote.xpath('p[13]')) email = quote.xpath('p[14]/a/@href').get() tempAddr = name[3].split(" ") addr = " ".join([tempAddr[0], "PA", tempAddr[2]]) yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=name[0], address=name[2] + ", " + addr, phone=name[4].replace(".", ""), email=email, url=response.url)
def parse(self, response): if response.url[-1] == "l": counter = 1 for quote in response.xpath( '//div[@id="cityDirectoryWidget3e4ca0ff-5ed1-4ca6-86bc-f4ebc4b5ed38"]//ol/li' ): if counter == 1: yield Official(muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=quote.xpath("h4/text()").get(), email=quote.xpath('div[2]/a/text()').get(), url=response.url, phone=quote.xpath( 'div[3]/text()').get().split(": ")[1]) else: yield Official(muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=quote.xpath("h4/text()").get(), email=quote.xpath('div[2]/a/text()').get(), url=response.url) counter += 1 else: for quote in response.xpath( '//div[@id="cityDirectoryWidgetba8decb7-37d2-4a77-9496-5d8ce39b7341"]/ol/li[1]' ): yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=quote.xpath("h4/text()").get(), url=response.url, phone=quote.xpath('div[2]/text()').get())
def parse(self, response): supervisors = getAllText(response.xpath("//table[@id='Table1']")) for i in range(len(supervisors) // 2): yield Official(muniName=self.muniName, muniType=self.muniType, office="SUPERVISOR", name=supervisors[i].split(",")[0], phone=supervisors[i + 3], url=response.url) for quote in response.xpath( "//b[contains(text(),'Real Estate Tax Collector')]/.."): taxman = quote.xpath("./u/strong/text()").get().strip() taxinfo = [ i.strip() for i in quote.xpath("./text()").getall() if len(i.strip()) != 0 ][2].split(" ") yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=taxman, address=" ".join(taxinfo[10:19])[:-1], phone=taxinfo[-7], url=response.url) auditors = getAllText( response.xpath("//font[contains(b/text(),'Board of Auditors')]")) audArr = [auditors[3], auditors[5][:-1], auditors[7]] for person in audArr: yield Official(muniName=self.muniName, muniType=self.muniType, office="AUDITOR", name=person, url=response.url)
def parse(self, response): for quote in response.xpath( '//div[@id="divEditor909176e4-b373-45cd-af7f-deacc7efb43e"]/span' ): parts = " ".join(getAllText(quote)) parts = [ x for x in split(r"\s{2,}", parts)[4:] if x != "Vice President" ] for i in range(7): yield Official(muniName=self.muniName, muniType=self.muniType, office="COMMISSIONER", name=parts[i * 6].split(",")[0], district=parts[i * 6 + 1].upper(), termEnd=parts[i * 6 + 3], phone=parts[i * 6 + 5], address=parts[i * 6 + 2] + ", " + parts[i * 6 + 4], url=response.url) for quote in response.xpath('//tr[@class="textContent"]')[0:1]: yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=quote.xpath("td[1]/text()").get(), termEnd=quote.xpath("td[4]/text()").get(), phone=quote.xpath("td[3]/div[3]/text()").get(), address=quote.xpath("td[3]/text()").get() + ", " + quote.xpath("td[3]/div[1]/text()").get(), url=response.url)
def parse(self, response): if "elected" in response.url: for quote in response.xpath( "//div[@class='et_pb_blurb_container']"): allText = getAllText(quote) email = quote.xpath(".//img/@alt").getall() if len(email) > 0: email[0] = email[0].replace("mifflon", "mifflin") yield Official(muniName=self.muniName, muniType=self.muniType, office="MAYOR" if allText[0] == "Mayor" else "MEMBER OF COUNCIL", name=allText[-1], email=None if email == [] else email[0], url=response.url) elif "taxes" in response.url: for quote in response.xpath( "//div[@class='et_pb_text_inner']")[0:1]: allText = getAllText(quote) print(allText) yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", phone=allText[3], name=allText[0].split(',')[0], address=allText[1] + " " + allText[2], url=response.url)
def parse(self, response): if response.url[-1] == 'p': namesdates = [] emails = [] for quote in response.xpath("//ul[@class='listnone']")[0:2]: namesdates.append(getAllText(quote)[1:]) for quote in response.xpath( "//li[contains(strong/text(),'ail:')]"): emails.append(quote.xpath('text()').get().strip()) for i in range(5): yield Official(muniName=self.muniName, muniType=self.muniType, office="SUPERVISOR", name=namesdates[0][i], email=emails[i], termEnd=namesdates[1][i], url=response.url) elif response.url[-1] == '/': for quote in response.xpath('//div[@id="mainContent"]'): yield Official( muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=" ".join( quote.xpath('p[1]/text()').get().strip().split(" ") [3:5]), phone=quote.xpath('text()').get(), address=", ".join( response.xpath("//div[@id='footer']/div/p/text()"). getall()[-1].split(' · ')[1:3]), url=response.url) # INCOMPLETE # Expected offices: 3 auditors, unable to be found on website
def parse(self, response): wards = [[10, 11, 12], [15, 16, 17], [20, 21, 22]] self.address = self._address( response.xpath('//div[@id="comp-j45vf6qp"]/p[2]//text()').getall() [1:]) self.phone = self._phone( response.xpath('//div[@id="comp-j45vf6qp"]/p[3]//text()').get()) for quote in response.xpath('//div[@id="comp-j45vf6qp"]/h6[5]/span'): yield Official(muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=quote.xpath("./text()").get(), email=quote.xpath(".//a/@href").get(), url=response.url, address=self.address, phone=self.phone) counter = 1 for ward in wards: for place in ward: for quote in response.xpath( '//div[@id="comp-j45vf6qp"]/h6[{}]'.format(place)): yield Official(muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", district="WARD {}".format(counter), email=quote.xpath(".//a/@href").get(), name=quote.xpath("span/text()").get(), url=response.url, address=self.address, phone=self.phone) counter += 1 for quote in response.xpath('//div[@id="comp-j45vf6qp"]/h6[25]'): yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", email=quote.xpath(".//a/@href").get(), name=quote.xpath("span/text()").get(), url=response.url, address=self.address, phone=self.phone) for quote in response.xpath('//div[@id="comp-j45vf6qp"]/h6')[27:30]: if quote.xpath("span/text()").get().strip() != "\u200b": yield Official(muniName=self.muniName, muniType=self.muniType, office="AUDITOR", name=quote.xpath("span/text()").get(), url=response.url, address=self.address, phone=self.phone) else: yield Official(muniName=self.muniName, muniType=self.muniType, office="AUDITOR", name=None, url=response.url, vacant=True)
def parse(self, response): if response.url[-6] == 't': #name, district, email, phone councilBits = [('p[1]//b/a/font', 'p[1]/font/font[2]/text()[1]', 'p[1]//b', 'p[1]/font/font[2]/text()[2]'), ('p[2]//b/a/font', 'p[2]/font/font[2]/text()[1]', 'p[2]//b', None), ('p[3]/b/', 'p[3]/font[1]//text()', None, 'p[5]//text()'), ('p[6]/font/span/', 'p[6]/font/font//text()', 'p[6]/', 'p[8]/font/font/text()'), ('p[9]/b//a/font', 'p[9]/font[1]//text()', 'p[9]/', 'p[9]/span/text()'), ('p[10]//span/a/', 'p[10]/font[2]/text()', 'p[10]//span', 'p[12]/font/text()')] for quote in response.xpath( '//p[contains(font/font/b/text(),"Mayor")]'): yield Official( muniName=self.muniName, muniType=self.muniType, office="MAYOR", name=quote.xpath("b/font/a/font/text()").get(), email=quote.xpath("b//a/@href").get(), phone=quote.xpath("font[2]/text()").getall()[-1], url=response.url) for quote in response.xpath( '//td[contains(p/b/font/text(),"Borough of Carnegie Government")]/font' ): for i in councilBits: yield Official( muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=quote.xpath("%s/text()" % i[0]).get(), district=quote.xpath(i[1]).get().upper().strip(), email=None if i[2] == None else quote.xpath( "%s/a/@href" % i[2]).get(), phone=None if i[3] == None else quote.xpath(i[3]).get(), url=response.url) elif response.url[-6] == "y": # Called the Carnegie Borough office on 10/21/2020 # At that time, the position of tax collector was vacant for quote in response.xpath( '//p[contains(font/font/b/text(),"Tax Collector")]'): if "Jordan Tax Service" in quote.xpath( 'font[2]/b/text()').get(): yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=None, vacant=True, url=response.url)
def parse(self, response): if response.url[-2] == 'l': for quote in response.xpath('//article[@id="post-346"]'): bits = getAllText(quote) peeps = [] peeps.append(bits[8:10]) peeps.append([bits[10], bits[14]]) peeps.append(bits[15:17]) peeps.append([bits[15], bits[20], bits[23]]) peeps.append(bits[24:26]) peeps.append([bits[24]] + bits[27:30]) peeps.append(bits[31:34]) peeps.append([bits[31], bits[34]]) peeps.append([bits[31]] + bits[-2:]) for i in peeps: yield self._member(i, response) elif response.url[-2] == 'e': for quote in response.xpath('//article[@id="post-343"]'): bits = quote.xpath("p[16]/text()").get() yield Official(muniName=self.muniName, muniType=self.muniType, office="MAYOR", email=bits.strip().split(" ")[-1], name=quote.xpath("h3[2]/text()").get(), phone=bits, url=response.url)
def parse(self, response): if response.url[-1] == 'e': for quote in response.xpath( "//li[@class='InfoAdvanced widgetItem ']")[0:1]: bits = [ x.strip() for x in quote.xpath("text()").getall() if len(x.strip()) > 0 ] yield Official(muniName=self.muniName, muniType=self.muniType, office="TREASURER", name=quote.xpath("span[1]/text()").get(), address=bits[3] + ", " + bits[4], phone=bits[5], url=response.url) elif response.url[-1] == 's': addr = response.xpath( "//div[@id='divEditor369e008b-89c1-44c3-9afe-c47cabcfe8eb']/text()" ).getall()[-2:] self.comAddress = ", ".join(addr) for quote in response.xpath( "//div[@id='divEditor369e008b-89c1-44c3-9afe-c47cabcfe8eb']/a" )[3:8]: url = response.urljoin(quote.xpath('./@href').get()) req = scrapy.Request(url=url, callback=self.councilParse) yield req
def parse(self, response): for quote in response.xpath( '//div[@class="baldwin-staff" and contains(strong/text(),"Board ")]' ): yield Official(muniName=self.muniName, muniType=self.muniType, office="COMMISSIONER", name=quote.xpath("h4/text()").get(), url=response.url) for quote in response.xpath( '//div[contains(strong/text(),"Taxes")]/h4'): yield Official(muniName=self.muniName, muniType=self.muniType, office="TAX COLLECTOR", name=quote.xpath("text()").get(), url=response.url)
def _member(self, person, response): ward = re.search(r"([a-z1-3]+?) ward", person[0], re.I) if ward[1] in ["1st", 'First']: ward = "WARD 1" elif ward[1] in ["2nd", "Second"]: ward = "WARD 2" elif ward[1] == "Third": ward = "WARD 3" name = "" if "," in person[0]: name = person[0].split(",")[0] else: name = person[1].split("(")[0] email = None phone = None for possEmails in person: possPhone = re.search(r"\(?\d{3}\)?[ \-–]\d{3} ?[–\-] ?\d{4}", possEmails) if possPhone != None: phone = possPhone[0] for words in possEmails.split(" "): if "@" in words: email = words return Official(muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", district=ward, email=email, name=name, phone=phone, url=response.url)
def parse(self, response): councilNums = [2, 3, 4, 5] for quote in response.xpath('//article[@id="post-551"]/div[1]'): mayor = { "name": quote.xpath("div[1]/strong/text()").get(), "phone": quote.xpath("div[4]/text()").get(), "url": response.url } req = scrapy.Request( url="http://cityofclairton.com/mayor-of-clairton/", callback=self.mayorParse, cb_kwargs=mayor) yield req for num in councilNums: yield Official( muniName=self.muniName, muniType=self.muniType, office="MEMBER OF COUNCIL", name=quote.xpath(".//strong/text()").getall()[num - 1], phone=quote.xpath( ".//div[contains(text(),'Phone:')]/text()").getall()[ num - 1], email=None if num != 2 else quote.xpath(".//a/@href").get(), district=quote.xpath(".//em/text()").getall()[ num - 1].split("–")[1].strip().upper(), url=response.url)