예제 #1
0
 def parse(self, response):
     if response.url[-2] == "t":
         for quote in response.xpath('//article[@id="post-28"]/div/p[2]'):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MAYOR",
                            name=quote.xpath("text()").get(),
                            url=response.url)
         for quote in response.xpath(
                 '//article[@id="post-28"]/div/p[1]/text()').getall():
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MEMBER OF COUNCIL",
                            name=quote.split("–")[0],
                            url=response.url)
     else:
         for quote in response.xpath('//article[@id="post-129"]/div/p[3]'):
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office="TAX COLLECTOR",
                 name=quote.xpath('strong/text()').get(),
                 email=quote.xpath('a/text()').get(),
                 phone=quote.xpath('./text()').getall()[2].split(": ")[1],
                 address=", ".join([
                     x.strip()
                     for x in quote.xpath('./text()').getall()[0:2]
                 ]),
                 url=response.url)
예제 #2
0
	def parse(self, response):
		counter = 0
		for quote in response.xpath('//div[@class="fusion-text fusion-text-2"]/table/tbody/tr'):
			if counter == 0:
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="MAYOR",
					name=" ".join(quote.xpath("td[2]/text()").get().split(" ")[1:]),
					phone=quote.xpath("td[2]/text()").getall()[1].strip(),
					email=quote.xpath("td[2]/text()").getall()[2],
					termEnd=self._termEnd(quote.xpath("td[2]/text()").getall()[3]),
					url=response.url)
			else:
				parts = [x.strip() for x in quote.xpath("td[2]/p//text()").getall() if len(x.strip())>0]
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="MEMBER OF COUNCIL",
					district="WARD {}".format(counter),
					name=parts[0],
					phone=parts[1],
					email=parts[2],
					termEnd=self._termEnd(parts[3]),
					url=response.url)
			counter += 1
예제 #3
0
 def parse(self, response):
     for quote in response.xpath('//table[contains(.//h3,"Mayor")]'):
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="MAYOR",
                        name=self._name(
                            quote.xpath("tr[2]//strong/text()").get()),
                        email=quote.xpath("tr[2]//a/@href").get(),
                        termEnd=quote.xpath("tr[2]/td[2]/p/text()").get(),
                        url=response.url)
     for quote in response.xpath(
             '//table[contains(.//h3,"Churchill Borough Council")]/tr[contains(.//strong, "(")]'
     ):
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="MEMBER OF COUNCIL",
                        name=self._name(
                            quote.xpath(".//strong/text()").get()),
                        email=quote.xpath(".//a/@href").get(),
                        termEnd=quote.xpath("./td[2]/p/text()").get(),
                        url=response.url)
     for quote in response.xpath(
             '//table[contains(.//h3,"Real Estate Tax Collector")]'):
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        name=self._name(
                            quote.xpath("tr[2]//strong/text()").get()),
                        termEnd=quote.xpath("tr[2]/td[2]/p/text()").get(),
                        url=response.url)
예제 #4
0
	def parse(self, response):
		for quote in response.xpath('//div[@class="entry-summary"]/p[7]'):
			yield Official(
				muniName=self.muniName,
				muniType=self.muniType,
				office="MAYOR",
				name=quote.xpath("text()[1]").get().split("–")[0],
				address=self._address(quote.xpath("text()[2]").get().strip()),
				phone=quote.xpath("text()[3]").get().split("(")[0],
				email=quote.xpath("text()[3]").get().split(")")[1].strip(),				
				url=response.url)
		for quote in response.xpath('//div[@class="entry-summary"]'):
			yield Official(
				muniName=self.muniName,
				muniType=self.muniType,
				office="MEMBER OF COUNCIL",
				name=quote.xpath("p[15]/text()").get(),
				address=self._address(quote.xpath("p[16]/text()").get().strip()),
				phone=quote.xpath("p[17]/text()").get().split("(")[0],
				district="WARD 1",
				url=response.url)
		nodesToCheck = [9,10,11,12,13,14,18]
		for x in nodesToCheck:
			for quote in response.xpath('//div[@class="entry-summary"]/p[{}]'.format(x)):
				thisName = quote.xpath("text()[1]").get().split("–")[0]
				lastName = thisName.strip().split(" ")[-1]
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="MEMBER OF COUNCIL",
					name=thisName,
					address=self._address(quote.xpath("text()[2]").get().strip()),
					phone=None if quote.xpath("text()[3]").get() ==None else quote.xpath("text()[3]").get().split("(")[0],
					district=self._wards(lastName),
					url=response.url)
예제 #5
0
	def parse(self, response):
		if response.url[-1]=="7":
			for quote in response.xpath('//tr/td[contains(span/text(),"Tax Coll")]/..'):
				name = quote.xpath("td[1]/span/a/text()").get().split(", ")
				name.reverse()
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="TAX COLLECTOR",
					name=" ".join(name),
					phone=quote.xpath("td[4]/span/text()").get(),
					email=self._email(quote.xpath(".//script").get()),
					address=", ".join([i.strip() for i in quote.xpath("//span[@class='DirectoryNormalText'][1]/p[1]/text()").getall()]),
					url=response.url)
		elif response.url[-1]=='2':
			for quote in response.xpath('//table/tr'):
				phone = quote.xpath("td[4]/span/text()").get().strip()
				name = quote.xpath("td[1]/span/a/text()").get().split(", ")
				name.reverse()
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="SUPERVISOR",
					name=" ".join(name),
					phone=None if phone=='' else phone,
					email=self._email(quote.xpath(".//script").get()),
					address=", ".join([i.strip() for i in quote.xpath("//span[@class='DirectoryNormalText'][1]/p[1]/text()").getall()]),
					url=response.url)
예제 #6
0
	def parse(self, response):
		if "/mayor/" in response.url:
			for quote in response.xpath('//*[@class="contacts-content"]'):
				parts = [x.strip() for x in quote.xpath(".//text()").getall() if len(x.strip())>1]
				alldict = self._getall(parts)
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="MAYOR",
					name=alldict["name"],
					address=alldict["address"],
					url=response.url)
		elif "/controller/" in response.url:
			for quote in response.xpath('//*[@class="contacts-content"]'):
				parts = [x.strip() for x in quote.xpath(".//text()").getall() if len(x.strip())>1]
				alldict = self._getall(parts)
				req = scrapy.Request(url="https://pittsburghpa.gov/controller/controller-bio",
					callback=self.controllerParse, cb_kwargs=alldict)
				yield req
		elif "/council/" in response.url:
			existed = False
			for quote in response.xpath('//*[@class="contacts-content" and contains(., "Council")]')[0:1]:
				existed = True
				parts = [x.strip() for x in quote.xpath(".//text()").getall() if len(x.strip())>1]
				alldict = self._getall(parts)
				if response.url[-10] == '8':
					tempAddr = [x.strip() for x in response.xpath('//*[@class="contacts-content"][1]//text()').getall() if x.strip()!='']
					alldict["address"] = self._address(tempAddr[1:4])
					tempPhone = [x.strip() for x in response.xpath('//*[@class="contacts-content"][1]//text()').getall() if x.strip()!='']
					alldict["phone"] = tempPhone[4]
				elif response.url[-10] == '7':
					tempEmail = [x.strip() for x in response.xpath('//*[@class="contacts-content"]')[1:2].xpath('.//text()').getall() if x.strip()!='']
					alldict["email"] = self._email(tempEmail)
				elif response.url[-10] == '6':
					tempPhone = [x.strip() for x in response.xpath('//*[@class="contacts-content"]')[1:2].xpath('.//text()').getall() if x.strip()!='']
					alldict["phone"] = self._phone(tempPhone)
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="MEMBER OF COUNCIL",
					district="DISTRICT {}".format(response.url[-10]),
					name=alldict["name"],
					address=alldict["address"],
					phone=alldict["phone"],
					email=alldict["email"],
					url=response.url)
			if not existed:
				for quote in response.xpath('//div[@class="col-md-6"]'):
					parts = [x.strip() for x in quote.xpath(".//text()").getall() if len(x.strip())>1][5:10]
					alldict = self._getall(parts)
					yield Official(
						muniName=self.muniName,
						muniType=self.muniType,
						office="MEMBER OF COUNCIL",
						district="DISTRICT {}".format(response.url[-10]),
						name=alldict["name"],
						address=alldict["address"],
						phone=alldict["phone"],
						email=alldict["email"],
						url=response.url)
예제 #7
0
 def parse(self, response):
     if response.url[-2] == "2":
         for quote in response.xpath('//div[@id="panel-2-0-1-0"]'):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MAYOR",
                            name=quote.xpath("h3/text()").get(),
                            email=quote.xpath(".//a/@href").get(),
                            url=response.url)
         for quote in response.xpath('//div[@id="pl-2"]/div')[1:8]:
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MEMBER OF COUNCIL",
                            name=quote.xpath('div[2]//h3/text()').get(),
                            email=quote.xpath('div[2]//a/@href').get(),
                            url=response.url)
     elif response.url[-2] == "s":
         for quote in response.xpath('//article[@id="post-291"]/div/p[6]'):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            name=quote.xpath("./text()").get(),
                            phone=quote.xpath("./text()").getall()
                            [2].strip().split(" ")[1],
                            url=response.url)
예제 #8
0
 def parse(self, response):
     if "officials" in response.url:
         for quote in response.xpath(
                 "//div[contains(h4/strong/u/text(),'Ward ')]"):
             alltext = getAllText(quote)
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="COMMISSIONER",
                            name=alltext[2],
                            district=alltext[0].upper(),
                            termEnd=alltext[1],
                            address=alltext[3] + ", " + alltext[4],
                            phone=alltext[5],
                            url=response.url)
     elif "tax" in response.url:
         name = response.xpath(
             "//h3[contains(u/text(),'-Tax Collector')]/u/text()").get(
             ).split("-")[0]
         address = getAllText(
             response.xpath(
                 "//div[contains(strong/text(),'Tax Office')]"))[1]
         phone = getAllText(response.xpath("//div[strong/text()='Phone:']"))
         print(phone)
         phone = phone[1]
         email = getAllText(
             response.xpath("//div[contains(strong/text(),'Email:')]"))[1]
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        name=name,
                        address=address,
                        phone=phone,
                        email=email,
                        url=response.url)
예제 #9
0
파일: ross_t.py 프로젝트: crocojim18/alleco
 def parse(self, response):
     if response.url[-2] == 'r':
         for quote in response.xpath('//div[@class="cpTabPanels"]'):
             arr = [
                 i.strip() for i in quote.xpath('.//text()').getall()
                 if len(i.strip()) > 0 and '$' not in i
             ]
             temp = []
             peeps = []
             for i in arr:
                 temp.append(i)
                 if '@' in i:
                     peeps.append(temp)
                     temp = []
             for pers in peeps:
                 name = self._name(
                     pers[1]) if "Commissioner" in pers[1] else None
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="COMMISSIONER",
                                district=pers[0].upper(),
                                name=name,
                                email=pers[-1],
                                vacant=name == None,
                                url=response.url)
     elif response.url[-2] == 'l':
         for quote in response.xpath(
                 '//div[contains(h2/text(),"Ross Tax Collector")]/p[1]'):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            name=quote.xpath('text()[1]').get(),
                            email=quote.xpath('a/@href').get(),
                            phone=quote.xpath('text()[2]').get(),
                            url=response.url)
예제 #10
0
 def parse(self, response):
     for quote in response.xpath("//div[@class='twocolbig']"):
         alltext = getAllText(quote)
         # don't try to make the next line interact w quote it wouldn't work
         headers = getAllText(
             response.xpath("//div[@class='twocolbig']/h4"))
         for h in headers:
             loc = [e for e, part in enumerate(alltext)
                    if h == alltext[e]][0]
             if "mayor" in h.lower():
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="MAYOR",
                                name=alltext[loc + 1],
                                url=response.url)
             if h == "MEMBERS OF COUNCIL":
                 for member in alltext[loc + 1:loc + 8]:
                     yield Official(
                         muniName=self.muniName,
                         muniType=self.muniType,
                         office="MEMBER OF COUNCIL",
                         # if their position, e.g. pres or VP follows
                         name=member.split(",")[0],
                         url=response.url)
             if "tax" in h.lower():
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="TAX COLLECTOR",
                                phone=alltext[loc + 3].split(":")[1],
                                name=alltext[loc + 2],
                                url=response.url)
예제 #11
0
	def parse(self, response):
		if "tax" in response.url:
			for quote in response.xpath("//div[p/text()='Tax Collector']/.."):
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="TAX COLLECTOR",
					name=quote.xpath("div[2]/h1/span/span/span/text()").get(),
					phone=quote.xpath("div[4]/p/text()").get(),					
					url=response.url)
		elif "commissioners" in response.url:
			names = [getAllText(i)[0].split(" -")[0] for i in response.xpath("//h1")]
			# print(names)
			data = [getAllText(quote) for quote in response.xpath("//p[contains(text(),'Ward ')]/../..")]
			# print(data)
			for allText in data:
				if len(allText)<4:
					allText.insert(0, names[2])
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="COMMISSIONER",
					name=allText[0].split(" -")[0],
					email=allText[1],
					phone=allText[2],
					district=allText[3].upper(),					
					url=response.url)
예제 #12
0
	def parse(self, response):
		if response.url[-6] == "x":
			for quote in response.xpath('//tr[contains(th/text(),"Mayor")]'):
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="MAYOR",
					name=quote.xpath("td/text()").get().split("(")[0],
					url=response.url)
			for line in response.xpath('//tr[contains(th/text(),"Council Members")]/td/text()').getall():
				for name in [x for x in line.split(",") if len(x.strip())>0]:
					yield Official(
						muniName=self.muniName,
						muniType=self.muniType,
						office="MEMBER OF COUNCIL",
						name=name,
						url=response.url)
		elif response.url[-6] == "q":
			for quote in response.xpath('//div[@id="FAQ7ans"]'):
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="TAX COLLECTOR",
					name=" ".join(quote.xpath("./text()").get().strip().split(" ")[0:2]),
					url=response.url)
예제 #13
0
 def parse(self, response):
     if response.url[-6] == 'n':
         for quote in response.xpath("//div[@itemprop='articleBody']/p[1]"):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            name=self._taxCollector(
                                quote.xpath("text()").get(), "name"),
                            phone=self._taxCollector(
                                quote.xpath("text()").get(), "phone"),
                            url=response.url)
     elif response.url[-6] == 'r':
         for quote in response.xpath("//div[@class='custom-title']"):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MAYOR",
                            name=quote.xpath("div[1]/text()").get(),
                            phone=quote.xpath("div[3]/text()").get(),
                            url=response.url)
     elif response.url[-6] == 'l':
         for quote in response.xpath(
                 "//div[@id='rt-sidebar-a']/div[1]/div/div[2]/ul/li"):
             url = response.urljoin(quote.xpath('a/@href').get())
             req = scrapy.Request(url=url, callback=self.councilParse)
             yield req
예제 #14
0
 def parse(self, response):
     if response.url[-1] == "h":
         for quote in response.xpath('//div[@id="comp-jf8d75h8"]'):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MAYOR",
                            name=quote.xpath("p[1]//text()").get(),
                            email=quote.xpath("p[3]//text()").get(),
                            url=response.url)
         for column in response.xpath(
                 '//h6[contains(span//text(),"COUNCIL MEMBERS")]/../../div'
         )[1:4]:
             textbits = column.xpath(
                 './/span[contains(@style,"text-decoration:underline")]//text()'
             ).getall()
             textbits = [detox(x) for x in textbits if len(detox(x)) > 0]
             for i in range(len(textbits) // 2):
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="MEMBER OF COUNCIL",
                                name=textbits[i * 2],
                                email=textbits[i * 2 + 1],
                                url=response.url)
     elif response.url[-1] == "e":
         for quote in response.xpath('//div[@id="comp-j589ncus"]'):
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office="TAX COLLECTOR",
                 name=quote.xpath("h6[3]//text()").get().split("-")[1],
                 email=quote.xpath("h6[5]//text()").get(),
                 phone=quote.xpath("h6[4]//text()").get(),
                 url=response.url)
예제 #15
0
 def parse(self, response):
     if response.url[-1] == "s":
         for quote in response.xpath(
                 "//div[contains(@class, 'row container-box-med') and contains(.//div/@class, 'it-grid-one start bl')]"
         ):
             bio = quote.xpath('div/p/text()').getall()
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office="MAYOR",
                 name=" ".join(
                     quote.xpath("div/h1/text()").get().split(" ")[1:3]),
                 phone=bio[-1],
                 email=bio[-3].replace(" ", ""),
                 url=response.url)
     elif response.url[-1] == "/":
         for quote in response.xpath(
                 "//div[@class='med information-text']//div[@class='itg-teambox']"
         )[1:]:
             name = quote.xpath("h3/text()").get()
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MEMBER OF COUNCIL",
                            name=name,
                            district=self._districts(name.split(" ")[-1]),
                            url=response.url)
     elif response.url[-1] == "f":
         for quote in response.xpath(
                 "//p[contains(text(), 'Tax Department Manager')]/.."):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            name=quote.xpath("h3/text()").get(),
                            url=response.url)
예제 #16
0
 def parse(self, response):
     if "officials" in response.url:
         for quote in response.xpath(
                 "//p[contains(text(),'Council Member') or contains(text(),'Mayor')]/.."
         ):
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office="MAYOR" if "Mayor" in quote.xpath("p/text()").get()
                 else "MEMBER OF COUNCIL",
                 name=quote.xpath("h3/text()").get().split(",")[0],
                 url=response.url)
     elif "taxes" in response.url:
         for quote in [
                 response.xpath(
                     "//text()[contains(.,'Whitehall Borough Tax Collector')]"
                 ).get()
         ]:
             part = quote.split(", ")[-1].split(" ")
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            name=" ".join(part[0:2]),
                            email=part[-1],
                            phone=part[-3],
                            url=response.url)
예제 #17
0
 def parse(self, response):
     addressSuffix = ", Pittsburgh, PA 15205"
     for quote in response.xpath('//div[@class="pf-content"]/p[1]'):
         mayorBits = getAllText(quote)
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="MAYOR",
                        name=mayorBits[1],
                        url=response.url,
                        address=mayorBits[2] + addressSuffix,
                        phone=mayorBits[3])
     for quote in response.xpath(
             '//div[@class="pf-content"]/table/tbody/tr'):
         memberBits = getAllText(quote)
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="MEMBER OF COUNCIL",
                        name=memberBits[0].split("–")[0],
                        url=response.url,
                        address=memberBits[1] + addressSuffix,
                        phone=memberBits[2])
     for quote in response.xpath('//div[@class="pf-content"]/p[5]'):
         taxBits = getAllText(quote)
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        name=taxBits[1],
                        url=response.url,
                        email=taxBits[2],
                        phone=taxBits[3])
예제 #18
0
 def parse(self, response):
     if "supervisors" in response.url:
         for quote in response.xpath(
                 "//div[contains(h2/text(),'Current Board of Supervisors')]/ul/li"
         ):
             name = quote.xpath("text()").get().split("–")[0]
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="SUPERVISOR",
                            name=name,
                            district=self._district(name),
                            url=response.url)
     elif "tax" in response.url:
         for quote in response.xpath("//div[@class='entry']"):
             name = getAllText(quote.xpath('p[13]'))
             email = quote.xpath('p[14]/a/@href').get()
             tempAddr = name[3].split(" ")
             addr = " ".join([tempAddr[0], "PA", tempAddr[2]])
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            name=name[0],
                            address=name[2] + ", " + addr,
                            phone=name[4].replace(".", ""),
                            email=email,
                            url=response.url)
예제 #19
0
 def parse(self, response):
     if response.url[-1] == "l":
         counter = 1
         for quote in response.xpath(
                 '//div[@id="cityDirectoryWidget3e4ca0ff-5ed1-4ca6-86bc-f4ebc4b5ed38"]//ol/li'
         ):
             if counter == 1:
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="MAYOR",
                                name=quote.xpath("h4/text()").get(),
                                email=quote.xpath('div[2]/a/text()').get(),
                                url=response.url,
                                phone=quote.xpath(
                                    'div[3]/text()').get().split(": ")[1])
             else:
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="MEMBER OF COUNCIL",
                                name=quote.xpath("h4/text()").get(),
                                email=quote.xpath('div[2]/a/text()').get(),
                                url=response.url)
             counter += 1
     else:
         for quote in response.xpath(
                 '//div[@id="cityDirectoryWidgetba8decb7-37d2-4a77-9496-5d8ce39b7341"]/ol/li[1]'
         ):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            name=quote.xpath("h4/text()").get(),
                            url=response.url,
                            phone=quote.xpath('div[2]/text()').get())
예제 #20
0
 def parse(self, response):
     supervisors = getAllText(response.xpath("//table[@id='Table1']"))
     for i in range(len(supervisors) // 2):
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="SUPERVISOR",
                        name=supervisors[i].split(",")[0],
                        phone=supervisors[i + 3],
                        url=response.url)
     for quote in response.xpath(
             "//b[contains(text(),'Real Estate Tax Collector')]/.."):
         taxman = quote.xpath("./u/strong/text()").get().strip()
         taxinfo = [
             i.strip() for i in quote.xpath("./text()").getall()
             if len(i.strip()) != 0
         ][2].split(" ")
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        name=taxman,
                        address=" ".join(taxinfo[10:19])[:-1],
                        phone=taxinfo[-7],
                        url=response.url)
     auditors = getAllText(
         response.xpath("//font[contains(b/text(),'Board of Auditors')]"))
     audArr = [auditors[3], auditors[5][:-1], auditors[7]]
     for person in audArr:
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="AUDITOR",
                        name=person,
                        url=response.url)
예제 #21
0
 def parse(self, response):
     for quote in response.xpath(
             '//div[@id="divEditor909176e4-b373-45cd-af7f-deacc7efb43e"]/span'
     ):
         parts = "  ".join(getAllText(quote))
         parts = [
             x for x in split(r"\s{2,}", parts)[4:] if x != "Vice President"
         ]
         for i in range(7):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="COMMISSIONER",
                            name=parts[i * 6].split(",")[0],
                            district=parts[i * 6 + 1].upper(),
                            termEnd=parts[i * 6 + 3],
                            phone=parts[i * 6 + 5],
                            address=parts[i * 6 + 2] + ", " +
                            parts[i * 6 + 4],
                            url=response.url)
     for quote in response.xpath('//tr[@class="textContent"]')[0:1]:
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        name=quote.xpath("td[1]/text()").get(),
                        termEnd=quote.xpath("td[4]/text()").get(),
                        phone=quote.xpath("td[3]/div[3]/text()").get(),
                        address=quote.xpath("td[3]/text()").get() + ", " +
                        quote.xpath("td[3]/div[1]/text()").get(),
                        url=response.url)
예제 #22
0
 def parse(self, response):
     if "elected" in response.url:
         for quote in response.xpath(
                 "//div[@class='et_pb_blurb_container']"):
             allText = getAllText(quote)
             email = quote.xpath(".//img/@alt").getall()
             if len(email) > 0:
                 email[0] = email[0].replace("mifflon", "mifflin")
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MAYOR" if allText[0] == "Mayor" else
                            "MEMBER OF COUNCIL",
                            name=allText[-1],
                            email=None if email == [] else email[0],
                            url=response.url)
     elif "taxes" in response.url:
         for quote in response.xpath(
                 "//div[@class='et_pb_text_inner']")[0:1]:
             allText = getAllText(quote)
             print(allText)
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            phone=allText[3],
                            name=allText[0].split(',')[0],
                            address=allText[1] + " " + allText[2],
                            url=response.url)
예제 #23
0
파일: moon_t.py 프로젝트: crocojim18/alleco
 def parse(self, response):
     if response.url[-1] == 'p':
         namesdates = []
         emails = []
         for quote in response.xpath("//ul[@class='listnone']")[0:2]:
             namesdates.append(getAllText(quote)[1:])
         for quote in response.xpath(
                 "//li[contains(strong/text(),'ail:')]"):
             emails.append(quote.xpath('text()').get().strip())
         for i in range(5):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="SUPERVISOR",
                            name=namesdates[0][i],
                            email=emails[i],
                            termEnd=namesdates[1][i],
                            url=response.url)
     elif response.url[-1] == '/':
         for quote in response.xpath('//div[@id="mainContent"]'):
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office="TAX COLLECTOR",
                 name=" ".join(
                     quote.xpath('p[1]/text()').get().strip().split(" ")
                     [3:5]),
                 phone=quote.xpath('text()').get(),
                 address=", ".join(
                     response.xpath("//div[@id='footer']/div/p/text()").
                     getall()[-1].split(' · ')[1:3]),
                 url=response.url)
     # INCOMPLETE
     # Expected offices: 3 auditors, unable to be found on website
예제 #24
0
 def parse(self, response):
     wards = [[10, 11, 12], [15, 16, 17], [20, 21, 22]]
     self.address = self._address(
         response.xpath('//div[@id="comp-j45vf6qp"]/p[2]//text()').getall()
         [1:])
     self.phone = self._phone(
         response.xpath('//div[@id="comp-j45vf6qp"]/p[3]//text()').get())
     for quote in response.xpath('//div[@id="comp-j45vf6qp"]/h6[5]/span'):
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="MAYOR",
                        name=quote.xpath("./text()").get(),
                        email=quote.xpath(".//a/@href").get(),
                        url=response.url,
                        address=self.address,
                        phone=self.phone)
     counter = 1
     for ward in wards:
         for place in ward:
             for quote in response.xpath(
                     '//div[@id="comp-j45vf6qp"]/h6[{}]'.format(place)):
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="MEMBER OF COUNCIL",
                                district="WARD {}".format(counter),
                                email=quote.xpath(".//a/@href").get(),
                                name=quote.xpath("span/text()").get(),
                                url=response.url,
                                address=self.address,
                                phone=self.phone)
         counter += 1
     for quote in response.xpath('//div[@id="comp-j45vf6qp"]/h6[25]'):
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        email=quote.xpath(".//a/@href").get(),
                        name=quote.xpath("span/text()").get(),
                        url=response.url,
                        address=self.address,
                        phone=self.phone)
     for quote in response.xpath('//div[@id="comp-j45vf6qp"]/h6')[27:30]:
         if quote.xpath("span/text()").get().strip() != "\u200b":
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="AUDITOR",
                            name=quote.xpath("span/text()").get(),
                            url=response.url,
                            address=self.address,
                            phone=self.phone)
         else:
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="AUDITOR",
                            name=None,
                            url=response.url,
                            vacant=True)
예제 #25
0
 def parse(self, response):
     if response.url[-6] == 't':
         #name, district, email, phone
         councilBits = [('p[1]//b/a/font', 'p[1]/font/font[2]/text()[1]',
                         'p[1]//b', 'p[1]/font/font[2]/text()[2]'),
                        ('p[2]//b/a/font', 'p[2]/font/font[2]/text()[1]',
                         'p[2]//b', None),
                        ('p[3]/b/', 'p[3]/font[1]//text()', None,
                         'p[5]//text()'),
                        ('p[6]/font/span/', 'p[6]/font/font//text()',
                         'p[6]/', 'p[8]/font/font/text()'),
                        ('p[9]/b//a/font', 'p[9]/font[1]//text()', 'p[9]/',
                         'p[9]/span/text()'),
                        ('p[10]//span/a/', 'p[10]/font[2]/text()',
                         'p[10]//span', 'p[12]/font/text()')]
         for quote in response.xpath(
                 '//p[contains(font/font/b/text(),"Mayor")]'):
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office="MAYOR",
                 name=quote.xpath("b/font/a/font/text()").get(),
                 email=quote.xpath("b//a/@href").get(),
                 phone=quote.xpath("font[2]/text()").getall()[-1],
                 url=response.url)
         for quote in response.xpath(
                 '//td[contains(p/b/font/text(),"Borough of Carnegie Government")]/font'
         ):
             for i in councilBits:
                 yield Official(
                     muniName=self.muniName,
                     muniType=self.muniType,
                     office="MEMBER OF COUNCIL",
                     name=quote.xpath("%s/text()" % i[0]).get(),
                     district=quote.xpath(i[1]).get().upper().strip(),
                     email=None if i[2] == None else quote.xpath(
                         "%s/a/@href" % i[2]).get(),
                     phone=None
                     if i[3] == None else quote.xpath(i[3]).get(),
                     url=response.url)
     elif response.url[-6] == "y":
         # Called the Carnegie Borough office on 10/21/2020
         # At that time, the position of tax collector was vacant
         for quote in response.xpath(
                 '//p[contains(font/font/b/text(),"Tax Collector")]'):
             if "Jordan Tax Service" in quote.xpath(
                     'font[2]/b/text()').get():
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="TAX COLLECTOR",
                                name=None,
                                vacant=True,
                                url=response.url)
예제 #26
0
 def parse(self, response):
     if response.url[-2] == 'l':
         for quote in response.xpath('//article[@id="post-346"]'):
             bits = getAllText(quote)
             peeps = []
             peeps.append(bits[8:10])
             peeps.append([bits[10], bits[14]])
             peeps.append(bits[15:17])
             peeps.append([bits[15], bits[20], bits[23]])
             peeps.append(bits[24:26])
             peeps.append([bits[24]] + bits[27:30])
             peeps.append(bits[31:34])
             peeps.append([bits[31], bits[34]])
             peeps.append([bits[31]] + bits[-2:])
             for i in peeps:
                 yield self._member(i, response)
     elif response.url[-2] == 'e':
         for quote in response.xpath('//article[@id="post-343"]'):
             bits = quote.xpath("p[16]/text()").get()
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MAYOR",
                            email=bits.strip().split(" ")[-1],
                            name=quote.xpath("h3[2]/text()").get(),
                            phone=bits,
                            url=response.url)
예제 #27
0
 def parse(self, response):
     if response.url[-1] == 'e':
         for quote in response.xpath(
                 "//li[@class='InfoAdvanced widgetItem ']")[0:1]:
             bits = [
                 x.strip() for x in quote.xpath("text()").getall()
                 if len(x.strip()) > 0
             ]
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TREASURER",
                            name=quote.xpath("span[1]/text()").get(),
                            address=bits[3] + ", " + bits[4],
                            phone=bits[5],
                            url=response.url)
     elif response.url[-1] == 's':
         addr = response.xpath(
             "//div[@id='divEditor369e008b-89c1-44c3-9afe-c47cabcfe8eb']/text()"
         ).getall()[-2:]
         self.comAddress = ", ".join(addr)
         for quote in response.xpath(
                 "//div[@id='divEditor369e008b-89c1-44c3-9afe-c47cabcfe8eb']/a"
         )[3:8]:
             url = response.urljoin(quote.xpath('./@href').get())
             req = scrapy.Request(url=url, callback=self.councilParse)
             yield req
예제 #28
0
 def parse(self, response):
     for quote in response.xpath(
             '//div[@class="baldwin-staff" and contains(strong/text(),"Board ")]'
     ):
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="COMMISSIONER",
                        name=quote.xpath("h4/text()").get(),
                        url=response.url)
     for quote in response.xpath(
             '//div[contains(strong/text(),"Taxes")]/h4'):
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        name=quote.xpath("text()").get(),
                        url=response.url)
예제 #29
0
 def _member(self, person, response):
     ward = re.search(r"([a-z1-3]+?) ward", person[0], re.I)
     if ward[1] in ["1st", 'First']: ward = "WARD 1"
     elif ward[1] in ["2nd", "Second"]: ward = "WARD 2"
     elif ward[1] == "Third": ward = "WARD 3"
     name = ""
     if "," in person[0]: name = person[0].split(",")[0]
     else: name = person[1].split("(")[0]
     email = None
     phone = None
     for possEmails in person:
         possPhone = re.search(r"\(?\d{3}\)?[ \-–]\d{3} ?[–\-] ?\d{4}",
                               possEmails)
         if possPhone != None:
             phone = possPhone[0]
         for words in possEmails.split(" "):
             if "@" in words: email = words
     return Official(muniName=self.muniName,
                     muniType=self.muniType,
                     office="MEMBER OF COUNCIL",
                     district=ward,
                     email=email,
                     name=name,
                     phone=phone,
                     url=response.url)
예제 #30
0
 def parse(self, response):
     councilNums = [2, 3, 4, 5]
     for quote in response.xpath('//article[@id="post-551"]/div[1]'):
         mayor = {
             "name": quote.xpath("div[1]/strong/text()").get(),
             "phone": quote.xpath("div[4]/text()").get(),
             "url": response.url
         }
         req = scrapy.Request(
             url="http://cityofclairton.com/mayor-of-clairton/",
             callback=self.mayorParse,
             cb_kwargs=mayor)
         yield req
         for num in councilNums:
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office="MEMBER OF COUNCIL",
                 name=quote.xpath(".//strong/text()").getall()[num - 1],
                 phone=quote.xpath(
                     ".//div[contains(text(),'Phone:')]/text()").getall()[
                         num - 1],
                 email=None
                 if num != 2 else quote.xpath(".//a/@href").get(),
                 district=quote.xpath(".//em/text()").getall()[
                     num - 1].split("–")[1].strip().upper(),
                 url=response.url)