Python getAllTextの例、alleco.objects.official.getAllText Pythonの例

コード例 #1

0

ファイルを表示

ファイル: north_versailles_t.py プロジェクト: crocojim18/alleco

 def parse(self, response):
     if "officials" in response.url:
         for quote in response.xpath(
                 "//div[contains(h4/strong/u/text(),'Ward ')]"):
             alltext = getAllText(quote)
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="COMMISSIONER",
                            name=alltext[2],
                            district=alltext[0].upper(),
                            termEnd=alltext[1],
                            address=alltext[3] + ", " + alltext[4],
                            phone=alltext[5],
                            url=response.url)
     elif "tax" in response.url:
         name = response.xpath(
             "//h3[contains(u/text(),'-Tax Collector')]/u/text()").get(
             ).split("-")[0]
         address = getAllText(
             response.xpath(
                 "//div[contains(strong/text(),'Tax Office')]"))[1]
         phone = getAllText(response.xpath("//div[strong/text()='Phone:']"))
         print(phone)
         phone = phone[1]
         email = getAllText(
             response.xpath("//div[contains(strong/text(),'Email:')]"))[1]
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        name=name,
                        address=address,
                        phone=phone,
                        email=email,
                        url=response.url)

コード例 #2

0

ファイルを表示

ファイル: south_park_t.py プロジェクト: crocojim18/alleco

 def parse(self, response):
     supervisors = getAllText(response.xpath("//table[@id='Table1']"))
     for i in range(len(supervisors) // 2):
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="SUPERVISOR",
                        name=supervisors[i].split(",")[0],
                        phone=supervisors[i + 3],
                        url=response.url)
     for quote in response.xpath(
             "//b[contains(text(),'Real Estate Tax Collector')]/.."):
         taxman = quote.xpath("./u/strong/text()").get().strip()
         taxinfo = [
             i.strip() for i in quote.xpath("./text()").getall()
             if len(i.strip()) != 0
         ][2].split(" ")
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        name=taxman,
                        address=" ".join(taxinfo[10:19])[:-1],
                        phone=taxinfo[-7],
                        url=response.url)
     auditors = getAllText(
         response.xpath("//font[contains(b/text(),'Board of Auditors')]"))
     audArr = [auditors[3], auditors[5][:-1], auditors[7]]
     for person in audArr:
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="AUDITOR",
                        name=person,
                        url=response.url)

コード例 #3

0

ファイルを表示

ファイル: pine_t.py プロジェクト: crocojim18/alleco

	def parse(self, response):
		surnames = {}
		sidearr = []
		sidelinks = getAllLinks(response.xpath("//h4[contains(text(),'Board of Supervisors')]/../.."))
		for parts in response.xpath("//h4[contains(text(),'Board of Supervisors')]/../../li"):
			sidearr.append(getAllText(parts))
		index = 0
		for person in sidearr[1:]:
			surnames[person[0].split(" ")[-1]] = sidelinks[index]
			index += 1
		addr = sidearr[0][2]+", "+"".join(sidearr[0][3:5])+" "+sidearr[0][5]+" "+sidearr[0][6]
		phone = sidearr[0][7]
		for quote in response.xpath("//h2[contains(text(),'Members')]/../../../../../..//li"):
			bits = getAllText(quote)
			name = bits[0].split(",")[0]
			termEnd = bits[1]
			email = surnames[name.split(" ")[-1]]
			yield Official(
				muniName=self.muniName,
				muniType=self.muniType,
				office="SUPERVISOR",
				name=name,
				termEnd=termEnd,
				email=email,
				address=addr,
				phone=phone,
				url=response.url)

コード例 #4

0

ファイルを表示

 def parse(self, response):
     if "elected" in response.url:
         for quote in response.xpath(
                 "//div[@class='et_pb_blurb_container']"):
             allText = getAllText(quote)
             email = quote.xpath(".//img/@alt").getall()
             if len(email) > 0:
                 email[0] = email[0].replace("mifflon", "mifflin")
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MAYOR" if allText[0] == "Mayor" else
                            "MEMBER OF COUNCIL",
                            name=allText[-1],
                            email=None if email == [] else email[0],
                            url=response.url)
     elif "taxes" in response.url:
         for quote in response.xpath(
                 "//div[@class='et_pb_text_inner']")[0:1]:
             allText = getAllText(quote)
             print(allText)
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            phone=allText[3],
                            name=allText[0].split(',')[0],
                            address=allText[1] + " " + allText[2],
                            url=response.url)

コード例 #5

0

ファイルを表示

 def parse(self, response):
     for quote in response.xpath("//div[@class='twocolbig']"):
         alltext = getAllText(quote)
         # don't try to make the next line interact w quote it wouldn't work
         headers = getAllText(
             response.xpath("//div[@class='twocolbig']/h4"))
         for h in headers:
             loc = [e for e, part in enumerate(alltext)
                    if h == alltext[e]][0]
             if "mayor" in h.lower():
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="MAYOR",
                                name=alltext[loc + 1],
                                url=response.url)
             if h == "MEMBERS OF COUNCIL":
                 for member in alltext[loc + 1:loc + 8]:
                     yield Official(
                         muniName=self.muniName,
                         muniType=self.muniType,
                         office="MEMBER OF COUNCIL",
                         # if their position, e.g. pres or VP follows
                         name=member.split(",")[0],
                         url=response.url)
             if "tax" in h.lower():
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="TAX COLLECTOR",
                                phone=alltext[loc + 3].split(":")[1],
                                name=alltext[loc + 2],
                                url=response.url)

コード例 #6

0

ファイルを表示

ファイル: pennsbury_village_b.py プロジェクト: crocojim18/alleco

 def parse(self, response):
     addressSuffix = ", Pittsburgh, PA 15205"
     for quote in response.xpath('//div[@class="pf-content"]/p[1]'):
         mayorBits = getAllText(quote)
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="MAYOR",
                        name=mayorBits[1],
                        url=response.url,
                        address=mayorBits[2] + addressSuffix,
                        phone=mayorBits[3])
     for quote in response.xpath(
             '//div[@class="pf-content"]/table/tbody/tr'):
         memberBits = getAllText(quote)
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="MEMBER OF COUNCIL",
                        name=memberBits[0].split("–")[0],
                        url=response.url,
                        address=memberBits[1] + addressSuffix,
                        phone=memberBits[2])
     for quote in response.xpath('//div[@class="pf-content"]/p[5]'):
         taxBits = getAllText(quote)
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        name=taxBits[1],
                        url=response.url,
                        email=taxBits[2],
                        phone=taxBits[3])

コード例 #7

0

ファイルを表示

	def parse(self, response):
		if "tax" in response.url:
			for quote in response.xpath("//div[p/text()='Tax Collector']/.."):
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="TAX COLLECTOR",
					name=quote.xpath("div[2]/h1/span/span/span/text()").get(),
					phone=quote.xpath("div[4]/p/text()").get(),					
					url=response.url)
		elif "commissioners" in response.url:
			names = [getAllText(i)[0].split(" -")[0] for i in response.xpath("//h1")]
			# print(names)
			data = [getAllText(quote) for quote in response.xpath("//p[contains(text(),'Ward ')]/../..")]
			# print(data)
			for allText in data:
				if len(allText)<4:
					allText.insert(0, names[2])
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="COMMISSIONER",
					name=allText[0].split(" -")[0],
					email=allText[1],
					phone=allText[2],
					district=allText[3].upper(),					
					url=response.url)

コード例 #8

0

ファイルを表示

 def parse(self, response):
     for quote in response.xpath(
             "//h1[contains(text(),'Elected Officials')]/../../div"):
         names = getTextOfType(quote, "h3")
         allText = getAllText(quote)
         allPeeps = allText[:-3]
         phone = allText[-1]
         split = []
         temp = []
         for i in allText:
             if i in names and temp != []:
                 split.append(temp)
                 temp = []
             temp.append(i)
         split.append(temp)
         print(split)
         for person in split:
             office = "TAX COLLECTOR" if "Tax" in person[
                 1] else "MAYOR" if person[
                     1] == "Mayor" else "MEMBER OF COUNCIL"
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office=office,
                 name=person[0],
                 phone=phone if office == "MEMBER OF COUNCIL" else None,
                 url=response.url)

コード例 #9

0

ファイルを表示

ファイル: shaler_t.py プロジェクト: crocojim18/alleco

 def parse(self, response):
     for quote in response.xpath(
             '//div[@id="divEditor909176e4-b373-45cd-af7f-deacc7efb43e"]/span'
     ):
         parts = "  ".join(getAllText(quote))
         parts = [
             x for x in split(r"\s{2,}", parts)[4:] if x != "Vice President"
         ]
         for i in range(7):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="COMMISSIONER",
                            name=parts[i * 6].split(",")[0],
                            district=parts[i * 6 + 1].upper(),
                            termEnd=parts[i * 6 + 3],
                            phone=parts[i * 6 + 5],
                            address=parts[i * 6 + 2] + ", " +
                            parts[i * 6 + 4],
                            url=response.url)
     for quote in response.xpath('//tr[@class="textContent"]')[0:1]:
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        name=quote.xpath("td[1]/text()").get(),
                        termEnd=quote.xpath("td[4]/text()").get(),
                        phone=quote.xpath("td[3]/div[3]/text()").get(),
                        address=quote.xpath("td[3]/text()").get() + ", " +
                        quote.xpath("td[3]/div[1]/text()").get(),
                        url=response.url)

コード例 #10

0

ファイルを表示

 def parse(self, response):
     if response.url[-2] == 'l':
         for quote in response.xpath('//article[@id="post-346"]'):
             bits = getAllText(quote)
             peeps = []
             peeps.append(bits[8:10])
             peeps.append([bits[10], bits[14]])
             peeps.append(bits[15:17])
             peeps.append([bits[15], bits[20], bits[23]])
             peeps.append(bits[24:26])
             peeps.append([bits[24]] + bits[27:30])
             peeps.append(bits[31:34])
             peeps.append([bits[31], bits[34]])
             peeps.append([bits[31]] + bits[-2:])
             for i in peeps:
                 yield self._member(i, response)
     elif response.url[-2] == 'e':
         for quote in response.xpath('//article[@id="post-343"]'):
             bits = quote.xpath("p[16]/text()").get()
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MAYOR",
                            email=bits.strip().split(" ")[-1],
                            name=quote.xpath("h3[2]/text()").get(),
                            phone=bits,
                            url=response.url)

コード例 #11

0

ファイルを表示

ファイル: moon_t.py プロジェクト: crocojim18/alleco

 def parse(self, response):
     if response.url[-1] == 'p':
         namesdates = []
         emails = []
         for quote in response.xpath("//ul[@class='listnone']")[0:2]:
             namesdates.append(getAllText(quote)[1:])
         for quote in response.xpath(
                 "//li[contains(strong/text(),'ail:')]"):
             emails.append(quote.xpath('text()').get().strip())
         for i in range(5):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="SUPERVISOR",
                            name=namesdates[0][i],
                            email=emails[i],
                            termEnd=namesdates[1][i],
                            url=response.url)
     elif response.url[-1] == '/':
         for quote in response.xpath('//div[@id="mainContent"]'):
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office="TAX COLLECTOR",
                 name=" ".join(
                     quote.xpath('p[1]/text()').get().strip().split(" ")
                     [3:5]),
                 phone=quote.xpath('text()').get(),
                 address=", ".join(
                     response.xpath("//div[@id='footer']/div/p/text()").
                     getall()[-1].split(' · ')[1:3]),
                 url=response.url)
     # INCOMPLETE
     # Expected offices: 3 auditors, unable to be found on website

コード例 #12

0

ファイルを表示

 def parse(self, response):
     if "supervisors" in response.url:
         for quote in response.xpath(
                 "//div[contains(h2/text(),'Current Board of Supervisors')]/ul/li"
         ):
             name = quote.xpath("text()").get().split("–")[0]
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="SUPERVISOR",
                            name=name,
                            district=self._district(name),
                            url=response.url)
     elif "tax" in response.url:
         for quote in response.xpath("//div[@class='entry']"):
             name = getAllText(quote.xpath('p[13]'))
             email = quote.xpath('p[14]/a/@href').get()
             tempAddr = name[3].split(" ")
             addr = " ".join([tempAddr[0], "PA", tempAddr[2]])
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            name=name[0],
                            address=name[2] + ", " + addr,
                            phone=name[4].replace(".", ""),
                            email=email,
                            url=response.url)

コード例 #13

0

ファイルを表示

 def parse(self, response):
     #file = open(response.url.split("/")[-1],'w')
     #file.write(response.xpath(".").get())
     #file.close()
     if "elected" in response.url:
         folks = []
         bits = getAllText(response.xpath('//td[@id="esbCr2x1"]/..'))
         temp = []
         for i in bits:
             if "Mayor" in i or "City Council" in i:
                 if len(temp) > 0: folks.append(temp)
                 temp = [i]
             else:
                 temp.append(i)
         folks.append(temp)
         for folk in folks:
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MEMBER OF COUNCIL"
                            if folk[0] == "City Council" else "MAYOR",
                            name=folk[1],
                            email=None if len(folk) < 3 else folk[2],
                            url=response.url)
     elif "city" in response.url:
         bits = getAllText(
             response.xpath(
                 '//span[contains(text(),"Treasurer")]/../../..'))
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TREASURER",
                        name=bits[1],
                        url=response.url)
         if response.xpath(
                 '//*[contains(text(),"Controller")]').get() == None:
             #Emailed Duquesne Manager concerning the Controller, who I could not find listed on the website.
             #On 11/15/20 he responded that the Duquesne Controller was Maureen Strahl.
             #While ideally we would scrape this, this will be returned as long as 'Controller' is not on this webpage
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="CONTROLLER",
                            name="Maureen Strahl",
                            url=None)

コード例 #14

0

ファイルを表示

 def parse(self, response):
     if "taxcollector" in response.url:
         addr = getAllText(
             response.xpath(
                 "//p[text()[contains(.,'Tax Office Address')]]"))[-2:]
         addr[1] = " ".join([
             i.upper() if i == "Pa" else i + "," if i == "McKeesport" else i
             for i in addr[1].split(" ")
         ])
         email = response.xpath(
             "//p[text()[contains(.,'E-mail')]]/a/@href").get()
         phone = response.xpath(
             "//p[contains(text(),'Phone')]/text()").get()
         for quote in response.xpath(
                 '//p[contains(text(), "Tax Collector")]'):
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office="TAX COLLECTOR",
                 name=quote.xpath("./text()").get().split("-")[0],
                 address=", ".join(addr),
                 email=email,
                 phone=phone,
                 url=response.url)
     elif "contact" in response.url:
         parts = getAllText(
             response.xpath('//p[contains(text(), "Council")]'))[:-2]
         for i in range(len(parts) // 2):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MEMBER OF COUNCIL",
                            name=parts[i * 2].split(":")[1].split("-")[0],
                            email=parts[i * 2 + 1],
                            url=response.url)
         for quote in response.xpath('//p[contains(text(), "Mayor")]'):
             email = {"email": quote.xpath("a/@href").get().split(":")[1]}
             url = "http://eastmckeesportboro.com/leaders.htm"
             req = scrapy.Request(url=url,
                                  callback=self.mayorParse,
                                  cb_kwargs=email)
             yield req

コード例 #15

0

ファイルを表示

 def parse(self, response):
     for quote in response.xpath(
             "//th[contains(text(),'Name')]/../../../tbody/tr"):
         allText = getAllText(quote)
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TREASURER" if "Treasurer" in allText[1] else
                        "MEMBER OF COUNCIL",
                        district=self._district(allText[1]),
                        name=allText[0],
                        phone=None if len(allText) < 3 else allText[2],
                        url=response.url)

コード例 #16

0

ファイルを表示

ファイル: pleasant_hills_b.py プロジェクト: crocojim18/alleco

	def parse(self, response):
		if "contact" in response.url:
			for quote in response.xpath("//form[@id='adminForm']/ul/li"):
				alltext = getAllText(quote)
				if "Council" in alltext[1] or "President" in alltext[1] or "Mayor" in alltext[1]:
					yield Official(
						muniName=self.muniName,
						muniType=self.muniType,
						office="MAYOR" if "Mayor"==alltext[1] else "MEMBER OF COUNCIL",
						name=alltext[0],
						url=response.url)
		elif "taxes" in response.url:
			for quote in response.xpath("//span[contains(strong/text(),'Mercantile Tax Collector')]"):
				alltext = getAllText(quote)
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="TAX COLLECTOR",
					name=alltext[1].split(",")[0],
					address=", ".join([i.strip() for i in alltext[1].split(",")[1:-1]]),
					phone=alltext[2],
					url=response.url)

コード例 #17

0

ファイルを表示

ファイル: franklin_park_b.py プロジェクト: crocojim18/alleco

	def parse(self, response):
		if "Council" in response.url:
			for quote in response.xpath("//div[@class='fr-view']")[5:7]:
				for person in quote.xpath('.//li'):
					thing = getAllText(person)
					if len(thing)==4: thing = [thing[0]+thing[1]]+thing[2:]
					if 'Junior Council Person' not in thing[0]:
						yield Official(
							muniName=self.muniName,
							muniType=self.muniType,
							office="MEMBER OF COUNCIL",
							name=thing[0].split(",")[0],
							district=thing[0].split(",")[-1].strip().upper(),
							termEnd=thing[1],
							email=thing[2],
							url=response.url)
		elif "Mayor" in response.url:
			for quote in response.xpath("//div[contains(h2/text(),'Responsibilities')]/p[2]"):
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="MAYOR",
					name=quote.xpath("text()").get().split(",")[0],
					termEnd=quote.xpath("text()[2]").get(),
					email=quote.xpath("a/@href").get(),
					url=response.url)
		elif "Tax" in response.url:
			for quote in response.xpath("//ol[contains(li/div/text(),'Real Estate Tax Collector')]"):
				address = getAllText(quote.xpath("li[2]/div[1]"))[2:]
				address = address[0]+", "+address[1]+" ".join(address[2:])
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="TAX COLLECTOR",
					name=quote.xpath("li[1]/h4/text()").get(),
					phone=quote.xpath("li[2]/div[3]/text()").get(),
					address=address,
					email=quote.xpath("li[1]/div/a/@href").get(),
					url=response.url)

コード例 #18

0

ファイルを表示

	def parse(self, response):
		for quote in response.xpath('//div[contains(strong/text(),"Sitting left to right")]'):
			bits = getAllText(quote)
			bits = bits[1].split(",")[0].strip(":")
			yield Official(
				muniName=self.muniName,
				muniType=self.muniType,
				office="MEMBER OF COUNCIL",
				name=bits,
				url=response.url)
			for link in quote.xpath(".//a"):
				url = link.xpath("./@href").get()
				req = scrapy.Request(url=url, callback=self.linkParse)
				yield req

コード例 #19

0

ファイルを表示

 def parse(self, response):
     if 'commissioners' in response.url:
         for quote in response.xpath(
                 '//h4[contains(text(),"Township Commissioners")]/../div'):
             bits = getAllText(quote)
             for person in range(len(bits) // 4):
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="COMMISSIONER",
                                name=bits[person * 4],
                                email=bits[person * 4 + 3],
                                url=response.url)
     elif 'tax' in response.url:
         for quote in response.xpath(
                 '//h4[contains(text(),"Tax Collector")]/../div'):
             bits = getAllText(quote)
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            name=bits[0],
                            phone=bits[2],
                            email=bits[3],
                            url=response.url)

コード例 #20

0

ファイルを表示

 def parse(self, response):
     if response.url[-1] == "7":
         universals = response.xpath(
             "//span[@class='DirectoryNormalText' and contains(label/text(),'Physical Address')]"
         )
         allText = getAllText(universals)
         address = ", ".join(allText[1:4])
         for quote in response.xpath("//tr[td/span/text()='Mayor']"):
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 address=address,
                 office="MAYOR",
                 email=self._email(
                     quote.xpath('td[3]/span/script/text()').get()),
                 phone=quote.xpath('td[4]/span/text()').get(),
                 name=self._name(quote.xpath('td[1]/span/a/text()').get()),
                 url=response.url)
     elif response.url[-1] == "9":
         universals = response.xpath(
             "//span[@class='DirectoryNormalText' and contains(label/text(),'Physical Address')]"
         )
         allText = getAllText(universals)
         address = ", ".join(allText[5:8])
         phone = allText[-2]
         for quote in response.xpath(
                 "//table[@id='cityDirectoryDepartmentDetails']//tr[contains(td[2]/span/text(), 'Council')]"
         ):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            address=address,
                            office="MEMBER OF COUNCIL",
                            phone=phone,
                            name=self._name(
                                quote.xpath('td[1]/span/a/text()').get()),
                            url=response.url)

コード例 #21

0

ファイルを表示

ファイル: ingram_b.py プロジェクト: crocojim18/alleco

 def parse(self, response):
     if "council" in response.url:
         for quote in response.xpath('//tbody/tr'):
             bits = getAllText(quote)
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MAYOR"
                            if bits[1] == "Mayor" else "MEMBER OF COUNCIL",
                            name=bits[0],
                            email=bits[3],
                            termEnd=bits[2],
                            url=response.url)
     ##EXPECTED: 1 tax collector
     ## It may be Lorraine Rehtoric given section 8(h)
     ##in the September 14, 2020 agenda of the borough council

コード例 #22

0

ファイルを表示

ファイル: monroeville_b.py プロジェクト: crocojim18/alleco

	def parse(self, response):
		counter = 0
		for quote in response.xpath('//td[@width="609"]/table'):
			names = [sub(r"\s+"," ",i) for i in getAllText(quote)]
			if counter==0: names = names[1:5]
			emails = [i for i in quote.xpath('.//a/@href').getall() if "mailto:" in i]
			counter += 1
			for x in range(len(names)//4):
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="MEMBER OF COUNCIL" if "Council" in names[x*4+1] else names[x*4+1].upper(),
					name=names[x*4],
					email=emails[x],
					district="AT-LARGE" if "Ward" not in names[x*4+1] else names[x*4+1][:6].upper(),
					phone=names[x*4+2],
					url=response.url)

コード例 #23

0

ファイルを表示

ファイル: jefferson_hills_b.py プロジェクト: crocojim18/alleco

	def parse(self, response):
		if "Officials" in response.url:
			for quote in response.xpath("//span[@id='ContentPage1_ctl04_lblText']/table//tr"):
				allText = getAllText(quote)
				print(allText)
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="MAYOR" if allText[0]=="Mayor" else "MEMBER OF COUNCIL",
					name=allText[1]+" "+allText[2],
					#email has to be manual because the site has weird server-side protections
					email=(allText[1][0]+allText[2]+"@jeffersonhills.net").lower(),
					url=response.url)
		elif "Taxes" in response.url:
			for quote in response.xpath("//tr[td/text()='Real Estate Tax Collector\xa0']"):
				yield Official(
					muniName=self.muniName,
					muniType=self.muniType,
					office="TAX COLLECTOR",
					name=quote.xpath("td[2]/text()").get().split("(")[1].strip()[:-1],
					phone=quote.xpath("td[2]/text()").get(),
					url=response.url)

コード例 #24

0

ファイルを表示

ファイル: dravosburg_b.py プロジェクト: crocojim18/alleco

 def parse(self, response):
     for quote in response.xpath(
             '//div[@data-mesh-id="Containerc1qrainlineContent-gridContainer"]'
     ):
         folks = []
         temp = []
         texto = getAllText(quote)
         for i in texto:
             if i.isupper():
                 if len(temp) > 0: folks.append(temp)
                 temp = [i]
             else:
                 temp.append(i)
         for person in folks:
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office="MEMBER OF COUNCIL" if person[0]
                 not in ["MAYOR", "TAX COLLECTOR"] else person[0],
                 phone=None if len(person) < 3 else
                 person[2] if "(" in person[2] else None,
                 name=person[1],
                 url=response.url)

コード例 #25

0

ファイルを表示

 def parse(self, response):
     if response.url[-1] == 't':
         for quote in response.xpath(
                 '//table[@class="views-table cols-3"]/tbody/tr'):
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="MAYOR"
                            if quote.xpath("td[2]/text()").get().strip()
                            == "Mayor" else "MEMBER OF COUNCIL",
                            name=quote.xpath("td[1]/a/text()").get(),
                            phone=quote.xpath("td[3]/text()").get(),
                            url=response.url)
     elif response.url[-1] == 'x':
         for quote in response.xpath(
                 '//div[contains(h5/text(),"Real Estate Property Tax")]/p[3]'
         ):
             text = getAllText(quote)
             yield Official(muniName=self.muniName,
                            muniType=self.muniType,
                            office="TAX COLLECTOR",
                            name=text[0].split(",")[0],
                            phone=text[3],
                            address=text[1] + ", " + text[2],
                            url=response.url)

コード例 #26

0

ファイルを表示

ファイル: dormont_b.py プロジェクト: crocojim18/alleco

 def parse(self, response):
     if response.url[-2] == "r":
         for quote in response.xpath('//article[@id="post-137"]'):
             yield Official(
                 muniName=self.muniName,
                 muniType=self.muniType,
                 office="MAYOR",
                 name=quote.xpath("h1/text()").get().split("–")[-1],
                 email=quote.xpath('div/div[2]/p[2]/a/@href').get(),
                 url=response.url)
     elif response.url[-2] == "l":
         for quote in response.xpath('//div[@class="entry-content"]'):
             fullNames = getAllText(quote.xpath("div"))
             emails = [(i.xpath("a/text()").get(), i.xpath("a/@href").get())
                       for i in quote.xpath("p")]
             names = {
                 i[0].split(" ")[-1]: {
                     "name": None,
                     "email": i[1]
                 }
                 for i in emails
             }
             for i in fullNames:
                 if i.split(" ")[-1] in names:
                     names[i.split(" ")[-1]]["name"] = " ".join(
                         i.split(" ")[-2:])
             #first name taken from 2019 election returns; was not on website when spider was first made
             if names["Moore"]["name"] == None:
                 names["Moore"]["name"] = "John Moore"
             for person in names:
                 yield Official(muniName=self.muniName,
                                muniType=self.muniType,
                                office="MEMBER OF COUNCIL",
                                name=names[person]["name"],
                                email=names[person]["email"],
                                url=response.url)

コード例 #27

0

ファイルを表示

 def parse(self, response):
     if "board" in response.url:
         for quote in response.xpath(
                 "//h2[contains(text(),'Board Members')]/.."):
             alltext = [
                 i for i in getAllText(quote)[1:]
                 if i != ',' and "Chairman" not in i
             ]
             for i in range(len(alltext) // 5):
                 yield Official(
                     muniName=self.muniName,
                     muniType=self.muniType,
                     office="COMMISSIONER",
                     name=alltext[i * 5],
                     district="WARD " + alltext[i * 5 + 2],
                     email=alltext[i * 5 +
                                   1] if "@" in alltext[i * 5 +
                                                        1] else None,
                     phone=alltext[i * 5 +
                                   1] if "(" in alltext[i * 5 +
                                                        1] else None,
                     termEnd=alltext[i * 5 + 4][-4:],
                     url=response.url)
     elif 'taxes' in response.url:
         quote = response.xpath(
             "//p[contains(strong/text(),'Delinquent Earned Income Tax Collector:')]/following-sibling::p/text()"
         ).getall()[0:4]
         quote = [i.replace("\xa0", "").strip() for i in quote]
         print(quote)
         yield Official(muniName=self.muniName,
                        muniType=self.muniType,
                        office="TAX COLLECTOR",
                        name=quote[0],
                        phone=quote[1],
                        address=quote[2] + ", " + quote[3],
                        url=response.url)