Esempi in Python per CanadianLegislator, esempi in Python per utils.CanadianLegislator

Esempio n. 1

0

Mostra file

  def get_people(self):
      response = urlopen(COUNCIL_CSV_URL)
      cr = DictReader(response)
      for councillor in cr:
        name = '%s %s' % (councillor['First name'], councillor['Last name'])
        role = councillor['Elected office']
        if role == 'Mayor':
          district = 'Ottawa'
        else:
          district = councillor['District name']

        # Correct typos. The City has been notified of the errors.
        if district == u'Knoxdale Merivale':
          district = u'Knoxdale-Merivale'
        if district == u'Rideau Vanier':
          district = u'Rideau-Vanier'
        if district == u'Orleans':
          district = u'Orléans'

        email = councillor['Email']
        address = ', '.join([councillor['Address line 1'],
                             councillor['Address line 2'],
                             councillor['Locality'],
                             councillor['Postal code'],
                             councillor['Province']])
        phone = councillor['Phone']
        photo_url = councillor['Photo URL']

        p = Legislator(name=name, post_id=district, role=role)
        p.add_source(COUNCIL_CSV_URL)
        p.add_contact('email', email, None)
        p.add_contact('address', address, 'legislature')
        p.add_contact('voice', phone, 'legislature')
        p.image = photo_url
        yield p

Esempio n. 2

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)

    councillors = page.xpath('//table/tbody/tr/td')
    for councillor in councillors:
      text = councillor.xpath('.//strong/text()')[0]
      name = text.split(',')[0].replace('Name:', '').strip()
      if 'Mayor' in text and not 'Deputy Mayor' in text:
        role = 'Mayor'
        district = 'Fredericton'
      else:
        district = re.findall(r'(Ward:.*)(?=Address:)', councillor.text_content())[0].replace(':', '').strip()
        district = re.search('\((.+?)(?: Area)?\)', district).group(1)
        role = 'Councillor'

      p = Legislator(name=name, post_id=district, role=role)
      p.add_source(COUNCIL_PAGE)

      p.image = councillor.xpath('.//img/@src')[0]

      address = re.findall(r'(?<=Address:).*(?=Home:)', councillor.text_content())[0].strip()
      p.add_contact('address', address, 'legislature')

      phone = re.findall(r'(?<=Home: \().*(?=Fax:)', councillor.text_content())[0]
      phone = re.sub(r'(?<=[0-9])(\)\D{1,2})(?=[0-9])', '-', phone).split()[0]
      p.add_contact('voice', phone, 'residence')

      phone = re.findall(r'(?<=Office: \().*(?=Fax:)', councillor.text_content())
      if phone:
        phone = phone[0].replace(') ', '-')
        p.add_contact('voice', phone, 'legislature')

      yield p

Esempio n. 3

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

    def get_people(self):
        page = lxmlize(COUNCIL_PAGE)

        councillors = page.xpath('//div[@id="content"]//tr')

        for i, councillor in enumerate(councillors):
            if 'Maire' in councillor.text_content():
                name = councillor.xpath('./td')[1].text_content()
                district = 'Sainte-Anne-de-Bellevue'
                role = 'Maire'
            else:
                name = councillor.xpath('./td')[1].text_content()
                district = 'District ' + re.findall(
                    r'\d',
                    councillor.xpath('./td')[0].text_content())[0]
                role = 'Conseiller'

            p = Legislator(name=name, post_id=district, role=role)
            p.add_source(COUNCIL_PAGE)

            email = councillor.xpath('.//a')
            if email:
                email = email[0].attrib['href'].replace('mailto:', '')
                p.add_contact('email', email, None)
            yield p

Esempio n. 4

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

    def scrape_mayor(self):
        page = lxmlize(MAYOR_PAGE, 'iso-8859-1')

        name = page.xpath(
            '//div[@class="articletitle"]/h1')[0].text_content().replace(
                'Mayor', '')

        p = Legislator(name=name, post_id='Summerside', role='Mayor')
        p.add_source(MAYOR_PAGE)
        p.image = page.xpath(
            '//div[@class="articlebody-inside"]/p/img/@src')[0].replace(
                '..', '')

        info = page.xpath('//div[@class="articlebody-inside"]/p')
        phone = re.findall(r'to (.*)', info[1].text_content())[0]
        address = info[3].text_content().replace(
            'by mail: ', '') + ' ' + info[4].text_content()
        email = info[5].xpath(
            './/a[contains(@href, "mailto:")]')[0].text_content()

        p.add_contact('voice', phone, 'legislature')
        p.add_contact('address', address, 'legislature')
        p.add_contact('email', email, None)

        return p

Esempio n. 5

0

Mostra file

File: people.py Progetto: rhymeswithcycle/scrapers-ca

    def get_people(self):
        page = lxmlize(COUNCIL_PAGE)

        councillors = page.xpath('//div[@id="WebPartWPQ1"]/table/tbody/tr[1]')
        for councillor in councillors:
            node = councillor.xpath(".//td[1]//strong//strong//strong//strong") or councillor.xpath(".//td[1]//strong")
            text = node[0].text_content()
            name = text.strip().replace("Deputy ", "").replace("Warden ", "").replace("Mayor", "")
            role = text.replace(name, "").strip()
            if not role:
                role = "Councillor"
            if "," in name:
                name = name.split(",")[0].strip()
            district = councillor.xpath('.//td[1]//p[contains(text(),",")]/text()')[0].split(",")[1].strip()
            district = re.sub(r"\A(?:City|Municipality|Town|Township|Village) of\b| Township\Z", "", district)

            p = Legislator(name=name, post_id=district, role=role)
            p.add_source(COUNCIL_PAGE)

            p.image = councillor.xpath(".//td[1]//img/@src")[0]

            info = councillor.xpath(".//td[2]")[0].text_content()
            residential_info = re.findall(r"(?<=Residence:)(.*)(?=Municipal Office:)", info, flags=re.DOTALL)[0]
            self.get_contacts(residential_info, "residence", p)
            municipal_info = re.findall(r"(?<=Municipal Office:)(.*)", info, flags=re.DOTALL)[0]
            self.get_contacts(municipal_info, "legislature", p)

            yield p

Esempio n. 6

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)

    councillors = page.xpath('//ul[@class="subNav top"]/li/ul//li/a')
    for councillor in councillors:
      name = councillor.text_content()

      url = councillor.attrib['href']
      page = lxmlize(url)

      if councillor == councillors[0]:
        district = 'Ajax'
        role = 'Mayor'
      else:
        district = re.findall(r'Ward.*', page.xpath('//div[@id="printAreaContent"]//h1')[0].text_content())[0].strip()
        role = page.xpath('//div[@id="printAreaContent"]//h1')[0].text_content()
        role = re.findall('((Regional)? ?(Councillor))', role)[0][0]

      p = Legislator(name=name, post_id=district, role=role)
      p.add_source(COUNCIL_PAGE)
      p.add_source(url)

      p.image = page.xpath('//div[@class="intQuicklinksPhoto"]/img/@src')[0]

      contact_info = page.xpath('//table[@class="datatable"][1]//tr')[1:]
      for line in contact_info:
        contact_type = line.xpath('./td')[0].text_content().strip()
        contact = line.xpath('./td')[1].text_content().strip()
        if re.match(r'(Phone)|(Fax)|(Email)', contact_type):
          contact_type = CONTACT_DETAIL_TYPE_MAP[contact_type]
          p.add_contact(contact_type, contact, None if contact_type == 'email' else 'legislature')
        else:
          p.add_link(contact, None)
      yield p

Esempio n. 7

0

Mostra file

def scrape_mayor(url):
    page = lxmlize(url)
    name = page.xpath('//tr/td/p')[-1]
    name = name.text_content().replace('Mayor', '')
    image = page.xpath('//div[@class="sask_ArticleBody"]//img/@src')[0]

    contact_url = page.xpath(
        '//a[contains(text(), "Contact the Mayor")]/@href')[0]
    page = lxmlize(contact_url)

    address = ' '.join(
        page.xpath(
            '//div[@id="ctl00_PlaceHolderMain_RichHtmlField1__ControlWrapper_RichHtmlField"]/p[4]/text()'
        )[1:])
    phone = page.xpath(
        '//div[@id="ctl00_PlaceHolderMain_RichHtmlField1__ControlWrapper_RichHtmlField"]/p[5]/span/text()'
    )[0].replace('(', '').replace(') ', '-')
    fax = page.xpath(
        '//div[@id="ctl00_PlaceHolderMain_RichHtmlField1__ControlWrapper_RichHtmlField"]/p[6]/span/text()'
    )[0].replace('(', '').replace(') ', '-')

    p = Legislator(name=name, post_id='Saskatoon', role='Mayor')
    p.add_source(url)
    p.image = image
    p.add_contact('address', address, 'legislature')
    p.add_contact('voice', phone, 'legislature')
    p.add_contact('fax', fax, 'legislature')
    return p

Esempio n. 8

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)

    councillors = page.xpath('//div[@class="article-content"]//td[@class="ms-rteTableOddCol-0"]')
    yield scrape_mayor(councillors[0])
    for councillor in councillors[1:]:
      if not councillor.xpath('.//a'):
        continue

      name = councillor.xpath('.//a')[0].text_content().strip()
      district = councillor.xpath('.//a')[1].text_content()
      url = councillor.xpath('.//a/@href')[0]
      page = lxmlize(url)

      p = Legislator(name=name, post_id=district, role='Conseiller')
      p.add_source(COUNCIL_PAGE)
      p.add_source(url)

      p.image = councillor.xpath('./preceding-sibling::td//img/@src')[-1]

      contacts = page.xpath('.//td[@class="ms-rteTableOddCol-0"]//text()')
      for contact in contacts:
        if re.findall(r'[0-9]', contact):
          phone = contact.strip().replace(' ', '-')
          p.add_contact('voice', phone, 'legislature')
      get_links(p, page.xpath('.//td[@class="ms-rteTableOddCol-0"]')[0])

      email = page.xpath(
        'string(//a[contains(@href, "mailto:")]/@href)')[len('mailto:'):]
      p.add_contact('email', email, None)
      yield p

Esempio n. 9

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)

    councillors = page.xpath('//div[@align="center" and not(@class="background")]//td/p')
    for councillor in councillors:
      if not councillor.text_content().strip():
        continue
      name = councillor.xpath('./font/b/text()')
      if not name:
        name = councillor.xpath('./font/text()')
      if 'e-mail' in name[0]:
        name = councillor.xpath('./b/font/text()')
      name = name[0]
      role = 'Councillor'
      if 'Mayor' in name:
        name = name.replace('Mayor', '')
        role = 'Mayor'

      p = Legislator(name=name, post_id="LaSalle", role=role)
      p.add_source(COUNCIL_PAGE)
      
      photo_url = councillor.xpath('./parent::td//img/@src')[0]
      p.image = photo_url

      email = councillor.xpath('.//a[contains(@href, "mailto:")]/text()')[0]
      p.add_contact('email', email, None)

      phone = re.findall(r'(?<=phone:)(.*)(?=home)', councillor.text_content(), flags=re.DOTALL)
      if phone:
        p.add_contact('voice', phone[0].strip(), 'legislature')

      home_phone = re.findall(r'(?<=home phone:)(.*)', councillor.text_content(), flags=re.DOTALL)[0]
      p.add_contact('voice', home_phone.strip(), 'residence')
      yield p

Esempio n. 10

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

    def get_people(self):
        page = lxmlize(
            COUNCIL_PAGE,
            user_agent=
            'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)')

        councillors = page.xpath('//table[last()]//tr/td[1]//strong')
        for i, councillor in enumerate(councillors):
            name = councillor.text_content().strip()
            if not name:
                continue
            if 'maire' in name:
                name = name.split('maire')[1].strip()
                district = u'Montréal-Est'
            else:
                district = councillor.xpath(
                    './ancestor::td/following-sibling::td//strong'
                )[-1].text_content()
                district = 'District %s' % re.sub('\D+', '', district)
            email = councillor.xpath(
                './ancestor::tr/following-sibling::tr//a[contains(@href, "mailto:")]'
            )[0].text_content().strip()
            role = 'Maire' if i == 0 else 'Conseiller'
            p = Legislator(name=name, post_id=district, role=role)
            p.add_source(COUNCIL_PAGE)
            p.add_contact('email', email, None)
            yield p

Esempio n. 11

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE, 'iso-8859-1')

    general_contacts = page.xpath('//p[@class="large_title"]/following-sibling::p/text()')
    general_phone = general_contacts[0]
    general_fax = general_contacts[1]

    councillors = page.xpath('//tr/td/p/strong')
    councillors = [councillor for councillor in councillors if not "@" in councillor.text_content()]
    for councillor in councillors:

      if 'Mayor' in councillor.text_content():
        name = councillor.text_content().replace('Mayor', '')
        district = 'Dollard-Des Ormeaux'
        role = 'Maire'
      else:
        name = re.split(r'[0-9]', councillor.text_content())[1]
        district = 'District ' + re.findall(r'[0-9]', councillor.text_content())[0]
        role = 'Conseiller'

      p = Legislator(name=name, post_id=district, role=role)
      p.add_source(COUNCIL_PAGE)
      p.image = councillor.xpath('./parent::p/parent::td/parent::tr/preceding-sibling::tr//img/@src')[0]

      email = councillor.xpath('./parent::p/following-sibling::p//a[contains(@href, "mailto:")]')
      if email:
        p.add_contact('email', email[0].text_content(), None)

      p.add_contact('voice', general_phone, 'legislature')
      p.add_contact('fax', general_fax, 'legislature')

      yield p

Esempio n. 12

0

Mostra file

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)

    mayor_url = page.xpath('//a[contains(text(), "Mayor")]/@href')[0]
    yield self.scrape_mayor(mayor_url)

    councillors_url = page.xpath('//a[contains(text(), "Councillors")]/@href')[0]
    cpage = lxmlize(councillors_url)

    councillor_rows = cpage.xpath('//tr[td//img]')[:-1]
    for councillor_row in councillor_rows:
      img_cell, info_cell = tuple(councillor_row)
      name = info_cell.xpath(
         'string(.//span[contains(text(), "Councillor")])')[len('Councillor '):]
      district = info_cell.xpath('string(.//p[contains(text(), "District")])')
      email = info_cell.xpath('string(.//a[contains(@href, "mailto:")])')
      if not email:
        email = info_cell.xpath('string(.//strong[contains(text(), "E-mail")]/following-sibling::text())')
      phone = info_cell.xpath(
          'string(.//p[contains(.//text(), "Telephone:")])').split(':')[1]
      img_url_rel = img_cell.xpath('string(//img/@href)')
      img_url = urljoin(councillors_url, img_url_rel)

      p = Legislator(name=name, post_id=district, role='Conseiller')
      p.add_source(COUNCIL_PAGE)
      p.add_source(councillors_url)
      p.add_contact('email', email, None)
      p.add_contact('voice', phone, 'legislature')
      p.image = img_url
      yield p

Esempio n. 13

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

    def get_people(self):
        member_parties = dict(process_parties(lxmlize(PARTY_PAGE)))

        page = lxmlize(COUNCIL_PAGE)
        for row in page.xpath('//table[not(@id="footer")]/tr')[1:]:
            name, district, _, email = [
                cell.xpath('string(.)').replace(u'\xa0', u' ') for cell in row
            ]
            phone = row[2].xpath('string(text()[1])')
            try:
                photo_page_url = row[0].xpath('./a/@href')[0]
            except IndexError:
                continue  # there is a vacant district
            photo_page = lxmlize(photo_page_url)
            photo_url = photo_page.xpath('string(//table//img/@src)')
            district = district.replace(' - ', u'—')  # m-dash
            party = get_party(member_parties[name.strip()])
            p = Legislator(name=name,
                           post_id=district,
                           role='MHA',
                           party=party,
                           image=photo_url)
            p.add_source(COUNCIL_PAGE)
            p.add_source(photo_page_url)
            p.add_contact('email', email, None)
            # TODO: either fix phone regex or tweak phone value
            p.add_contact('voice', phone, 'legislature')
            yield p

Esempio n. 14

0

Mostra file

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)

    member_cells = page.xpath(
        '//div[@class="views-field views-field-field-picture"]/'
        'parent::td')
    for cell in member_cells:
      name = cell[1].text_content().replace(' .', '. ') # typo on page
      riding = cell[2].text_content()
      if 'Mackenzie Delta' in riding:
        riding = 'Mackenzie-Delta'
      detail_url = cell[0].xpath('string(.//a/@href)')
      detail_page = lxmlize(detail_url)
      photo_url = detail_page.xpath(
          'string(//div[@class="field-item even"]/img/@src)')
      email = detail_page.xpath('string(//a[contains(@href, "mailto:")])')

      contact_text = detail_page.xpath(
          'string(//div[@property="content:encoded"]/p[1])')
      phone = re.search(r'P(hone)?: ([-0-9]+)', contact_text).group(2)

      p = Legislator(name=name, post_id=riding, role='MLA', image=photo_url)
      p.add_source(COUNCIL_PAGE)
      p.add_source(detail_url)
      p.add_contact('email', email, None)
      p.add_contact('voice', phone, 'legislature')
      yield p

Esempio n. 15

0

Mostra file

    def get_people(self):
        page = lxmlize(COUNCIL_PAGE)
        table = page.cssselect('table')[0]
        rows = table.cssselect('tr')[1:]
        assert len(rows) == 27  # There should be 27 districts

        for row in rows:
            districtnumcell, districtcell, membercell, dummy2 = row.cssselect(
                'td')

            district_name = districtcell.cssselect(
                'a')[0].text_content().strip()
            district = district_name.replace(' - ', '-')
            name = (membercell.cssselect('a')[0].text_content().replace(
                'Hon. ', '').replace(' (LIB)', '').replace(' (PC)',
                                                           '').strip())
            url = membercell.cssselect('a')[0].get('href')
            email, phone, photo_url = scrape_extended_info(url)
            p = Legislator(name=name,
                           post_id=district,
                           role='MLA',
                           image=photo_url)
            p.add_source(COUNCIL_PAGE)
            p.add_source(url)
            p.add_contact('email', email, None)
            p.add_contact('voice', phone, 'legislature')
            yield p

Esempio n. 16

0

Mostra file

    def scrape_mayor(self, url):
        infos_page = lxmlize(url)
        infos = infos_page.xpath('//div[@class="item-page"]')[0]

        name = ' '.join(infos.xpath('p[2]/text()')[0].split(' ')[2:4])
        lname = name.lower()
        email = lname.split(' ')[0][0] + lname.split(
            ' ')[1] + '@langleycity.ca'
        photo_url = infos.xpath('p[1]/img/@src')[0]

        p = Legislator(name=name,
                       post_id='Langley',
                       role='Mayor',
                       image=photo_url)
        p.add_source(url)
        p.add_contact('email', email, None)

        personal_infos = infos.xpath('p[last()]/text()')

        phone = re.findall(r'Phone(:?) (.*)', '\n'.join(personal_infos))[0][1]
        address = re.findall(r'Address: (.*) Phone',
                             ' '.join(personal_infos))[0]
        p.add_contact('address', address, 'office')
        p.add_contact('voice', phone, 'office')

        return p

Esempio n. 17

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

    def get_people(self):
        page = lxmlize(COUNCIL_PAGE)

        councillors = page.xpath('//div[@class="img_four"][1]/div[1]')
        councillors = councillors + page.xpath(
            '//div[@class="img_four"][2]/div')
        for councillor_elem in councillors:
            name, position = councillor_elem.xpath('string(./p/strong)').split(
                ',')
            position = position.strip()
            if ' ' in position:
                position, post_id = position.split(' ', 1)
                post_id = post_number(post_id)
            else:
                post_id = 'Wellesley'
            addr = '\n'.join(
                addr_str.strip()
                for addr_str in councillor_elem.xpath('./p/text()')).strip()
            phone = councillor_elem.xpath(
                'string(.//a[starts-with(@href, "tel:")])')
            email = councillor_elem.xpath(
                'string(.//a[starts-with(@href, "mailto:")])')
            image = councillor_elem.xpath('string(.//img[1]/@src)')
            p = Legislator(name=name,
                           post_id=post_id,
                           role=position,
                           image=image)
            p.add_source(COUNCIL_PAGE)
            p.add_contact('address', addr, 'legislature')
            p.add_contact('voice', phone, 'legislature')
            p.add_contact('email', email, None)
            yield p

Esempio n. 18

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

def mayor_data(url):
  page = lxmlize(url)

  # TODO: Consider getting photo. It's on a separate page.
  name_text = page.xpath('//p[contains(text(), "Worship Mayor")]/text()')[0]
  name = ' '.join(name_text.split()[3:]) # TODO: probably too brittle
  email = page.xpath('//a[contains(@href, "mailto")]/text()')[0]

  p = Legislator(name=name, post_id='Mississauga', role='Mayor')
  p.add_source(url)
  p.add_contact('email', email, None)

  return p

Esempio n. 19

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

def mayor_data(url):
    page = lxmlize(url)

    # TODO: Consider getting photo. It's on a separate page.
    name_text = page.xpath('//p[contains(text(), "Worship Mayor")]/text()')[0]
    name = ' '.join(name_text.split()[3:])  # TODO: probably too brittle
    email = page.xpath('//a[contains(@href, "mailto")]/text()')[0]

    p = Legislator(name=name, post_id='Mississauga', role='Mayor')
    p.add_source(url)
    p.add_contact('email', email, None)

    return p

Esempio n. 20

0

Mostra file

def scrape_mayor(url):
  page = lxmlize(url)

  name = page.xpath('//div[@id="printAreaContent"]/h1/strong/text()')[0].replace('Mayor', '').strip()
  address = page.xpath('//strong[contains(text(), "mail")]/parent::p/text()')[1].replace(':', '').strip()
  phone = page.xpath('//strong[contains(text(), "phone")]/parent::p/text()')[1].split()[1]

  p = Legislator(name=name, post_id='Caledon', role='Mayor')
  p.add_source(COUNCIL_PAGE)
  p.add_source(url)
  p.image = page.xpath('//h2[contains(text(), "About me")]/img/@src')[0]
  p.add_contact('address', address, 'legislature')
  p.add_contact('voice', phone, 'legislature')
  return p

Esempio n. 21

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

    def get_people(self):
        page = lxmlize(COUNCIL_PAGE)

        councillor_trs = [
            tr for tr in page.xpath('//table//tr[1]') if len(tr) == 2
        ][:-1]
        for councillor_tr in councillor_trs:
            desc = [
                text.strip()
                for text in councillor_tr.xpath('.//text()[normalize-space()]')
                if text.strip()
            ]

            if len(desc) == 3:
                role = 'Maire'
                district = u'Saint-Jérôme'
            else:
                role = 'Conseiller'
                district = desc[0].replace(u'numéro ', '')

            name = desc[-3]
            phone = desc[-2]
            email = desc[-1]

            image = councillor_tr.xpath('string(.//img/@src)')[0]

            p = Legislator(name=name, post_id=district, role=role)
            p.add_source(COUNCIL_PAGE)
            p.image = image
            p.add_contact('voice', phone, 'legislature')
            p.add_contact('email', email, None)
            yield p

Esempio n. 22

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

    def get_people(self):
        page = lxmlize(COUNCIL_PAGE)
        councillor_links = page.xpath('//li[@id="pageid2117"]/ul/li/a')[2:10]
        for link in councillor_links:
            if not link.text.startswith('Councillor'):
                continue
            url = link.attrib['href']
            page = lxmlize(url)
            mail_link = page.xpath('//a[@title]')[0]
            name = mail_link.attrib['title']
            email = mail_link.attrib['href'][len('mailto:'):]
            photo_url = page.xpath(
                'string(//div[@class="pageContent"]//img[@align="right"]/@src)'
            )
            p = Legislator(name=name,
                           post_id='Abbotsford',
                           role='Councillor',
                           image=photo_url)
            p.add_source(url)
            p.add_contact('email', email, None)
            yield p

        page = lxmlize(MAYOR_URL)
        name = page.xpath('string(//h1)').split(' ', 1)[1]
        photo_url = page.xpath('string(//img[@hspace=10]/@src)')
        # email is hidden behind a form
        p = Legislator(name=name,
                       post_id='Abbotsford',
                       role='Mayor',
                       image=photo_url)
        p.add_source(MAYOR_URL)
        yield p

Esempio n. 23

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)
    councillor_links = page.xpath('//li[@id="pageid2117"]/ul/li/a')[2:10]
    for link in councillor_links:
      if not link.text.startswith('Councillor'):
        continue
      url = link.attrib['href']
      page = lxmlize(url)
      mail_link = page.xpath('//a[@title]')[0]
      name = mail_link.attrib['title']
      email = mail_link.attrib['href'][len('mailto:'):]
      photo_url = page.xpath('string(//div[@class="pageContent"]//img[@align="right"]/@src)')
      p = Legislator(name=name, post_id='Abbotsford', role='Councillor',
                     image=photo_url)
      p.add_source(url)
      p.add_contact('email', email, None)
      yield p

    page = lxmlize(MAYOR_URL)
    name = page.xpath('string(//h1)').split(' ', 1)[1]
    photo_url = page.xpath('string(//img[@hspace=10]/@src)')
    # email is hidden behind a form
    p = Legislator(name=name, post_id='Abbotsford', role='Mayor', image=photo_url)
    p.add_source(MAYOR_URL)
    yield p

Esempio n. 24

0

Mostra file

File: people.py Progetto: rhymeswithcycle/scrapers-ca

    def get_people(self):
        # mayor first, can't find email
        page = lxmlize(MAYOR_URL)
        photo_url = page.xpath('string(//img/@src[contains(., "Maire")])')
        name = page.xpath('string(//td[@class="contenu"]/text()[last()])')
        p = Legislator(name=name, post_id=u"Trois-Rivières", role="Maire", image=photo_url)
        p.add_source(MAYOR_URL)
        yield p

        resp = requests.get(COUNCIL_PAGE)
        # page rendering through JS on the client
        page_re = re.compile(r'createItemNiv3.+"District (.+?)".+(index.+)\\"')
        for district, url_rel in page_re.findall(resp.text):
            if district not in ("des Estacades", "des Plateaux", "des Terrasses", "du Sanctuaire"):
                district = re.sub("\A(?:de(?: la)?|des|du) ", "", district)

            url = urljoin(COUNCIL_PAGE, url_rel)
            page = lxmlize(url)
            name = page.xpath("string(//h2)")
            email = page.xpath('string(//a/@href[contains(., "mailto:")])')[len("mailto:") :]
            photo_url = page.xpath('string(//img/@src[contains(., "Conseiller")])')
            p = Legislator(name=name, post_id=district, role="Conseiller", image=photo_url)
            p.add_source(url)
            p.add_contact("email", email, None)
            yield p

Esempio n. 25

0

Mostra file

    def get_people(self):
        # mayor first, can't find email
        page = lxmlize(MAYOR_URL)
        photo_url = page.xpath('string(//img/@src[contains(., "Maire")])')
        name = page.xpath('string(//td[@class="contenu"]/text()[last()])')
        p = Legislator(name=name,
                       post_id=u"Trois-Rivières",
                       role="Maire",
                       image=photo_url)
        p.add_source(MAYOR_URL)
        yield p

        resp = requests.get(COUNCIL_PAGE)
        # page rendering through JS on the client
        page_re = re.compile(r'createItemNiv3.+"District (.+?)".+(index.+)\\"')
        for district, url_rel in page_re.findall(resp.text):
            if district not in ('des Estacades', 'des Plateaux',
                                'des Terrasses', 'du Sanctuaire'):
                district = re.sub('\A(?:de(?: la)?|des|du) ', '', district)

            url = urljoin(COUNCIL_PAGE, url_rel)
            page = lxmlize(url)
            name = page.xpath('string(//h2)')
            email = page.xpath(
                'string(//a/@href[contains(., "mailto:")])')[len('mailto:'):]
            photo_url = page.xpath(
                'string(//img/@src[contains(., "Conseiller")])')
            p = Legislator(name=name,
                           post_id=district,
                           role='Conseiller',
                           image=photo_url)
            p.add_source(url)
            p.add_contact('email', email, None)
            yield p

Esempio n. 26

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)

    councillor_trs = [tr for tr in page.xpath('//table//tr[1]') if len(tr) == 2][:-1]
    for councillor_tr in councillor_trs:
      desc = [text.strip() for text in councillor_tr.xpath('.//text()[normalize-space()]') if text.strip()]

      if len(desc) == 3:
        role = 'Maire'
        district = u'Saint-Jérôme'
      else:
        role = 'Conseiller'
        district = desc[0].replace(u'numéro ', '')

      name = desc[-3]
      phone = desc[-2]
      email = desc[-1]

      image = councillor_tr.xpath('string(.//img/@src)')[0]
      
      p = Legislator(name=name, post_id=district, role=role)
      p.add_source(COUNCIL_PAGE)
      p.image = image
      p.add_contact('voice', phone, 'legislature')
      p.add_contact('email', email, None)
      yield p

Esempio n. 27

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):

    page = lxmlize(COUNCIL_PAGE)

    councillors = page.xpath('//p[@class="WSIndent"]/a')
    for councillor in councillors:
      district = re.findall(r'(Ward [0-9]{1,2})', councillor.text_content())
      if district:
        district = district[0]
        name = councillor.text_content().replace(district, '').strip()
        role = 'Councillor'
      else:
        district = 'Kawartha Lakes'
        name = councillor.text_content().replace('Mayor', '').strip()
        role = 'Mayor'

      url = councillor.attrib['href']
      page = lxmlize(url)
      email = page.xpath('//a[contains(@href, "mailto:")]/@href')[0].rsplit(':', 1)[1].strip()
      image = page.xpath('//img[@class="image-right"]/@src')[0]

      p = Legislator(name=name, post_id=district, role=role)
      p.add_source(COUNCIL_PAGE)
      p.add_source(url)
      p.add_contact('email', email, None)
      p.image = image
      yield p

Esempio n. 28

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE, user_agent='Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)')

    yield self.scrape_mayor(page)

    councillors = page.xpath('//strong[contains(text(), "Councillor")]/parent::p|//b[contains(text(), "Councillor")]/parent::p')
    for councillor in councillors:

      name = councillor.xpath('./strong/text()|./b/text()')[0].replace('Councillor', '').strip()
      district = re.findall('(?<=Ward \d, ).*', councillor.text_content())[0].strip()

      p = Legislator(name=name, post_id=district, role='Councillor')
      p.add_source(COUNCIL_PAGE)

      p.image = councillor.xpath('.//img/@src')[0]

      phone = re.findall(r'Phone(.*)', councillor.text_content())
      node = councillor
      while not phone:
        node = node.xpath('./following-sibling::p')[1]
        phone = re.findall(r'Phone(.*)', node.text_content())
      phone = phone[0].strip()

      email = councillor.xpath('.//a[contains(@href, "mailto:")]')
      if not email:
        email = councillor.xpath('./following-sibling::p//a[contains(@href, "mailto")]')
      email = email[0].text_content()

      if len(re.sub(r'\D', '', phone)) == 7:
        phone = '902-%s' % phone
      p.add_contact('voice', phone, 'legislature')
      p.add_contact('email', email, None)

      yield p

Esempio n. 29

0

Mostra file

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE, 'iso-8859-1')

    councillors = page.xpath('//div[@id="PageContent"]/table/tbody/tr/td')
    for councillor in councillors:
      if not councillor.text_content().strip():
        continue
      if councillor == councillors[0]:
        district = 'Kirkland'
        role = 'Maire'
      else:
        district = councillor.xpath('.//h2')[0].text_content()
        district = re.search('- (.+)', district).group(1).strip()
        district = district.replace(' Ouest', ' ouest').replace(' Est', ' est')
        role = 'Conseiller'

      name = councillor.xpath('.//strong/text()')[0]

      phone = councillor.xpath('.//div[contains(text(), "#")]/text()')[0].replace('T ', '').replace(' ', '-').replace(',-#-', ' x')
      email = councillor.xpath('.//a[contains(@href, "mailto:")]')[0].text_content()

      p = Legislator(name=name, post_id=district, role=role)
      p.add_source(COUNCIL_PAGE)
      p.add_contact('voice', phone, 'legislature')
      p.add_contact('email', email, None)
      p.image = councillor.xpath('.//img/@src')[0]
      yield p

Esempio n. 30

0

Mostra file

    def get_people(self):

        page = lxmlize(COUNCIL_PAGE)

        councillors = page.xpath('//p[@class="WSIndent"]/a')
        for councillor in councillors:
            district = re.findall(r'(Ward [0-9]{1,2})',
                                  councillor.text_content())
            if district:
                district = district[0]
                name = councillor.text_content().replace(district, '').strip()
                role = 'Councillor'
            else:
                district = 'Kawartha Lakes'
                name = councillor.text_content().replace('Mayor', '').strip()
                role = 'Mayor'

            url = councillor.attrib['href']
            page = lxmlize(url)
            email = page.xpath(
                '//a[contains(@href, "mailto:")]/@href')[0].rsplit(
                    ':', 1)[1].strip()
            image = page.xpath('//img[@class="image-right"]/@src')[0]

            p = Legislator(name=name, post_id=district, role=role)
            p.add_source(COUNCIL_PAGE)
            p.add_source(url)
            p.add_contact('email', email, None)
            p.image = image
            yield p

Esempio n. 31

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

def mayor_data(url, name):
  page = lxmlize(url)
  photo_url = urljoin(url, 
      page.xpath('string((//div[@id="contentcontainer"]//img)[1]/@src)'))
  contact_page = lxmlize(MAYOR_CONTACT_URL)
  email = contact_page.xpath('string(//a[contains(., "@")][1])')

  m = Legislator(name=name, post_id='Regina', role='Mayor')
  m.add_source(COUNCIL_PAGE)
  m.add_source(url)
  m.add_source(MAYOR_CONTACT_URL)
  m.add_contact('email', email, None)
  m.image = photo_url

  return m

Esempio n. 32

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

def councillor_data(url, name, ward):
  page = lxmlize(url)
  # email is, sadly, a form
  photo_url = urljoin(url, page.xpath('string(//img[@class="bio_pic"]/@src)'))
  phone = page.xpath('string(//td[contains(., "Phone")]/following-sibling::td)')
  email = (page.xpath('string(//tr[contains(., "Email")]//a/@href)').
      split('=')[1] + '@winnipeg.ca')

  p = Legislator(name=name, post_id=ward, role='Councillor')
  p.add_source(COUNCIL_PAGE)
  p.add_source(url)
  p.add_contact('email', email, None)
  p.add_contact('voice', phone, 'legislature')
  p.image = photo_url

  return p

Esempio n. 33

0

Mostra file

def mayor_data(url):
  page = lxmlize(url)
  name = page.xpath('string(//h1)').split('-')[1]
  content_node = page.xpath('//div[@class="usercontent"]')[0]
  photo_url = urljoin(url, content_node.xpath('string(.//img[1]/@src)'))
  email = content_node.xpath('string(.//a/text()[contains(., "@")])')
  phone = content_node.xpath('string(.//strong[contains(., "Phone")]/'
      'following-sibling::text()[1])').strip()

  p = Legislator(name=name, post_id='Strathcona County', role='Mayor')
  p.add_source(COUNCIL_PAGE)
  p.add_source(url)
  p.add_contact('voice', phone, 'legislature')
  p.add_contact('email', email, None)
  p.image = photo_url
  return p

Esempio n. 34

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

def mayor_data(url, name):
    page = lxmlize(url)
    photo_url = urljoin(
        url,
        page.xpath('string((//div[@id="contentcontainer"]//img)[1]/@src)'))
    contact_page = lxmlize(MAYOR_CONTACT_URL)
    email = contact_page.xpath('string(//a[contains(., "@")][1])')

    m = Legislator(name=name, post_id='Regina', role='Mayor')
    m.add_source(COUNCIL_PAGE)
    m.add_source(url)
    m.add_source(MAYOR_CONTACT_URL)
    m.add_contact('email', email, None)
    m.image = photo_url

    return m

Esempio n. 35

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)
    for link in page.xpath('//div[@class="section"]//a'):
      url = link.attrib['href']
      if url.endswith('address.html'):
        continue
      page = lxmlize(url)
      role, name = page.xpath('string(//div[@id="content"]/h1)').split(' ', 1)
      name = ' '.join(name.split()[:-1])
      photo_url = page.xpath('string(//img[@class="float-right"]/@src)')
      email = page.xpath('string(//a[starts-with(@href, "mailto:")])')

      p = Legislator(name=name, post_id='Saanich', role=role, image=photo_url)
      p.add_source(COUNCIL_PAGE)
      p.add_contact('email', email, None)
      yield p

Esempio n. 36

0

Mostra file

def councillor_data(url, name, role):
    page = lxmlize(url)
    email = page.xpath('string(//a[contains(@href, "mailto")])')
    phone_str = page.xpath('string(//div[@id="content"]//strong[1]/'
                           'following-sibling::text()[contains(., "Phone")])')
    phone = phone_str.split(':')[1]
    photo_url = urljoin(
        url, page.xpath('string(//div[@id="content"]//img[1]/@src)'))

    # TODO: should post_id be "Nieghborhood Liaison"?
    m = Legislator(name=name, post_id='Victoria', role=role)
    m.add_source(COUNCIL_PAGE)
    m.add_source(url)
    m.add_contact('email', email, None)
    m.add_contact('voice', phone, 'legislature')
    m.image = photo_url
    return m

Esempio n. 37

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

    def get_people(self):
        page = lxmlize(COUNCIL_PAGE, 'iso-8859-1')

        mayor_url = page.xpath('//li[@id="pageid193"]//a/@href')[0]
        yield scrape_mayor(mayor_url)

        councillors = page.xpath('//td[@class="cityfonts"]')
        for councillor in councillors:
            try:
                name = councillor.xpath('.//a')[0].text_content()
            except IndexError:
                continue
            districts = [
                x.strip() for x in councillor.xpath('.//span/text()')
                if re.sub(u'\xa0', ' ', x).strip()
            ]
            district = districts[1]
            if district == 'At Large':
                district = 'Moncton'
            elif district == 'Deputy Mayor':
                district = districts[2]

            url = councillor.xpath('.//a')[-1].attrib['href']
            page = lxmlize(url)

            p = Legislator(name=name, post_id=district, role='Councillor')
            p.add_source(COUNCIL_PAGE)
            p.add_source(url)
            p.image = councillor.xpath('.//img/@src')[0]

            email = page.xpath('string(.//a[contains(@href, "mailto:")]/@href)'
                               )[len('mailto:'):]
            p.add_contact('email', email, None)

            contact_info = page.xpath(
                './/table[@class="whiteroundedbox"]//td/p[contains(text()," ")]'
            )[0].text_content()
            phone_nos = re.findall(r'(([0-9]{3}-)?([0-9]{3}-[0-9]{4}))',
                                   contact_info)
            for phone_no in phone_nos:
                if len(re.sub(r'\D', '', phone_no[0])) == 7:
                    phone = '506-%s' % phone_no[0]
                else:
                    phone = phone_no[0]
                p.add_contact('voice', phone, 'legislature')
            yield p

Esempio n. 38

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

 def get_people(self):
   page = lxmlize(COUNCIL_PAGE)
   for row in page.xpath('//div[@id="content"]/table/tbody/tr'):
     full_name, party_abbr, post = row.xpath('./td//text()')[:3]
     name = ' '.join(reversed(full_name.split(',')))
     detail_url = row[0][0].attrib['href']
     image, phone, email = get_details(detail_url)
     p = Legislator(name=name, post_id=post, role='MLA', 
         party=get_party(party_abbr), image=image)
     p.add_source(COUNCIL_PAGE)
     p.add_source(detail_url)
     p.add_contact('voice', phone, 'legislature')
     p.add_contact('email', email, None)
     yield p

Esempio n. 39

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

    def get_people(self):
        page = lxmlize(COUNCIL_PAGE)
        for councillor_row in page.xpath('//tr'):
            post = councillor_row.xpath('string(./td[2]/p/text())')
            if post == 'Maire de Laval':
                district = 'Laval'
                role = 'Maire'
            else:
                district = re.sub('^C.?irconscription (?:no )?\d+\D- ', '',
                                  post).replace("L'",
                                                '').replace(' ', '').replace(
                                                    'bois', 'Bois')
                role = 'Conseiller'
            full_name = councillor_row.xpath(
                'string(./td[2]/p/text()[2])').strip()
            name = ' '.join(full_name.split()[1:])

            phone = councillor_row.xpath(
                'string(.//span[@class="icon-phone"]/following::text())')
            email = councillor_row.xpath(
                'string(.//a[contains(@href, "mailto:")]/@href)')[len('mailto:'
                                                                      ):]
            photo_url = councillor_row[0][0].attrib['src']
            p = Legislator(name=name,
                           post_id=district,
                           role=role,
                           image=photo_url)
            p.add_source(COUNCIL_PAGE)
            p.add_contact('voice', phone, 'legislature')
            p.add_contact('email', email, None)
            yield p

Esempio n. 40

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    member_page = lxmlize(COUNCIL_PAGE)
    table = member_page.xpath('//table')[0]
    rows = table.cssselect('tr')[1:]
    for row in rows:
      (namecell, constitcell, partycell) = row.cssselect('td')
      full_name = namecell.text_content().strip()
      if full_name.lower() == 'vacant':
          continue
      (last, first) = full_name.split(',')
      name = first.replace('Hon.', '').strip() + ' ' + last.title().strip()
      district = ' '.join(constitcell.text_content().split())
      party = get_party(partycell.text)
      data = {
                  'elected_office': 'MLA',
                  'source_url': COUNCIL_PAGE
              }

      url = namecell.cssselect('a')[0].get('href')
      photo, email = get_details(url)

      p = Legislator(name=name, post_id=district, role='MLA', 
          party=party, image=photo)
      p.add_source(COUNCIL_PAGE)
      p.add_source(url)
      p.add_contact('email', email, None)
      yield p

Esempio n. 41

0

Mostra file

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)

    mayor_info = page.xpath('//h2[contains(text(), "MAYOR")]//following-sibling::p')[0]
    yield self.scrape_mayor(mayor_info)

    wards = page.xpath('//h3')
    for ward in wards:
      district = re.sub('\AWARD \d+ - ', '', ward.text_content())
      councillors = ward.xpath('following-sibling::p')
      for councillor in councillors:
        name = councillor.xpath('./strong')[0].text_content()

        p = Legislator(name=name, post_id=district, role='Councillor')
        p.add_source(COUNCIL_PAGE)

        info = councillor.xpath('./text()')
        address = info.pop(0)
        p.add_contact('address', address, 'legislature')

        # get phone numbers
        for line in info:
          stuff = re.split(ur'(\xbb)|(\xa0)', line)
          tmp = [y for y in stuff if y and not re.match(ur'\xa0', y)]
          self.get_tel_numbers(tmp, p)

        email = councillor.xpath('string(./a)')
        p.add_contact('email', email, None)

        yield p
        if councillor == councillors[1]:
          break

Esempio n. 42

0

Mostra file

    def get_people(self):
        member_page = lxmlize(COUNCIL_PAGE)
        table = member_page.xpath('//table')[0]
        rows = table.cssselect('tr')[1:]
        for row in rows:
            (namecell, constitcell, partycell) = row.cssselect('td')
            full_name = namecell.text_content().strip()
            if full_name.lower() == 'vacant':
                continue
            (last, first) = full_name.split(',')
            name = first.replace('Hon.',
                                 '').strip() + ' ' + last.title().strip()
            district = ' '.join(constitcell.text_content().split())
            party = get_party(partycell.text)
            data = {'elected_office': 'MLA', 'source_url': COUNCIL_PAGE}

            url = namecell.cssselect('a')[0].get('href')
            photo, email = get_details(url)

            p = Legislator(name=name,
                           post_id=district,
                           role='MLA',
                           party=party,
                           image=photo)
            p.add_source(COUNCIL_PAGE)
            p.add_source(url)
            p.add_contact('email', email, None)
            yield p

Esempio n. 43

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)

    councillor_elems = page.xpath('//a[contains(@class, "slide item-")]')
    email_links = page.xpath('//a[contains(@href, "mailto:")]')
    for elem in councillor_elems:
      name_elem = elem.xpath('.//strong')[0]
      name = re.search('(Mr\. )?(.+)', name_elem.text).group(2)
      position = name_elem.xpath('string(following-sibling::text())')
      role = 'Conseiller'
      if 'Mayor' in position:
        district = 'Brossard'
        role = 'Maire'
      else:
          district = re.sub(r'(?<=[0-9]).+', '', position).strip()

      photo = re.search(r'url\((.+)\)', elem.attrib['style']).group(1)

      p = Legislator(name=name, post_id=district, role=role, image=photo)
      p.add_source(COUNCIL_PAGE)

      try:
        email_elem = [link for link in email_links 
                      if name in link.text_content().replace(u'\u2019', "'")][0]
        email = re.match('mailto:([email protected])', email_elem.attrib['href']).group(1)
        p.add_contact('email', email, None)
        phone = email_elem.xpath(
            './following-sibling::text()[contains(., "450")]')[0]
        p.add_contact('voice', phone, 'legislature')
      except IndexError: # oh Francyne/Francine Raymond, who are you, really?
        pass

      yield p

Esempio n. 44

0

Mostra file

def mayor_data(page):
  # Strip the word "mayor" from the beginning of the photo lavel
  photo_node = page.xpath('//img[@class="mayorsPic"]')[0]
  name = photo_node.xpath('string(./@alt)').replace('Mayor ', '')
  photo_url = photo_node.xpath('string(./@src)')

  address_node = page.xpath('//div[@class="address"]')[0]
  email = address_node.xpath('string(.//a)')
  address = ''.join(address_node.xpath('./p/text()')[:3])
  phone = address_node.xpath('string(./p/text()[4])')

  p = Legislator(name=name, post_id='Brampton', role='Mayor')
  p.add_source(MAYOR_PAGE)
  p.add_contact('voice', phone, 'legislature')
  p.add_contact('address', address, 'legislature')
  p.add_contact('email', email, None)
  p.image = photo_url
  return p

Esempio n. 45

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

def councillor_data(url, name, role):
  page = lxmlize(url)
  email = page.xpath('string(//a[contains(@href, "mailto")])')
  phone_str = page.xpath('string(//div[@id="content"]//strong[1]/'
                         'following-sibling::text()[contains(., "Phone")])')
  phone = phone_str.split(':')[1]
  photo_url = urljoin(url, 
      page.xpath('string(//div[@id="content"]//img[1]/@src)'))


  # TODO: should post_id be "Nieghborhood Liaison"?
  m = Legislator(name=name, post_id='Victoria', role=role)
  m.add_source(COUNCIL_PAGE)
  m.add_source(url)
  m.add_contact('email', email, None)
  m.add_contact('voice', phone, 'legislature')
  m.image = photo_url
  return m

Esempio n. 46

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

def scrape_mayor(url):
  page = lxmlize(url)
  name = page.xpath('//div[@class="interiorContentWrapper"]/p/strong/text()')[0]
  address = ' '.join(page.xpath('//div[@class="interiorContentWrapper"]/p/text()')[1:3])
  address = re.sub(r'\s{2,}', ' ', address)
  contact_elem = page.xpath('//div[@class="interiorContentWrapper"]/p[3]')[0]
  phone = contact_elem.text.split(':')[1].strip()
  email = contact_elem.xpath('string(./a)')

  p = Legislator(name=name, post_id='Markham', role='Mayor')
  p.add_source(url)
  p.add_contact('address', address, 'legislature')
  p.add_contact('voice', phone, 'legislature')
  p.add_contact('email', email, None)
  yield p

Esempio n. 47

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

def scrape_person(url):
  page = lxmlize(url)

  role, name = page.xpath('string(//title)').split(' ', 1)
  photo_url = page.xpath('string(//div[@id="content"]//img[@style]/@src)')
  email = page.xpath('string(//a[contains(@href, "mailto:")])')
  phone = page.xpath('string(//li[contains(text(), "Phone:")])')

  p = Legislator(name=name, post_id='Burnaby', role=role, image=photo_url)
  p.add_source(COUNCIL_PAGE)
  p.add_source(url)
  p.add_contact('email', email, None)
  if phone:
    p.add_contact('voice', phone, 'legislature')
  return p

Esempio n. 48

0

Mostra file

File: people.py Progetto: fchagnon/scrapers-ca

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)

    for person_header_elem in page.xpath('//h2'):
      role, name_post = person_header_elem.text.split(' - ')
      try:
        name, caps_post = re.match(r'(.+) \((.+)\)', name_post).groups()
        post = caps_post.title()
      except AttributeError:
        name = name_post
        post = "Clarington"
      email = person_header_elem.xpath(
          'string(./following-sibling::a[1]/@href)')[len('mailto:'):]
      photo_url = person_header_elem.xpath(
          'string(./following-sibling::img[1]/@src)')
      p = Legislator(name=name, post_id=post, role=role, image=photo_url)
      p.add_source(COUNCIL_PAGE)
      p.add_contact('email', email, None)
      yield p

Esempio n. 49

0

Mostra file

def councillor_data(url):
  page = lxmlize(url)
  name, ward = re.match('Councillor (.+) - (.+)', 
      page.xpath('string(//h1)')).groups()
  content_node = page.xpath('//div[@class="usercontent"]')[0]
  photo_url_rel = content_node.xpath('string(.//img[1]/@src)')
  photo_url = urljoin(COUNCIL_PAGE, photo_url_rel)
  email = content_node.xpath('string(.//a/text()[contains(., "@")])')
  phone = content_node.xpath('string(.//strong[contains(., "Phone")]/'
      'following-sibling::text()[1])').strip()

  p = Legislator(name=name, post_id=ward, role='Councillor')
  p.add_source(COUNCIL_PAGE)
  p.add_source(url)
  if phone:
    p.add_contact('voice', phone, 'legislature')
  p.add_contact('email', email, None)
  p.image = photo_url
  return p