Python CanadianPerson.gender Examples

Programming Language: Python

Namespace/Package Name: utils

Class/Type: CanadianPerson

Method/Function: gender

Examples at hotexamples.com: 2

Python CanadianPerson.gender - 2 examples found. These are the top rated real world Python examples of utils.CanadianPerson.gender extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

add_contact(30)

add_source(30)

image(30)

add_link(22)

add_membership(8)

birth_date(5)

gender(2)

add_name(1)

validate(1)

Example #1

Show file

File: people.py Project: spatialbits/scrapers-ca

    def scrape(self):
        organizations = {}
        seat_numbers = defaultdict(lambda: defaultdict(int))

        reader = self.csv_reader(self.csv_url,
                                 delimiter=self.delimiter,
                                 header=True,
                                 encoding=self.encoding,
                                 skip_rows=self.skip_rows)
        reader.fieldnames = [
            self.header_converter(field) for field in reader.fieldnames
        ]
        for row in reader:

            try:
                if self.is_valid_row(row):
                    for key, corrections in self.corrections.items():
                        if not isinstance(corrections, dict):
                            row[key] = corrections(row[key])
                        elif row[key] in corrections:
                            row[key] = corrections[row[key]]

                    organization_classification = 'legislature'

                    organization_name = row['organization']
                    organization_key = organization_name.lower()
                    if organization_key in organizations:
                        organization = organizations[organization_key]
                    else:
                        organization = Organization(
                            organization_name,
                            classification=organization_classification)
                        organization.add_source(self.csv_url)
                        yield organization
                        organizations[organization_key] = organization

                    if not row['primary role']:
                        row['primary role'] = 'Councillor'

                    role = row['primary role']

                    post = Post(role=role,
                                label=organization_name,
                                organization_id=organization._id)
                    yield post

                    name = row['name'].strip(' .,')

                    district = row['district name']

                    if self.many_posts_per_area and role not in self.unique_roles:
                        seat_numbers[role][district] += 1
                        district = '{} (seat {})'.format(
                            district, seat_numbers[role][district])

                    p = Person(primary_org=organization_classification,
                               name=name,
                               district=district,
                               role=role,
                               party=row.get('party name'))
                    p.add_source(self.csv_url)

                    if row.get('gender'):
                        p.gender = row['gender']
                    if row.get('photo url'):
                        p.image = row['photo url']

                    if row.get('source url'):
                        p.add_source(row['source url'].strip(' .,'))

                    if row.get('website'):
                        p.add_link(row['website'], note='web site')
                    if row.get('facebook'):
                        p.add_link(re.sub(r'[#?].+', '', row['facebook']))
                    if row.get('twitter'):
                        p.add_link(row['twitter'])

                    if row['email']:
                        p.add_contact('email', row['email'].strip(' .,'))
                    if row['address']:
                        p.add_contact('address', row['address'], 'legislature')
                    if row.get('phone'):
                        p.add_contact('voice', row['phone'], 'legislature')
                    if row.get('fax'):
                        p.add_contact('fax', row['fax'], 'legislature')
                    if row.get('cell'):
                        p.add_contact('cell', row['cell'], 'legislature')
                    if row.get('birth date'):
                        p.birth_date = row['birth date']

                    if row.get('incumbent'):
                        p.extras['incumbent'] = row['incumbent']

                    if name in self.other_names:
                        for other_name in self.other_names[name]:
                            p.add_name(other_name)

                    # Validate person entity so that we can catch the exception if needed.
                    p.validate()

                    yield p
            except Exception as e:
                print(repr(e))
                continue

Example #2

Show file

File: people.py Project: jboakye/scrapers-ca

    def scrape_people(self, rows, gender):
        assert len(rows), 'No members found'
        for row in rows:
            name = row.xpath(
                './/div[@class="ce-mip-mp-name"][1]')[0].text_content()
            constituency = row.xpath(
                './/div[@class="ce-mip-mp-constituency"][1]')[0].text_content(
                )
            constituency = constituency.replace('–', '—')  # n-dash, m-dash
            if constituency == 'Mont-Royal':
                constituency = 'Mount Royal'

            province = row.xpath(
                './/div[@class="ce-mip-mp-province"][1]')[0].text_content()

            party = row.xpath(
                './/div[@class="ce-mip-mp-party"][1]')[0].text_content()

            url = row.xpath('.//a[@class="ce-mip-mp-tile"]/@href')[0]

            if province == 'Québec':
                url = url.replace('/en/', '/fr/')

            mp_page = self.lxmlize(url)
            email = self.get_email(mp_page,
                                   '//*[@id="contact"]/div/p/a',
                                   error=False)

            photo = mp_page.xpath(
                './/div[@class="ce-mip-mp-profile-container"]//img/@src')[0]

            m = Person(primary_org='lower',
                       name=name,
                       district=constituency,
                       role='MP',
                       party=party)
            m.add_source(COUNCIL_PAGE)
            m.add_source(url)
            m.gender = gender
            # @see https://www.ourcommons.ca/Members/en/ziad-aboultaif(89156)
            if email:
                m.add_contact('email', email)

            if photo:
                # Determine whether the photo is actually a generic silhouette
                photo_response = self.get(photo)
                if (photo_response.status_code == 200
                        and hashlib.sha1(photo_response.content).hexdigest()
                        not in IMAGE_PLACEHOLDER_SHA1):
                    m.image = photo

            # I don't think the new parliment website has personal website anymore
            personal_url = mp_page.xpath(
                './/a[contains(@title, "Personal Web Site")]/@href')
            if personal_url:
                m.add_link(personal_url[0])

            preferred_languages = mp_page.xpath(
                './/dt[contains(., "Preferred Language")]/following-sibling::dd/text()'
            )
            if preferred_languages:
                m.extras['preferred_languages'] = [
                    language.replace('/', '').strip()
                    for language in preferred_languages
                ]

            if province == 'Québec':
                m.add_contact('address',
                              'Chambre des communes\nOttawa ON  K1A 0A6',
                              'legislature')
            else:
                m.add_contact('address',
                              'House of Commons\nOttawa ON  K1A 0A6',
                              'legislature')

            # Hill Office contacts
            # Now phone and fax are in the same element
            # <p>
            #   Telephone: xxx-xxx-xxxx<br/>
            #   Fax: xxx-xxx-xxx
            # </p>
            phone_and_fax_el = mp_page.xpath(
                './/h4[contains(., "Hill Office")]/../p[contains(., "Telephone")]|.//h4[contains(., "Hill Office")]/../p[contains(., "Téléphone :")]'
            )
            if len(phone_and_fax_el):
                phone_and_fax = phone_and_fax_el[0].text_content().strip(
                ).splitlines()
                voice = phone_and_fax[0].replace('Telephone:', '').replace(
                    'Téléphone :', '').strip()
                fax = phone_and_fax[1].replace('Fax:', '').replace(
                    'Télécopieur :', '').strip()
                if voice:
                    m.add_contact('voice', voice, 'legislature')

                if fax:
                    m.add_contact('fax', fax, 'legislature')

            # Constituency Office contacts
            # Some people has more than one, e.g. https://www.ourcommons.ca/Members/en/ben-lobb(35600)#contact
            for i, constituency_office_el in enumerate(
                    mp_page.xpath(
                        './/div[@class="ce-mip-contact-constituency-office-container"]/div'
                    )):
                note = 'constituency'
                if i:
                    note += ' ({})'.format(i + 1)

                address = constituency_office_el.xpath('./p[1]')[0]
                address = address.text_content().strip().splitlines()
                address = list(map(str.strip, address))
                m.add_contact('address', '\n'.join(address), note)

                phone_and_fax_el = constituency_office_el.xpath(
                    './p[contains(., "Telephone")]|./p[contains(., "Téléphone")]'
                )
                if len(phone_and_fax_el):
                    phone_and_fax = phone_and_fax_el[0].text_content().strip(
                    ).splitlines()
                    # Note that https://www.ourcommons.ca/Members/en/michael-barrett(102275)#contact
                    # has a empty value - "Telephone:". So the search / replace cannot include space.
                    voice = phone_and_fax[0].replace('Telephone:', '').replace(
                        'Téléphone :', '').strip()
                    if len(phone_and_fax) > 1:
                        fax = phone_and_fax[1].replace('Fax:', '').replace(
                            'Télécopieur :', '').strip()

                    if voice:
                        m.add_contact('voice', voice, note)

                    if fax:
                        m.add_contact('fax', fax, note)

            yield m