Ejemplo n.º 1
0
    def get_people(self):
        # committee
        tech = Organization('Technology')
        tech.add_post('Chairman', 'chairman')
        tech.add_source('https://example.com')
        yield tech

        # subcommittee
        ecom = Organization('Subcommittee on E-Commerce', parent=tech)
        ecom.add_source('https://example.com')
        yield ecom

        p = Person('Paul Tagliamonte', district='6', chamber='upper')
        p.add_membership(tech, role='chairman')
        p.add_source('https://example.com')
        yield p
Ejemplo n.º 2
0
def test_add_contact_information():
    """ Test that we can add contact information """
    bob = Person("John Q. Public, Esq.",
                 gender="male", image="http://example.com/john.jpg",
                 summary="Some person")
    bob.add_source(url='foo')
    bob.validate()

    bob.add_contact_detail(type='voice',
                           value='876-5309',
                           note='Jenny Cell')

    bob.validate()
Ejemplo n.º 3
0
    def get_people(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization('Temecula City Council',
                               classification='legislature')
        council.add_source(urls.list.url)
        yield council

        for tr in urls.list.xpath('//table[2]//tr')[1:]:

            # Parse some attributes.
            name, role = tr.xpath('td/p[1]//font/text()')
            image = tr.xpath('td/img/@src').pop()

            # Create legislator.
            person = Person(name, image=image)

            # Add membership on council.
            memb = person.add_membership(council, role=role)

            # Add email address.
            email, detail_url = tr.xpath('td//a/@href')
            email = email[7:]
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            # Add sources.
            person.add_source(urls.list.url)
            person.add_source(detail_url)

            yield person
Ejemplo n.º 4
0
    def get_people(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization(
            'Temecula City Council',
            classification='legislature')
        council.add_source(urls.list.url)
        yield council

        for tr in urls.list.xpath('//table[2]//tr')[1:]:

            # Parse some attributes.
            name, role = tr.xpath('td/p[1]//font/text()')
            image = tr.xpath('td/img/@src').pop()

            # Create legislator.
            person = Person(name, image=image)

            # Add membership on council.
            memb = person.add_membership(council, role=role)

            # Add email address.
            email, detail_url = tr.xpath('td//a/@href')
            email = email[7:]
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            # Add sources.
            person.add_source(urls.list.url)
            person.add_source(detail_url)

            yield person
Ejemplo n.º 5
0
def test_basic_invalid_person():
    """ Test that we can create an invalid person, and validation will fail """
    bob = Person("Bob B. Johnson")
    bob.add_source(url='foo')
    bob.validate()

    bob.name = None

    @raises(ValidationError)
    def _():
        bob.validate()
    _()
Ejemplo n.º 6
0
    def get_people(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization('Boise City Council')
        council.add_source(legislators_url)
        yield council

        xpath = '//div[@id="content"]/div/a/@href'
        people_urls = urls.list.xpath(xpath)

        # SKip the mayor because his page has no name or email.
        people_urls = people_urls[1:]
        for url in people_urls:

            urls.add(detail=url)
            # Parse some attributes.

            image = urls.detail.xpath('//div[@id="content"]/p/img/@src').pop()
            name = urls.detail.xpath('//h1/text()').pop()

            name = name.replace('Council ', '')
            role, _, name = name.partition(' ')

            # Create legislator.
            person = Person(name, image=image)

            # Add membership on council.
            memb = person.add_membership(council, role=role)
            memb.add_source(urls.detail.url)

            # Add email address.
            email_xpath = '//a[contains(@href, "mailto")]/@href'
            email = urls.detail.xpath(email_xpath).pop()[7:]
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            # Add sources.
            person.add_source(urls.list.url)
            person.add_source(urls.detail.url)

            yield person
Ejemplo n.º 7
0
def test_magic_methods():
    """ Test the magic methods work """
    bob = Person("John Q. Public, Esq.",
                 gender="male", image="http://example.com/john.jpg",
                 summary="Some person")
    bob.add_source(url='foo')
    bob.validate()

    bob.add_link("http://twitter.com/ev", "Twitter Account")

    assert bob.links == [
        {"note": "Twitter Account",
         "url": "http://twitter.com/ev"}
    ]

    bob.add_name("Thiston", note="What my friends call me")

    assert bob.other_names == [
        {"name": "Thiston",
         "note": "What my friends call me"}
    ]

    bob.add_name("Johnseph Q. Publico",
                 note="Birth name",
                 start_date="1920-01",
                 end_date="1949-12-31")

    assert bob.other_names == [
        {"name": "Thiston",
         "note": "What my friends call me"},
        {"name": "Johnseph Q. Publico",
         "note": "Birth name",
         "start_date": "1920-01",
         "end_date": "1949-12-31"}
    ]
Ejemplo n.º 8
0
    def get_people(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization('Denver City Council')
        council.add_source(legislators_url)

        # Get image urls, names, detail urls, and districts.
        image_xpath = '//a[contains(@href, "councildistrict")]/img/@src'
        image_urls = urls.list.xpath(image_xpath)

        name_xpath = '//a[contains(@href, "councildistrict")]'
        names = [a.text_content() for a in urls.list.xpath(name_xpath)][:-1]
        names = filter(None, names)

        person_urls_xpath = '//a[contains(@href, "councildistrict")]/@href'
        person_urls = urls.list.xpath(person_urls_xpath)

        post_ids = []
        xpath = '//a[contains(@href, "councildistrict")]/img/ancestor::td'
        for td in urls.list.xpath(xpath):
            text = td.text_content()
            m = re.search('Council District \d+', text)
            if m:
                post_ids.append(m.group())
                continue
            m = re.search('Council At-Large', text)
            if m:
                post_ids.append('Council At-Large')

        for post_id in post_ids:
            council.add_post(post_id, post_id)
        yield council

        data = zip(image_urls, names, person_urls, post_ids)
        for image_url, name, person_url, post_id in data:

            # Create legislator.
            person = Person(name, image=image_url)

            # Add sources.
            urls.add(detail=person_url)
            person.add_source(urls.list.url, note='list')
            person.add_source(urls.detail.url, note='detail')

            # Add membership on council.
            memb = person.add_membership(council, post_id=post_id.strip())
            memb.add_source(urls.detail.url)

            xpath = '//div[@id="dnn_column3"]'
            contact_text = urls.detail.xpath(xpath)[0].text_content()

            if not contact_text.strip():
                xpath = '//div[contains(@id, "dnn_RightPaneWide")]'
                contact_text = urls.detail.xpath(xpath)[0].text_content()

            phone_regex = r'\(\d{3}\)[ -]*\d{3}-\d{4}'
            phone = re.search(phone_regex, contact_text).group()
            memb.contact_details.append(
                dict(type='phone', value=phone, note='work'))

            # Add email address.
            email_regex = r'\[email protected]'
            email = re.search(email_regex, contact_text).group()
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            yield person
Ejemplo n.º 9
0
    def migrate_people(self, state):
        spec = {}
        if state:
            spec["state"] = state
        for entry in self.billy_db.legislators.find(spec, timeout=False):
            jurisdiction_id = openstates_to_jid(entry['_id'])

            who = Person(entry['full_name'])
            who.identifiers = [{'scheme': 'openstates',
                               'identifier': entry['_id']}]
            who._openstates_id = entry['_id']
            who.created_at = entry['created_at']
            who.updated_at = entry['updated_at']
            if who.name != entry['_scraped_name']:
                who.add_name(entry['_scraped_name'])

            for k, v in {
                "photo_url": "image",
                "chamber": "chamber",
                "district": "district",
            }.items():
                if entry.get(k, None):
                    setattr(who, v, entry[k])

            who.sources = entry['sources']

            home = entry.get('url', None)
            if home:
                who.add_link(home, "Homepage")

            vsid = entry.get('votesmart_id')
            if vsid:
                who.add_identifier(vsid, 'votesmart-id')

            tdid = entry.get('transparencydata_id')
            if tdid:
                who.add_identifier(tdid, 'transparencydata-id')

            blacklist = ["photo_url", "chamber", "district", "url",
                         "roles", "offices", "party", "state", "sources",
                         "active", "old_roles", "_locked_fields", "created_at",
                         "updated_at", "transparencydata_id", "votesmart_id",
                         "leg_id", "email", "phone", "fax", "_scraped_name",
                         "_id", "_all_ids", "full_name", "country", "level",
                         "office_address", "suffixes", "_type"]
            # The above is a list of keys we move over by hand later.

            for key, value in entry.items():
                if key in blacklist or not value:  # or key.startswith("_"):
                    continue
                who.extras[key] = value

            who.add_meta('locked_fields', entry.get('_locked_fields', []))

            chamber = entry.get('chamber')
            if chamber == "joint":
                continue

            legislature = None
            if chamber:
                legislature = _hot_cache.get('{state}-{chamber}'.format(
                    state=entry['state'],
                    chamber=chamber,
                ))

                if legislature is None:
                    print(chamber, entry['state'], entry['_id'])
                    raise Exception("Someone's in the void.")

            self.save_object(who)  # gives who an id, btw.

            party = entry.get('party', None)
            db.memberships.remove({"person_id": who._id}, safe=True)

            if party:
                m = Membership(who._id, self.create_or_get_party(entry['party']))
                self.save_object(m)

            if legislature:
                term = get_current_term(jurisdiction_id)
                m = Membership(who._id, legislature,
                               chamber=chamber,
                               start_date=str(term['start_year']))
                m.add_extra('term', term['name'])

                chamber, district = (entry.get(x, None)
                                     for x in ['chamber', 'district'])

                if chamber:
                    m.chamber = chamber

                if district:
                    m.post_id = district

                for key in ['email', 'fax', 'phone']:
                    if key in entry and entry[key]:
                        m.add_contact_detail(type=key,
                                             value=entry[key],
                                             note=key)

                if entry.get("office_address"):
                    m.add_contact_detail(type='office',
                                         value=entry['office_address'],
                                         note='Office Address')

                for office in entry.get('offices', []):
                    note = office['name']
                    for key, value in office.items():
                        if not value or key in ["name", "type"]:
                            continue
                        m.add_contact_detail(type=key, value=value, note=note)

                self.save_object(m)

            for session in entry.get('old_roles', []):
                roles = entry['old_roles'][session]
                meta = get_metadata(obj_to_jid(who))
                term = None

                for role in roles:
                    term = role['term']
                    to = None
                    for to in meta['terms']:
                        if term == to['name']:
                            term = to
                            break
                    else:
                        raise Exception("No term found?")

                    start_year = term['start_year']
                    end_year = term['end_year']

                    if 'committee' in role:
                        cid = role.get('committee_id')
                        if cid:
                            jid = _hot_cache.get(cid)
                            if jid:
                                m = Membership(who._id, jid,
                                               role='member',
                                               start_date=str(start_year),
                                               end_date=str(end_year),
                                               chamber=role['chamber'])
                                m.add_extra('term', term['name'])

                                if "position" in role:
                                    m.role = role['position']

                                self.save_object(m)

                                if m.role != 'member':
                                    # In addition to being the (chair|vice-chair),
                                    # they should also be noted as a member.
                                    m = Membership(who._id,
                                                   jid,
                                                   role='member',
                                                   start_date=str(start_year),
                                                   end_date=str(end_year),
                                                   chamber=role['chamber'])
                                    m.add_extra('term', term['name'])
                                    self.save_object(m)

                    if 'district' in role:
                        oid = "{state}-{chamber}".format(**role)
                        leg_ocdid = _hot_cache.get(oid)
                        if leg_ocdid:
                            m = Membership(who._id, leg_ocdid,
                                           start_date=str(start_year),
                                           end_date=str(end_year),
                                           post_id=role['district'],
                                           chamber=role['chamber'])
                            m.add_extra('term', term['name'])
                            self.save_object(m)
Ejemplo n.º 10
0
    def get_people(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization('Denver City Council')
        council.add_source(legislators_url)

        # Get image urls, names, detail urls, and districts.
        image_xpath = '//a[contains(@href, "councildistrict")]/img/@src'
        image_urls = urls.list.xpath(image_xpath)

        name_xpath = '//a[contains(@href, "councildistrict")]'
        names = [a.text_content() for a in urls.list.xpath(name_xpath)][:-1]
        names = filter(None, names)

        person_urls_xpath = '//a[contains(@href, "councildistrict")]/@href'
        person_urls = urls.list.xpath(person_urls_xpath)

        post_ids = []
        xpath = '//a[contains(@href, "councildistrict")]/img/ancestor::td'
        for td in urls.list.xpath(xpath):
            text = td.text_content()
            m = re.search('Council District \d+', text)
            if m:
                post_ids.append(m.group())
                continue
            m = re.search('Council At-Large', text)
            if m:
                post_ids.append('Council At-Large')

        for post_id in post_ids:
            council.add_post(post_id, post_id)
        yield council

        data = zip(image_urls, names, person_urls, post_ids)
        for image_url, name, person_url, post_id in data:

            # Create legislator.
            person = Person(name, image=image_url)

            # Add sources.
            urls.add(detail=person_url)
            person.add_source(urls.list.url, note='list')
            person.add_source(urls.detail.url, note='detail')

            # Add membership on council.
            memb = person.add_membership(council, post_id=post_id.strip())
            memb.add_source(urls.detail.url)

            xpath = '//div[@id="dnn_column3"]'
            contact_text = urls.detail.xpath(xpath)[0].text_content()

            if not contact_text.strip():
                xpath = '//div[contains(@id, "dnn_RightPaneWide")]'
                contact_text = urls.detail.xpath(xpath)[0].text_content()

            phone_regex = r'\(\d{3}\)[ -]*\d{3}-\d{4}'
            phone = re.search(phone_regex, contact_text).group()
            memb.contact_details.append(
                dict(type='phone', value=phone, note='work'))

            # Add email address.
            email_regex = r'\[email protected]'
            email = re.search(email_regex, contact_text).group()
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            yield person