Example #1
0
    def handle(self, *args, **options):
        def save(key, body):
            k = Key(bucket)
            k.key = key
            k.set_contents_from_string(body)
            k.set_acl('public-read')

        sys.path.append(os.path.abspath('scrapers'))

        bucket = S3Connection().get_bucket('represent.opennorth.ca')

        names = {
            'Parliament of Canada': 'house-of-commons',
            'Legislative Assembly of Alberta': 'alberta-legislature',
            'Legislative Assembly of British Columbia': 'bc-legislature',
            'Legislative Assembly of Manitoba': 'manitoba-legislature',
            'Legislative Assembly of New Brunswick':
            'new-brunswick-legislature',
            'Newfoundland and Labrador House of Assembly':
            'newfoundland-labrador-legislature',
            'Nova Scotia House of Assembly': 'nova-scotia-legislature',
            'Legislative Assembly of Ontario': 'ontario-legislature',
            'Legislative Assembly of Prince Edward Island': 'pei-legislature',
            'Assemblée nationale du Québec': 'quebec-assemblee-nationale',
            'Legislative Assembly of Saskatchewan': 'saskatchewan-legislature',
        }

        default_headers = [
            'District name',
            'Primary role',
            'Name',  # not in CSV schema
            'First name',
            'Last name',
            'Gender',
            'Party name',
            'Email',
            'Photo URL',
            'Source URL',
            'Website',
            'Facebook',
            'Instagram',
            'Twitter',
            'LinkedIn',
            'YouTube',
        ]
        office_headers = [
            'Office type',  # not in CSV schema
            'Address',  # not in CSV schema
            'Phone',
            'Fax',
        ]

        all_rows = []
        max_offices_count = 0

        reports = Report.objects.filter(exception='').exclude(
            module__endswith='_candidates').exclude(
                module__endswith='_municipalities').order_by('module')
        for report in reports:
            try:
                metadata = module_name_to_metadata(report.module)

                rows = []
                offices_count = 0

                # Exclude party memberships.
                queryset = Membership.objects.filter(
                    organization__jurisdiction_id=metadata['jurisdiction_id']
                ).exclude(role__in=('member', 'candidate'))
                for membership in queryset.prefetch_related(
                        'contact_details', 'person', 'person__links',
                        'person__sources'):
                    person = membership.person

                    try:
                        party_name = Membership.objects.get(
                            organization__classification='party',
                            role='member',
                            person=person).organization.name
                    except Membership.DoesNotExist:
                        party_name = None

                    facebook = None
                    instagram = None
                    linkedin = None
                    twitter = None
                    youtube = None
                    for link in person.links.all():
                        domain = '.'.join(
                            urlsplit(link.url).netloc.split('.')[-2:])
                        if domain in ('facebook.com', 'fb.com'):
                            facebook = link.url
                        elif domain == 'instagram.com':
                            instagram = link.url
                        elif domain == 'linkedin.com':
                            linkedin = link.url
                        elif domain == 'twitter.com':
                            twitter = link.url
                        elif domain == 'youtube.com':
                            youtube = link.url

                    if person.gender == 'male':
                        gender = 'M'
                    elif person.gender == 'female':
                        gender = 'F'
                    else:
                        gender = None

                    if ' ' in person.name:
                        first_name, last_name = person.name.rsplit(' ', 1)
                    else:
                        first_name, last_name = None, person.name

                    # @see https://represent.opennorth.ca/api/#fields
                    sources = list(person.sources.all())
                    row = [
                        remove_suffix_re.sub(
                            '', membership.post.label),  # District name
                        membership.role,  # Elected office
                        person.name,  # Name
                        first_name,  # First name
                        last_name,  # Last name
                        gender,  # Gender
                        party_name,  # Party name
                        next((contact_detail.value for contact_detail in
                              membership.contact_details.all()
                              if contact_detail.type == 'email'),
                             None),  # Email
                        person.image,  # Photo URL
                        sources[-1].url
                        if len(sources) > 1 else None,  # Source URL
                        get_personal_url(person),  # Website
                        facebook,  # Facebook
                        instagram,  # Instagram
                        twitter,  # Twitter
                        linkedin,  # LinkedIn
                        youtube,  # YouTube
                    ]

                    offices = get_offices(membership)
                    if len(offices) > offices_count:
                        offices_count = len(offices)

                    for office in offices:
                        for key in ('type', 'postal', 'tel', 'fax'):
                            row.append(office.get(key))

                    # If the person is associated to multiple boundaries.
                    if re.search(r'\AWards\b', membership.post.label):
                        for district_id in re.findall(r'\d+',
                                                      membership.post.label):
                            row = row[:]
                            row[0] = 'Ward %s' % district_id
                            rows.append(row)
                    else:
                        rows.append(row)

                rows.sort()

                headers = default_headers[:]
                for _ in range(offices_count):
                    headers += office_headers

                name = metadata['name']
                if name in names:
                    slug = names[name]
                else:
                    slug = slugify(name)

                io = StringIO()
                body = csv.writer(io)
                body.writerow(headers)
                body.writerows(rows)
                save('csv/%s.csv' % slug,
                     codecs.encode(io.getvalue(), 'windows-1252'))

                if offices_count > max_offices_count:
                    max_offices_count = offices_count

                for row in rows:
                    row.insert(0, name)
                    all_rows.append(row)
            except ImportError:
                report.delete()  # delete reports for old modules

        headers = ['Organization'] + default_headers
        for _ in range(max_offices_count):
            headers += office_headers

        io = StringIO()
        body = csv.writer(io)
        body.writerow(headers)
        body.writerows(all_rows)
        save('csv/complete.csv', codecs.encode(io.getvalue(), 'windows-1252'))
Example #2
0
def represent(request, module_name):
    sys.path.append(os.path.abspath('scrapers'))

    metadata = module_name_to_metadata(module_name)

    representatives = []

    # Exclude party memberships.
    queryset = Membership.objects.filter(organization__jurisdiction_id=metadata['jurisdiction_id'])

    if module_name.endswith('_candidates'):
        # Include only candidates.
        queryset.filter(role='candidate')
    else:
        # Exclude candidates and party memberships.
        queryset.exclude(role__in=('member', 'candidate'))

    for membership in queryset.prefetch_related('contact_details', 'person', 'person__links', 'person__sources', 'post'):
        person = membership.person

        # Not sure why this is necessary.
        if not isinstance(membership.extras, dict):
            membership.extras = json.loads(membership.extras)
        if not isinstance(person.extras, dict):
            person.extras = json.loads(person.extras)

        try:
            party_name = Membership.objects.select_related('organization').get(organization__classification='party', role='member', person=person).organization.name
        except Membership.DoesNotExist:
            party_name = None

        if person.gender == 'male':
            gender = 'M'
        elif person.gender == 'female':
            gender = 'F'
        else:
            gender = None

        # Candidates only.
        incumbent = person.extras.pop('incumbent', None)

        # @see https://represent.opennorth.ca/api/#fields
        representative = {
            'name': person.name,
            'elected_office': membership.role,
            'party_name': party_name,
            'email': next((contact_detail.value for contact_detail in membership.contact_details.all() if contact_detail.type == 'email'), None),
            'photo_url': person.image or None,
            'personal_url': get_personal_url(person),
            'gender': gender,
            'offices': json.dumps(get_offices(membership)),
            'extra': json.dumps(get_extra(person)),
        }

        sources = list(person.sources.all())

        # The first URL ought to be the most generic source.
        representative['source_url'] = sources[0].url

        if len(sources) > 1:
            # The last URL ought to be the most specific source.
            representative['url'] = sources[-1].url

        if incumbent:
            representative['incumbent'] = True

        match = re.search(r'^(\S+) (Ward \d+)$', membership.post.label)

        # If the person is part of Peel Regional Council.
        if match:
            parent = Division.objects.get(subtype1='csd', subtype2='', name=match.group(1))
            division = Division.objects.get(subtype1='csd', subid1=parent.subid1, name=match.group(2))
            boundary_set_slug = '{}-wards'.format(parent.name.lower())
            representative['district_name'] = membership.post.label
            representative['boundary_url'] = '/boundaries/{}/ward-{}/'.format(boundary_set_slug, division.subid2)
            representatives.append(representative)

        # If the person is associated to multiple boundaries.
        elif re.search(r'^Wards\b', membership.post.label):
            for district_id in re.findall(r'\d+', membership.post.label):
                representative = representative.copy()
                representative['district_id'] = district_id
                representative['district_name'] = 'Ward {}'.format(district_id)
                representatives.append(representative)

        else:
            division_id = metadata['division_id']

            if re.search('^ocd-division/country:ca/csd:(\d{7})\Z', division_id):
                geographic_code = division_id[-7:]
            elif re.search('^ocd-division/country:ca/cd:(\d{4})\Z', division_id):
                geographic_code = division_id[-4:]
            else:
                geographic_code = None

            post_label = remove_suffix_re.sub('', membership.post.label)

            # If the post label is numeric.
            if re.search(r'^\d+\Z', post_label):
                representative['district_id'] = post_label

            # If the person has a boundary URL.
            elif 'boundary_url' in membership.extras:
                representative['district_name'] = post_label
                representative['boundary_url'] = membership.extras['boundary_url']

            # If the post label is a Census geographic name.
            elif post_label == metadata['division_name'] and geographic_code:
                representative['district_name'] = post_label
                if len(geographic_code) == 7:
                    representative['boundary_url'] = '/boundaries/census-subdivisions/{}/'.format(geographic_code)
                elif len(geographic_code) == 4:
                    representative['boundary_url'] = '/boundaries/census-divisions/{}/'.format(geographic_code)

            else:
                representative['district_name'] = post_label
                district_id = re.search(r'^(?:District|Division|Ward) (\d+)\Z', post_label)
                if district_id:
                    representative['district_id'] = district_id.group(1)

            representatives.append(representative)

    return HttpResponse(json.dumps(representatives), content_type='application/json')
Example #3
0
        def process(report, *, candidates=False):
            rows = []
            offices_count = 0

            try:
                metadata = module_name_to_metadata(report.module)

                # Exclude party memberships.
                queryset = Membership.objects.filter(
                    organization__jurisdiction_id=metadata['jurisdiction_id'])
                if candidates:
                    queryset = queryset.filter(role='candidate')
                else:
                    queryset = queryset.exclude(role__in=('member',
                                                          'candidate'))

                for membership in queryset.prefetch_related(
                        'contact_details', 'person', 'person__links',
                        'person__sources'):
                    person = membership.person

                    try:
                        party_name = Membership.objects.get(
                            organization__classification='party',
                            role='member',
                            person=person).organization.name
                    except Membership.DoesNotExist:
                        party_name = None

                    facebook = None
                    instagram = None
                    linkedin = None
                    twitter = None
                    youtube = None
                    for link in person.links.all():
                        domain = '.'.join(
                            urlsplit(link.url).netloc.split('.')[-2:])
                        if domain in ('facebook.com', 'fb.com'):
                            facebook = link.url
                        elif domain == 'instagram.com':
                            instagram = link.url
                        elif domain == 'linkedin.com':
                            linkedin = link.url
                        elif domain == 'twitter.com':
                            twitter = link.url
                        elif domain == 'youtube.com':
                            youtube = link.url

                    if person.gender == 'male':
                        gender = 'M'
                    elif person.gender == 'female':
                        gender = 'F'
                    else:
                        gender = None

                    if ' ' in person.name:
                        first_name, last_name = person.name.rsplit(' ', 1)
                    else:
                        first_name, last_name = None, person.name

                    # @see https://represent.opennorth.ca/api/#fields
                    sources = list(person.sources.all())
                    row = [
                        remove_suffix_re.sub(
                            '', membership.post.label),  # District name
                        membership.role,  # Elected office
                        person.name,  # Name
                        first_name,  # First name
                        last_name,  # Last name
                        gender,  # Gender
                        party_name,  # Party name
                        next((contact_detail.value for contact_detail in
                              membership.contact_details.all()
                              if contact_detail.type == 'email'),
                             None),  # Email
                        person.image,  # Photo URL
                        sources[-1].url
                        if len(sources) > 1 else None,  # Source URL
                        get_personal_url(person),  # Website
                        facebook,  # Facebook
                        instagram,  # Instagram
                        twitter,  # Twitter
                        linkedin,  # LinkedIn
                        youtube,  # YouTube
                    ]

                    offices = get_offices(membership)
                    if len(offices) > offices_count:
                        offices_count = len(offices)

                    for office in offices:
                        for key in ('type', 'postal', 'tel', 'fax'):
                            row.append(office.get(key))

                    # If the person is associated to multiple boundaries.
                    if re.search(r'\AWards\b', membership.post.label):
                        for district_id in re.findall(r'\d+',
                                                      membership.post.label):
                            row = row[:]
                            row[0] = 'Ward {}'.format(district_id)
                            rows.append(row)
                    else:
                        rows.append(row)

                rows.sort()

                headers = self.default_headers[:]
                for _ in range(offices_count):
                    headers += self.office_headers

                name = metadata['name']
                if name in self.names:
                    slug = self.names[name]
                else:
                    slug = slugify(name)

                io = StringIO()
                body = csv.writer(io)
                body.writerow(headers)
                body.writerows(rows)
                key = 'csv/{}/{}.csv'.format(
                    'candidates' if candidates else 'representatives', slug)
                save(key, io)

                for row in rows:
                    row.insert(0, name)
            except ImportError:
                report.delete()  # delete reports for old modules

            return [rows, offices_count]
Example #4
0
def represent(request, module_name):
    sys.path.append(os.path.abspath('scrapers'))

    metadata = module_name_to_metadata(module_name)

    representatives = []

    # Exclude party memberships.
    queryset = Membership.objects.filter(organization__jurisdiction_id=metadata['jurisdiction_id'])

    if module_name.endswith('_candidates'):
        queryset.filter(role='candidate')
    else:
        queryset.exclude(role__in=('member', 'candidate'))

    for membership in queryset.prefetch_related('contact_details', 'person', 'person__links', 'person__sources'):
        person = membership.person

        try:
            party_name = Membership.objects.get(organization__classification='party', role='member', person=person).organization.name
        except Membership.DoesNotExist:
            party_name = None

        if person.gender == 'male':
            gender = 'M'
        elif person.gender == 'female':
            gender = 'F'
        else:
            gender = None

        incumbent = person.extras.pop('incumbent', None)

        # @see https://represent.opennorth.ca/api/#fields
        representative = {
            'name': person.name,
            'elected_office': membership.role,
            'party_name': party_name,
            'email': next((contact_detail.value for contact_detail in membership.contact_details.all() if contact_detail.type == 'email'), None),
            'photo_url': person.image or None,
            'personal_url': get_personal_url(person),
            'gender': gender,
            'offices': json.dumps(get_offices(membership)),
            'extra': json.dumps(get_extra(person)),
        }

        # @see https://github.com/opennorth/represent-canada/issues/81
        sources = list(person.sources.all())
        if len(sources[0].url) <= 200:
            representative['source_url'] = sources[0].url

        if len(sources) > 1:
            representative['url'] = sources[-1].url

        if incumbent:
            representative['incumbent'] = True

        match = re.search(r'^(\S+) (Ward \d+)$', membership.post.label)

        # If the person is part of Peel Regional Council.
        if match:
            parent = Division.objects.get(subtype1='csd', subtype2='', name=match.group(1))
            division = Division.objects.get(subtype1='csd', subid1=parent.subid1, name=match.group(2))
            boundary_set_slug = next((k for k, v in settings.IMAGO_BOUNDARY_MAPPINGS.items() if v['prefix'].startswith(parent.id)), None)
            representative['district_name'] = membership.post.label
            representative['boundary_url'] = '/boundaries/{}/ward-{}/'.format(boundary_set_slug, division.subid2)
            representatives.append(representative)
        # If the person is associated to multiple boundaries.
        elif re.search(r'^Wards\b', membership.post.label):
            for district_id in re.findall(r'\d+', membership.post.label):
                representative = representative.copy()
                representative['district_id'] = district_id
                representative['district_name'] = 'Ward %s' % district_id
                representatives.append(representative)
        else:
            division_id = metadata['division_id']
            if re.search('^ocd-division/country:ca/csd:(\d{7})\Z', division_id):
                geographic_code = division_id[-7:]
            elif re.search('^ocd-division/country:ca/cd:(\d{4})\Z', division_id):
                geographic_code = division_id[-4:]
            else:
                geographic_code = None
            post_label = remove_suffix_re.sub('', membership.post.label)
            # If the post label is numeric.
            if re.search(r'^\d+\Z', post_label):
                representative['district_id'] = post_label
            # If the person has a boundary URL.
            elif membership.extras.get('boundary_url'):
                representative['district_name'] = post_label
                representative['boundary_url'] = membership.extras['boundary_url']
            # If the post label is a census subdivision.
            elif post_label == metadata['division_name'] and geographic_code:
                representative['district_name'] = post_label
                if len(geographic_code) == 7:
                    representative['boundary_url'] = '/boundaries/census-subdivisions/%s/' % geographic_code
                elif len(geographic_code) == 4:
                    representative['boundary_url'] = '/boundaries/census-divisions/%s/' % geographic_code
            else:
                representative['district_name'] = post_label
                district_id = re.search(r'^(?:District|Division|Ward) (\d+)\Z', post_label)
                if district_id:
                    representative['district_id'] = district_id.group(1)

            representatives.append(representative)

    return HttpResponse(json.dumps(representatives), content_type='application/json')
Example #5
0
def represent(request, module_name):
    sys.path.append(os.path.abspath('scrapers'))

    metadata = module_name_to_metadata(module_name)

    representatives = []

    # Exclude party memberships.
    queryset = Membership.objects.filter(
        organization__jurisdiction_id=metadata['jurisdiction_id'])

    if module_name.endswith('_candidates'):
        queryset.filter(role='candidate')
    else:
        queryset.exclude(role__in=('member', 'candidate'))

    for membership in queryset.prefetch_related('contact_details', 'person',
                                                'person__links',
                                                'person__sources'):
        person = membership.person

        try:
            party_name = Membership.objects.get(
                organization__classification='party',
                role='member',
                person=person).organization.name
        except Membership.DoesNotExist:
            party_name = None

        if person.gender == 'male':
            gender = 'M'
        elif person.gender == 'female':
            gender = 'F'
        else:
            gender = None

        incumbent = person.extras.pop('incumbent', None)

        # @see https://represent.opennorth.ca/api/#fields
        representative = {
            'name':
            person.name,
            'elected_office':
            membership.role,
            'party_name':
            party_name,
            'email':
            next((contact_detail.value
                  for contact_detail in membership.contact_details.all()
                  if contact_detail.type == 'email'), None),
            'photo_url':
            person.image or None,
            'personal_url':
            get_personal_url(person),
            'gender':
            gender,
            'offices':
            json.dumps(get_offices(membership)),
            'extra':
            json.dumps(get_extra(person)),
        }

        # @see https://github.com/opennorth/represent-canada/issues/81
        sources = list(person.sources.all())
        if len(sources[0].url) <= 200:
            representative['source_url'] = sources[0].url

        if len(sources) > 1:
            representative['url'] = sources[-1].url

        if incumbent:
            representative['incumbent'] = True

        match = re.search(r'^(\S+) (Ward \d+)$', membership.post.label)

        # If the person is part of Peel Regional Council.
        if match:
            parent = Division.objects.get(subtype1='csd',
                                          subtype2='',
                                          name=match.group(1))
            division = Division.objects.get(subtype1='csd',
                                            subid1=parent.subid1,
                                            name=match.group(2))
            boundary_set_slug = next(
                (k for k, v in settings.IMAGO_BOUNDARY_MAPPINGS.items()
                 if v['prefix'].startswith(parent.id)), None)
            representative['district_name'] = membership.post.label
            representative['boundary_url'] = '/boundaries/{}/ward-{}/'.format(
                boundary_set_slug, division.subid2)
            representatives.append(representative)
        # If the person is associated to multiple boundaries.
        elif re.search(r'^Wards\b', membership.post.label):
            for district_id in re.findall(r'\d+', membership.post.label):
                representative = representative.copy()
                representative['district_id'] = district_id
                representative['district_name'] = 'Ward %s' % district_id
                representatives.append(representative)
        else:
            division_id = metadata['division_id']
            if re.search('^ocd-division/country:ca/csd:(\d{7})\Z',
                         division_id):
                geographic_code = division_id[-7:]
            elif re.search('^ocd-division/country:ca/cd:(\d{4})\Z',
                           division_id):
                geographic_code = division_id[-4:]
            else:
                geographic_code = None
            post_label = remove_suffix_re.sub('', membership.post.label)
            # If the post label is numeric.
            if re.search(r'^\d+\Z', post_label):
                representative['district_id'] = post_label
            # If the person has a boundary URL.
            elif membership.extras.get('boundary_url'):
                representative['district_name'] = post_label
                representative['boundary_url'] = membership.extras[
                    'boundary_url']
            # If the post label is a census subdivision.
            elif post_label == metadata['division_name'] and geographic_code:
                representative['district_name'] = post_label
                if len(geographic_code) == 7:
                    representative[
                        'boundary_url'] = '/boundaries/census-subdivisions/%s/' % geographic_code
                elif len(geographic_code) == 4:
                    representative[
                        'boundary_url'] = '/boundaries/census-divisions/%s/' % geographic_code
            else:
                representative['district_name'] = post_label
                district_id = re.search(r'^(?:District|Division|Ward) (\d+)\Z',
                                        post_label)
                if district_id:
                    representative['district_id'] = district_id.group(1)

            representatives.append(representative)

    return HttpResponse(json.dumps(representatives),
                        content_type='application/json')
Example #6
0
    def handle(self, *args, **options):
        def save(key, body):
            k = Key(bucket)
            k.key = key
            k.set_contents_from_string(body)
            k.set_acl('public-read')

        sys.path.append(os.path.abspath('scrapers'))

        bucket = S3Connection().get_bucket('represent.opennorth.ca')

        names = {
            'Parliament of Canada': 'house-of-commons',
            'Legislative Assembly of Alberta': 'alberta-legislature',
            'Legislative Assembly of British Columbia': 'bc-legislature',
            'Legislative Assembly of Manitoba': 'manitoba-legislature',
            'Legislative Assembly of New Brunswick': 'new-brunswick-legislature',
            'Newfoundland and Labrador House of Assembly': 'newfoundland-labrador-legislature',
            'Nova Scotia House of Assembly': 'nova-scotia-legislature',
            'Legislative Assembly of Ontario': 'ontario-legislature',
            'Legislative Assembly of Prince Edward Island': 'pei-legislature',
            'Assemblée nationale du Québec': 'quebec-assemblee-nationale',
            'Legislative Assembly of Saskatchewan': 'saskatchewan-legislature',
        }

        default_headers = [
            'District name',
            'Primary role',
            'Name',  # not in CSV schema
            'First name',
            'Last name',
            'Gender',
            'Party name',
            'Email',
            'Photo URL',
            'Source URL',
            'Website',
            'Facebook',
            'Instagram',
            'Twitter',
            'LinkedIn',
            'YouTube',
        ]
        office_headers = [
            'Office type',  # not in CSV schema
            'Address',  # not in CSV schema
            'Phone',
            'Fax',
        ]

        all_rows = []
        max_offices_count = 0

        reports = Report.objects.filter(exception='').exclude(module__endswith='_candidates').exclude(module__endswith='_municipalities').order_by('module')
        for report in reports:
            try:
                metadata = module_name_to_metadata(report.module)

                rows = []
                offices_count = 0

                # Exclude party memberships.
                queryset = Membership.objects.filter(organization__jurisdiction_id=metadata['jurisdiction_id']).exclude(role__in=('member', 'candidate'))
                for membership in queryset.prefetch_related('contact_details', 'person', 'person__links', 'person__sources'):
                    person = membership.person

                    try:
                        party_name = Membership.objects.get(organization__classification='party', role='member', person=person).organization.name
                    except Membership.DoesNotExist:
                        party_name = None

                    facebook = None
                    instagram = None
                    linkedin = None
                    twitter = None
                    youtube = None
                    for link in person.links.all():
                        domain = '.'.join(urlsplit(link.url).netloc.split('.')[-2:])
                        if domain in ('facebook.com', 'fb.com'):
                            facebook = link.url
                        elif domain == 'instagram.com':
                            instagram = link.url
                        elif domain == 'linkedin.com':
                            linkedin = link.url
                        elif domain == 'twitter.com':
                            twitter = link.url
                        elif domain == 'youtube.com':
                            youtube = link.url

                    if person.gender == 'male':
                        gender = 'M'
                    elif person.gender == 'female':
                        gender = 'F'
                    else:
                        gender = None

                    if ' ' in person.name:
                        first_name, last_name = person.name.rsplit(' ', 1)
                    else:
                        first_name, last_name = None, person.name

                    # @see https://represent.opennorth.ca/api/#fields
                    sources = list(person.sources.all())
                    row = [
                        remove_suffix_re.sub('', membership.post.label),  # District name
                        membership.role,  # Elected office
                        person.name,  # Name
                        first_name,  # First name
                        last_name,  # Last name
                        gender,  # Gender
                        party_name,  # Party name
                        next((contact_detail.value for contact_detail in membership.contact_details.all() if contact_detail.type == 'email'), None),  # Email
                        person.image,  # Photo URL
                        sources[-1].url if len(sources) > 1 else None,  # Source URL
                        get_personal_url(person),  # Website
                        facebook,  # Facebook
                        instagram,  # Instagram
                        twitter,  # Twitter
                        linkedin,  # LinkedIn
                        youtube,  # YouTube
                    ]

                    offices = get_offices(membership)
                    if len(offices) > offices_count:
                        offices_count = len(offices)

                    for office in offices:
                        for key in ('type', 'postal', 'tel', 'fax'):
                            row.append(office.get(key))

                    # If the person is associated to multiple boundaries.
                    if re.search(r'\AWards\b', membership.post.label):
                        for district_id in re.findall(r'\d+', membership.post.label):
                            row = row[:]
                            row[0] = 'Ward %s' % district_id
                            rows.append(row)
                    else:
                        rows.append(row)

                rows.sort()

                headers = default_headers[:]
                for _ in range(offices_count):
                    headers += office_headers

                name = metadata['name']
                if name in names:
                    slug = names[name]
                else:
                    slug = slugify(name)

                io = StringIO()
                body = csv.writer(io)
                body.writerow(headers)
                body.writerows(rows)
                save('csv/%s.csv' % slug, codecs.encode(io.getvalue(), 'windows-1252'))

                if offices_count > max_offices_count:
                    max_offices_count = offices_count

                for row in rows:
                    row.insert(0, name)
                    all_rows.append(row)
            except ImportError:
                report.delete()  # delete reports for old modules

        headers = ['Organization'] + default_headers
        for _ in range(max_offices_count):
            headers += office_headers

        io = StringIO()
        body = csv.writer(io)
        body.writerow(headers)
        body.writerows(all_rows)
        save('csv/complete.csv', codecs.encode(io.getvalue(), 'windows-1252'))
Example #7
0
def represent(request, module_name):
    sys.path.append(os.path.abspath('scrapers'))

    metadata = module_name_to_metadata(module_name)

    representatives = []

    # Exclude party memberships.
    queryset = Membership.objects.filter(
        organization__jurisdiction_id=metadata['jurisdiction_id'])

    if module_name.endswith('_candidates'):
        # Include only candidates.
        queryset.filter(role='candidate')
    else:
        # Exclude candidates and party memberships.
        queryset.exclude(role__in=('member', 'candidate'))

    for membership in queryset.prefetch_related('contact_details', 'person',
                                                'person__links',
                                                'person__sources', 'post'):
        person = membership.person

        # Not sure why this is necessary.
        if not isinstance(membership.extras, dict):
            membership.extras = json.loads(membership.extras)
        if not isinstance(person.extras, dict):
            person.extras = json.loads(person.extras)

        try:
            party_name = Membership.objects.select_related('organization').get(
                organization__classification='party',
                role='member',
                person=person).organization.name
        except Membership.DoesNotExist:
            party_name = None

        if person.gender == 'male':
            gender = 'M'
        elif person.gender == 'female':
            gender = 'F'
        else:
            gender = None

        # Candidates only.
        incumbent = person.extras.pop('incumbent', None)

        # @see https://represent.opennorth.ca/api/#fields
        representative = {
            'name':
            person.name,
            'elected_office':
            membership.role,
            'party_name':
            party_name,
            'email':
            next((contact_detail.value
                  for contact_detail in membership.contact_details.all()
                  if contact_detail.type == 'email'), None),
            'photo_url':
            person.image or None,
            'personal_url':
            get_personal_url(person),
            'gender':
            gender,
            'offices':
            json.dumps(get_offices(membership)),
            'extra':
            json.dumps(get_extra(person)),
        }

        sources = list(person.sources.all())

        # The first URL ought to be the most generic source.
        representative['source_url'] = sources[0].url

        if len(sources) > 1:
            # The last URL ought to be the most specific source.
            representative['url'] = sources[-1].url

        if incumbent:
            representative['incumbent'] = True

        match = re.search(r'^(\S+) (Ward \d+)$', membership.post.label)

        # If the person is part of Peel Regional Council.
        if match:
            parent = Division.objects.get(subtype1='csd',
                                          subtype2='',
                                          name=match.group(1))
            division = Division.objects.get(subtype1='csd',
                                            subid1=parent.subid1,
                                            name=match.group(2))
            boundary_set_slug = '{}-wards'.format(parent.name.lower())
            representative['district_name'] = membership.post.label
            representative['boundary_url'] = '/boundaries/{}/ward-{}/'.format(
                boundary_set_slug, division.subid2)
            representatives.append(representative)

        # If the person is associated to multiple boundaries.
        elif re.search(r'^Wards\b', membership.post.label):
            for district_id in re.findall(r'\d+', membership.post.label):
                representative = representative.copy()
                representative['district_id'] = district_id
                representative['district_name'] = 'Ward {}'.format(district_id)
                representatives.append(representative)

        else:
            division_id = metadata['division_id']

            if re.search('^ocd-division/country:ca/csd:(\d{7})\Z',
                         division_id):
                geographic_code = division_id[-7:]
            elif re.search('^ocd-division/country:ca/cd:(\d{4})\Z',
                           division_id):
                geographic_code = division_id[-4:]
            else:
                geographic_code = None

            post_label = remove_suffix_re.sub('', membership.post.label)

            # If the post label is numeric.
            if re.search(r'^\d+\Z', post_label):
                representative['district_id'] = post_label

            # If the person has a boundary URL.
            elif 'boundary_url' in membership.extras:
                representative['district_name'] = post_label
                representative['boundary_url'] = membership.extras[
                    'boundary_url']

            # If the post label is a Census geographic name.
            elif post_label == metadata['division_name'] and geographic_code:
                representative['district_name'] = post_label
                if len(geographic_code) == 7:
                    representative[
                        'boundary_url'] = '/boundaries/census-subdivisions/{}/'.format(
                            geographic_code)
                elif len(geographic_code) == 4:
                    representative[
                        'boundary_url'] = '/boundaries/census-divisions/{}/'.format(
                            geographic_code)

            else:
                representative['district_name'] = post_label
                district_id = re.search(r'^(?:District|Division|Ward) (\d+)\Z',
                                        post_label)
                if district_id:
                    representative['district_id'] = district_id.group(1)

            representatives.append(representative)

    return HttpResponse(json.dumps(representatives),
                        content_type='application/json')