Esempio n. 1
0
  def handle(self, *args, **options):
    url = os.getenv('MONGOHQ_URL', 'mongodb://localhost:27017/pupa')
    parsed = urlsplit(url)
    _configure_db(url, parsed.port, parsed.path[1:])

    for jurisdiction_id in args:
      if db.jurisdictions.find({'_id': jurisdiction_id}).count():
        for membership in db.memberships.find({'jurisdiction_id': jurisdiction_id}):
          db.people.remove({'_id': membership['person_id']})
        db.memberships.remove({'jurisdiction_id': jurisdiction_id})
        db.organizations.remove({'jurisdiction_id': jurisdiction_id})
      else:
        print("Couldn't find jurisdiction_id %s" % jurisdiction_id)
Esempio n. 2
0
  def handle(self, *args, **options):
    def save(key, body):
      k = Key(bucket)
      k.key = key
      k.set_contents_from_string(body)
      k.set_acl('public-read')

    sys.path.append(os.path.abspath('scrapers'))

    url = os.getenv('MONGOHQ_URL', 'mongodb://localhost:27017/pupa')
    parsed = urlsplit(url)
    _configure_db(url, parsed.port, parsed.path[1:])

    bucket = S3Connection().get_bucket('represent.opennorth.ca')

    names = {
      'Parliament of Canada': 'house-of-commons',
      'Legislative Assembly of Alberta': 'alberta-legislature',
      'Legislative Assembly of British Columbia': 'bc-legislature',
      'Legislative Assembly of Manitoba': 'manitoba-legislature',
      'Legislative Assembly of New Brunswick': 'new-brunswick-legislature',
      'Newfoundland and Labrador House of Assembly': 'newfoundland-labrador-legislature',
      'Nova Scotia House of Assembly': 'nova-scotia-legislature',
      'Legislative Assembly of Ontario': 'ontario-legislature',
      'Legislative Assembly of Prince Edward Island': 'pei-legislature',
      'Assemblée nationale du Québec': 'quebec-assemblee-nationale',
      'Legislative Assembly of Saskatchewan': 'saskatchewan-legislature',
    }

    default_headers = [
      'District name',
      'Elected office',
      'Name',
      'First name',
      'Last name',
      'Gender',
      'Party name',
      'Email',
      'URL',
      'Photo URL',
      'Personal URL',
    ]
    office_headers = [
      'Office type',
      'Address',
      'Phone',
      'Fax',
    ]

    all_rows = []
    max_offices_count = 0

    reports = Report.objects.filter(exception='').exclude(module__endswith='_candidates').exclude(module__endswith='_municipalities').order_by('module')
    for report in reports:
      try:
        module = importlib.import_module(report.module)
        for obj in module.__dict__.values():
          jurisdiction_id = getattr(obj, 'jurisdiction_id', None)
          if jurisdiction_id:  # We've found the module.
            name = getattr(obj, 'name', None)

            rows = []
            offices_count = 0

            # Exclude party memberships.
            for membership in db.memberships.find({'jurisdiction_id': jurisdiction_id, 'role': {'$nin': ['member', 'candidate']}}):
              organization = db.organizations.find_one({'_id': membership['organization_id']})
              person = db.people.find_one({'_id': membership['person_id']})

              party_membership = db.memberships.find_one({'jurisdiction_id': jurisdiction_id, 'role': 'member', 'person_id': membership['person_id']})
              if party_membership:
                party_name = db.organizations.find_one({'_id': party_membership['organization_id']})['name']
              else:
                party_name = None

              if person['gender'] == 'male':
                gender = 'M'
              elif person['gender'] == 'female':
                gender = 'F'
              else:
                gender = None

              if ' ' in person['name']:
                first_name, last_name = person['name'].rsplit(' ', 1)
              else:
                first_name, last_name = None, person['name']

              # @see http://represent.opennorth.ca/api/#fields
              row = [
                person['post_id'], # District name
                membership['role'], # Elected office
                person['name'], # Name
                first_name, # First name
                last_name, # Last name
                gender, # Gender
                party_name, # Party name
                next((contact_detail['value'] for contact_detail in membership['contact_details'] if contact_detail['type'] == 'email'), None), # Email
                person['sources'][-1]['url'] if len(person['sources']) > 1 else None, # URL
                person['image'], # Photo URL
                get_personal_url(person), # Personal URL
              ]

              offices = get_offices(membership)
              if len(offices) > offices_count:
                offices_count = len(offices)

              for office in offices:
                for key in ('type', 'postal', 'tel', 'fax'):
                  row.append(office.get(key))

              # If the person is associated to multiple boundaries.
              if re.search(r'\AWards \d(?:(?:,| & | and )\d+)+\Z', person['post_id']):
                for district_id in re.findall(r'\d+', person['post_id']):
                  row = row[:]
                  row[0] = 'Ward %s' % district_id
                  rows.append(row)
              else:
                rows.append(row)

            rows.sort()

            headers = default_headers[:]
            for _ in range(offices_count):
              headers += office_headers

            if name in names:
              slug = names[name]
            else:
              slug = slugify(name)

            io = StringIO()
            body = UnicodeWriter(io, encoding='windows-1252')
            body.writerow(headers)
            body.writerows(rows)
            save('csv/%s.csv' % slug, io.getvalue())

            if offices_count > max_offices_count:
              max_offices_count = offices_count

            for row in rows:
              row.insert(0, name)
              all_rows.append(row)
      except ImportError:
        report.delete()  # delete reports for old modules

    headers = ['Organization'] + default_headers
    for _ in range(max_offices_count):
      headers += office_headers

    io = StringIO()
    body = UnicodeWriter(io, encoding='windows-1252')
    body.writerow(headers)
    body.writerows(all_rows)
    save('csv/complete.csv', io.getvalue())