def _build_metadata(self, year, elections):
     meta = []
     year_int = int(year)
     for election in elections:
         if 'special' in election['slug']:
             results = [x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] == True]
             for result in results:
                 generated_filename = self._generate_filename(election['start_date'], result)
                 if result['county']:
                     ocd_id = 'ocd-division/country:us/state:az/county:' + result['county'].replace(' ','_').lower()
                 else:
                     ocd_id = 'ocd-division/country:us/state:az'
                 meta.append({
                     "generated_filename": generated_filename,
                     "raw_url": election['direct_links'][0],
                     "pre_processed_url": build_github_url(self.state, generated_filename),
                     "ocd_id": ocd_id,
                     "name": 'Arizona',
                     "election": election['slug']
                 })
         else:
             results = [x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] == False]
             for result in results:
                 generated_filename = self._generate_filename(election['start_date'], result)
                 meta.append({
                     "generated_filename": generated_filename,
                     "raw_url": election['direct_links'][0],
                     "pre_processed_url": build_github_url(self.state, generated_filename),
                     "ocd_id": 'ocd-division/country:us/state:az',
                     "name": 'Arizona',
                     "election": election['slug']
                 })
     return meta
    def _build_election_metadata_default(self, election):
        link = election['direct_links'][0]
        filename_kwargs = {}

        if link.startswith(self.RESULTS_PORTAL_URL):
            # Report portal results are precinct-level
            filename_kwargs['reporting_level'] = 'precinct'
            # And the format is tab-delimited text
            filename_kwargs['extension'] = '.tsv'

        generated_filename = self._standardized_filename(election, **filename_kwargs)
        mapping = {
            "generated_filename": generated_filename, 
            "raw_url": link, 
            "ocd_id": 'ocd-division/country:us/state:ar',
            "name": 'Arkansas',
            "election": election['slug']
        }

        if "2002" in election['slug']:
            generated_filename = generated_filename.replace('.pdf', '.csv')
            mapping['pre_processed_url'] = build_github_url(self.state,
                generated_filename)
            mapping['generated_filename'] = generated_filename
        
        return [mapping]
 def _build_metadata(self, year, elections):
     meta = []
     year_int = int(year)
     for election in elections:
         if election['special']:
             results = [x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] == True]
         else:
             results = [x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] != True]
         for result in results:
             generated_filename = self._generate_filename(election['start_date'], election['race_type'], result)
             if election['direct_links']:
                 raw_url = election['direct_links'][0]
                 github_url = None
             else:
                 raw_url = None
                 github_url = build_github_url(self.state, generated_filename)
             meta.append({
                 "generated_filename": generated_filename,
                 "raw_url": raw_url,
                 "pre_processed_url": github_url,
                 "ocd_id": 'ocd-division/country:us/state:ct',
                 "name": 'Connecticut',
                 "election": election['slug']
             })
     return meta
 def _build_metadata(self, year, elections):
     meta = []
     year_int = int(year)
     if year < 2008:
         for election in elections:
             results = [x for x in self._url_paths() if x['date'] == election['start_date']]
             for result in results:
                 generated_filename = self._generate_office_filename(election['direct_links'][0], election['start_date'], election['race_type'], result)
                 meta.append({
                     "generated_filename": generated_filename,
                     "raw_url": self._build_raw_url(year, result['path']),
                     "pre_processed_url": build_github_url(self.state, generated_filename),
                     "ocd_id": 'ocd-division/country:us/state:wv',
                     "name": 'West Virginia',
                     "election": election['slug']
                 })
     else:
         for election in elections:
             csv_links = self._find_csv_links(election['direct_links'][0])
             counties = self._jurisdictions()
             results = list(zip(counties, csv_links[1:]))
             for result in results:
                 meta.append({
                     "generated_filename": self._generate_county_filename(result[0]['county'], election),
                     "pre_processed_url": None,
                     "raw_url": result[1],
                     "ocd_id": result[0]['ocd_id'],
                     "name": result[0]['county'],
                     "election": election['slug']
                 })
     return meta
Example #5
0
    def _build_election_metadata_default(self, election):
        link = election['direct_links'][0]
        filename_kwargs = {}

        if link.startswith(self.RESULTS_PORTAL_URL):
            # Report portal results are precinct-level
            filename_kwargs['reporting_level'] = 'precinct'
            # And the format is tab-delimited text
            filename_kwargs['extension'] = '.tsv'

        generated_filename = self._standardized_filename(election, **filename_kwargs)
        mapping = {
            "generated_filename": generated_filename,
            "raw_url": link,
            "ocd_id": 'ocd-division/country:us/state:ar',
            "name": 'Arkansas',
            "election": election['slug']
        }

        if "2002" in election['slug']:
            generated_filename = generated_filename.replace('.pdf', '.csv')
            mapping['pre_processed_url'] = build_github_url(self.state,
                generated_filename)
            mapping['generated_filename'] = generated_filename

        return [mapping]
Example #6
0
 def _build_metadata(self, year, elections):
     meta = []
     year_int = int(year)
     if year < 2008:
         for election in elections:
             results = [x for x in self._url_paths() if x['date'] == election['start_date']]
             for result in results:
                 generated_filename = self._generate_office_filename(election['direct_links'][0], election['start_date'], election['race_type'], result)
                 meta.append({
                     "generated_filename": generated_filename,
                     "raw_url": self._build_raw_url(year, result['path']),
                     "pre_processed_url": build_github_url(self.state, generated_filename),
                     "ocd_id": 'ocd-division/country:us/state:wv',
                     "name": 'West Virginia',
                     "election": election['slug']
                 })
     else:
         for election in elections:
             csv_links = self._find_csv_links(election['direct_links'][0])
             counties = self._jurisdictions()
             results = zip(counties, csv_links[1:])
             for result in results:
                 meta.append({
                     "generated_filename": self._generate_county_filename(result[0]['county'], election),
                     "pre_processed_url": None,
                     "raw_url": result[1],
                     "ocd_id": result[0]['ocd_id'],
                     "name": result[0]['county'],
                     "election": election['slug']
                 })
     return meta
Example #7
0
 def _build_metadata(self, year, elections):
     meta = []
     year_int = int(year)
     for election in elections:
         if election['special']:
             results = [x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] == True]
         else:
             results = [x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] != True]
         for result in results:
             generated_filename = self._generate_filename(election['start_date'], election['race_type'], result)
             if election['direct_links']:
                 raw_url = election['direct_links'][0]
                 github_url = None
             else:
                 raw_url = None
                 github_url = build_github_url(self.state, generated_filename)
             meta.append({
                 "generated_filename": generated_filename,
                 "raw_url": raw_url,
                 "pre_processed_url": github_url,
                 "ocd_id": 'ocd-division/country:us/state:ct',
                 "name": 'Connecticut',
                 "election": election['slug']
             })
     return meta
Example #8
0
 def _build_metadata_preprocessed(self, election):
     """Return election metadata for an election with preprocessed results"""
     generated_filename = self._standardized_filename(election,
                                                      extension=".csv")
     return [{
         'generated_filename': generated_filename,
         'raw_url': build_github_url('wa', generated_filename),
         'ocd_id': 'ocd-division/country:us/state:wa',
         'name': "Washington",
         'election': election['slug'],
     }]
    def _add_preprocessed_urls(self, meta_entries):
        new_entries = []
        for meta_entry in meta_entries:
            ext = self._filename_extension(meta_entry['raw_url'])
            if ext == ".pdf":
                meta_entry['generated_filename'] = meta_entry['generated_filename'].replace(".pdf", ".csv")
                meta_entry['pre_processed_url'] = build_github_url(self.state,
                    meta_entry['generated_filename'])

            new_entries.append(meta_entry)

        return new_entries
 def _build_metadata_preprocessed(self, election):
     """Return election metadata for an election with preprocessed results"""
     generated_filename = self._standardized_filename(election,
         extension=".csv")
     return [
         {
             'generated_filename': generated_filename,
             'raw_url': build_github_url('wa', generated_filename),
             'ocd_id': 'ocd-division/country:us/state:wa',
             'name': "Washington", 
             'election': election['slug'],
         }
     ]
Example #11
0
    def _add_preprocessed_urls(self, meta_entries):
        new_entries = []
        for meta_entry in meta_entries:
            ext = self._filename_extension(meta_entry['raw_url'])
            if ext == ".pdf":
                meta_entry['generated_filename'] = meta_entry[
                    'generated_filename'].replace(".pdf", ".csv")
                meta_entry['pre_processed_url'] = build_github_url(
                    self.state, meta_entry['generated_filename'])

            new_entries.append(meta_entry)

        return new_entries
Example #12
0
    def _build_metadata_url_paths(self, election):
        """Return mappings for result files from url_paths.csv"""
        meta_entries = []
        # Exclude paths with the ``skip`` flag set in the mappings
        url_paths = [
            url_path for url_path in self._url_paths_for_election(election)
            if not url_path['skip']
        ]

        for url_path in url_paths:
            preprocessed_result = False
            filename_ext = self._filename_extension_for_url_path(url_path)
            # We'll eventually preprocess PDFs and convert them to CSVs.
            # So, the downloaded file will be a CSV.  Set the filename
            # extension accordingly.
            if filename_ext == ".pdf" or filename_ext == ".mdb":
                filename_ext = ".csv"
                preprocessed_result = True

            filename_kwargs = {
                'extension': filename_ext,
                'reporting_level': url_path['reporting_level'],
                'jurisdiction': url_path['jurisdiction'],
                'party': url_path['party'],
            }
            generated_filename = self._standardized_filename(
                election, **filename_kwargs)

            mapping = {
                'generated_filename': generated_filename,
                'raw_url': url_path['url'],
                'ocd_id': self._ocd_id_for_url_path(url_path),
                'name': url_path['jurisdiction'],
                'election': election['slug'],
                'raw_extracted_filename': url_path['raw_extracted_filename'],
                'parent_zipfile': url_path['parent_zipfile'],
            }

            if preprocessed_result:
                mapping['pre_processed_url'] = build_github_url(
                    self.state, generated_filename)

            meta_entries.append(mapping)

        return meta_entries
    def _build_metadata_url_paths(self, election):
        """Return mappings for result files from url_paths.csv"""
        meta_entries = []
        # Exclude paths with the ``skip`` flag set in the mappings
        url_paths = [url_path for url_path in self._url_paths_for_election(election)
                     if not url_path['skip']]

        for url_path in url_paths:
            preprocessed_result = False
            filename_ext = self._filename_extension_for_url_path(url_path)
            # We'll eventually preprocess PDFs and convert them to CSVs.
            # So, the downloaded file will be a CSV.  Set the filename
            # extension accordingly.
            if filename_ext == ".pdf" or filename_ext == ".mdb":
                filename_ext = ".csv"
                preprocessed_result = True

            filename_kwargs = {
                'extension': filename_ext, 
                'reporting_level': url_path['reporting_level'],
                'jurisdiction': url_path['jurisdiction'],
                'party': url_path['party'],
            }
            generated_filename = self._standardized_filename(election,
                **filename_kwargs)

            mapping = {
                'generated_filename': generated_filename,
                'raw_url': url_path['url'], 
                'ocd_id': self._ocd_id_for_url_path(url_path),
                'name': url_path['jurisdiction'],
                'election': election['slug'],
                'raw_extracted_filename': url_path['raw_extracted_filename'],
                'parent_zipfile': url_path['parent_zipfile'],
            }

            if preprocessed_result:
                mapping['pre_processed_url'] = build_github_url(self.state,
                    generated_filename)

            meta_entries.append(mapping)

        return meta_entries
Example #14
0
 def _build_metadata(self, year, elections):
     meta = []
     year_int = int(year)
     for election in elections:
         if election['special']:
             results = [
                 x for x in self._url_paths()
                 if x['date'] == election['start_date']
                 and x['special'] == True
             ]
         else:
             results = [
                 x for x in self._url_paths()
                 if x['date'] == election['start_date']
                 and x['special'] == False
             ]
         for result in results:
             if result['url']:
                 raw_url = result['url']
             else:
                 raw_url = None
             if result['special']:
                 ocd_id = 'ocd-division/country:us/state:ga'
                 name = "Georgia"
                 generated_filename = self._generate_filename(election)
                 meta.append({
                     "generated_filename":
                     generated_filename,
                     "raw_url":
                     raw_url,
                     "pre_processed_url":
                     build_github_url(self.state, generated_filename),
                     "ocd_id":
                     ocd_id,
                     "name":
                     'Georgia',
                     "election":
                     election['slug']
                 })
                 generated_filename = self._generate_special_filename(
                     election, result)
                 meta.append({
                     "generated_filename":
                     generated_filename,
                     "raw_url":
                     raw_url,
                     "pre_processed_url":
                     build_github_url(self.state, generated_filename),
                     "ocd_id":
                     ocd_id,
                     "name":
                     'Georgia',
                     "election":
                     election['slug']
                 })
             else:
                 generated_filename = self._generate_filename(election)
                 ocd_id = 'ocd-division/country:us/state:ga'
                 name = "Georgia"
                 for jurisdiction in self._jurisdictions():
                     generated_filename = self._generate_county_filename(
                         election, jurisdiction['county'], result)
                     ocd_id = 'ocd-division/country:us/state:ga/county:%s' % result[
                         'county'].lower().replace(" ", "_")
                     meta.append({
                         "generated_filename":
                         generated_filename,
                         "raw_url":
                         raw_url,
                         "pre_processed_url":
                         build_github_url(self.state, generated_filename),
                         "ocd_id":
                         ocd_id,
                         "name":
                         jurisdiction['county'],
                         "election":
                         election['slug']
                     })
     return meta
Example #15
0
 def _build_metadata(self, year, elections):
     meta = []
     year_int = int(year)
     if year > 2008:
         for election in elections:
             results = [
                 x for x in self._url_paths()
                 if x['date'] == election['start_date']
             ]
             for result in results:
                 county = [
                     c for c in self._jurisdictions()
                     if c['county'] == result['county']
                 ][0]
                 if year == 2012:
                     generated_filename = self._generate_county_filename(
                         result, election, '.xlsx')
                 else:
                     generated_filename = self._generate_county_filename(
                         result, election, '.xls')
                 meta.append({
                     "generated_filename":
                     generated_filename,
                     'raw_url':
                     result['url'],
                     'raw_extracted_filename':
                     result['raw_extracted_filename'],
                     "ocd_id":
                     county['ocd_id'],
                     "name":
                     county['county'],
                     "election":
                     election['slug']
                 })
     elif year == 2006:
         for election in elections:
             results = [
                 x for x in self._url_paths()
                 if x['date'] == election['start_date']
             ]
             for result in results:
                 county = [
                     c for c in self._jurisdictions()
                     if c['county'] == result['county']
                 ][0]
                 generated_filename = self._generate_county_filename(
                     result, election, '.csv')
                 meta.append({
                     "generated_filename":
                     generated_filename,
                     "pre_processed_url":
                     build_github_url(self.state, generated_filename),
                     "raw_url":
                     result['url'],
                     "ocd_id":
                     county['ocd_id'],
                     "name":
                     county['county'],
                     "election":
                     election['slug']
                 })
     else:
         for election in elections:
             results = [
                 x for x in self._url_paths()
                 if x['date'] == election['start_date']
             ]
             for result in results:
                 county = [
                     c for c in self._jurisdictions()
                     if c['county'] == result['county']
                 ][0]
                 if result['special'] and result['raw_extracted_filename']:
                     generated_filename = '20021126__wy__special__general__natrona__state_house__36__precinct.xls'
                     raw_url = build_raw_github_url(
                         self.state,
                         election['start_date'].replace('-', ''),
                         result['raw_extracted_filename'])
                     pre_processed_url = ''
                 elif result['special']:
                     generated_filename = result['path']
                     raw_url = result['url']
                     pre_processed_url = ''
                 elif result['raw_extracted_filename'] != '':
                     generated_filename = self._generate_county_filename(
                         result, election, '.' +
                         result['raw_extracted_filename'].split('.')[1])
                     pre_processed_url = build_raw_github_url(
                         self.state,
                         election['start_date'].replace('-', ''),
                         result['raw_extracted_filename'])
                     raw_url = ''
                 else:
                     generated_filename = self._generate_county_filename(
                         result, election, '.xls')
                     raw_url = build_raw_github_url(
                         self.state,
                         election['start_date'].replace('-', ''),
                         result['raw_extracted_filename'])
                     pre_processed_url = ''
                 meta.append({
                     "generated_filename": generated_filename,
                     "pre_processed_url": pre_processed_url,
                     "raw_url": raw_url,
                     "ocd_id": county['ocd_id'],
                     "name": county['county'],
                     "election": election['slug']
                 })
     return meta
 def _build_metadata(self, year, elections):
     meta = []
     year_int = int(year)
     if year > 2008:
         for election in elections:
             results = [x for x in self._url_paths() if x['date'] == election['start_date']]
             for result in results:
                 county = [c for c in self._jurisdictions() if c['county'] == result['county']][0]
                 if year > 2010:
                     generated_filename = self._generate_county_filename(result, election, '.xlsx')
                 else:
                     generated_filename = self._generate_county_filename(result, election, '.xls')
                 meta.append({
                     "generated_filename": generated_filename,
                     'raw_url': result['url'],
                     'raw_extracted_filename': result['raw_extracted_filename'],
                     "ocd_id": county['ocd_id'],
                     "name": county['county'],
                     "election": election['slug']
                 })
     elif year == 2006:
         for election in elections:
             results = [x for x in self._url_paths() if x['date'] == election['start_date']]
             for result in results:
                 county = [c for c in self._jurisdictions() if c['county'] == result['county']][0]
                 generated_filename = self._generate_county_filename(result, election, '.csv')
                 meta.append({
                     "generated_filename": generated_filename,
                     "pre_processed_url": build_github_url(self.state, generated_filename),
                     "raw_url": result['url'],
                     "ocd_id": county['ocd_id'],
                     "name": county['county'],
                     "election": election['slug']
                 })
     else:
         for election in elections:
             results = [x for x in self._url_paths() if x['date'] == election['start_date']]
             for result in results:
                 county = [c for c in self._jurisdictions() if c['county'] == result['county']][0]
                 if result['special'] and result['raw_extracted_filename']:
                     generated_filename = '20021126__wy__special__general__natrona__state_house__36__precinct.xls'
                     raw_url = build_raw_github_url(self.state, election['start_date'].replace('-',''), result['raw_extracted_filename'])
                     pre_processed_url = ''
                 elif result['special']:
                     generated_filename = result['path']
                     raw_url = result['url']
                     pre_processed_url = ''
                 elif result['raw_extracted_filename'] != '':
                     generated_filename = self._generate_county_filename(result, election, '.' + result['raw_extracted_filename'].split('.')[1])
                     pre_processed_url = build_raw_github_url(self.state, election['start_date'].replace('-',''), result['raw_extracted_filename'])
                     raw_url = ''
                 else:
                     generated_filename = self._generate_county_filename(result, election, '.xls')
                     raw_url = build_raw_github_url(self.state, election['start_date'].replace('-',''), result['raw_extracted_filename'])
                     pre_processed_url = ''
                 meta.append({
                     "generated_filename": generated_filename,
                     "pre_processed_url": pre_processed_url,
                     "raw_url": raw_url,
                     "ocd_id": county['ocd_id'],
                     "name": county['county'],
                     "election": election['slug']
                 })
     return meta