def _build_metadata(self, year, elections): meta = [] year_int = int(year) for election in elections: if 'special' in election['slug']: results = [x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] == True] for result in results: generated_filename = self._generate_filename(election['start_date'], result) if result['county']: ocd_id = 'ocd-division/country:us/state:az/county:' + result['county'].replace(' ','_').lower() else: ocd_id = 'ocd-division/country:us/state:az' meta.append({ "generated_filename": generated_filename, "raw_url": election['direct_links'][0], "pre_processed_url": build_github_url(self.state, generated_filename), "ocd_id": ocd_id, "name": 'Arizona', "election": election['slug'] }) else: results = [x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] == False] for result in results: generated_filename = self._generate_filename(election['start_date'], result) meta.append({ "generated_filename": generated_filename, "raw_url": election['direct_links'][0], "pre_processed_url": build_github_url(self.state, generated_filename), "ocd_id": 'ocd-division/country:us/state:az', "name": 'Arizona', "election": election['slug'] }) return meta
def _build_election_metadata_default(self, election): link = election['direct_links'][0] filename_kwargs = {} if link.startswith(self.RESULTS_PORTAL_URL): # Report portal results are precinct-level filename_kwargs['reporting_level'] = 'precinct' # And the format is tab-delimited text filename_kwargs['extension'] = '.tsv' generated_filename = self._standardized_filename(election, **filename_kwargs) mapping = { "generated_filename": generated_filename, "raw_url": link, "ocd_id": 'ocd-division/country:us/state:ar', "name": 'Arkansas', "election": election['slug'] } if "2002" in election['slug']: generated_filename = generated_filename.replace('.pdf', '.csv') mapping['pre_processed_url'] = build_github_url(self.state, generated_filename) mapping['generated_filename'] = generated_filename return [mapping]
def _build_metadata(self, year, elections): meta = [] year_int = int(year) for election in elections: if election['special']: results = [x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] == True] else: results = [x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] != True] for result in results: generated_filename = self._generate_filename(election['start_date'], election['race_type'], result) if election['direct_links']: raw_url = election['direct_links'][0] github_url = None else: raw_url = None github_url = build_github_url(self.state, generated_filename) meta.append({ "generated_filename": generated_filename, "raw_url": raw_url, "pre_processed_url": github_url, "ocd_id": 'ocd-division/country:us/state:ct', "name": 'Connecticut', "election": election['slug'] }) return meta
def _build_metadata(self, year, elections): meta = [] year_int = int(year) if year < 2008: for election in elections: results = [x for x in self._url_paths() if x['date'] == election['start_date']] for result in results: generated_filename = self._generate_office_filename(election['direct_links'][0], election['start_date'], election['race_type'], result) meta.append({ "generated_filename": generated_filename, "raw_url": self._build_raw_url(year, result['path']), "pre_processed_url": build_github_url(self.state, generated_filename), "ocd_id": 'ocd-division/country:us/state:wv', "name": 'West Virginia', "election": election['slug'] }) else: for election in elections: csv_links = self._find_csv_links(election['direct_links'][0]) counties = self._jurisdictions() results = list(zip(counties, csv_links[1:])) for result in results: meta.append({ "generated_filename": self._generate_county_filename(result[0]['county'], election), "pre_processed_url": None, "raw_url": result[1], "ocd_id": result[0]['ocd_id'], "name": result[0]['county'], "election": election['slug'] }) return meta
def _build_metadata(self, year, elections): meta = [] year_int = int(year) if year < 2008: for election in elections: results = [x for x in self._url_paths() if x['date'] == election['start_date']] for result in results: generated_filename = self._generate_office_filename(election['direct_links'][0], election['start_date'], election['race_type'], result) meta.append({ "generated_filename": generated_filename, "raw_url": self._build_raw_url(year, result['path']), "pre_processed_url": build_github_url(self.state, generated_filename), "ocd_id": 'ocd-division/country:us/state:wv', "name": 'West Virginia', "election": election['slug'] }) else: for election in elections: csv_links = self._find_csv_links(election['direct_links'][0]) counties = self._jurisdictions() results = zip(counties, csv_links[1:]) for result in results: meta.append({ "generated_filename": self._generate_county_filename(result[0]['county'], election), "pre_processed_url": None, "raw_url": result[1], "ocd_id": result[0]['ocd_id'], "name": result[0]['county'], "election": election['slug'] }) return meta
def _build_metadata_preprocessed(self, election): """Return election metadata for an election with preprocessed results""" generated_filename = self._standardized_filename(election, extension=".csv") return [{ 'generated_filename': generated_filename, 'raw_url': build_github_url('wa', generated_filename), 'ocd_id': 'ocd-division/country:us/state:wa', 'name': "Washington", 'election': election['slug'], }]
def _add_preprocessed_urls(self, meta_entries): new_entries = [] for meta_entry in meta_entries: ext = self._filename_extension(meta_entry['raw_url']) if ext == ".pdf": meta_entry['generated_filename'] = meta_entry['generated_filename'].replace(".pdf", ".csv") meta_entry['pre_processed_url'] = build_github_url(self.state, meta_entry['generated_filename']) new_entries.append(meta_entry) return new_entries
def _build_metadata_preprocessed(self, election): """Return election metadata for an election with preprocessed results""" generated_filename = self._standardized_filename(election, extension=".csv") return [ { 'generated_filename': generated_filename, 'raw_url': build_github_url('wa', generated_filename), 'ocd_id': 'ocd-division/country:us/state:wa', 'name': "Washington", 'election': election['slug'], } ]
def _add_preprocessed_urls(self, meta_entries): new_entries = [] for meta_entry in meta_entries: ext = self._filename_extension(meta_entry['raw_url']) if ext == ".pdf": meta_entry['generated_filename'] = meta_entry[ 'generated_filename'].replace(".pdf", ".csv") meta_entry['pre_processed_url'] = build_github_url( self.state, meta_entry['generated_filename']) new_entries.append(meta_entry) return new_entries
def _build_metadata_url_paths(self, election): """Return mappings for result files from url_paths.csv""" meta_entries = [] # Exclude paths with the ``skip`` flag set in the mappings url_paths = [ url_path for url_path in self._url_paths_for_election(election) if not url_path['skip'] ] for url_path in url_paths: preprocessed_result = False filename_ext = self._filename_extension_for_url_path(url_path) # We'll eventually preprocess PDFs and convert them to CSVs. # So, the downloaded file will be a CSV. Set the filename # extension accordingly. if filename_ext == ".pdf" or filename_ext == ".mdb": filename_ext = ".csv" preprocessed_result = True filename_kwargs = { 'extension': filename_ext, 'reporting_level': url_path['reporting_level'], 'jurisdiction': url_path['jurisdiction'], 'party': url_path['party'], } generated_filename = self._standardized_filename( election, **filename_kwargs) mapping = { 'generated_filename': generated_filename, 'raw_url': url_path['url'], 'ocd_id': self._ocd_id_for_url_path(url_path), 'name': url_path['jurisdiction'], 'election': election['slug'], 'raw_extracted_filename': url_path['raw_extracted_filename'], 'parent_zipfile': url_path['parent_zipfile'], } if preprocessed_result: mapping['pre_processed_url'] = build_github_url( self.state, generated_filename) meta_entries.append(mapping) return meta_entries
def _build_metadata_url_paths(self, election): """Return mappings for result files from url_paths.csv""" meta_entries = [] # Exclude paths with the ``skip`` flag set in the mappings url_paths = [url_path for url_path in self._url_paths_for_election(election) if not url_path['skip']] for url_path in url_paths: preprocessed_result = False filename_ext = self._filename_extension_for_url_path(url_path) # We'll eventually preprocess PDFs and convert them to CSVs. # So, the downloaded file will be a CSV. Set the filename # extension accordingly. if filename_ext == ".pdf" or filename_ext == ".mdb": filename_ext = ".csv" preprocessed_result = True filename_kwargs = { 'extension': filename_ext, 'reporting_level': url_path['reporting_level'], 'jurisdiction': url_path['jurisdiction'], 'party': url_path['party'], } generated_filename = self._standardized_filename(election, **filename_kwargs) mapping = { 'generated_filename': generated_filename, 'raw_url': url_path['url'], 'ocd_id': self._ocd_id_for_url_path(url_path), 'name': url_path['jurisdiction'], 'election': election['slug'], 'raw_extracted_filename': url_path['raw_extracted_filename'], 'parent_zipfile': url_path['parent_zipfile'], } if preprocessed_result: mapping['pre_processed_url'] = build_github_url(self.state, generated_filename) meta_entries.append(mapping) return meta_entries
def _build_metadata(self, year, elections): meta = [] year_int = int(year) for election in elections: if election['special']: results = [ x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] == True ] else: results = [ x for x in self._url_paths() if x['date'] == election['start_date'] and x['special'] == False ] for result in results: if result['url']: raw_url = result['url'] else: raw_url = None if result['special']: ocd_id = 'ocd-division/country:us/state:ga' name = "Georgia" generated_filename = self._generate_filename(election) meta.append({ "generated_filename": generated_filename, "raw_url": raw_url, "pre_processed_url": build_github_url(self.state, generated_filename), "ocd_id": ocd_id, "name": 'Georgia', "election": election['slug'] }) generated_filename = self._generate_special_filename( election, result) meta.append({ "generated_filename": generated_filename, "raw_url": raw_url, "pre_processed_url": build_github_url(self.state, generated_filename), "ocd_id": ocd_id, "name": 'Georgia', "election": election['slug'] }) else: generated_filename = self._generate_filename(election) ocd_id = 'ocd-division/country:us/state:ga' name = "Georgia" for jurisdiction in self._jurisdictions(): generated_filename = self._generate_county_filename( election, jurisdiction['county'], result) ocd_id = 'ocd-division/country:us/state:ga/county:%s' % result[ 'county'].lower().replace(" ", "_") meta.append({ "generated_filename": generated_filename, "raw_url": raw_url, "pre_processed_url": build_github_url(self.state, generated_filename), "ocd_id": ocd_id, "name": jurisdiction['county'], "election": election['slug'] }) return meta
def _build_metadata(self, year, elections): meta = [] year_int = int(year) if year > 2008: for election in elections: results = [ x for x in self._url_paths() if x['date'] == election['start_date'] ] for result in results: county = [ c for c in self._jurisdictions() if c['county'] == result['county'] ][0] if year == 2012: generated_filename = self._generate_county_filename( result, election, '.xlsx') else: generated_filename = self._generate_county_filename( result, election, '.xls') meta.append({ "generated_filename": generated_filename, 'raw_url': result['url'], 'raw_extracted_filename': result['raw_extracted_filename'], "ocd_id": county['ocd_id'], "name": county['county'], "election": election['slug'] }) elif year == 2006: for election in elections: results = [ x for x in self._url_paths() if x['date'] == election['start_date'] ] for result in results: county = [ c for c in self._jurisdictions() if c['county'] == result['county'] ][0] generated_filename = self._generate_county_filename( result, election, '.csv') meta.append({ "generated_filename": generated_filename, "pre_processed_url": build_github_url(self.state, generated_filename), "raw_url": result['url'], "ocd_id": county['ocd_id'], "name": county['county'], "election": election['slug'] }) else: for election in elections: results = [ x for x in self._url_paths() if x['date'] == election['start_date'] ] for result in results: county = [ c for c in self._jurisdictions() if c['county'] == result['county'] ][0] if result['special'] and result['raw_extracted_filename']: generated_filename = '20021126__wy__special__general__natrona__state_house__36__precinct.xls' raw_url = build_raw_github_url( self.state, election['start_date'].replace('-', ''), result['raw_extracted_filename']) pre_processed_url = '' elif result['special']: generated_filename = result['path'] raw_url = result['url'] pre_processed_url = '' elif result['raw_extracted_filename'] != '': generated_filename = self._generate_county_filename( result, election, '.' + result['raw_extracted_filename'].split('.')[1]) pre_processed_url = build_raw_github_url( self.state, election['start_date'].replace('-', ''), result['raw_extracted_filename']) raw_url = '' else: generated_filename = self._generate_county_filename( result, election, '.xls') raw_url = build_raw_github_url( self.state, election['start_date'].replace('-', ''), result['raw_extracted_filename']) pre_processed_url = '' meta.append({ "generated_filename": generated_filename, "pre_processed_url": pre_processed_url, "raw_url": raw_url, "ocd_id": county['ocd_id'], "name": county['county'], "election": election['slug'] }) return meta
def _build_metadata(self, year, elections): meta = [] year_int = int(year) if year > 2008: for election in elections: results = [x for x in self._url_paths() if x['date'] == election['start_date']] for result in results: county = [c for c in self._jurisdictions() if c['county'] == result['county']][0] if year > 2010: generated_filename = self._generate_county_filename(result, election, '.xlsx') else: generated_filename = self._generate_county_filename(result, election, '.xls') meta.append({ "generated_filename": generated_filename, 'raw_url': result['url'], 'raw_extracted_filename': result['raw_extracted_filename'], "ocd_id": county['ocd_id'], "name": county['county'], "election": election['slug'] }) elif year == 2006: for election in elections: results = [x for x in self._url_paths() if x['date'] == election['start_date']] for result in results: county = [c for c in self._jurisdictions() if c['county'] == result['county']][0] generated_filename = self._generate_county_filename(result, election, '.csv') meta.append({ "generated_filename": generated_filename, "pre_processed_url": build_github_url(self.state, generated_filename), "raw_url": result['url'], "ocd_id": county['ocd_id'], "name": county['county'], "election": election['slug'] }) else: for election in elections: results = [x for x in self._url_paths() if x['date'] == election['start_date']] for result in results: county = [c for c in self._jurisdictions() if c['county'] == result['county']][0] if result['special'] and result['raw_extracted_filename']: generated_filename = '20021126__wy__special__general__natrona__state_house__36__precinct.xls' raw_url = build_raw_github_url(self.state, election['start_date'].replace('-',''), result['raw_extracted_filename']) pre_processed_url = '' elif result['special']: generated_filename = result['path'] raw_url = result['url'] pre_processed_url = '' elif result['raw_extracted_filename'] != '': generated_filename = self._generate_county_filename(result, election, '.' + result['raw_extracted_filename'].split('.')[1]) pre_processed_url = build_raw_github_url(self.state, election['start_date'].replace('-',''), result['raw_extracted_filename']) raw_url = '' else: generated_filename = self._generate_county_filename(result, election, '.xls') raw_url = build_raw_github_url(self.state, election['start_date'].replace('-',''), result['raw_extracted_filename']) pre_processed_url = '' meta.append({ "generated_filename": generated_filename, "pre_processed_url": pre_processed_url, "raw_url": raw_url, "ocd_id": county['ocd_id'], "name": county['county'], "election": election['slug'] }) return meta