def all_reading_rooms(): """ Get reading room links for ALL agencies. """ for agency in AGENCIES: print(agency) agency_data = reading_room(agency) save_agency_data(agency, agency_data)
def layer_manual_data(agency_abbr): filename = scraper.agency_yaml_filename('data', agency_abbr) with open(filename, 'r') as f: print(filename) agency_data = yaml.load(f) data = scraper.apply_manual_data(agency_abbr, agency_data) scraper.save_agency_data(agency_abbr, data)
def test_save_agency_data(self): scraper.save_agency_data( 'TEST', {'name': 'Test Agency'}, data_directory='/tmp/test/') self.assertTrue(os.path.isfile('/tmp/test/TEST.yaml')) f = open('/tmp/test/TEST.yaml', 'r') test_data = yaml.load(f) f.close() self.assertEqual({'name': 'Test Agency'}, test_data)
def test_save_agency_data(self): scraper.save_agency_data('TEST', {'name': 'Test Agency'}, data_directory='/tmp/test/') self.assertTrue(os.path.isfile('/tmp/test/TEST.yaml')) f = open('/tmp/test/TEST.yaml', 'r') test_data = yaml.load(f) f.close() self.assertEqual({'name': 'Test Agency'}, test_data)
departments = [] if 'departments' in agency_data: for department in agency_data['departments']: links = process(department) if links: department = update_links(department, links) departments.append(department) agency_data['departments'] = departments return agency_data def all_reading_rooms(): """ Get reading room links for ALL agencies. """ for agency in AGENCIES: print(agency) agency_data = reading_room(agency) save_agency_data(agency, agency_data) if __name__ == "__main__": agency_abbr = None if len(sys.argv) > 1: agency_abbr = sys.argv[1] if agency_abbr: agency_data = reading_room(agency_abbr) save_agency_data(agency_abbr, agency_data) else: all_reading_rooms()
def test_read_manual_data(self): scraper.save_agency_data( 'TEST', {'name': 'Test Agency'}, data_directory='/tmp/test/') data = scraper.read_manual_data('TEST', manual_data_dir='/tmp/test') self.assertEqual({'name': 'Test Agency'}, data)
def test_read_manual_data(self): scraper.save_agency_data('TEST', {'name': 'Test Agency'}, data_directory='/tmp/test/') data = scraper.read_manual_data('TEST', manual_data_dir='/tmp/test') self.assertEqual({'name': 'Test Agency'}, data)
import scraper import typos if __name__ == "__main__": """ This one-time use script is designed to take everything in typos.py and create manual override YAML files for the agencies. """ agencies = {} for agency in typos.KEYWORDS.keys(): data = {} data = scraper.add_keywords(agency, data) agencies[agency] = data for agency in typos.TOP_LEVEL.keys(): departments = [] for department in typos.TOP_LEVEL[agency]: department = {'name': department, 'top_level': True} departments.append(department) data = agencies.get(agency, {}) agencies[agency] = dict(data, departments=departments) for agency in agencies: scraper.save_agency_data(agency, agencies[agency], 'manual_data')