session.close_all() # Temporary main eccsw = CswScanner() scan8601 = None scan_date = None if args.all: scan_date = None elif args.monitor: monitor_date = get_setting('csw_last_scan_date') scan_date = datetime.now() if monitor_date.setting_value is not None: scan_date = dateutil.parser.parse(monitor_date.setting_value) elif args.since != '': scan_date = dateutil.parser.parse(args.since) if scan_date is None: logging.error('Invalid date: ' + args.since) exit() eccsw.get_all_ids(scan_date) eccsw.load_naps() monitor_date = get_setting('csw_last_scan_date') scan_date = datetime.now() monitor_date.setting_value = scan_date.isoformat() save_setting(monitor_date)
try: ckan_portal.action.package_show( id=scan_record.uuid) # If the record does not exist, then a NotFound exception will be thrown pkg_update_record.existing = True except NotFound, e: pass # Set the dataset for immediate release on the Registry geo_record.portal_release_date = time.strftime( "%Y-%m-%d") geo_record.ready_to_publish = True pkg_update_record.ckan_json = json.dumps( geo_record.as_dict()) current_time_str = time.strftime("%Y-%m-%d %H:%M:%S") if not pkg_update_record.created: pkg_update_record.created = current_time_str pkg_update_record.updated = current_time_str add_record(session, pkg_update_record) except Exception, e: logging.error(e.message) last_id = scan_record.id save_setting(setting) session.close() main()
def main(since='', start_index='', monitor=False): geog_url = 'http://geogratis.gc.ca/api/en/nrcan-rncan/ess-sst?alt=json&max-results=100' monitor_setting = get_setting(u'monitor_link') if monitor: if monitor_setting.setting_value is None: geog_url =\ 'http://geogratis.gc.ca/api/en/nrcan-rncan/ess-sst?edited-min=2001-01-01&alt=json&max-results=100' else: geog_url = monitor_setting.setting_value elif since != '': geog_url =\ 'http://geogratis.gc.ca/api/en/nrcan-rncan/ess-sst?edited-min={0}&alt=json&max-results=100'.format(since) elif start_index != '': geog_url =\ 'http://geogratis.gc.ca/api/en/nrcan-rncan/ess-sst/?start-index={0}&alt=json&max-results=100'.\ format(start_index) print ('{0}Scanning: {1}{2}'.format(Fore.GREEN, Fore.BLUE, geog_url)) r = requests.get(geog_url) logging.info('HTTP Response Status {0}'.format(r.status_code)) session = None try: session = connect_to_database() # Get the first page of the feed if r.status_code == 200: feed_page = r.json() # Save the monitor link for future use monitor_link = _get_link(feed_page, 'monitor') if monitor_link != '': monitor_setting.setting_value = monitor_link save_setting(monitor_setting) print "{0}Next Monitor Link: {1}{2}".format(Fore.YELLOW, Fore.BLUE, monitor_setting.setting_value) next_link = _get_link(feed_page) print ('{0}{1} Records Found'.format(Fore.BLUE, feed_page['count'])) if 'products' in feed_page: for product in feed_page['products']: try: save_geogratis_record(session, product['id']) except Exception, e: logging.error('{0} failed to load'.format(product['id'])) logging.error(e) # Keep polling until exhausted while next_link != '': geog_url = next_link r = requests.get(geog_url) feed_page = r.json() next_link = _get_link(feed_page) print '{0}Next page link: {1}{2}'.format(Fore.YELLOW, Fore.BLUE, next_link) if 'products' in feed_page: for product in feed_page['products']: # Don't crash on every call - log the error and continue try: save_geogratis_record(session, product['id']) except Exception, e: logging.error('{0} failed to load'.format(product['id'])) logging.error(e) save_setting(monitor_setting)
pkg_update_record.uuid = scan_record.uuid # Determine if the record is already on the OD portal try: ckan_portal.action.package_show(id=scan_record.uuid) # If the record does not exist, then a NotFound exception will be thrown pkg_update_record.existing = True except NotFound, e: pass # Set the dataset for immediate release on the Registry geo_record.portal_release_date = time.strftime("%Y-%m-%d") geo_record.ready_to_publish = True pkg_update_record.ckan_json = json.dumps(geo_record.as_dict()) current_time_str = time.strftime("%Y-%m-%d %H:%M:%S") if not pkg_update_record.created: pkg_update_record.created = current_time_str pkg_update_record.updated = current_time_str add_record(session, pkg_update_record) except Exception, e: logging.error(e.message) last_id = scan_record.id save_setting(setting) session.close() main()
def main(since='', start_index='', monitor=False): geog_url = 'http://geogratis.gc.ca/api/en/nrcan-rncan/ess-sst?alt=json&max-results=100' monitor_setting = get_setting('monitor_link') if monitor: if monitor_setting.setting_value is None: geog_url = 'http://geogratis.gc.ca/api/en/nrcan-rncan/ess-sst?edited-min=2015-01-01&alt=json&max-results=100' else: geog_url = monitor_setting.setting_value elif since != '': geog_url = 'http://geogratis.gc.ca/api/en/nrcan-rncan/ess-sst?edited-min={0}&alt=json&max-results=100'.format( since) elif start_index != '': geog_url = 'http://geogratis.gc.ca/api/en/nrcan-rncan/ess-sst/?start-index={0}&alt=json&max-results=100'.format( start_index) print('{0}Scanning: {1}{2}'.format(Fore.GREEN, Fore.BLUE, geog_url)) r = requests.get(geog_url) logging.info('HTTP Response Status {0}'.format(r.status_code)) session = None try: session = connect_to_database() # Get the first page of the feed if r.status_code == 200: feed_page = r.json() # Save the monitor link for future use monitor_link = _get_link(feed_page, 'monitor') if monitor_link != '': monitor_setting.setting_value = monitor_link save_setting(monitor_setting) print "{0}Next Monitor Link: {1}{2}".format( Fore.YELLOW, Fore.BLUE, monitor_setting.setting_value) next_link = _get_link(feed_page) print('{0}{1} Records Found'.format(Fore.BLUE, feed_page['count'])) if 'products' in feed_page: for product in feed_page['products']: try: save_geogratis_record(session, product['id']) except Exception, e: logging.error('{0} failed to load'.format( product['id'])) logging.error(e) # Keep polling until exhausted while next_link != '': geog_url = next_link r = requests.get(geog_url) feed_page = r.json() next_link = _get_link(feed_page) print '{0}Next page link: {1}{2}'.format( Fore.YELLOW, Fore.BLUE, next_link) if 'products' in feed_page: for product in feed_page['products']: # Don't crash on every call - log the error and continue try: save_geogratis_record(session, product['id']) except Exception, e: logging.error('{0} failed to load'.format( product['id'])) logging.error(e) save_setting(monitor_setting)