def load_naps(self): ns = Namespaces() gmd = ns.get_namespace('gmd') session = connect_to_database() for napid in self.napids: print '{0}Full NAP Record for {1}{2}'.format( Fore.GREEN, Fore.CYAN, napid) self.csw.getrecordbyid(id=[napid], outputschema=gmd) ec_rec = find_record_by_uuid(session, napid, query_class=ECRecord) if ec_rec is None: ec_rec = ECRecord( uuid=self.csw.records[napid].identifier, title=self.csw.records[napid].identification.title, state='active', nap_record=self.csw.records[napid].xml, csw_scanned=datetime.now().isoformat()) else: ec_rec.title = self.csw.records[napid].identification.title, ec_rec.state = 'active', ec_rec.nap_record = self.csw.records[napid].xml, ec_rec.csw_scanned = datetime.now().isoformat() add_record(session, ec_rec) session.close_all()
def main(): factory = MetadataDatasetModelGeogratisFactory() # Potentially doing a VERY large ORM query. If we don't limit the read, then SQLAlchemy will try to pull # everything into memory. Therefore the query must be paged. Paging requires keeping track of the sequential # record ID's session = connect_to_database() last_id = 0 while True: known_records = find_all_records(session, query_limit=10, limit_id=last_id) if len(known_records) == 0: break else: for geo_rec in known_records: print 'ID: {0} UUID: {1}'.format(geo_rec.id, geo_rec.uuid) try: # In order to avoid multiple updates, only allow for one instance of an update per uuid. # Previous updates are overridden with the latest update pkg_update = find_record_by_uuid(session, geo_rec.uuid, query_class=Packages) if pkg_update is None: pkg_update = Packages() pkg_update.status = 'new' if geo_rec.state == 'active': ckan_record = factory.create_model_ckan(geo_rec.uuid) geogratis_record = factory.create_model_geogratis(geo_rec.uuid) pkg_update.uuid = geo_rec.uuid # Set the dataset for immediate release on the Registry geogratis_record.portal_release_date = time.strftime("%Y-%m-%d") geogratis_record.ready_to_publish = True if not ckan_record is None: if not geogratis_record.equals(ckan_record): diffs = geogratis_record.compare(ckan_record, self_label="Geogratis", other_label="CKAN") pkg_update.differences = "\n".join(item for item in diffs) geo_rec.od_status = 'Needs Update' pkg_update.ckan_json = json.dumps(geogratis_record.as_dict()) pkg_update.status = 'update' else: geo_rec.od_status = 'Current' else: pkg_update.ckan_json = json.dumps(geogratis_record.as_dict()) geo_rec.od_status = 'New Record' else: geo_rec.od_status = 'Ineligible' pkg_update.last_comparison = datetime.now() add_record(session, geo_rec) if geo_rec.od_status == 'New Record' or geo_rec.od_status == "Needs Update": add_record(session, pkg_update) last_id = geo_rec.id except Exception, e: logging.error(e.message)
def save_geogratis_record(session, uuid): msg = 'Retrieving data set {0}'.format(uuid) logging.info(msg) print(msg) geo_rec_en = get_geogratis_rec(uuid) geo_rec_fr = get_geogratis_rec(uuid, 'fr') if not geo_rec_en is None: state = u'deleted' title_fr = '' if geo_rec_en['deleted'] == 'false': state = u'active' if geo_rec_fr is None: state = u'missing french' else: title_fr = geo_rec_fr['title'] new_rec = find_record_by_uuid(session, geo_rec_en['id']) created_date = u'2000-01-01' updated_date = u'2000-01-01' edited_date = u'2000-01-01' geogratis_scanned = datetime.now().isoformat() if state != 'deleted': created_date = geo_rec_en['publishedDate'] updated_date = geo_rec_en['updatedDate'] edited_date = geo_rec_en['editedDate'] if new_rec is None: new_rec = GeogratisRecord(uuid=geo_rec_en['id'], title_en=geo_rec_en['title'], title_fr=title_fr, json_record_en=json.dumps(geo_rec_en), json_record_fr=json.dumps(geo_rec_fr), created=created_date, updated=updated_date, edited=edited_date, state=state, geogratis_scanned=geogratis_scanned) else: new_rec.title_en = geo_rec_en['title'] new_rec.title_fr = title_fr new_rec.json_record_en = json.dumps(geo_rec_en) new_rec.json_record_fr = json.dumps(geo_rec_fr), new_rec.created = created_date, new_rec.updated = updated_date, new_rec.edited = edited_date, new_rec.state = state new_rec.geogratis_scanned = geogratis_scanned add_record(session, new_rec)
def save_geogratis_record(session, uuid): msg = 'Retrieving data set {0}'.format(uuid) logging.info(msg) print(msg) geo_rec_en = get_geogratis_rec(uuid) geo_rec_fr = get_geogratis_rec(uuid, 'fr') if not geo_rec_en is None: state = 'deleted' title_fr = '' if geo_rec_en['deleted'] == 'false': state = 'active' if geo_rec_fr is None: state = 'missing french' else: title_fr = geo_rec_fr['title'] new_rec = find_record_by_uuid(session, geo_rec_en['id']) created_date = '2000-01-01' updated_date = '2000-01-01' edited_date = '2000-01-01' geogratis_scanned = datetime.now().isoformat() if state != 'deleted': created_date = geo_rec_en['publishedDate'] updated_date = geo_rec_en['updatedDate'] edited_date = geo_rec_en['editedDate'] if new_rec is None: new_rec = GeogratisRecord(uuid=geo_rec_en['id'], title_en=geo_rec_en['title'], title_fr=title_fr, json_record_en=json.dumps(geo_rec_en), json_record_fr=json.dumps(geo_rec_fr), created=created_date, updated=updated_date, edited=edited_date, state=state, geogratis_scanned=geogratis_scanned) else: new_rec.title_en = geo_rec_en['title'] new_rec.title_fr = title_fr new_rec.json_record_en = json.dumps(geo_rec_en) new_rec.json_record_fr = json.dumps(geo_rec_fr), new_rec.created = created_date, new_rec.updated = updated_date, new_rec.edited = edited_date, new_rec.state = state new_rec.geogratis_scanned = geogratis_scanned add_record(session, new_rec)
def main(): ini_config = ConfigParser() ini_config.read('geogratis.ini') remote_url = ini_config.get('ckan', 'ckan.remote_portal') api_key = ini_config.get('ckan', 'ckan.api_key') user_agent = ini_config.get('ckan', 'ckan.user_agent') ckansite = ckanapi.RemoteCKAN(remote_url, apikey=api_key, user_agent=user_agent) session = connect_to_database() last_id = 0 while True: package_stream = session.query(Packages).filter(Packages.id > last_id) package_stream = package_stream.filter(Packages.status.in_(["new", "update"])).\ order_by(Packages.id).all() if len(package_stream) == 0: break else: for r in package_stream: sleep(60) print u'Processing dataset {0}'.format(r.id) try: new_pkg_dict = json.loads(r.ckan_json.decode('utf-8')) except AttributeError as a: print u'AttributeError {0}'.format(unicode(a)) continue is_new = False try: pkg_info = ckansite.action.package_show(id=r.uuid) except ckanapi.NotFound: is_new = True try: if is_new: ckansite.call_action('package_create', new_pkg_dict) else: ckansite.call_action('package_update', new_pkg_dict) r.status = 'posted' r.status_message = '' r.latest_posted = datetime.now() add_record(session, r) continue except ckanapi.NotAuthorized as e: print u'Not Authorized {0}'.format(unicode(e)) continue except ckanapi.CKANAPIError as c: r.status = 'error' r.status_message = u'CKAN API error {0}'.format(unicode(c)) add_record(session, r) print r.status_message continue except ckanapi.errors.ValidationError as v: r.status = 'error' r.status_message = u'Validation error {0}'.format(unicode(v.error_dict)) add_record(session, r) print r.status_message continue break
try: ckan_portal.action.package_show( id=scan_record.uuid) # If the record does not exist, then a NotFound exception will be thrown pkg_update_record.existing = True except NotFound, e: pass # Set the dataset for immediate release on the Registry geo_record.portal_release_date = time.strftime( "%Y-%m-%d") geo_record.ready_to_publish = True pkg_update_record.ckan_json = json.dumps( geo_record.as_dict()) current_time_str = time.strftime("%Y-%m-%d %H:%M:%S") if not pkg_update_record.created: pkg_update_record.created = current_time_str pkg_update_record.updated = current_time_str add_record(session, pkg_update_record) except Exception, e: logging.error(e.message) last_id = scan_record.id save_setting(setting) session.close() main()
pkg_update_record.uuid = scan_record.uuid # Determine if the record is already on the OD portal try: ckan_portal.action.package_show(id=scan_record.uuid) # If the record does not exist, then a NotFound exception will be thrown pkg_update_record.existing = True except NotFound, e: pass # Set the dataset for immediate release on the Registry geo_record.portal_release_date = time.strftime("%Y-%m-%d") geo_record.ready_to_publish = True pkg_update_record.ckan_json = json.dumps(geo_record.as_dict()) current_time_str = time.strftime("%Y-%m-%d %H:%M:%S") if not pkg_update_record.created: pkg_update_record.created = current_time_str pkg_update_record.updated = current_time_str add_record(session, pkg_update_record) except Exception, e: logging.error(e.message) last_id = scan_record.id save_setting(setting) session.close() main()
def main(): ini_config = ConfigParser() ini_config.read('geogratis.ini') remote_url = ini_config.get('ckan', 'ckan.remote_portal') api_key = ini_config.get('ckan', 'ckan.api_key') user_agent = ini_config.get('ckan', 'ckan.user_agent') ckansite = ckanapi.RemoteCKAN(remote_url, apikey=api_key, user_agent=user_agent) session = connect_to_database() last_id = 0 while True: package_stream = session.query(Packages).filter(Packages.id > last_id) package_stream = package_stream.filter(Packages.status.in_(["new", "update"])).\ order_by(Packages.id).all() if len(package_stream) == 0: break else: for r in package_stream: sleep(60) print u'Processing dataset {0}'.format(r.id) try: new_pkg_dict = json.loads(r.ckan_json.decode('utf-8')) except AttributeError as a: print u'AttributeError {0}'.format(unicode(a)) continue is_new = False try: pkg_info = ckansite.action.package_show(id=r.uuid) except ckanapi.NotFound: is_new = True try: if is_new: ckansite.call_action('package_create', new_pkg_dict) else: ckansite.call_action('package_update', new_pkg_dict) r.status = 'posted' r.status_message = '' r.latest_posted = datetime.now() add_record(session, r) continue except ckanapi.NotAuthorized as e: print u'Not Authorized {0}'.format(unicode(e)) continue except ckanapi.CKANAPIError as c: r.status = 'error' r.status_message = u'CKAN API error {0}'.format(unicode(c)) add_record(session, r) print r.status_message continue except ckanapi.errors.ValidationError as v: r.status = 'error' r.status_message = u'Validation error {0}'.format( unicode(v.error_dict)) add_record(session, r) print r.status_message continue break
def main(): factory = MetadataDatasetModelGeogratisFactory() # Potentially doing a VERY large ORM query. If we don't limit the read, then SQLAlchemy will try to pull # everything into memory. Therefore the query must be paged. Paging requires keeping track of the sequential # record ID's session = connect_to_database() last_id = 0 while True: known_records = find_all_records(session, query_limit=10, limit_id=last_id) if len(known_records) == 0: break else: for geo_rec in known_records: print 'ID: {0} UUID: {1}'.format(geo_rec.id, geo_rec.uuid) try: # In order to avoid multiple updates, only allow for one instance of an update per uuid. # Previous updates are overridden with the latest update pkg_update = find_record_by_uuid(session, geo_rec.uuid, query_class=Packages) if pkg_update is None: pkg_update = Packages() pkg_update.status = 'new' if geo_rec.state == 'active': ckan_record = factory.create_model_ckan(geo_rec.uuid) geogratis_record = factory.create_model_geogratis( geo_rec.uuid) pkg_update.uuid = geo_rec.uuid # Set the dataset for immediate release on the Registry geogratis_record.portal_release_date = time.strftime( "%Y-%m-%d") geogratis_record.ready_to_publish = True if not ckan_record is None: if not geogratis_record.equals(ckan_record): diffs = geogratis_record.compare( ckan_record, self_label="Geogratis", other_label="CKAN") pkg_update.differences = "\n".join( item for item in diffs) geo_rec.od_status = 'Needs Update' pkg_update.ckan_json = json.dumps( geogratis_record.as_dict()) pkg_update.status = 'update' else: geo_rec.od_status = 'Current' else: pkg_update.ckan_json = json.dumps( geogratis_record.as_dict()) geo_rec.od_status = 'New Record' else: geo_rec.od_status = 'Ineligible' pkg_update.last_comparison = datetime.now() add_record(session, geo_rec) if geo_rec.od_status == 'New Record' or geo_rec.od_status == "Needs Update": add_record(session, pkg_update) last_id = geo_rec.id except Exception, e: logging.error(e.message)