def migrate_extras(): conn = Session.connection() statements = ''' ALTER TABLE datarequests ADD COLUMN extras text; ''' conn.execute(statements) Session.commit()
def GetRecords(self, req): resp = etree.Element(ntag("csw:GetRecordsResponse"), nsmap=namespaces) etree.SubElement(resp, ntag("csw:SearchStatus"), timestamp=datetime.utcnow().isoformat()) cursor = Session.connection() q = Session.query(distinct(HarvestObject.guid)) \ .join(Package) \ .join(HarvestSource) \ .filter(HarvestObject.current==True) \ .filter(Package.state==u'active') \ .filter(or_(HarvestSource.type=='gemini-single', \ HarvestSource.type=='gemini-waf', \ HarvestSource.type=='csw')) ### TODO Parse query instead of stupidly just returning whatever we like startPosition = req["startPosition"] if req["startPosition"] > 0 else 1 maxRecords = req["maxRecords"] if req["maxRecords"] > 0 else 10 rset = q.offset(startPosition-1).limit(maxRecords) total = q.count() attrs = { "numberOfRecordsMatched": total, "elementSet": req["elementSetName"], # we lie here. it's always really "full" } if req["resultType"] == "results": returned = rset.count() attrs["numberOfRecordsReturned"] = returned if (total-startPosition-1) > returned: attrs["nextRecord"] = startPosition + returned else: attrs["nextRecord"] = 0 else: attrs["numberOfRecordsReturned"] = 0 attrs = dict((k, unicode(v)) for k,v in attrs.items()) results = etree.SubElement(resp, ntag("csw:SearchResults"), **attrs) if req["resultType"] == "results": for guid, in Session.execute(rset): doc = Session.query(HarvestObject) \ .join(Package) \ .filter(HarvestObject.guid==guid) \ .filter(HarvestObject.current==True) \ .filter(Package.state==u'active') \ .first() try: record = etree.parse(StringIO(doc.content.encode("utf-8"))) results.append(record.getroot()) except: log.error("exception parsing document %s:\n%s", doc.id, traceback.format_exc()) raise data = self._render_xml(resp) log.info('GetRecords response: %r', truncate(data, LOG_XML_LENGTH)) return data
def migrate_to_v0_6(): conn = Session.connection() statement = """ ALTER TABLE external_catalog ADD COLUMN create_as_private BOOLEAN NOT NULL DEFAULT FALSE; """ conn.execute(statement) Session.commit()
def migrate_to_v0_4(): conn = Session.connection() statement = """ ALTER TABLE external_catalog ADD COLUMN ext_org_id text; """ conn.execute(statement) Session.commit()
def migrate_visibility(): conn = Session.connection() statements = ''' ALTER TABLE datarequests ADD COLUMN visibility integer; ''' conn.execute(statements) Session.commit()
def setup(): if harvest_source_table is None: define_harvester_tables() log.debug("Harvest tables defined in memory") if not model.package_table.exists(): log.debug("Harvest table creation deferred") return if not harvest_source_table.exists(): # Create each table individually rather than # using metadata.create_all() harvest_source_table.create() harvest_job_table.create() harvest_object_table.create() harvest_gather_error_table.create() harvest_object_error_table.create() harvest_object_extra_table.create() harvest_log_table.create() log.debug("Harvest tables created") else: from ckan.model.meta import engine log.debug("Harvest tables already exist") # Check if existing tables need to be updated inspector = Inspector.from_engine(engine) columns = inspector.get_columns("harvest_source") column_names = [column["name"] for column in columns] if not "title" in column_names: log.debug("Harvest tables need to be updated") migrate_v2() if not "frequency" in column_names: log.debug("Harvest tables need to be updated") migrate_v3() # Check if this instance has harvest source datasets source_ids = Session.query(HarvestSource.id).filter_by(active=True).all() source_package_ids = Session.query(model.Package.id).filter_by(type=u"harvest", state="active").all() sources_to_migrate = set(source_ids) - set(source_package_ids) if sources_to_migrate: log.debug("Creating harvest source datasets for %i existing sources", len(sources_to_migrate)) sources_to_migrate = [s[0] for s in sources_to_migrate] migrate_v3_create_datasets(sources_to_migrate) # Check if harvest_log table exist - needed for existing users if not "harvest_log" in inspector.get_table_names(): harvest_log_table.create() # Check if harvest_object has a index index_names = [index["name"] for index in inspector.get_indexes("harvest_object")] if not "harvest_job_id_idx" in index_names: log.debug("Creating index for harvest_object") Index("harvest_job_id_idx", harvest_object_table.c.harvest_job_id).create()
def migrate_to_v0_3(): conn = Session.connection() statement = """ ALTER TABLE external_catalog ADD COLUMN last_updated timestamp, ADD COLUMN status smallint not null default 0; """ conn.execute(statement) Session.commit()
def create(cls, message, job): """ Helper function to create an error object and save it. """ err = cls(message=message, job=job) try: err.save() except InvalidRequestError: Session.rollback() err.save() finally: # No need to alert administrator so don't log as an error log.info(message)
def create(cls, message, object, stage=u'Fetch', line=None): ''' Helper function to create an error object and save it. ''' err = cls(message=message, object=object, stage=stage, line=line) try: err.save() except InvalidRequestError: Session.rollback() err.save() finally: log_message = '{0}, line {1}'.format(message, line) \ if line else message log.debug(log_message)
def migrate_v3(): log.debug("Migrating harvest tables to v3. This may take a while...") conn = Session.connection() statement = """CREATE TABLE harvest_object_extra ( id text NOT NULL, harvest_object_id text, "key" text, "value" text ); ALTER TABLE harvest_object ADD COLUMN import_started timestamp without time zone, ADD COLUMN import_finished timestamp without time zone, ADD COLUMN "state" text, ADD COLUMN "report_status" text; ALTER TABLE harvest_source ADD COLUMN frequency text, ADD COLUMN next_run timestamp without time zone; ALTER TABLE harvest_job ADD COLUMN finished timestamp without time zone; ALTER TABLE harvest_object_extra ADD CONSTRAINT harvest_object_extra_pkey PRIMARY KEY (id); ALTER TABLE harvest_object_extra ADD CONSTRAINT harvest_object_extra_harvest_object_id_fkey FOREIGN KEY (harvest_object_id) REFERENCES harvest_object(id); UPDATE harvest_object set state = 'COMPLETE' where package_id is not null; UPDATE harvest_object set state = 'ERROR' where package_id is null; UPDATE harvest_object set retry_times = 0; UPDATE harvest_object set report_status = 'updated' where package_id is not null; UPDATE harvest_object set report_status = 'errored' where package_id is null; UPDATE harvest_source set frequency = 'MANUAL'; ALTER TABLE harvest_object DROP CONSTRAINT harvest_object_package_id_fkey; ALTER TABLE harvest_object ADD CONSTRAINT harvest_object_package_id_fkey FOREIGN KEY (package_id) REFERENCES package(id) DEFERRABLE; ALTER TABLE harvest_object_error ADD COLUMN line integer; """ conn.execute(statement) Session.commit() log.info("Harvest tables migrated to v3")
def GetRecordById(self, req): resp = etree.Element(ntag("csw:GetRecordByIdResponse"), nsmap=namespaces) seen = set() for ident in req["id"]: doc = Session.query(HarvestObject) \ .join(Package) \ .join(HarvestJob).join(HarvestSource) \ .filter(HarvestSource.active==True) \ .filter(HarvestObject.guid==ident) \ .filter(HarvestObject.package!=None) \ .filter(Package.state==u'active') \ .order_by(HarvestObject.gathered.desc()) \ .limit(1).first() if doc is None: continue if 'MD_Metadata' in doc.content: try: record = etree.parse(StringIO(doc.content.encode("utf-8"))) resp.append(record.getroot()) except: log.error("exception parsing document %s:\n%s", doc.id, traceback.format_exc()) raise data = self._render_xml(resp) log.info('GetRecordById response: %r', truncate(data, LOG_XML_LENGTH)) return data
def get_package_ids_for_showcase(cls, showcase_id): ''' Return a list of package ids associated with the passed showcase_id. ''' showcase_package_association_list = \ Session.query(cls.package_id).filter_by( showcase_id=showcase_id).all() return showcase_package_association_list
def get_showcase_ids_for_package(cls, package_id): ''' Return a list of showcase ids associated with the passed package_id. ''' showcase_package_association_list = \ Session.query(cls.showcase_id).filter_by( package_id=package_id).all() return showcase_package_association_list
def _do_drupal_login_by_key(self, environ, drupal_apikey, new_headers): from ckan import model from ckan.model.meta import Session try: user = Session.query(model.User).filter_by(apikey=drupal_apikey).one() except Exception, e: log.debug('Drupal user not found in CKAN: %s', e) return
def gen_new_name(title): name = munge_title_to_name(title).replace("_", "-") while "--" in name: name = name.replace("--", "-") pkg_obj = Session.query(Package).filter(Package.name == name).first() if pkg_obj: return name + str(uuid.uuid4())[:5] else: return name
def gen_new_name(title): name = munge_title_to_name(title).replace('_', '-') while '--' in name: name = name.replace('--', '-') pkg_obj = Session.query(Package).filter(Package.name == name).first() if pkg_obj: return name + str(uuid.uuid4())[:5] else: return name
def create(cls, message, object, stage=u"Fetch", line=None): """ Helper function to create an error object and save it. """ err = cls(message=message, object=object, stage=stage, line=line) try: err.save() except InvalidRequestError, e: # Clear any in-progress sqlalchemy transactions try: Session.rollback() except: pass try: Session.remove() except: pass err.save()
def setup(): if harvest_source_table is None: define_harvester_tables() log.debug('Harvest tables defined in memory') if not model.package_table.exists(): log.debug('Harvest table creation deferred') return if not harvest_source_table.exists(): # Create each table individually rather than # using metadata.create_all() harvest_source_table.create() harvest_job_table.create() harvest_object_table.create() harvest_gather_error_table.create() harvest_object_error_table.create() harvest_object_extra_table.create() log.debug('Harvest tables created') else: from ckan.model.meta import engine log.debug('Harvest tables already exist') # Check if existing tables need to be updated inspector = Inspector.from_engine(engine) columns = inspector.get_columns('harvest_source') column_names = [column['name'] for column in columns] if not 'title' in column_names: log.debug('Harvest tables need to be updated') migrate_v2() if not 'frequency' in column_names: log.debug('Harvest tables need to be updated') migrate_v3() # Check if this instance has harvest source datasets source_ids = Session.query(HarvestSource.id).filter_by(active=True).all() source_package_ids = Session.query(model.Package.id).filter_by(type=u'harvest', state='active').all() sources_to_migrate = set(source_ids) - set(source_package_ids) if sources_to_migrate: log.debug('Creating harvest source datasets for %i existing sources', len(sources_to_migrate)) sources_to_migrate = [s[0] for s in sources_to_migrate] migrate_v3_create_datasets(sources_to_migrate)
def migrate_v2(): log.debug("Migrating harvest tables to v2. This may take a while...") conn = Session.connection() statements = """ ALTER TABLE harvest_source ADD COLUMN title text; ALTER TABLE harvest_object ADD COLUMN current boolean; ALTER TABLE harvest_object ADD COLUMN harvest_source_id text; ALTER TABLE harvest_object ADD CONSTRAINT harvest_object_harvest_source_id_fkey FOREIGN KEY (harvest_source_id) REFERENCES harvest_source(id); UPDATE harvest_object o SET harvest_source_id = j.source_id FROM harvest_job j WHERE o.harvest_job_id = j.id; """ conn.execute(statements) # Flag current harvest_objects guids = ( Session.query(distinct(HarvestObject.guid)) .join(Package) .filter(HarvestObject.package != None) .filter(Package.state == u"active") ) update_statement = """ UPDATE harvest_object SET current = TRUE WHERE id = ( SELECT o.id FROM harvest_object o JOIN package p ON p.id = o.package_id WHERE o.package_id IS NOT null AND p.state = 'active' AND o.guid = '%s' ORDER BY metadata_modified_date DESC, fetch_finished DESC, gathered DESC LIMIT 1) """ for guid in guids: conn.execute(update_statement % guid) conn.execute("UPDATE harvest_object SET current = FALSE WHERE current IS NOT TRUE") Session.commit() log.info("Harvest tables migrated to v2")
def create(cls, message, object, stage=u'Fetch', line=None): ''' Helper function to create an error object and save it. ''' err = cls(message=message, object=object, stage=stage, line=line) try: err.save() except InvalidRequestError: # Clear any in-progress sqlalchemy transactions try: Session.rollback() except Exception: pass try: Session.remove() except Exception: pass err.save() finally: log_message = '{0}, line {1}'.format(message, line) \ if line else message log.debug(log_message)
def clean_harvest_log(condition): Session.query(HarvestLog).filter(HarvestLog.created <= condition).delete(synchronize_session=False) try: Session.commit() except InvalidRequestError: Session.rollback() log.error("An error occurred while trying to clean-up the harvest log table") log.info("Harvest log table clean-up finished successfully")
def sync_irods(params, id): """ Fetches a resource from database with the same path as user specified and that matches an existing resource in CKAN. """ from irods import getFileUserMetadata, rcModAccessControl rev = model.repo.new_revision() conn = get_connection_from_params(params) resource = Resource.get(id) path = params['path'] extras = {} # Lets handle only resources with file names if resource.name: fname = "%s/%s" % (path, resource.name.split('/')[-1]) log.debug(fname) i = 0 access = rcModAccessControl() log.debug(access.getPath()) if conn: for met in getFileUserMetadata(conn, fname): i += 1 key, value, _ = met extras[key] = value resource.extras = extras Session.add(resource) conn.disconnect() model.repo.commit() rev.message = "Update from iRODS, matched file %s" % fname h.flash_success("iRODS import to resource OK! Imported %s metadatas" % i) else: h.flash_error("Could not connect to iRODS!") else: h.flash_error("Resource is an URL, cannot import!") h.redirect_to(controller='package', action='resource_read', \ id=resource.resource_group.package.name, \ resource_id=resource.id)
def import_collection_to_package(params, id): """ Import a collection to dataset. Does not import whole file data but rather the metadata. """ from irods import irodsCollection path = params['path'] pkg = Package.get(id) conn = get_connection_from_params(params) if (conn): coll = irodsCollection(conn, path) from irods import iRodsOpen rev = model.repo.new_revision() i = 0 for obj in coll.getObjects(): extras = {} fname, _ = obj fpath = "%s/%s" % (coll.getCollName(), fname) f = iRodsOpen(conn, fpath, 'r') if f: i += 1 res = Resource.by_name(fname) if not res: res = Resource(url = '', name=fname, extras=extras, \ resource_type='file') for met in f.getUserMetadata(): key, value, _ = met extras[key] = value res.extras = extras resgrp = pkg.resource_groups[0] resgrp.resources.append(res) Session.add(res) Session.add(resgrp) rev.message = "Update from iRODS, matched file %s" % fname for met in coll.getUserMetadata(): key, value, _ = met pkg.extras[key] = value Session.add(pkg) model.repo.commit() conn.disconnect() h.flash_success("iRODS import to dataset OK! Imported %s resources." % i) else: h.flash_error("Could not connect to iRODS!") h.redirect_to(controller='package', action='read', id=id)
def get_showcase_admin_ids(cls): ''' Return a list of showcase admin user ids. ''' id_list = [i for (i, ) in Session.query(cls.user_id).all()] return id_list
def get(self): return Session.query(Pipelines)\ .filter_by(package_id = self.package_id, pipeline_id = self.pipeline_id).first()
def get(cls, _type, ref_id): return (Session.query(cls) .filter_by(type=_type, ref_id=ref_id) .order_by(data_quality_metrics_table.c.modified_at.desc()) .first())
def GetRecords(self, req): resp = etree.Element(ntag("csw:GetRecordsResponse"), nsmap=namespaces) etree.SubElement(resp, ntag("csw:SearchStatus"), timestamp=datetime.utcnow().isoformat()) cursor = Session.connection() q = Session.query(distinct(HarvestObject.guid)) \ .join(Package) \ .join(HarvestSource) \ .filter(HarvestObject.current==True) \ .filter(Package.state==u'active') \ .filter(or_(HarvestSource.type=='gemini-single', \ HarvestSource.type=='gemini-waf', \ HarvestSource.type=='csw')) ### TODO Parse query instead of stupidly just returning whatever we like startPosition = req["startPosition"] if req["startPosition"] > 0 else 1 maxRecords = req["maxRecords"] if req["maxRecords"] > 0 else 10 rset = q.offset(startPosition - 1).limit(maxRecords) total = q.count() attrs = { "numberOfRecordsMatched": total, "elementSet": req["elementSetName"], # we lie here. it's always really "full" } if req["resultType"] == "results": returned = rset.count() attrs["numberOfRecordsReturned"] = returned if (total - startPosition - 1) > returned: attrs["nextRecord"] = startPosition + returned else: attrs["nextRecord"] = 0 else: attrs["numberOfRecordsReturned"] = 0 attrs = dict((k, unicode(v)) for k, v in attrs.items()) results = etree.SubElement(resp, ntag("csw:SearchResults"), **attrs) if req["resultType"] == "results": for guid, in Session.execute(rset): doc = Session.query(HarvestObject) \ .join(Package) \ .filter(HarvestObject.guid==guid) \ .filter(HarvestObject.current==True) \ .filter(Package.state==u'active') \ .first() try: record = etree.parse(StringIO(doc.content.encode("utf-8"))) results.append(record.getroot()) except: log.error("exception parsing document %s:\n%s", doc.id, traceback.format_exc()) raise data = self._render_xml(resp) log.info('GetRecords response: %r', truncate(data, LOG_XML_LENGTH)) return data
def __call__(self, environ, start_response): if self.drupal_client is None: self.drupal_client = DrupalClient() # establish from the cookie whether ckan and drupal are signed in ckan_signed_in = [False] drupal_signed_in = [False] for k, v in environ.items(): key = k.lower() if key == 'http_cookie': ckan_signed_in[0] = is_ckan_signed_in(v) drupal_signed_in[0] = drupal_extract_cookie(v) ckan_signed_in = ckan_signed_in[0] drupal_signed_in = drupal_signed_in[0] environ['drupal.uid'] = None environ['drupal.publishers'] = None new_start_response = start_response if drupal_signed_in and not ckan_signed_in: # get info about the user from drupal and store in environ for # use by main CKAN app user_id = self.drupal_client.get_user_id_from_session_id( drupal_signed_in) res = self.drupal_client.get_user_properties(user_id) environ['drupal.uid'] = res['uid'] environ['drupal.publishers'] = res['publishers'] environ['drupal.name'] = res['name'] from ckan import model from ckan.model.meta import Session def munge(username): username.lower().replace(' ', '_') return username # Add the new Drupal user if they don't already exist. query = Session.query( model.User).filter_by(name=unicode(environ['drupal.uid'])) if not query.count(): user = model.User( name=munge(unicode(environ['drupal.uid'])), fullname=unicode(environ['drupal.name']), about=u'Drupal auto-generated user', ) Session.add(user) Session.commit() else: user = query.one() # We want to store values in the user's cookie, so # prepare the response header with this value, # using auth_tkt to sign it. new_header = environ['repoze.who.plugins']['auth_tkt'].remember( environ, { 'repoze.who.userid': environ['drupal.uid'], 'tokens': '', 'userdata': '', }) # e.g. new_header = [('Set-Cookie', 'bob=ab48fe; Path=/;')] cookie_template = new_header[0][1].split('; ') cookie_string = '' for name, value in [ ('ckan_apikey', user.apikey), ('ckan_display_name', user.fullname), ('ckan_user', user.name), ]: cookie_string += '; %s="%s"' % (name, value) new_cookie = cookie_template[:] new_cookie[0] = '%s="%s"' % (name, value) new_header.append(('Set-Cookie', str('; '.join(new_cookie)))) # Also need these cookies to work too: # ckan_apikey # Value "3a51edc6-6461-46b8-bfe2-57445cbdeb2b" # Host catalogue.dev.dataco.coi.gov.uk # Path / # Secure No # Expires At End Of Session # # # Name ckan_display_name # Value "James Gardner" # Host catalogue.dev.dataco.coi.gov.uk # Path / # Secure No # Expires At End Of Session # # # Name ckan_user # Value "4466" # Host catalogue.dev.dataco.coi.gov.uk # Path / # Secure No # Expires At End Of Session # @@@ Need to add the headers to the request too so that the rest of the stack can sign the user in. #Cookie: __utma=217959684.178461911.1286034407.1286034407.1286178542.2; __utmz=217959684.1286178542.2.2.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=coi%20london; DRXtrArgs=James+Gardner; DRXtrArgs2=3e174e7f1e1d3fab5ca138c0a023e13a; SESS9854522e7c5dba5831db083c5372623c=4160a72a4d6831abec1ac57d7b5a59eb; auth_tkt="a578c4a0d21bdbde7f80cd271d60b66f4ceabc3f4466!"; ckan_apikey="3a51edc6-6461-46b8-bfe2-57445cbdeb2b"; ckan_display_name="James Gardner"; ckan_user="******" # There is a bug(/feature?) in line 628 of Cookie.py that means # it can't load from unicode strings. This causes Beaker to fail # unless the value here is a string if not environ.get('HTTP_COOKIE'): environ['HTTP_COOKIE'] += str(cookie_string) else: environ['HTTP_COOKIE'] = str(cookie_string[2:]) def cookie_setting_start_response(status, headers, exc_info=None): headers += new_header return start_response(status, headers, exc_info) new_start_response = cookie_setting_start_response return self.app(environ, new_start_response)
def get_organization_list(): return Session.query(Group.name).filter(Group.state=='active', Group.type=='organization')\ .order_by(Group.title)
def create(cls, **kwargs): instance = cls(**kwargs) Session.add(instance) Session.commit() return instance.as_dict()
def get_gather_errors(self): query = Session.query(HarvestGatherError)\ .filter(HarvestGatherError.harvest_job_id == self.id)\ .order_by(HarvestGatherError.created.desc()) return query.all()
def update(cls, filter, data): obj = Session.query(cls).filter_by(**filter) obj.update(data) Session.commit() return obj.first()
def get_by_id(cls, ref_id): return Session.query(cls).get(ref_id)
def purge_datasets(): ''' Purge all existing datasets. ''' for pkg in Session.query(Package): call_action('dataset_purge', id=pkg.id)
def delete(cls, **kw): query = Session.query(cls).autoflush(False).filter_by(**kw).all() for i in query: Session.delete(i) return
def get_all(cls): return Session.query(cls).all()
def all(cls): return Session.query(Featurednumbers)
def get(cls, **kw): '''Finds a single entity in the register.''' query = Session.query(cls).autoflush(False) return query.filter_by(**kw).all()
def tearDown(self): Session.rollback()
def count(cls): return Session.query(cls).count()
def get(cls, ref): return Session.query(CommentsRefStats).get(ref)
def _do_wordpress_login(self, environ, wordpress_session_id, new_headers): '''Given a WordPress cookie\'s session ID, check it with WordPress, create/modify the equivalent CKAN user with properties copied from WordPress and log the person in with auth_tkt and its cookie. ''' if self.wordpress_client is None: self.wordpress_client = WordPressClient(environ) else: # Warning! These must be called, or the user data will quite often be wrong. # self.wordpress_client may sometimes be a properly set up object with wrong data. self.wordpress_client.reset_data() self.wordpress_client.update_cookies(environ) # ask wp for the wordpress_user_id for this session wordpress_user_id = self.wordpress_client.get_user_id() if not wordpress_user_id: log.error( 'WordPress said the session ID found in the cookie is not valid.' ) return # ask wp about this user user_properties = self.wordpress_client.get_user_properties() # see if user already exists in CKAN ckan_user_name = WordPressUserMapping.wordpress_id_to_ckan_user_name( wordpress_user_id) log.debug('_do_wordpress_login ->') log.debug(str(wordpress_session_id)) log.debug(str(wordpress_user_id)) log.debug(str(ckan_user_name)) log.debug('<- _do_wordpress_login') from ckan import model from ckan.model.meta import Session query = Session.query( model.User).filter_by(name=unicode(ckan_user_name)) if not query.count(): # need to add this user to CKAN #raise Exception('Got this userdata:' + str(user_properties)) # http://stackoverflow.com/questions/1697815/how-do-you-convert-a-python-time-struct-time-object-into-a-datetime-object def convertSQLDateTimeToDatetime(value): return datetime.datetime.fromtimestamp( time.mktime(time.strptime(value, '%Y-%m-%d %H:%M:%S'))) date_created = convertSQLDateTimeToDatetime( user_properties['data']['user_registered']) user = model.User( name=ckan_user_name, fullname=unicode( user_properties['data'] ['display_name']), # NB may change in WordPress db about=u'User account imported from WordPress system.', email=user_properties['data'] ['user_email'], # NB may change in WordPress db created=date_created, ) Session.add(user) Session.commit() log.debug('WordPress user added to CKAN as: %s', user.name) else: user = query.one() log.debug( 'WordPress user found in CKAN: %s for ckan_user_name: %s', user.name, ckan_user_name) self.set_roles(ckan_user_name, user_properties['roles']) # There is a chance that on this request we needed to get authtkt # to log-out. This would have created headers like this: # 'Set-Cookie', 'auth_tkt="INVALID"...' # but since we are about to login again, which will create a header # setting that same cookie, we need to get rid of the invalidation # header first. new_headers[:] = [(key, value) for (key, value) in new_headers \ if (not (key=='Set-Cookie' and value.startswith('auth_tkt="INVALID"')))] #log.debug('Headers reduced to: %r', new_headers) # Ask auth_tkt to remember this user so that subsequent requests # will be authenticated by auth_tkt. # auth_tkt cookie template needs to also go in the response. identity = { 'repoze.who.userid': str(ckan_user_name), 'tokens': '', 'userdata': wordpress_session_id } headers = environ['repoze.who.plugins']['hri_auth_tkt'].remember( environ, identity) if headers: new_headers.extend(headers) # Tell app during this request that the user is logged in environ['REMOTE_USER'] = user.name log.debug('Set REMOTE_USER = %r', user.name)
def get(cls, comment_id): return Session.query(cls).get(comment_id)
class DrupalAuthMiddleware(object): '''Allows CKAN user to login via Drupal. It looks for the Drupal cookie and gets user details from Drupal using XMLRPC. so works side-by-side with normal CKAN logins.''' def __init__(self, app, app_conf): self.app = app self.drupal_client = None self._user_name_prefix = 'user_d' def _parse_cookies(self, environ): is_ckan_cookie = [False] drupal_session_id = [False] server_name = environ['SERVER_NAME'] for k, v in environ.items(): key = k.lower() if key == 'http_cookie': is_ckan_cookie[0] = self._is_this_a_ckan_cookie(v) drupal_session_id[0] = self._drupal_cookie_parse( v, server_name) is_ckan_cookie = is_ckan_cookie[0] drupal_session_id = drupal_session_id[0] return is_ckan_cookie, drupal_session_id @staticmethod def _drupal_cookie_parse(cookie_string, server_name): '''Returns the Drupal Session ID from the cookie string.''' cookies = Cookie.SimpleCookie() cookies.load(str(cookie_string)) similar_cookies = [] for cookie in cookies: if cookie.startswith('SESS'): server_hash = hashlib.md5(server_name).hexdigest() if cookie == 'SESS%s' % server_hash: log.debug('Drupal cookie found for server request %s', server_name) return cookies[cookie].value else: similar_cookies.append(cookie) if similar_cookies: log.debug( 'Drupal cookies ignored with incorrect hash for server %r: %r', server_name, similar_cookies) return None @staticmethod def _is_this_a_ckan_cookie(cookie_string): cookies = Cookie.SimpleCookie() cookies.load(str(cookie_string)) if not 'auth_tkt' in cookies: return False return True def _munge_drupal_id_to_ckan_user_name(self, drupal_id): drupal_id.lower().replace(' ', '_') return u'%s%s' % (self._user_name_prefix, drupal_id) def _log_out(self, environ, new_headers): # don't progress the user info for this request environ['REMOTE_USER'] = None environ['repoze.who.identity'] = None # tell auth_tkt to logout whilst adding the header to tell # the browser to delete the cookie identity = {} headers = environ['repoze.who.plugins']['dgu_auth_tkt'].forget( environ, identity) if headers: new_headers.extend(headers) # Remove cookie from request, so that if we are doing a login again in this request then # it is aware of the cookie removal #log.debug('Removing cookies from request: %r', environ.get('HTTP_COOKIE', '')) cookies = environ.get('HTTP_COOKIE', '').split('; ') cookies = '; '.join([ cookie for cookie in cookies if not cookie.startswith('auth_tkt=') ]) environ['HTTP_COOKIE'] = cookies #log.debug('Cookies in request now: %r', environ['HTTP_COOKIE']) log.debug('Logged out Drupal user') def __call__(self, environ, start_response): new_headers = [] self.do_drupal_login_logout(environ, new_headers) #log.debug('New headers: %r', new_headers) def cookie_setting_start_response(status, headers, exc_info=None): if headers: headers.extend(new_headers) else: headers = new_headers return start_response(status, headers, exc_info) new_start_response = cookie_setting_start_response return self.app(environ, new_start_response) def do_drupal_login_logout(self, environ, new_headers): '''Looks at cookies and auth_tkt and may tell auth_tkt to log-in or log-out to a Drupal user.''' is_ckan_cookie, drupal_session_id = self._parse_cookies(environ) # Is there a Drupal cookie? We may want to do a log-in for it. if drupal_session_id: # Look at any authtkt logged in user details authtkt_identity = environ.get('repoze.who.identity') if authtkt_identity: authtkt_user_name = authtkt_identity[ 'repoze.who.userid'] #same as environ.get('REMOTE_USER', '') authtkt_drupal_session_id = authtkt_identity['userdata'] else: authtkt_user_name = '' authtkt_drupal_session_id = '' if not authtkt_user_name: # authtkt not logged in, so log-in with the Drupal cookie self._do_drupal_login(environ, drupal_session_id, new_headers) return elif authtkt_user_name.startswith(self._user_name_prefix): # A drupal user is logged in with authtkt. # See if that the authtkt matches the drupal cookie's session if authtkt_drupal_session_id != drupal_session_id: # Drupal cookie session has changed, so tell authkit to forget the old one # before we do the new login log.debug('Drupal cookie session has changed.') #log.debug('Drupal cookie session has changed from %r to %r.', authtkt_drupal_session_id, drupal_session_id) self._log_out(environ, new_headers) # since we are about to login again, we need to get rid of the headers like # ('Set-Cookie', 'auth_tkt="INVALID"...' since we are about to set them again in this # same request.) new_headers[:] = [(key, value) for (key, value) in new_headers \ if (not (key=='Set-Cookie' and value.startswith('auth_tkt="INVALID"')))] #log.debug('Headers reduced to: %r', new_headers) self._do_drupal_login(environ, drupal_session_id, new_headers) #log.debug('Headers on log-out log-in result: %r', new_headers) return else: log.debug('Drupal cookie session stayed the same.') # Drupal cookie session matches the authtkt - leave user logged in return else: # There's a Drupal cookie, but user is logged in as a normal CKAN user. # Ignore the Drupal cookie. return elif not drupal_session_id and is_ckan_cookie: # Deal with the case where user is logged out of Drupal # i.e. user WAS were logged in with Drupal and the cookie was # deleted (probably because Drupal logged out) # Is the logged in user a Drupal user? user_name = environ.get('REMOTE_USER', '') if user_name and user_name.startswith(self._user_name_prefix): log.debug( 'Was logged in as Drupal user %r but Drupal cookie no longer there.', user_name) self._log_out(environ, new_headers) def _do_drupal_login(self, environ, drupal_session_id, new_headers): if self.drupal_client is None: self.drupal_client = DrupalClient() # ask drupal for the drupal_user_id for this session try: drupal_user_id = self.drupal_client.get_user_id_from_session_id( drupal_session_id) except DrupalRequestError, e: log.error('Error checking session with Drupal: %s', e) return if drupal_user_id: # ask drupal about this user user_properties = self.drupal_client.get_user_properties( drupal_user_id) # see if user already exists in CKAN ckan_user_name = self._munge_drupal_id_to_ckan_user_name( drupal_user_id) from ckan import model from ckan.model.meta import Session query = Session.query( model.User).filter_by(name=unicode(ckan_user_name)) if not query.count(): # need to add this user to CKAN date_created = datetime.datetime.fromtimestamp( int(user_properties['created'])) user = model.User( name=ckan_user_name, fullname=unicode( user_properties['name']), # NB may change in Drupal db about=u'User account imported from Drupal system.', email=user_properties[ 'mail'], # NB may change in Drupal db created=date_created, ) Session.add(user) Session.commit() log.debug('Drupal user added to CKAN as: %s', user.name) else: user = query.one() log.debug('Drupal user found in CKAN: %s', user.name) self.set_roles(ckan_user_name, user_properties['roles'].values()) # Ask auth_tkt to remember this user so that subsequent requests # will be authenticated by auth_tkt. # auth_tkt cookie template needs to also go in the response. identity = { 'repoze.who.userid': str(ckan_user_name), 'tokens': '', 'userdata': drupal_session_id } headers = environ['repoze.who.plugins']['dgu_auth_tkt'].remember( environ, identity) if headers: new_headers.extend(headers) # Tell app during this request that the user is logged in environ['REMOTE_USER'] = user.name log.debug('Set REMOTE_USER = %r', user.name) else: log.debug( 'Drupal said the session ID found in the cookie is not valid.')
def get_comments_count(cls, ref): q = Session.query(comments_refs_stats_table.c.comment_count).filter( comments_refs_stats_table.c.ref == ref) return q.scalar()
def get_thread(cls, ref, thread_id): q = Session.query(cls).filter(comments_table.c.ref == ref).filter( comments_table.c.thread_id == thread_id).order_by( comments_table.c.created_at.asc()) return q.all()
def filter(cls, **kwds): query = Session.query(cls).autoflush(False) return query.filter_by(**kwds)
def by_pipeline_id(cls, pipeline_id): assert pipeline_id return Session.query(cls)\ .filter_by(pipeline_id = pipeline_id).first()
def filter(cls, **kwargs): return Session.query(cls).filter_by(**kwargs)
def by_dataset_id(cls, dataset_id): assert dataset_id return Session.query(cls)\ .filter_by(package_id = dataset_id).all()
def __call__(self, environ, start_response): if self.drupal_client is None: self.drupal_client = DrupalClient() # establish from the cookie whether ckan and drupal are signed in ckan_signed_in = [False] drupal_signed_in = [False] for k, v in environ.items(): key = k.lower() if key == 'http_cookie': ckan_signed_in[0] = is_ckan_signed_in(v) drupal_signed_in[0] = drupal_extract_cookie(v) ckan_signed_in = ckan_signed_in[0] drupal_signed_in = drupal_signed_in[0] environ['drupal.uid'] = None environ['drupal.publishers'] = None new_start_response = start_response if drupal_signed_in and not ckan_signed_in: # get info about the user from drupal and store in environ for # use by main CKAN app user_id = self.drupal_client.get_user_id_from_session_id(drupal_signed_in) res = self.drupal_client.get_user_properties(user_id) environ['drupal.uid'] = res['uid'] environ['drupal.publishers'] = res['publishers'] environ['drupal.name'] = res['name'] from ckan import model from ckan.model.meta import Session def munge(username): username.lower().replace(' ', '_') return username # Add the new Drupal user if they don't already exist. query = Session.query(model.User).filter_by(name=unicode(environ['drupal.uid'])) if not query.count(): user = model.User( name=munge(unicode(environ['drupal.uid'])), fullname=unicode(environ['drupal.name']), about=u'Drupal auto-generated user', ) Session.add(user) Session.commit() else: user = query.one() # We want to store values in the user's cookie, so # prepare the response header with this value, # using auth_tkt to sign it. new_header = environ['repoze.who.plugins']['auth_tkt'].remember( environ, { 'repoze.who.userid': environ['drupal.uid'], 'tokens': '', 'userdata': '', } ) # e.g. new_header = [('Set-Cookie', 'bob=ab48fe; Path=/;')] cookie_template = new_header[0][1].split('; ') cookie_string = '' for name, value in [ ('ckan_apikey', user.apikey), ('ckan_display_name', user.fullname), ('ckan_user', user.name), ]: cookie_string += '; %s="%s"'%(name, value) new_cookie = cookie_template[:] new_cookie[0] = '%s="%s"'%(name, value) new_header.append(('Set-Cookie', str('; '.join(new_cookie)))) # Also need these cookies to work too: # ckan_apikey # Value "3a51edc6-6461-46b8-bfe2-57445cbdeb2b" # Host catalogue.dev.dataco.coi.gov.uk # Path / # Secure No # Expires At End Of Session # # # Name ckan_display_name # Value "James Gardner" # Host catalogue.dev.dataco.coi.gov.uk # Path / # Secure No # Expires At End Of Session # # # Name ckan_user # Value "4466" # Host catalogue.dev.dataco.coi.gov.uk # Path / # Secure No # Expires At End Of Session # @@@ Need to add the headers to the request too so that the rest of the stack can sign the user in. #Cookie: __utma=217959684.178461911.1286034407.1286034407.1286178542.2; __utmz=217959684.1286178542.2.2.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=coi%20london; DRXtrArgs=James+Gardner; DRXtrArgs2=3e174e7f1e1d3fab5ca138c0a023e13a; SESS9854522e7c5dba5831db083c5372623c=4160a72a4d6831abec1ac57d7b5a59eb; auth_tkt="a578c4a0d21bdbde7f80cd271d60b66f4ceabc3f4466!"; ckan_apikey="3a51edc6-6461-46b8-bfe2-57445cbdeb2b"; ckan_display_name="James Gardner"; ckan_user="******" # There is a bug(/feature?) in line 628 of Cookie.py that means # it can't load from unicode strings. This causes Beaker to fail # unless the value here is a string if not environ.get('HTTP_COOKIE'): environ['HTTP_COOKIE'] += str(cookie_string) else: environ['HTTP_COOKIE'] = str(cookie_string[2:]) def cookie_setting_start_response(status, headers, exc_info=None): headers += new_header return start_response(status, headers, exc_info) new_start_response = cookie_setting_start_response return self.app(environ, new_start_response)
def _get_harvest_object(self,id): obj = Session.query(HarvestObject) \ .filter(HarvestObject.id==id).first() return obj
def delete(self): Session.delete(self) Session.commit()
def save(self): Session.add(self) Session.commit() return self.as_dict()
def old_locks(cls, since): query = Session.query(cls).filter(cls.timestamp < since) return query.all()
def get_package_list(): return Session.query(Package.name).filter(Package.state=='active', Package.type=='dataset')\ .order_by(Package.title)