Example #1
0
def migrate_extras():
    conn = Session.connection()

    statements = '''
    ALTER TABLE datarequests ADD COLUMN extras text;
    '''
    conn.execute(statements)
    Session.commit()
Example #2
0
    def GetRecords(self, req):
        resp = etree.Element(ntag("csw:GetRecordsResponse"), nsmap=namespaces)
        etree.SubElement(resp, ntag("csw:SearchStatus"), timestamp=datetime.utcnow().isoformat())

        cursor = Session.connection()

        q = Session.query(distinct(HarvestObject.guid)) \
                .join(Package) \
                .join(HarvestSource) \
                .filter(HarvestObject.current==True) \
                .filter(Package.state==u'active') \
                .filter(or_(HarvestSource.type=='gemini-single', \
                        HarvestSource.type=='gemini-waf', \
                        HarvestSource.type=='csw'))

        ### TODO Parse query instead of stupidly just returning whatever we like
        startPosition = req["startPosition"] if req["startPosition"] > 0 else 1
        maxRecords = req["maxRecords"] if req["maxRecords"] > 0 else 10
        rset = q.offset(startPosition-1).limit(maxRecords)

        total = q.count()
        attrs = {
            "numberOfRecordsMatched": total,
            "elementSet": req["elementSetName"], # we lie here. it's always really "full"
            }
        if req["resultType"] == "results":
            returned = rset.count()
            attrs["numberOfRecordsReturned"] = returned
            if (total-startPosition-1) > returned:
                attrs["nextRecord"] = startPosition + returned
            else:
                attrs["nextRecord"] = 0
        else:
            attrs["numberOfRecordsReturned"] = 0

        attrs = dict((k, unicode(v)) for k,v in attrs.items())
        results = etree.SubElement(resp, ntag("csw:SearchResults"), **attrs)

        if req["resultType"] == "results":
            for guid, in Session.execute(rset):
                doc = Session.query(HarvestObject) \
                        .join(Package) \
                        .filter(HarvestObject.guid==guid) \
                        .filter(HarvestObject.current==True) \
                        .filter(Package.state==u'active') \
                        .first()
                try:

                    record = etree.parse(StringIO(doc.content.encode("utf-8")))
                    results.append(record.getroot())
                except:
                    log.error("exception parsing document %s:\n%s", doc.id, traceback.format_exc())
                    raise

        data = self._render_xml(resp)
        log.info('GetRecords response: %r', truncate(data, LOG_XML_LENGTH))
        return data
def migrate_to_v0_6():
    conn = Session.connection()
    
    statement = """
    ALTER TABLE external_catalog
        ADD COLUMN create_as_private BOOLEAN NOT NULL DEFAULT FALSE;
    """
    conn.execute(statement)
    Session.commit()
def migrate_to_v0_4():
    conn = Session.connection()
    
    statement = """
    ALTER TABLE external_catalog
        ADD COLUMN ext_org_id text;
    """
    conn.execute(statement)
    Session.commit()
Example #5
0
def migrate_visibility():
    conn = Session.connection()

    statements = '''
    ALTER TABLE datarequests ADD COLUMN visibility integer;
    '''

    conn.execute(statements)
    Session.commit()
Example #6
0
def setup():

    if harvest_source_table is None:
        define_harvester_tables()
        log.debug("Harvest tables defined in memory")

    if not model.package_table.exists():
        log.debug("Harvest table creation deferred")
        return

    if not harvest_source_table.exists():

        # Create each table individually rather than
        # using metadata.create_all()
        harvest_source_table.create()
        harvest_job_table.create()
        harvest_object_table.create()
        harvest_gather_error_table.create()
        harvest_object_error_table.create()
        harvest_object_extra_table.create()
        harvest_log_table.create()

        log.debug("Harvest tables created")
    else:
        from ckan.model.meta import engine

        log.debug("Harvest tables already exist")
        # Check if existing tables need to be updated
        inspector = Inspector.from_engine(engine)
        columns = inspector.get_columns("harvest_source")
        column_names = [column["name"] for column in columns]
        if not "title" in column_names:
            log.debug("Harvest tables need to be updated")
            migrate_v2()
        if not "frequency" in column_names:
            log.debug("Harvest tables need to be updated")
            migrate_v3()

        # Check if this instance has harvest source datasets
        source_ids = Session.query(HarvestSource.id).filter_by(active=True).all()
        source_package_ids = Session.query(model.Package.id).filter_by(type=u"harvest", state="active").all()
        sources_to_migrate = set(source_ids) - set(source_package_ids)
        if sources_to_migrate:
            log.debug("Creating harvest source datasets for %i existing sources", len(sources_to_migrate))
            sources_to_migrate = [s[0] for s in sources_to_migrate]
            migrate_v3_create_datasets(sources_to_migrate)

        # Check if harvest_log table exist - needed for existing users
        if not "harvest_log" in inspector.get_table_names():
            harvest_log_table.create()

        # Check if harvest_object has a index
        index_names = [index["name"] for index in inspector.get_indexes("harvest_object")]
        if not "harvest_job_id_idx" in index_names:
            log.debug("Creating index for harvest_object")
            Index("harvest_job_id_idx", harvest_object_table.c.harvest_job_id).create()
def migrate_to_v0_3():
    conn = Session.connection()
    
    statement = """
    ALTER TABLE external_catalog
        ADD COLUMN last_updated timestamp,
        ADD COLUMN status smallint not null default 0;
    """
    conn.execute(statement)
    Session.commit()
Example #8
0
 def create(cls, message, job):
     """
     Helper function to create an error object and save it.
     """
     err = cls(message=message, job=job)
     try:
         err.save()
     except InvalidRequestError:
         Session.rollback()
         err.save()
     finally:
         # No need to alert administrator so don't log as an error
         log.info(message)
Example #9
0
 def create(cls, message, object, stage=u'Fetch', line=None):
     '''
     Helper function to create an error object and save it.
     '''
     err = cls(message=message, object=object,
               stage=stage, line=line)
     try:
         err.save()
     except InvalidRequestError:
         Session.rollback()
         err.save()
     finally:
         log_message = '{0}, line {1}'.format(message, line) \
                       if line else message
         log.debug(log_message)
Example #10
0
def migrate_v3():
    log.debug("Migrating harvest tables to v3. This may take a while...")
    conn = Session.connection()

    statement = """CREATE TABLE harvest_object_extra (
	id text NOT NULL,
	harvest_object_id text,
	"key" text,
	"value" text
);

ALTER TABLE harvest_object
	ADD COLUMN import_started timestamp without time zone,
	ADD COLUMN import_finished timestamp without time zone,
	ADD COLUMN "state" text,
	ADD COLUMN "report_status" text;

ALTER TABLE harvest_source
	ADD COLUMN frequency text,
    ADD COLUMN next_run timestamp without time zone;

ALTER TABLE harvest_job
    ADD COLUMN finished timestamp without time zone;

ALTER TABLE harvest_object_extra
	ADD CONSTRAINT harvest_object_extra_pkey PRIMARY KEY (id);

ALTER TABLE harvest_object_extra
	ADD CONSTRAINT harvest_object_extra_harvest_object_id_fkey FOREIGN KEY (harvest_object_id) REFERENCES harvest_object(id);

UPDATE harvest_object set state = 'COMPLETE' where package_id is not null;
UPDATE harvest_object set state = 'ERROR' where package_id is null;
UPDATE harvest_object set retry_times = 0;
UPDATE harvest_object set report_status = 'updated' where package_id is not null;
UPDATE harvest_object set report_status = 'errored' where package_id is null;
UPDATE harvest_source set frequency = 'MANUAL';

ALTER TABLE harvest_object DROP CONSTRAINT harvest_object_package_id_fkey;
ALTER TABLE harvest_object
    ADD CONSTRAINT harvest_object_package_id_fkey FOREIGN KEY (package_id) REFERENCES package(id) DEFERRABLE;

ALTER TABLE harvest_object_error
	ADD COLUMN line integer;

"""
    conn.execute(statement)
    Session.commit()
    log.info("Harvest tables migrated to v3")
Example #11
0
    def GetRecordById(self, req):
        resp = etree.Element(ntag("csw:GetRecordByIdResponse"), nsmap=namespaces)
        seen = set()
        for ident in req["id"]:
            doc = Session.query(HarvestObject) \
                    .join(Package) \
                    .join(HarvestJob).join(HarvestSource) \
                    .filter(HarvestSource.active==True) \
                    .filter(HarvestObject.guid==ident) \
                    .filter(HarvestObject.package!=None) \
                    .filter(Package.state==u'active') \
                    .order_by(HarvestObject.gathered.desc()) \
                    .limit(1).first()

            if doc is None:
                continue

            if 'MD_Metadata' in doc.content:
                try:
                    record = etree.parse(StringIO(doc.content.encode("utf-8")))
                    resp.append(record.getroot())
                except:
                    log.error("exception parsing document %s:\n%s", doc.id, traceback.format_exc())
                    raise

        data = self._render_xml(resp)
        log.info('GetRecordById response: %r', truncate(data, LOG_XML_LENGTH))
        return data
Example #12
0
 def get_package_ids_for_showcase(cls, showcase_id):
     '''
     Return a list of package ids associated with the passed showcase_id.
     '''
     showcase_package_association_list = \
         Session.query(cls.package_id).filter_by(
             showcase_id=showcase_id).all()
     return showcase_package_association_list
Example #13
0
 def get_showcase_ids_for_package(cls, package_id):
     '''
     Return a list of showcase ids associated with the passed package_id.
     '''
     showcase_package_association_list = \
         Session.query(cls.showcase_id).filter_by(
             package_id=package_id).all()
     return showcase_package_association_list
Example #14
0
 def _do_drupal_login_by_key(self, environ, drupal_apikey, new_headers):
   from ckan import model
   from ckan.model.meta import Session
   try:
     user = Session.query(model.User).filter_by(apikey=drupal_apikey).one()
   except Exception, e:
     log.debug('Drupal user not found in CKAN: %s', e)
     return
Example #15
0
 def gen_new_name(title):
     name = munge_title_to_name(title).replace("_", "-")
     while "--" in name:
         name = name.replace("--", "-")
     pkg_obj = Session.query(Package).filter(Package.name == name).first()
     if pkg_obj:
         return name + str(uuid.uuid4())[:5]
     else:
         return name
Example #16
0
 def gen_new_name(title):
     name = munge_title_to_name(title).replace('_', '-')
     while '--' in name:
         name = name.replace('--', '-')
     pkg_obj = Session.query(Package).filter(Package.name == name).first()
     if pkg_obj:
         return name + str(uuid.uuid4())[:5]
     else:
         return name
Example #17
0
 def create(cls, message, object, stage=u"Fetch", line=None):
     """
     Helper function to create an error object and save it.
     """
     err = cls(message=message, object=object, stage=stage, line=line)
     try:
         err.save()
     except InvalidRequestError, e:
         # Clear any in-progress sqlalchemy transactions
         try:
             Session.rollback()
         except:
             pass
         try:
             Session.remove()
         except:
             pass
         err.save()
Example #18
0
def setup():

    if harvest_source_table is None:
        define_harvester_tables()
        log.debug('Harvest tables defined in memory')

    if not model.package_table.exists():
        log.debug('Harvest table creation deferred')
        return

    if not harvest_source_table.exists():

        # Create each table individually rather than
        # using metadata.create_all()
        harvest_source_table.create()
        harvest_job_table.create()
        harvest_object_table.create()
        harvest_gather_error_table.create()
        harvest_object_error_table.create()
        harvest_object_extra_table.create()

        log.debug('Harvest tables created')
    else:
        from ckan.model.meta import engine
        log.debug('Harvest tables already exist')
        # Check if existing tables need to be updated
        inspector = Inspector.from_engine(engine)
        columns = inspector.get_columns('harvest_source')
        column_names = [column['name'] for column in columns]
        if not 'title' in column_names:
            log.debug('Harvest tables need to be updated')
            migrate_v2()
        if not 'frequency' in column_names:
            log.debug('Harvest tables need to be updated')
            migrate_v3()

        # Check if this instance has harvest source datasets
        source_ids = Session.query(HarvestSource.id).filter_by(active=True).all()
        source_package_ids = Session.query(model.Package.id).filter_by(type=u'harvest', state='active').all()
        sources_to_migrate = set(source_ids) - set(source_package_ids)
        if sources_to_migrate:
            log.debug('Creating harvest source datasets for %i existing sources', len(sources_to_migrate))
            sources_to_migrate = [s[0] for s in sources_to_migrate]
            migrate_v3_create_datasets(sources_to_migrate)
Example #19
0
def migrate_v2():
    log.debug("Migrating harvest tables to v2. This may take a while...")
    conn = Session.connection()

    statements = """
    ALTER TABLE harvest_source ADD COLUMN title text;

    ALTER TABLE harvest_object ADD COLUMN current boolean;
    ALTER TABLE harvest_object ADD COLUMN harvest_source_id text;
    ALTER TABLE harvest_object ADD CONSTRAINT harvest_object_harvest_source_id_fkey FOREIGN KEY (harvest_source_id) REFERENCES harvest_source(id);

    UPDATE harvest_object o SET harvest_source_id = j.source_id FROM harvest_job j WHERE o.harvest_job_id = j.id;
    """
    conn.execute(statements)

    # Flag current harvest_objects
    guids = (
        Session.query(distinct(HarvestObject.guid))
        .join(Package)
        .filter(HarvestObject.package != None)
        .filter(Package.state == u"active")
    )

    update_statement = """
    UPDATE harvest_object
    SET current = TRUE
    WHERE id = (
        SELECT o.id
        FROM harvest_object o JOIN package p ON p.id = o.package_id
        WHERE o.package_id IS NOT null AND p.state = 'active'
            AND o.guid = '%s'
        ORDER BY metadata_modified_date DESC, fetch_finished DESC, gathered DESC
        LIMIT 1)
    """

    for guid in guids:
        conn.execute(update_statement % guid)

    conn.execute("UPDATE harvest_object SET current = FALSE WHERE current IS NOT TRUE")

    Session.commit()
    log.info("Harvest tables migrated to v2")
Example #20
0
 def create(cls, message, object, stage=u'Fetch', line=None):
     '''
     Helper function to create an error object and save it.
     '''
     err = cls(message=message, object=object,
               stage=stage, line=line)
     try:
         err.save()
     except InvalidRequestError:
         # Clear any in-progress sqlalchemy transactions
         try:
             Session.rollback()
         except Exception:
             pass
         try:
             Session.remove()
         except Exception:
             pass
         err.save()
     finally:
         log_message = '{0}, line {1}'.format(message, line) \
                       if line else message
         log.debug(log_message)
Example #21
0
def clean_harvest_log(condition):
    Session.query(HarvestLog).filter(HarvestLog.created <= condition).delete(synchronize_session=False)
    try:
        Session.commit()
    except InvalidRequestError:
        Session.rollback()
        log.error("An error occurred while trying to clean-up the harvest log table")

    log.info("Harvest log table clean-up finished successfully")
def sync_irods(params, id):
    """
    Fetches a resource from database with the same path as user specified and 
    that matches an existing resource in CKAN.
    """
    from irods import getFileUserMetadata, rcModAccessControl
    rev = model.repo.new_revision()
    conn = get_connection_from_params(params)
    resource = Resource.get(id)
    path = params['path']
    extras = {}
    # Lets handle only resources with file names
    if resource.name:
        fname = "%s/%s" % (path, resource.name.split('/')[-1])
        log.debug(fname)
        i = 0
        access = rcModAccessControl()
        log.debug(access.getPath())
        if conn:
            for met in getFileUserMetadata(conn, fname):
                i += 1
                key, value, _ = met
                extras[key] = value
            resource.extras = extras
            Session.add(resource)
            conn.disconnect()
            model.repo.commit()
            rev.message = "Update from iRODS, matched file %s" % fname
            h.flash_success("iRODS import to resource OK! Imported %s metadatas" % i)
        else:
            h.flash_error("Could not connect to iRODS!")
    else:
        h.flash_error("Resource is an URL, cannot import!")
    h.redirect_to(controller='package', action='resource_read', \
              id=resource.resource_group.package.name, \
              resource_id=resource.id)
def import_collection_to_package(params, id):
    """
    Import a collection to dataset. Does not import whole file data but
    rather the metadata.
    """
    from irods import irodsCollection
    path = params['path']
    pkg = Package.get(id)
    conn = get_connection_from_params(params)
    if (conn):
        coll = irodsCollection(conn, path)
        from irods import iRodsOpen
        rev = model.repo.new_revision()
        i = 0
        for obj in coll.getObjects():
            extras = {} 
            fname, _ = obj
            fpath = "%s/%s" % (coll.getCollName(), fname) 
            f = iRodsOpen(conn, fpath, 'r')
            if f:
                i += 1
                res = Resource.by_name(fname)
                if not res:
                    res = Resource(url = '', name=fname, extras=extras, \
                                   resource_type='file')
                for met in f.getUserMetadata():
                    key, value, _ = met
                    extras[key] = value
                res.extras = extras
                resgrp = pkg.resource_groups[0]
                resgrp.resources.append(res)
                Session.add(res)
                Session.add(resgrp)
                rev.message = "Update from iRODS, matched file %s" % fname
        for met in coll.getUserMetadata():
            key, value, _ = met
            pkg.extras[key] = value
        Session.add(pkg)
        model.repo.commit()
        conn.disconnect()
        h.flash_success("iRODS import to dataset OK! Imported %s resources." % i)
    else:
        h.flash_error("Could not connect to iRODS!")
    h.redirect_to(controller='package', action='read', id=id)
Example #24
0
 def get_showcase_admin_ids(cls):
     '''
     Return a list of showcase admin user ids.
     '''
     id_list = [i for (i, ) in Session.query(cls.user_id).all()]
     return id_list
Example #25
0
 def get(self):
     return Session.query(Pipelines)\
         .filter_by(package_id = self.package_id, pipeline_id = self.pipeline_id).first()
Example #26
0
 def get(cls, _type, ref_id):
     return (Session.query(cls)
                    .filter_by(type=_type, ref_id=ref_id)
                    .order_by(data_quality_metrics_table.c.modified_at.desc())
                    .first())
Example #27
0
    def GetRecords(self, req):
        resp = etree.Element(ntag("csw:GetRecordsResponse"), nsmap=namespaces)
        etree.SubElement(resp,
                         ntag("csw:SearchStatus"),
                         timestamp=datetime.utcnow().isoformat())

        cursor = Session.connection()

        q = Session.query(distinct(HarvestObject.guid)) \
                .join(Package) \
                .join(HarvestSource) \
                .filter(HarvestObject.current==True) \
                .filter(Package.state==u'active') \
                .filter(or_(HarvestSource.type=='gemini-single', \
                        HarvestSource.type=='gemini-waf', \
                        HarvestSource.type=='csw'))

        ### TODO Parse query instead of stupidly just returning whatever we like
        startPosition = req["startPosition"] if req["startPosition"] > 0 else 1
        maxRecords = req["maxRecords"] if req["maxRecords"] > 0 else 10
        rset = q.offset(startPosition - 1).limit(maxRecords)

        total = q.count()
        attrs = {
            "numberOfRecordsMatched": total,
            "elementSet":
            req["elementSetName"],  # we lie here. it's always really "full"
        }
        if req["resultType"] == "results":
            returned = rset.count()
            attrs["numberOfRecordsReturned"] = returned
            if (total - startPosition - 1) > returned:
                attrs["nextRecord"] = startPosition + returned
            else:
                attrs["nextRecord"] = 0
        else:
            attrs["numberOfRecordsReturned"] = 0

        attrs = dict((k, unicode(v)) for k, v in attrs.items())
        results = etree.SubElement(resp, ntag("csw:SearchResults"), **attrs)

        if req["resultType"] == "results":
            for guid, in Session.execute(rset):
                doc = Session.query(HarvestObject) \
                        .join(Package) \
                        .filter(HarvestObject.guid==guid) \
                        .filter(HarvestObject.current==True) \
                        .filter(Package.state==u'active') \
                        .first()
                try:

                    record = etree.parse(StringIO(doc.content.encode("utf-8")))
                    results.append(record.getroot())
                except:
                    log.error("exception parsing document %s:\n%s", doc.id,
                              traceback.format_exc())
                    raise

        data = self._render_xml(resp)
        log.info('GetRecords response: %r', truncate(data, LOG_XML_LENGTH))
        return data
Example #28
0
    def __call__(self, environ, start_response):
        if self.drupal_client is None:
            self.drupal_client = DrupalClient()

        # establish from the cookie whether ckan and drupal are signed in
        ckan_signed_in = [False]
        drupal_signed_in = [False]
        for k, v in environ.items():
            key = k.lower()
            if key == 'http_cookie':
                ckan_signed_in[0] = is_ckan_signed_in(v)
                drupal_signed_in[0] = drupal_extract_cookie(v)
        ckan_signed_in = ckan_signed_in[0]
        drupal_signed_in = drupal_signed_in[0]

        environ['drupal.uid'] = None
        environ['drupal.publishers'] = None
        new_start_response = start_response
        if drupal_signed_in and not ckan_signed_in:
            # get info about the user from drupal and store in environ for
            # use by main CKAN app
            user_id = self.drupal_client.get_user_id_from_session_id(
                drupal_signed_in)
            res = self.drupal_client.get_user_properties(user_id)
            environ['drupal.uid'] = res['uid']
            environ['drupal.publishers'] = res['publishers']
            environ['drupal.name'] = res['name']

            from ckan import model
            from ckan.model.meta import Session

            def munge(username):
                username.lower().replace(' ', '_')
                return username

            # Add the new Drupal user if they don't already exist.
            query = Session.query(
                model.User).filter_by(name=unicode(environ['drupal.uid']))
            if not query.count():
                user = model.User(
                    name=munge(unicode(environ['drupal.uid'])),
                    fullname=unicode(environ['drupal.name']),
                    about=u'Drupal auto-generated user',
                )
                Session.add(user)
                Session.commit()
            else:
                user = query.one()

            # We want to store values in the user's cookie, so
            # prepare the response header with this value,
            # using auth_tkt to sign it.
            new_header = environ['repoze.who.plugins']['auth_tkt'].remember(
                environ, {
                    'repoze.who.userid': environ['drupal.uid'],
                    'tokens': '',
                    'userdata': '',
                })
            # e.g. new_header = [('Set-Cookie', 'bob=ab48fe; Path=/;')]
            cookie_template = new_header[0][1].split('; ')

            cookie_string = ''
            for name, value in [
                ('ckan_apikey', user.apikey),
                ('ckan_display_name', user.fullname),
                ('ckan_user', user.name),
            ]:
                cookie_string += '; %s="%s"' % (name, value)
                new_cookie = cookie_template[:]
                new_cookie[0] = '%s="%s"' % (name, value)
                new_header.append(('Set-Cookie', str('; '.join(new_cookie))))

            # Also need these cookies to work too:

            # ckan_apikey
            # Value	"3a51edc6-6461-46b8-bfe2-57445cbdeb2b"
            # Host	catalogue.dev.dataco.coi.gov.uk
            # Path	/
            # Secure	No
            # Expires	At End Of Session
            #
            #
            # Name	ckan_display_name
            # Value	"James Gardner"
            # Host	catalogue.dev.dataco.coi.gov.uk
            # Path	/
            # Secure	No
            # Expires	At End Of Session
            #
            #
            # Name	ckan_user
            # Value	"4466"
            # Host	catalogue.dev.dataco.coi.gov.uk
            # Path	/
            # Secure	No
            # Expires	At End Of Session

            # @@@ Need to add the headers to the request too so that the rest of the stack can sign the user in.

#Cookie: __utma=217959684.178461911.1286034407.1286034407.1286178542.2; __utmz=217959684.1286178542.2.2.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=coi%20london; DRXtrArgs=James+Gardner; DRXtrArgs2=3e174e7f1e1d3fab5ca138c0a023e13a; SESS9854522e7c5dba5831db083c5372623c=4160a72a4d6831abec1ac57d7b5a59eb; auth_tkt="a578c4a0d21bdbde7f80cd271d60b66f4ceabc3f4466!"; ckan_apikey="3a51edc6-6461-46b8-bfe2-57445cbdeb2b"; ckan_display_name="James Gardner"; ckan_user="******"

# There is a bug(/feature?) in line 628 of Cookie.py that means
# it can't load from unicode strings. This causes Beaker to fail
# unless the value here is a string
            if not environ.get('HTTP_COOKIE'):
                environ['HTTP_COOKIE'] += str(cookie_string)
            else:
                environ['HTTP_COOKIE'] = str(cookie_string[2:])

            def cookie_setting_start_response(status, headers, exc_info=None):
                headers += new_header
                return start_response(status, headers, exc_info)

            new_start_response = cookie_setting_start_response
        return self.app(environ, new_start_response)
Example #29
0
def get_organization_list():
    return Session.query(Group.name).filter(Group.state=='active',
                                            Group.type=='organization')\
                                    .order_by(Group.title)
Example #30
0
 def create(cls, **kwargs):
     instance = cls(**kwargs)
     Session.add(instance)
     Session.commit()
     return instance.as_dict()
    def get_gather_errors(self):
        query = Session.query(HarvestGatherError)\
                    .filter(HarvestGatherError.harvest_job_id == self.id)\
                    .order_by(HarvestGatherError.created.desc())

        return query.all()
Example #32
0
    def update(cls, filter, data):
        obj = Session.query(cls).filter_by(**filter)
        obj.update(data)
        Session.commit()

        return obj.first()
Example #33
0
 def get_by_id(cls, ref_id):
     return Session.query(cls).get(ref_id)
Example #34
0
def purge_datasets():
    '''
    Purge all existing datasets.
    '''
    for pkg in Session.query(Package):
        call_action('dataset_purge', id=pkg.id)
Example #35
0
 def delete(cls, **kw):
     query = Session.query(cls).autoflush(False).filter_by(**kw).all()
     for i in query:
         Session.delete(i)
     return
Example #36
0
 def get_all(cls):
     return Session.query(cls).all()
Example #37
0
 def all(cls):
     return Session.query(Featurednumbers)
Example #38
0
 def get(cls, **kw):
     '''Finds a single entity in the register.'''
     query = Session.query(cls).autoflush(False)
     return query.filter_by(**kw).all()
Example #39
0
 def tearDown(self):
     Session.rollback()
Example #40
0
 def create(cls, **kwargs):
     instance = cls(**kwargs)
     Session.add(instance)
     Session.commit()
     return instance.as_dict()
Example #41
0
 def count(cls):
     return Session.query(cls).count()
Example #42
0
 def get(cls, ref):
     return Session.query(CommentsRefStats).get(ref)
Example #43
0
    def _do_wordpress_login(self, environ, wordpress_session_id, new_headers):
        '''Given a WordPress cookie\'s session ID, check it with WordPress, create/modify
        the equivalent CKAN user with properties copied from WordPress and log the
        person in with auth_tkt and its cookie.
        '''
        if self.wordpress_client is None:
            self.wordpress_client = WordPressClient(environ)
        else:
            # Warning! These must be called, or the user data will quite often be wrong.
            # self.wordpress_client may sometimes be a properly set up object with wrong data.
            self.wordpress_client.reset_data()
            self.wordpress_client.update_cookies(environ)

        # ask wp for the wordpress_user_id for this session
        wordpress_user_id = self.wordpress_client.get_user_id()
        if not wordpress_user_id:
            log.error(
                'WordPress said the session ID found in the cookie is not valid.'
            )
            return

        # ask wp about this user
        user_properties = self.wordpress_client.get_user_properties()

        # see if user already exists in CKAN
        ckan_user_name = WordPressUserMapping.wordpress_id_to_ckan_user_name(
            wordpress_user_id)

        log.debug('_do_wordpress_login ->')
        log.debug(str(wordpress_session_id))
        log.debug(str(wordpress_user_id))
        log.debug(str(ckan_user_name))
        log.debug('<- _do_wordpress_login')

        from ckan import model
        from ckan.model.meta import Session
        query = Session.query(
            model.User).filter_by(name=unicode(ckan_user_name))
        if not query.count():
            # need to add this user to CKAN
            #raise Exception('Got this userdata:' + str(user_properties))
            # http://stackoverflow.com/questions/1697815/how-do-you-convert-a-python-time-struct-time-object-into-a-datetime-object
            def convertSQLDateTimeToDatetime(value):
                return datetime.datetime.fromtimestamp(
                    time.mktime(time.strptime(value, '%Y-%m-%d %H:%M:%S')))

            date_created = convertSQLDateTimeToDatetime(
                user_properties['data']['user_registered'])
            user = model.User(
                name=ckan_user_name,
                fullname=unicode(
                    user_properties['data']
                    ['display_name']),  # NB may change in WordPress db
                about=u'User account imported from WordPress system.',
                email=user_properties['data']
                ['user_email'],  # NB may change in WordPress db
                created=date_created,
            )
            Session.add(user)
            Session.commit()
            log.debug('WordPress user added to CKAN as: %s', user.name)
        else:
            user = query.one()
            log.debug(
                'WordPress user found in CKAN: %s for ckan_user_name: %s',
                user.name, ckan_user_name)

        self.set_roles(ckan_user_name, user_properties['roles'])

        # There is a chance that on this request we needed to get authtkt
        # to log-out. This would have created headers like this:
        #   'Set-Cookie', 'auth_tkt="INVALID"...'
        # but since we are about to login again, which will create a header
        # setting that same cookie, we need to get rid of the invalidation
        # header first.
        new_headers[:] = [(key, value) for (key, value) in new_headers \
                            if (not (key=='Set-Cookie' and value.startswith('auth_tkt="INVALID"')))]
        #log.debug('Headers reduced to: %r', new_headers)

        # Ask auth_tkt to remember this user so that subsequent requests
        # will be authenticated by auth_tkt.
        # auth_tkt cookie template needs to also go in the response.
        identity = {
            'repoze.who.userid': str(ckan_user_name),
            'tokens': '',
            'userdata': wordpress_session_id
        }
        headers = environ['repoze.who.plugins']['hri_auth_tkt'].remember(
            environ, identity)
        if headers:
            new_headers.extend(headers)

        # Tell app during this request that the user is logged in
        environ['REMOTE_USER'] = user.name
        log.debug('Set REMOTE_USER = %r', user.name)
Example #44
0
 def get(cls, comment_id):
     return Session.query(cls).get(comment_id)
Example #45
0
class DrupalAuthMiddleware(object):
    '''Allows CKAN user to login via Drupal. It looks for the Drupal cookie
    and gets user details from Drupal using XMLRPC. 
    so works side-by-side with normal CKAN logins.'''
    def __init__(self, app, app_conf):
        self.app = app
        self.drupal_client = None
        self._user_name_prefix = 'user_d'

    def _parse_cookies(self, environ):
        is_ckan_cookie = [False]
        drupal_session_id = [False]
        server_name = environ['SERVER_NAME']
        for k, v in environ.items():
            key = k.lower()
            if key == 'http_cookie':
                is_ckan_cookie[0] = self._is_this_a_ckan_cookie(v)
                drupal_session_id[0] = self._drupal_cookie_parse(
                    v, server_name)
        is_ckan_cookie = is_ckan_cookie[0]
        drupal_session_id = drupal_session_id[0]
        return is_ckan_cookie, drupal_session_id

    @staticmethod
    def _drupal_cookie_parse(cookie_string, server_name):
        '''Returns the Drupal Session ID from the cookie string.'''
        cookies = Cookie.SimpleCookie()
        cookies.load(str(cookie_string))
        similar_cookies = []
        for cookie in cookies:
            if cookie.startswith('SESS'):
                server_hash = hashlib.md5(server_name).hexdigest()
                if cookie == 'SESS%s' % server_hash:
                    log.debug('Drupal cookie found for server request %s',
                              server_name)
                    return cookies[cookie].value
                else:
                    similar_cookies.append(cookie)
        if similar_cookies:
            log.debug(
                'Drupal cookies ignored with incorrect hash for server %r: %r',
                server_name, similar_cookies)
        return None

    @staticmethod
    def _is_this_a_ckan_cookie(cookie_string):
        cookies = Cookie.SimpleCookie()
        cookies.load(str(cookie_string))
        if not 'auth_tkt' in cookies:
            return False
        return True

    def _munge_drupal_id_to_ckan_user_name(self, drupal_id):
        drupal_id.lower().replace(' ', '_')
        return u'%s%s' % (self._user_name_prefix, drupal_id)

    def _log_out(self, environ, new_headers):
        # don't progress the user info for this request
        environ['REMOTE_USER'] = None
        environ['repoze.who.identity'] = None
        # tell auth_tkt to logout whilst adding the header to tell
        # the browser to delete the cookie
        identity = {}
        headers = environ['repoze.who.plugins']['dgu_auth_tkt'].forget(
            environ, identity)
        if headers:
            new_headers.extend(headers)
        # Remove cookie from request, so that if we are doing a login again in this request then
        # it is aware of the cookie removal
        #log.debug('Removing cookies from request: %r', environ.get('HTTP_COOKIE', ''))
        cookies = environ.get('HTTP_COOKIE', '').split('; ')
        cookies = '; '.join([
            cookie for cookie in cookies if not cookie.startswith('auth_tkt=')
        ])
        environ['HTTP_COOKIE'] = cookies
        #log.debug('Cookies in request now: %r', environ['HTTP_COOKIE'])

        log.debug('Logged out Drupal user')

    def __call__(self, environ, start_response):
        new_headers = []

        self.do_drupal_login_logout(environ, new_headers)

        #log.debug('New headers: %r', new_headers)
        def cookie_setting_start_response(status, headers, exc_info=None):
            if headers:
                headers.extend(new_headers)
            else:
                headers = new_headers
            return start_response(status, headers, exc_info)

        new_start_response = cookie_setting_start_response

        return self.app(environ, new_start_response)

    def do_drupal_login_logout(self, environ, new_headers):
        '''Looks at cookies and auth_tkt and may tell auth_tkt to log-in or log-out
        to a Drupal user.'''
        is_ckan_cookie, drupal_session_id = self._parse_cookies(environ)

        # Is there a Drupal cookie? We may want to do a log-in for it.
        if drupal_session_id:
            # Look at any authtkt logged in user details
            authtkt_identity = environ.get('repoze.who.identity')
            if authtkt_identity:
                authtkt_user_name = authtkt_identity[
                    'repoze.who.userid']  #same as environ.get('REMOTE_USER', '')
                authtkt_drupal_session_id = authtkt_identity['userdata']
            else:
                authtkt_user_name = ''
                authtkt_drupal_session_id = ''

            if not authtkt_user_name:
                # authtkt not logged in, so log-in with the Drupal cookie
                self._do_drupal_login(environ, drupal_session_id, new_headers)
                return
            elif authtkt_user_name.startswith(self._user_name_prefix):
                # A drupal user is logged in with authtkt.
                # See if that the authtkt matches the drupal cookie's session
                if authtkt_drupal_session_id != drupal_session_id:
                    # Drupal cookie session has changed, so tell authkit to forget the old one
                    # before we do the new login
                    log.debug('Drupal cookie session has changed.')
                    #log.debug('Drupal cookie session has changed from %r to %r.', authtkt_drupal_session_id, drupal_session_id)
                    self._log_out(environ, new_headers)
                    # since we are about to login again, we need to get rid of the headers like
                    # ('Set-Cookie', 'auth_tkt="INVALID"...' since we are about to set them again in this
                    # same request.)
                    new_headers[:] = [(key, value) for (key, value) in new_headers \
                                   if (not (key=='Set-Cookie' and value.startswith('auth_tkt="INVALID"')))]
                    #log.debug('Headers reduced to: %r', new_headers)
                    self._do_drupal_login(environ, drupal_session_id,
                                          new_headers)
                    #log.debug('Headers on log-out log-in result: %r', new_headers)
                    return
                else:
                    log.debug('Drupal cookie session stayed the same.')
                    # Drupal cookie session matches the authtkt - leave user logged in
                    return
            else:
                # There's a Drupal cookie, but user is logged in as a normal CKAN user.
                # Ignore the Drupal cookie.
                return
        elif not drupal_session_id and is_ckan_cookie:
            # Deal with the case where user is logged out of Drupal
            # i.e. user WAS were logged in with Drupal and the cookie was
            # deleted (probably because Drupal logged out)

            # Is the logged in user a Drupal user?
            user_name = environ.get('REMOTE_USER', '')
            if user_name and user_name.startswith(self._user_name_prefix):
                log.debug(
                    'Was logged in as Drupal user %r but Drupal cookie no longer there.',
                    user_name)
                self._log_out(environ, new_headers)

    def _do_drupal_login(self, environ, drupal_session_id, new_headers):
        if self.drupal_client is None:
            self.drupal_client = DrupalClient()
        # ask drupal for the drupal_user_id for this session
        try:
            drupal_user_id = self.drupal_client.get_user_id_from_session_id(
                drupal_session_id)
        except DrupalRequestError, e:
            log.error('Error checking session with Drupal: %s', e)
            return
        if drupal_user_id:
            # ask drupal about this user
            user_properties = self.drupal_client.get_user_properties(
                drupal_user_id)

            # see if user already exists in CKAN
            ckan_user_name = self._munge_drupal_id_to_ckan_user_name(
                drupal_user_id)
            from ckan import model
            from ckan.model.meta import Session
            query = Session.query(
                model.User).filter_by(name=unicode(ckan_user_name))
            if not query.count():
                # need to add this user to CKAN

                date_created = datetime.datetime.fromtimestamp(
                    int(user_properties['created']))
                user = model.User(
                    name=ckan_user_name,
                    fullname=unicode(
                        user_properties['name']),  # NB may change in Drupal db
                    about=u'User account imported from Drupal system.',
                    email=user_properties[
                        'mail'],  # NB may change in Drupal db
                    created=date_created,
                )
                Session.add(user)
                Session.commit()
                log.debug('Drupal user added to CKAN as: %s', user.name)
            else:
                user = query.one()
                log.debug('Drupal user found in CKAN: %s', user.name)

            self.set_roles(ckan_user_name, user_properties['roles'].values())

            # Ask auth_tkt to remember this user so that subsequent requests
            # will be authenticated by auth_tkt.
            # auth_tkt cookie template needs to also go in the response.
            identity = {
                'repoze.who.userid': str(ckan_user_name),
                'tokens': '',
                'userdata': drupal_session_id
            }
            headers = environ['repoze.who.plugins']['dgu_auth_tkt'].remember(
                environ, identity)
            if headers:
                new_headers.extend(headers)

            # Tell app during this request that the user is logged in
            environ['REMOTE_USER'] = user.name
            log.debug('Set REMOTE_USER = %r', user.name)

        else:
            log.debug(
                'Drupal said the session ID found in the cookie is not valid.')
Example #46
0
 def get_comments_count(cls, ref):
     q = Session.query(comments_refs_stats_table.c.comment_count).filter(
         comments_refs_stats_table.c.ref == ref)
     return q.scalar()
Example #47
0
    def get_thread(cls, ref, thread_id):
        q = Session.query(cls).filter(comments_table.c.ref == ref).filter(
            comments_table.c.thread_id == thread_id).order_by(
                comments_table.c.created_at.asc())

        return q.all()
Example #48
0
 def filter(cls, **kwds):
     query = Session.query(cls).autoflush(False)
     return query.filter_by(**kwds)
Example #49
0
 def by_pipeline_id(cls, pipeline_id):
     assert pipeline_id
     return Session.query(cls)\
         .filter_by(pipeline_id = pipeline_id).first()
Example #50
0
 def filter(cls, **kwargs):
     return Session.query(cls).filter_by(**kwargs)
Example #51
0
 def by_dataset_id(cls, dataset_id):
     assert dataset_id
     return Session.query(cls)\
         .filter_by(package_id = dataset_id).all()
Example #52
0
 def get_showcase_admin_ids(cls):
     '''
     Return a list of showcase admin user ids.
     '''
     id_list = [i for (i, ) in Session.query(cls.user_id).all()]
     return id_list
Example #53
0
    def __call__(self, environ, start_response):
        if self.drupal_client is None:
            self.drupal_client = DrupalClient()

        # establish from the cookie whether ckan and drupal are signed in
        ckan_signed_in = [False]
        drupal_signed_in = [False]
        for k, v in environ.items():
            key = k.lower()
            if key  == 'http_cookie':
                ckan_signed_in[0] = is_ckan_signed_in(v)
                drupal_signed_in[0] = drupal_extract_cookie(v)
        ckan_signed_in = ckan_signed_in[0]
        drupal_signed_in = drupal_signed_in[0]

        environ['drupal.uid'] = None
        environ['drupal.publishers'] = None
        new_start_response = start_response
        if drupal_signed_in and not ckan_signed_in:
            # get info about the user from drupal and store in environ for
            # use by main CKAN app
            user_id = self.drupal_client.get_user_id_from_session_id(drupal_signed_in)
            res = self.drupal_client.get_user_properties(user_id)
            environ['drupal.uid'] = res['uid']
            environ['drupal.publishers'] = res['publishers']
            environ['drupal.name'] = res['name']

            from ckan import model
            from ckan.model.meta import Session

            def munge(username):
                username.lower().replace(' ', '_')
                return username

            # Add the new Drupal user if they don't already exist.
            query = Session.query(model.User).filter_by(name=unicode(environ['drupal.uid']))
            if not query.count():
                user = model.User(
                    name=munge(unicode(environ['drupal.uid'])), 
                    fullname=unicode(environ['drupal.name']), 
                    about=u'Drupal auto-generated user',
                )
                Session.add(user)
                Session.commit()
            else:
                user = query.one()

            # We want to store values in the user's cookie, so
            # prepare the response header with this value,
            # using auth_tkt to sign it.
            new_header = environ['repoze.who.plugins']['auth_tkt'].remember(
                environ,
                {
                    'repoze.who.userid': environ['drupal.uid'],
                    'tokens': '',
                    'userdata': '',
                }
            )
            # e.g. new_header = [('Set-Cookie', 'bob=ab48fe; Path=/;')]
            cookie_template = new_header[0][1].split('; ')

            cookie_string = ''
            for name, value in [
                ('ckan_apikey', user.apikey),
                ('ckan_display_name', user.fullname),
                ('ckan_user', user.name),
            ]: 
                cookie_string += '; %s="%s"'%(name, value)
                new_cookie = cookie_template[:]
                new_cookie[0] = '%s="%s"'%(name, value)
                new_header.append(('Set-Cookie', str('; '.join(new_cookie))))

            # Also need these cookies to work too:

            # ckan_apikey
            # Value	"3a51edc6-6461-46b8-bfe2-57445cbdeb2b"
            # Host	catalogue.dev.dataco.coi.gov.uk
            # Path	/
            # Secure	No
            # Expires	At End Of Session
            # 
            # 
            # Name	ckan_display_name
            # Value	"James Gardner"
            # Host	catalogue.dev.dataco.coi.gov.uk
            # Path	/
            # Secure	No
            # Expires	At End Of Session
            # 
            # 
            # Name	ckan_user
            # Value	"4466"
            # Host	catalogue.dev.dataco.coi.gov.uk
            # Path	/
            # Secure	No
            # Expires	At End Of Session


            # @@@ Need to add the headers to the request too so that the rest of the stack can sign the user in.

#Cookie: __utma=217959684.178461911.1286034407.1286034407.1286178542.2; __utmz=217959684.1286178542.2.2.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=coi%20london; DRXtrArgs=James+Gardner; DRXtrArgs2=3e174e7f1e1d3fab5ca138c0a023e13a; SESS9854522e7c5dba5831db083c5372623c=4160a72a4d6831abec1ac57d7b5a59eb; auth_tkt="a578c4a0d21bdbde7f80cd271d60b66f4ceabc3f4466!"; ckan_apikey="3a51edc6-6461-46b8-bfe2-57445cbdeb2b"; ckan_display_name="James Gardner"; ckan_user="******"

            # There is a bug(/feature?) in line 628 of Cookie.py that means
            # it can't load from unicode strings. This causes Beaker to fail
            # unless the value here is a string
            if not environ.get('HTTP_COOKIE'):
                environ['HTTP_COOKIE'] += str(cookie_string)
            else:
                environ['HTTP_COOKIE'] = str(cookie_string[2:])

            def cookie_setting_start_response(status, headers, exc_info=None):
                headers += new_header
                return start_response(status, headers, exc_info)
            new_start_response = cookie_setting_start_response
        return self.app(environ, new_start_response)
Example #54
0
    def _get_harvest_object(self,id):

        obj = Session.query(HarvestObject) \
                        .filter(HarvestObject.id==id).first()
        return obj
Example #55
0
 def filter(cls, **kwargs):
     return Session.query(cls).filter_by(**kwargs)
Example #56
0
 def delete(self):
     Session.delete(self)
     Session.commit()
Example #57
0
 def filter(cls, **kwds):
     query = Session.query(cls).autoflush(False)
     return query.filter_by(**kwds)
Example #58
0
 def save(self):
     Session.add(self)
     Session.commit()
     return self.as_dict()
Example #59
0
 def old_locks(cls, since):
     query = Session.query(cls).filter(cls.timestamp < since)
     return query.all()
Example #60
0
def get_package_list():
    return Session.query(Package.name).filter(Package.state=='active',
                                              Package.type=='dataset')\
                                      .order_by(Package.title)