Exemple #1
0
    def _clean_test_database(self, package_name, id):

        base_location = self._get_ckan_base_api_url()
        api_key = self._get_user_api_key()
        testclient = CkanClient(base_location, api_key)
        # package_name ='spatialize_test_resource_3'
        testclient.package_entity_delete(package_name)

        # also remove table from database using id
        data_dict = {}
        data_dict['connection_url'] = pylons.config.get(
            'ckan.datastore.write_url',
            'postgresql://*****:*****@localhost/test_datastore')
        engine = db._get_engine(None, data_dict)
        connection = engine.connect()
        resources_sql = 'DROP TABLE IF EXISTS "' + id + '";'
        # resources_sql = 'DROP TABLE "b11351a2-5bbc-4f8f-8078-86a4eef1c7b0";'
        try:
            print '>>>>>>>>>>>>> Executing command: ', resources_sql
            trans = connection.begin()
            results = connection.execute(resources_sql)
            trans.commit()
        except Exception, e:
            print "exception", e
            assert False
 def get_package_list_for_all(self):
     api_list = self.get_ckan_api_list()
     for api in api_list:
         ckan = CkanClient(base_location=api,
                           api_key=config.ckan_api_key)
         for package in ckan.package_list():
             break
	def _setup_test_database(self, package_name):
		
		print ">>>>>>>>>>>>>>>>>> creating package: ", package_name
		base_location = self._get_ckan_base_api_url()
		api_key = self._get_user_api_key()
		testclient = CkanClient(base_location, api_key)
		print "base.. ", testclient.base_location
		file_url, status = testclient.upload_file("./testData/small_with_lat_long.csv")
		
		print "created file_url:", file_url
		print "status: ", status
		assert True
		  
	
		package_dict = {u'name': package_name, u'title': u'Serialize test 1', u'notes': u'dummy notes',
		'owner_org': 'public', u'private': u'False', u'state': u'active',
		'resources': [{'description': u'Resource Document Description', 'format': u'csv', 'url': file_url, 'name': u'Resource somewhere'}]}
		
		#print "package_dict: at test: ", package_dict
		 
		try:
			ret_pack = testclient.package_register_post(package_dict)
			resources = ret_pack['resources']
			self.database_id = resources[0]['id'] 
		
			print ">>>>>>>>>>>>>>>>>>>>>>>> database_id:", self.database_id
		except Exception, e:
			print "Exception: ", e
			assert False
			return ""
Exemple #4
0
def test_build_index():
    engine, table = connect()
    client = CkanClient(base_location='http://catalogue.data.gov.uk/api')
    res = client.package_search("tags:spend-transactions",
            search_options={'limit': 5})
    for package_name in res['results']:
        fetch_package.description = 'metadata: %s' % package_name
        yield fetch_package, client, package_name, engine, table
Exemple #5
0
class CkanIndex(IndexBase):
    '''CKAN index.

    Where parameters not provided look them up in config.

    :param url: url for ckan API.
    :param api_key: API key.
    '''
    def __init__(self, url=None, api_key=None):
        self.status_info = ''
        if url is not None:
            self.url = url
        else:
            self.url = datapkg.CONFIG.get('index:ckan', 'ckan.url')
        if api_key is not None:
            self.api_key = api_key
        else:
            self.api_key = datapkg.CONFIG.dictget('index:ckan', 'ckan.api_key', None)
        if self.url.endswith('/'):
            self.url = self.url[:-1]
        from ckanclient import CkanClient
        service_kwds = {}
        service_kwds['base_location'] = self.url
        service_kwds['api_key'] = self.api_key
        self._print("datapkg: CKAN config: %s" % service_kwds )
        self.ckan = CkanClient(**service_kwds)

    def _print(self, msg):
        self.status_info += msg + '\n'
        logger.debug(msg)

    def list(self):
        self.ckan.package_register_get()
        self.print_status()
        if self.ckan.last_status == 200:
            if self.ckan.last_message != None:
                pkgs = [ self.cvt_to_pkg({'name': x})
                            for x in self.ckan.last_message ]
                return pkgs
            else:
                self._print("No response data. Check the resource location.")
                # TODO: convert to CKAN exception
        raise Exception(self.status_info)

    def search(self, query):
        # TODO: think this automatically limits results to 20 or so
        for pkg_name in self.ckan.package_search(query)['results']:
            yield self.get(pkg_name)
    
    def has(self, name):
        try:
            out = self.get(name)
            return True
        except Exception, inst:
            if self.ckan.last_status == 404:
                return False
            else:
                raise
Exemple #6
0
 def init_ckanclient(self):
     """Init the CKAN client from options."""
     if not self.options.ckan_api_location:
         print "Warning: CKAN API location not provided."
     if not self.options.ckan_api_key:
         print "Warning: CKAN API key not provided."
     self.ckanclient = CkanClient(
         base_location=self.options.ckan_api_location,
         api_key=self.options.ckan_api_key,
     )
Exemple #7
0
    def __init__(self, config, queue_name=None, routing_key=None):
        if not 'ckan.api_key' in config:
            log.warn("No CKAN API key has been specified")

        base_location = self.base_location
        if 'ckan.site_url' in config:
            base_location = urljoin(config.get('ckan.site_url'), "api")

        CkanClient.__init__(self, base_location=base_location,
                            api_key=config.get('ckan.api_key'))
        Consumer.__init__(self, config, 
                          queue_name=queue_name,
                          routing_key=routing_key)
 def _setup_test_database(self, package_name):
     
     print ">>>>>>>>>>>>>>>>>> creating package: ",package_name
     try:
         base_location = self._get_ckan_base_api_url()
         api_key = self._get_user_api_key()
         testclient = CkanClient(base_location, api_key)
         file_url,status = testclient.upload_file("./testData/small_with_lat_long.csv")
     
         print "created file_url:",file_url
         
     except Exception, e:
         print "exception",e
         assert False   
Exemple #9
0
def traverse(pkg_func, query='*:*'):
    client = CkanClient(base_location=HOST, api_key=API_KEY)
    for page in count(1):
        results_page = client.package_search(query, search_options={
            'offset': page*PAGE_SIZE, 'limit': PAGE_SIZE})
        #pprint(results_page)
        if not len(results_page.get('results', [])): 
            break
        for pkg_name in results_page.get('results', []):
            print "Traversing", pkg_name
            pkg = client.package_entity_get(pkg_name)
            ret = pkg_func(client, pkg)
            if ret is not None:
                client.package_entity_put(ret, package_name=pkg_name)
Exemple #10
0
    def __init__(self, config, queue_name=None, routing_key=None):
        if not 'ckan.api_key' in config:
            log.warn("No CKAN API key has been specified")

        base_location = self.base_location
        if 'ckan.site_url' in config:
            base_location = urljoin(config.get('ckan.site_url'), "api")

        CkanClient.__init__(self,
                            base_location=base_location,
                            api_key=config.get('ckan.api_key'))
        Consumer.__init__(self,
                          config,
                          queue_name=queue_name,
                          routing_key=routing_key)
Exemple #11
0
    def _setup_test_database(self, package_name):

        print ">>>>>>>>>>>>>>>>>> creating package: ", package_name
        try:
            base_location = self._get_ckan_base_api_url()
            api_key = self._get_user_api_key()
            testclient = CkanClient(base_location, api_key)
            file_url, status = testclient.upload_file(
                "./testData/small_with_lat_long.csv")

            print "created file_url:", file_url

        except Exception, e:
            print "exception", e
            assert False
Exemple #12
0
 def oct10(self):
     client = CkanClient(base_location=self.options.api_url,
                         api_key=self.options.api_key)
     change_licenses = ChangeLicenses(client,
                                      dry_run=self.options.dry_run,
                                      force=self.options.force)
     change_licenses.change_oct_2010(self.options.license_id)
def _process_upload(context, data):
    """
    When provided with a filename this function will process each row
    within the file and then return a tuple. The tuple will contain
        - a list of error messages (if any)
        - a list of dicts where each dict contains ...
                {
                 'package': 'a_package_id',
                 'action':  'Added' or 'Updated'
                }
    """
    log = inventory_upload.get_logger()

    errors = []
    results = []

    filename = data['file']
    publisher_name = data['publisher']

    import urlparse
    client = CkanClient(base_location=urlparse.urljoin(context['site_url'],
                                                       'api'),
                        api_key=context['apikey'])

    tableset = None
    try:
        _, ext = os.path.splitext(filename)
        tableset = messytables.any_tableset(open(filename, 'r'),
                                            extension=ext[1:])
    except Exception, e:
        if str(e) == "Unrecognized MIME type: text/plain":
            tableset = messytables.any_tableset(f, mimetype="text/csv")
        else:
            errors.append("Unable to load file: {0}".format(e))
Exemple #14
0
    def _setup_test_database(self, package_name):

        print ">>>>>>>>>>>>>>>>>> creating package: ", package_name
        base_location = self._get_ckan_base_api_url()
        api_key = self._get_user_api_key()
        testclient = CkanClient(base_location, api_key)
        print "base.. ", testclient.base_location
        file_url, status = testclient.upload_file(
            "./testData/small_with_lat_long.csv")

        print "created file_url:", file_url
        print "status: ", status
        assert True

        package_dict = {
            u'name':
            package_name,
            u'title':
            u'Serialize test 1',
            u'notes':
            u'dummy notes',
            'owner_org':
            'public',
            u'private':
            u'False',
            u'state':
            u'active',
            'resources': [{
                'description': u'Resource Document Description',
                'format': u'csv',
                'url': file_url,
                'name': u'Resource somewhere'
            }]
        }

        #print "package_dict: at test: ", package_dict

        try:
            ret_pack = testclient.package_register_post(package_dict)
            resources = ret_pack['resources']
            self.database_id = resources[0]['id']

            print ">>>>>>>>>>>>>>>>>>>>>>>> database_id:", self.database_id
        except Exception, e:
            print "Exception: ", e
            assert False
            return ""
Exemple #15
0
 def all(self):
     client = CkanClient(base_location=self.options.api_url,
                         api_key=self.options.api_key,
                         http_user=self.options.username,
                         http_pass=self.options.password)
     change_licenses = ChangeLicenses(client,
                                      dry_run=self.options.dry_run,
                                      force=self.options.force)
     change_licenses.change_all_packages(self.options.license_id)
Exemple #16
0
    def command(self):
        super(OnsAnalysisCommand, self).command()

        # now do command
        client = CkanClient(base_location=self.options.api_url,
                            api_key=self.options.api_key,
                            http_user=self.options.username,
                            http_pass=self.options.password)
        change_licenses = OnsAnalysis(client)
        change_licenses.run()
Exemple #17
0
 def init_ckanclient(self):
     """Init the CKAN client from options."""
     if not self.options.ckan_api_location:
         print "Warning: CKAN API location not provided."
     if not self.options.ckan_api_key:
         print "Warning: CKAN API key not provided."
     self.ckanclient = CkanClient(
         base_location=self.options.ckan_api_location,
         api_key=self.options.ckan_api_key,
     )
Exemple #18
0
    def setup_class(self):
        self.pid = self._start_ckan_server()
        self.test_base_location = 'http://127.0.0.1:5000/api'
        self._wait_for_url(url=self.test_base_location)
        self._recreate_ckan_server_testdata(config_path)
        # this is api key created for tester user by create-test-data in ckan
        test_api_key = 'tester'
        test_api_key2 = 'tester2'

        self.c = CkanClient(
            base_location=self.test_base_location,
            api_key=test_api_key,
            is_verbose=True,
        )
        self.c2 = CkanClient(
            base_location=self.test_base_location,
            api_key=test_api_key2,
            is_verbose=True,
        )
    def setup_class(self):
        self.pid = self._start_ckan_server()
        self.test_base_location = "http://127.0.0.1:5000/api"
        self._wait_for_url(url=self.test_base_location)
        self._recreate_ckan_server_testdata(config_path)
        # this is api key created for tester user by create-test-data in ckan
        test_api_key = "tester"
        test_api_key2 = "tester2"

        self.c = CkanClient(base_location=self.test_base_location, api_key=test_api_key, is_verbose=True)
        self.c2 = CkanClient(base_location=self.test_base_location, api_key=test_api_key2, is_verbose=True)
Exemple #20
0
 def setup_class(self):
     if hasattr(super(TestLoaderBase, self), 'setup_class'):
         super(TestLoaderBase, self).setup_class()
     CreateTestData.create_arbitrary([], extra_user_names=[USER])
     user = model.User.by_name(USER)
     assert user
     if WSGI_CLIENT:
         self.testclient = WsgiCkanClient(self.app, api_key=user.apikey)
     else:
         self.sub_proc = self._start_ckan_server('test.ini')
         self.testclient = CkanClient(
             base_location='http://localhost:5000/api', api_key=user.apikey)
         self._wait_for_url(url='http://localhost:5000/api')
 def _clean_test_database(self, package_name, id):
     
     base_location = elf._get_ckan_base_api_url()
     api_key = self._get_user_api_key()
     testclient = CkanClient(base_location, api_key)
     #package_name ='spatialize_test_resource_3'
     testclient.package_entity_delete(package_name)
     
      
      #also remove table from database using id
     data_dict = {}
     data_dict['connection_url'] = pylons.config.get('ckan.datastore.write_url', 'postgresql://*****:*****@localhost/datastore')  
     engine = db._get_engine(None, data_dict)
     connection = engine.connect()
     resources_sql = 'DROP TABLE IF EXISTS "'+id+'";'
     #resources_sql = 'DROP TABLE "b11351a2-5bbc-4f8f-8078-86a4eef1c7b0";'
     try:
         print '>>>>>>>>>>>>> Executing command: ',resources_sql
         results = connection.execute(resources_sql) 
     except Exception, e:
         print "exception",e
         assert False
Exemple #22
0
    def command(self):
        super(TransferUrlCommand, self).command()
        if self.options.license_id is None:
            self.parser.error("Please specify a license ID")
        if len(self.args) != 1:
            self.parser.error("Command is required")

        client = CkanClient(base_location=self.options.api_url,
                            api_key=self.options.api_key,
                            http_user=self.options.username,
                            http_pass=self.options.password)
        transfer_url = TransferUrl(client,
                                   dry_run=self.options.dry_run,
                                   force=self.options.force)
        transfer_url.transfer_url()
Exemple #23
0
 def __init__(self, url=None, api_key=None):
     self.status_info = ''
     if url is not None:
         self.url = url
     else:
         self.url = datapkg.CONFIG.get('index:ckan', 'ckan.url')
     if api_key is not None:
         self.api_key = api_key
     else:
         self.api_key = datapkg.CONFIG.dictget('index:ckan', 'ckan.api_key', None)
     if self.url.endswith('/'):
         self.url = self.url[:-1]
     from ckanclient import CkanClient
     service_kwds = {}
     service_kwds['base_location'] = self.url
     service_kwds['api_key'] = self.api_key
     self._print("datapkg: CKAN config: %s" % service_kwds )
     self.ckan = CkanClient(**service_kwds)
Exemple #24
0
    def command(self):
        super(ApiCommand, self).command()
        if not self.options.api_key:
            self.parser.error('Please specify an API Key')
        if not self.options.api_url:
            self.parser.error('Please specify an API URL')
        if self.options.api_url:
            if not (self.options.api_url.startswith('http://') or \
                    self.options.api_url.startswith('https://')):
                self.parser.error('--host must start with "http://"')
            if not '/api' in self.options.api_url:
                self.parser.error('--host must have "/api" towards the end')
        user_agent = self.user_agent if hasattr(self, 'user_agent') else 'ckanext-importlib/ApiCommand'

        self.client = CkanClient(base_location=self.options.api_url,
                                 api_key=self.options.api_key,
                                 http_user=self.options.username,
                                 http_pass=self.options.password,
                                 is_verbose=True,
                                 user_agent=user_agent)
Exemple #25
0
 def process(self, *av, **kw):
     ckan = CkanClient(*av, **kw)
     for dataset, descr in self:
         _, pkgname = dataset.rsplit("/", 1)
         pkg = ckan.package_entity_get(pkgname)
         self.__unmerge__(pkg, self.removals.get(dataset, {}))
         self.__merge__(pkg, descr)
         groups = pkg.get("groups", [])
         self.__fixup__(pkg)
         ckan.package_entity_put(pkg)
         self.log_api_result(pkgname, ckan)
         for groupname in groups:
             group = ckan.group_entity_get(groupname)
             pkglist = group.setdefault("packages", [])
             if pkgname not in pkglist:
                 pkglist.append(pkgname)
             ckan.group_entity_put(group)
             self.log_api_result(groupname, ckan)
     self.flush()
Exemple #26
0
    def _clean_all_tables_and_packages_in_database(self):

        base_location = self._get_ckan_base_api_url()
        api_key = self._get_user_api_key()
        testclient = CkanClient(base_location, api_key)

        # TODO: clean all packages

        # also remove table from database using id
        data_dict = {}
        data_dict['connection_url'] = pylons.config.get(
            'ckan.datastore.write_url',
            'postgresql://*****:*****@localhost/test_datastore')
        engine = db._get_engine(None, data_dict)
        connection = engine.connect()
        resources_sql = "SELECT * FROM pg_tables;"
        # resources_sql = 'DROP TABLE "b11351a2-5bbc-4f8f-8078-86a4eef1c7b0";'
        try:
            print '>>>>>>>>>>>>> Executing command: ', resources_sql
            trans = connection.begin()
            results_cursor = connection.execute(resources_sql)
            trans.commit()

            allTables = results_cursor.fetchall()
            filteredTables = []
            for table in allTables:
                tableName = table[1]
                if not "pg_" in tableName and not "sql_" in tableName and not tableName == "geometry_columns" and not tableName == "spatial_ref_sys":
                    filteredTables.append(tableName)

            trans = connection.begin()
            for name in filteredTables:
                print "dropping table: ", name
                resource_sql = 'DROP TABLE IF EXISTS "' + name + '";'
                results = connection.execute(resource_sql)
            trans.commit()

        except Exception, e:
            print "exception", e
            assert False
Exemple #27
0
def traverse(pkg_func, query='*:*'):
    client = CkanClient(base_location=HOST, api_key=API_KEY)
    for page in count(1):
        results_page = client.package_search(query,
                                             search_options={
                                                 'offset': page * PAGE_SIZE,
                                                 'limit': PAGE_SIZE
                                             })
        #pprint(results_page)
        if not len(results_page.get('results', [])):
            break
        for pkg_name in results_page.get('results', []):
            print "Traversing", pkg_name
            pkg = client.package_entity_get(pkg_name)
            ret = pkg_func(client, pkg)
            if ret is not None:
                client.package_entity_put(ret, package_name=pkg_name)
Exemple #28
0
class CkanLoader(object):
    """
    Directs a CKAN service client to put obtained packages on CKAN.
    """

    usage = '''usage: %prog OPTIONS'''

    def __init__(self):
        """Sets up options and init the CKAN service client."""
        parser = OptionParser(self.usage)
        self.add_options(parser)
        (self.options, self.args) = parser.parse_args()
        self.init_ckanclient()

    def add_options(self, parser):
        """Adds options for CKAN serice location and REST API key."""
        parser.add_option('--ckan-api-location',
                          dest='ckan_api_location',
                          default='http://127.0.0.1:5000/api',
                          help="""The location of working CKAN REST API.""")
        parser.add_option('--ckan-api-key',
                          dest='ckan_api_key',
                          help="""A valid CKAN REST API key.""")
        parser.add_option(
            '--no-create-confirmation',
            dest='no_create_confimation',
            action='store_true',
            help=
            """Don't prompt for confirmation when registering a new package."""
        )
        parser.add_option(
            '--no-update-confirmation',
            dest='no_update_confimation',
            action='store_true',
            help=
            """Don't prompt for confirmation when updating a registered package."""
        )

    def init_ckanclient(self):
        """Init the CKAN client from options."""
        if not self.options.ckan_api_location:
            print "Warning: CKAN API location not provided."
        if not self.options.ckan_api_key:
            print "Warning: CKAN API key not provided."
        self.ckanclient = CkanClient(
            base_location=self.options.ckan_api_location,
            api_key=self.options.ckan_api_key,
        )

    def run(self):
        """Obtain packages and put them on CKAN."""
        try:
            self.packages = []
            self.obtain_packages()
            print "Putting %s packages on CKAN running at %s" % (len(
                self.packages), self.options.ckan_api_location)
            self.put_packages_on_ckan()
        except KeyboardInterrupt:
            print ""
            print "exiting..."
            print ""

    def obtain_packages(self):
        """Abstract method for obtaining packages."""
        raise Exception, "Abstract method not implemented."

    def put_packages_on_ckan(self):
        """Uses CKAN client to register (or update) obtained packages."""
        # Todo: Fix ckan or ckanclient, so this method isn't so long-winded.
        print ""
        sleep(1)
        for package in self.packages:
            try:
                registered_package = self.ckanclient.package_entity_get(
                    package['name'])
            except CkanApiError:
                pass
            if self.ckanclient.last_status == 200:
                print "Package '%s' is already registered" % package['name']
                print ""
                pprint.pprint(package)
                print ""
                if not self.options.no_update_confimation:
                    answer = raw_input(
                        "Do you want to update this package with CKAN now? [y/N] "
                    )
                    if not answer or answer.lower()[0] != 'y':
                        print "Skipping '%s' package..." % package['name']
                        print ""
                        sleep(1)
                        continue
                print "Updating package..."
                self.ckanclient.package_entity_put(package)
                if self.ckanclient.last_status == 200:
                    print "Updated package '%s' OK." % package['name']
                    sleep(1)
                elif self.ckanclient.last_status == 403 or '403' in str(
                        self.ckanclient.last_url_error):
                    print "Error: Not authorised. Check your API key."
                    sleep(1)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_http_error:
                    print "Error: CKAN returned status code %s: %s" % (
                        self.ckanclient.last_status,
                        self.ckanclient.last_http_error)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_url_error:
                    print "Error: URL problems: %s" % self.ckanclient.last_url_error
                    sleep(1)
                    sleep(1)
                    sleep(1)
                else:
                    raise Exception, "Error: CKAN request didn't work at all."
            elif self.ckanclient.last_status == 404 or '404' in str(
                    self.ckanclient.last_url_error):
                print "Package '%s' not currently registered" % package['name']
                print ""
                pprint.pprint(package)
                print ""
                if not self.options.no_create_confimation:
                    answer = raw_input(
                        "Do you want to register this package with CKAN now? [y/N] "
                    )
                    if not answer or answer.lower()[0] != 'y':
                        print "Skipping '%s' package..." % package['name']
                        print ""
                        sleep(1)
                        continue
                print "Registering package..."
                self.ckanclient.package_register_post(package)
                if self.ckanclient.last_status in [200, 201]:
                    print "Registered package '%s' OK." % package['name']
                    sleep(1)
                elif self.ckanclient.last_status == 403 or '403' in str(
                        self.ckanclient.last_url_error):
                    print "Error: Not authorised. Check your API key."
                    sleep(1)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_http_error:
                    print "Error: CKAN returned status code %s: %s" % (
                        self.ckanclient.last_status,
                        self.ckanclient.last_http_error)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_url_error:
                    print "Error: URL problems: %s" % self.ckanclient.last_url_error
                    sleep(1)
                    sleep(1)
                    sleep(1)
                else:
                    raise Exception, "Error: CKAN request didn't work at all."
            elif self.ckanclient.last_http_error:
                print "Error: CKAN returned status code %s: %s" % (
                    self.ckanclient.last_status,
                    self.ckanclient.last_http_error)
                sleep(1)
                sleep(1)
                sleep(1)
            elif self.ckanclient.last_url_error:
                print "Error: URL problems: %s" % self.ckanclient.last_url_error
                sleep(1)
                sleep(1)
                sleep(1)
            else:
                raise Exception, "Error: CKAN request didn't work at all."

    def create_package(self,
                       name,
                       title='',
                       url='',
                       maintainer='',
                       maintainer_email='',
                       author='',
                       author_email='',
                       notes='',
                       tags=[],
                       extras={},
                       license_id=None,
                       license=None,
                       resources=[]):
        """Returns a CKAN REST API package from method arguments."""
        if not isinstance(tags, list):
            raise Exception, "Package tags must be a list: %s" % tags
        if not isinstance(extras, dict):
            raise Exception, "Package extras must be a dict: %s" % tags
        package = {}
        package['name'] = self.coerce_package_name(name)
        package['title'] = title
        package['url'] = url
        package['notes'] = notes
        package['maintainer'] = maintainer
        package['maintainer_email'] = maintainer_email
        package['author'] = author
        package['author_email'] = author_email
        package['tags'] = tags
        package['extras'] = extras
        # Pre and post licenses servicization.
        if license_id != None:
            package['license_id'] = license_id
        elif license != None:
            package['license'] = license
        package['resources'] = resources
        return package

    def coerce_package_name(self, name):
        """Converts unicode string to valid CKAN package name."""
        # Todo: Probably needs to be finished off.
        name = self.substitute_ascii_equivalents(name)
        name = name.lower()
        return name

    def substitute_ascii_equivalents(self, unicrap):
        # Method taken from: http://code.activestate.com/recipes/251871/
        """This takes a UNICODE string and replaces Latin-1 characters with
            something equivalent in 7-bit ASCII. It returns a plain ASCII string. 
            This function makes a best effort to convert Latin-1 characters into 
            ASCII equivalents. It does not just strip out the Latin-1 characters.
            All characters in the standard 7-bit ASCII range are preserved. 
            In the 8th bit range all the Latin-1 accented letters are converted 
            to unaccented equivalents. Most symbol characters are converted to 
            something meaningful. Anything not converted is deleted.
        """
        xlate = {
            0xc0: 'A',
            0xc1: 'A',
            0xc2: 'A',
            0xc3: 'A',
            0xc4: 'A',
            0xc5: 'A',
            0xc6: 'Ae',
            0xc7: 'C',
            0xc8: 'E',
            0xc9: 'E',
            0xca: 'E',
            0xcb: 'E',
            0xcc: 'I',
            0xcd: 'I',
            0xce: 'I',
            0xcf: 'I',
            0xd0: 'Th',
            0xd1: 'N',
            0xd2: 'O',
            0xd3: 'O',
            0xd4: 'O',
            0xd5: 'O',
            0xd6: 'O',
            0xd8: 'O',
            0xd9: 'U',
            0xda: 'U',
            0xdb: 'U',
            0xdc: 'U',
            0xdd: 'Y',
            0xde: 'th',
            0xdf: 'ss',
            0xe0: 'a',
            0xe1: 'a',
            0xe2: 'a',
            0xe3: 'a',
            0xe4: 'a',
            0xe5: 'a',
            0xe6: 'ae',
            0xe7: 'c',
            0xe8: 'e',
            0xe9: 'e',
            0xea: 'e',
            0xeb: 'e',
            0xec: 'i',
            0xed: 'i',
            0xee: 'i',
            0xef: 'i',
            0xf0: 'th',
            0xf1: 'n',
            0xf2: 'o',
            0xf3: 'o',
            0xf4: 'o',
            0xf5: 'o',
            0xf6: 'o',
            0xf8: 'o',
            0xf9: 'u',
            0xfa: 'u',
            0xfb: 'u',
            0xfc: 'u',
            0xfd: 'y',
            0xfe: 'th',
            0xff: 'y',
            #0xa1:'!', 0xa2:'{cent}', 0xa3:'{pound}', 0xa4:'{currency}',
            #0xa5:'{yen}', 0xa6:'|', 0xa7:'{section}', 0xa8:'{umlaut}',
            #0xa9:'{C}', 0xaa:'{^a}', 0xab:'<<', 0xac:'{not}',
            #0xad:'-', 0xae:'{R}', 0xaf:'_', 0xb0:'{degrees}',
            #0xb1:'{+/-}', 0xb2:'{^2}', 0xb3:'{^3}', 0xb4:"'",
            #0xb5:'{micro}', 0xb6:'{paragraph}', 0xb7:'*', 0xb8:'{cedilla}',
            #0xb9:'{^1}', 0xba:'{^o}', 0xbb:'>>',
            #0xbc:'{1/4}', 0xbd:'{1/2}', 0xbe:'{3/4}', 0xbf:'?',
            #0xd7:'*', 0xf7:'/'
        }

        r = ''
        for i in unicrap:
            if xlate.has_key(ord(i)):
                r += xlate[ord(i)]
            elif ord(i) >= 0x80:
                pass
            else:
                r += str(i)
        return r

    def create_package_resource(self,
                                url='',
                                format='',
                                hash='',
                                description=''):
        return {
            'url': url,
            'format': format,
            'hash': hash,
            'description': description,
        }
Exemple #29
0
 def __init__(self, api_base):
     self.ckan = CkanClient(base_location=api_base)
Exemple #30
0
class S3Bounce(OFSInterface):
    """
    Use ckanext-storage API to bounce to an S3 store
    """
    def __init__(self, api_base):
        self.ckan = CkanClient(base_location=api_base)

    def put_stream(self, bucket, label, fp, metadata={}, cb=None, num_cb=None):
        if metadata is None:
            metadata = { "_owner": getpass.getuser()}

        path = "/" + bucket + "/" + label

        content_type = metadata.get("_format", "application/octet-stream")

        metadata = self.ckan.storage_metadata_set(path, metadata)
        BufferSize = 65536 ## set to something very small to make sure
                                       ## chunking is working properly

        headers = { 'Content-Type': content_type }

        #if content_type is None:
        #    content_type = mimetypes.guess_type(filename)[0] or "text/plain"
        #headers['Content-Type'] = content_type
        #if content_encoding is not None:
        #   headers['Content-Encoding'] = content_encoding

        m = md5()
        fp.seek(0)
        s = fp.read(BufferSize)
        while s:
            m.update(s)
            s = fp.read(BufferSize)
        self.size = fp.tell()
        fp.seek(0)

        self.md5 = m.hexdigest()
        headers['Content-MD5'] = base64.encodestring(m.digest()).rstrip('\n')
        headers['Content-Length'] = str(self.size)

        headers['Expect'] = '100-Continue'

        host, headers = self.ckan.storage_auth_get(path, headers)

        def sender(http_conn, method, path, data, headers):
            http_conn.putrequest(method, path)
            for key in headers:
                http_conn.putheader(key, headers[key])
            http_conn.endheaders()
            fp.seek(0)
            http_conn.set_debuglevel(0) ### XXX set to e.g. 4 to see what going on
            if cb:
                if num_cb > 2:
                    cb_count = self.size / BufferSize / (num_cb-2)
                elif num_cb < 0:
                    cb_count = -1
                else:
                    cb_count = 0
                i = total_bytes = 0
                cb(total_bytes, self.size)
            l = fp.read(BufferSize)
            while len(l) > 0:
                http_conn.send(l)
                if cb:
                    total_bytes += len(l)
                    i += 1
                    if i == cb_count or cb_count == -1:
                        cb(total_bytes, self.size)
                        i = 0
                l = fp.read(BufferSize)
            if cb:
                cb(total_bytes, self.size)
            response = http_conn.getresponse()
            body = response.read()
            fp.seek(0)
            if response.status == 500 or response.status == 503 or \
                    response.getheader('location'):
                # we'll try again
                return response
            elif response.status >= 200 and response.status <= 299:
                self.etag = response.getheader('etag')
                if self.etag != '"%s"'  % self.md5:
                    raise Exception('ETag from S3 did not match computed MD5')
                return response
            else:
                #raise provider.storage_response_error(
                #    response.status, response.reason, body)
                raise Exception(response.status, response.reason, body)

        awsc = AWSAuthConnection(host,
                                 aws_access_key_id="key_id",
                                 aws_secret_access_key="secret")

        awsc._mexe('PUT', path, None, headers, sender=sender)

        metadata = self.ckan.storage_metadata_update(path, {})
        from pprint import pprint
        pprint(metadata)
Exemple #31
0
class CkanIO():
    def __init__(self):
        self.ckan = CkanClient(base_location=csv2rdf.config.config.ckan_api_url,
                               api_key=csv2rdf.config.config.ckan_api_key)

    def get_package_list(self):
        """
            Returns the list of package names (unique identifiers)
        """
        return self.ckan.package_list()

    def update_full_package_list(self):
        full_package_list = []
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.data_packages_path)
        package_list = self.get_package_list()
        logging.info("updating full package list file: %s" % csv2rdf.config.config.data_full_package_list)
        package_list_length = len(package_list)
        for num, package_id in enumerate(package_list):
            logging.info("Reading package number %d out of %d" % (num + 1, package_list_length))
            try:
                package = db.loadDbase(package_id)
                full_package_list.append(package)
            except BaseException as e:
                logging.error("An exception occured, while loading package, try CkanIO.update_packages")
                logging.error(str(e))
        logging.info("Saving full package list to file, length is %s" % len(full_package_list))
        db.saveDbase(csv2rdf.config.config.data_full_package_list, full_package_list)
        logging.info("DONE!")
            
    def update_packages(self):
        """
            Dump the CKAN instance into the files
        """
        package_list = self.get_package_list()
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.data_packages_path)
        number_of_packages = len(package_list)
        for num, package_id in enumerate(package_list):
            logging.info("processing %d out of %d package" % (num + 1, number_of_packages))
            if(os.path.exists(csv2rdf.config.config.data_packages_path + package_id)):
                continue
            try:
                package = csv2rdf.ckan.package.Package(package_id)
                # ckan object can not be pickled
                del package.ckan 
                db.saveDbase(package_id, package)
            except BaseException as e:
                logging.info("An exception occured, while processing package %d, %s" % (num+1, package_id))
                logging.info("Exception: %s" % str(e))

        logging.info("DONE!")

    def get_full_package_list(self):
        """
            Returns the full package list (CKAN dump)
        """
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.data_path)
        return db.loadDbase(csv2rdf.config.config.data_full_package_list)

    def update_full_resource_list(self):
        """
            Read the data from config.data_all_packages (CKAN full dump)
            and save resources separately in one file
        """
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.data_path)
        all_packages = self.get_full_package_list()
        all_resources = []
        logging.info("Updating full resource list: %s" % csv2rdf.config.config.data_full_resource_list)
        for num, package in enumerate(all_packages):
            for resource in package.resources:
               all_resources.append(resource) 
        logging.info("Dumping a full resource list to a file, length is %s" % len(all_resources))
        db.saveDbase(csv2rdf.config.config.data_full_resource_list, all_resources)
        logging.info("DONE!")

    def get_full_resource_list(self):
        """
            Returns the list of all resources (CKAN dump)
        """
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.data_path)
        return db.loadDbase(csv2rdf.config.config.data_full_resource_list)

    def update_csv_resource_list(self):
        """
            Read the data from config.data_all_resources and 
            Save the list of all CSV resources
        """
        all_resources = self.get_full_resource_list()
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.data_path)
        csv_resources = []

        logging.info("Updating CSV resource list: %s" % csv2rdf.config.config.data_csv_resource_list)
        for resource in all_resources:
            r = csv2rdf.ckan.resource.Resource(resource['id'])
            r.init_from_dump(resource)
            if(r.is_csv()):
                csv_resources.append(resource)

        db.saveDbase(csv2rdf.config.config.data_csv_resource_list, csv_resources)
        logging.info("DONE!")

    def get_csv_resource_list(self):
        """
            Returns the list of available csv CKAN resources (dumps)
        """
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.data_path)
        return db.loadDbase(csv2rdf.config.config.data_csv_resource_list)

    def update_rdf_resources_list(self):
        """
            Update the list of the RDF resources
        """
        resource_list = self.get_full_resource_list()
        rdf = []
        rdf_compressed = []
        endpoints = []
        rdf_html = []
        logging.info("Updating RDF resource list: %s" % csv2rdf.config.config.data_rdf_resource_list)
        logging.info("Updating RDF resource list: %s" % csv2rdf.config.config.data_rdf_compressed_resource_list)
        logging.info("Updating RDF resource list: %s" % csv2rdf.config.config.data_endpoint_resource_list)
        logging.info("Updating RDF resource list: %s" % csv2rdf.config.config.data_rdf_html_resource_list)
        for resource in resource_list:
            if(resource['format'] in csv2rdf.config.rdf_formats.rdf_formats):
                res = csv2rdf.ckan.resource.Resource(resource['id'])
                res.init_from_dump(resource)
                rdf.append(res)
            if(resource['format'] in csv2rdf.config.rdf_formats.compressed_formats):
                res = csv2rdf.ckan.resource.Resource(resource['id'])
                res.init_from_dump(resource)
                rdf_compressed.append(res)
            if(resource['format'] in csv2rdf.config.rdf_formats.endpoints):
                res = csv2rdf.ckan.resource.Resource(resource['id'])
                res.init_from_dump(resource)
                endpoints.append(res)
            if(resource['format'] in csv2rdf.config.rdf_formats.html_formats):
                res = csv2rdf.ckan.resource.Resource(resource['id'])
                res.init_from_dump(resource)
                rdf_html.append(res)
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.data_path)
        db.saveDbase(csv2rdf.config.config.data_rdf_resource_list, rdf)
        db.saveDbase(csv2rdf.config.config.data_rdf_compressed_resource_list, rdf_compressed)
        db.saveDbase(csv2rdf.config.config.data_endpoint_resource_list, endpoints)
        db.saveDbase(csv2rdf.config.config.data_rdf_html_resource_list, rdf_html)
        logging.info("DONE!")

    def get_resource_list(self, type):
        types = ["rdf","rdf_compressed","endpoint","rdf_html"]
        if(not type in types):
            return False
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.data_path)
        return db.loadDbase(eval("csv2rdf.config.config.data_"+str(type)+"_resource_list"))
class TestCkanClient(CkanServerCase):

    @classmethod
    def setup_class(self):
        self.pid = self._start_ckan_server()
        self.test_base_location = 'http://127.0.0.1:5000/api'
        self._wait_for_url(url=self.test_base_location)
        self._recreate_ckan_server_testdata(config_path)
        # this is api key created for tester user by create-test-data in ckan
        test_api_key = 'tester'
        test_api_key2 = 'tester2'
        
        self.c = CkanClient(
            base_location=self.test_base_location,
            api_key=test_api_key,
            is_verbose=True,
        )
        self.c2 = CkanClient(
            base_location=self.test_base_location,
            api_key=test_api_key2,
            is_verbose=True,
        )

    @classmethod
    def teardown_class(self):
        self._stop_ckan_server(self.pid)

    def delete_relationships(self):
        res = self.c.package_relationship_register_get('annakarenina')
        if self.c.last_status == 200:
            if self.c.last_message:
                for rel_dict in self.c.last_message:
                    self.c.package_relationship_entity_delete( \
                        rel_dict['subject'],
                        rel_dict['type'],
                        rel_dict['object'])
        

    def test_01_get_locations(self):
        rest_base = self.test_base_location + '/rest'
        search_base = self.test_base_location + '/search'
        url = self.c.get_location('Base')
        assert url == self.test_base_location, url
        url = self.c.get_location('Package Register')
        assert url == rest_base + '/package'
        url = self.c.get_location('Package Entity', 'myname')
        assert url == rest_base + '/package/myname'
        url = self.c.get_location('Package Entity', 'myname',
                                  'relationships')
        assert url == rest_base + '/package/myname/relationships'
        url = self.c.get_location('Package Entity', 'myname',
                                  'relationships', 'name2')
        assert url == rest_base + '/package/myname/relationships/name2'
        url = self.c.get_location('Package Entity', 'myname',
                                  'child_of', 'name2')
        assert url == rest_base + '/package/myname/child_of/name2'
        url = self.c.get_location('Group Register')
        assert url == rest_base + '/group'
        url = self.c.get_location('Group Entity', 'myname')
        assert url == rest_base + '/group/myname'
        url = self.c.get_location('Tag Register')
        assert url == rest_base + '/tag'
        url = self.c.get_location('Tag Entity', 'myname')
        assert url == rest_base + '/tag/myname'
        url = self.c.get_location('Tag Entity', 'myname')
        assert url == rest_base + '/tag/myname'
        url = self.c.get_location('Package Search')
        assert url == search_base + '/package'

    def test_02_get_api_version(self):
        version = self.c.api_version_get()
        status = self.c.last_status
        assert status == 200
        body = self.c.last_body
        assert 'version' in body, body
        assert int(version) > 0, version

    def test_03_package_register_get(self):
        self.c.package_register_get()
        status = self.c.last_status
        assert status == 200
        body = self.c.last_body
        assert 'annakarenina' in body, body
        assert type(self.c.last_message) == list
        assert 'annakarenina' in self.c.last_message

    def test_04_package_entity_get(self):
        # Check registered entity is found.
        self.c.package_entity_get('annakarenina')
        status = self.c.last_status
        assert status == 200, status
        body = self.c.last_body
        assert 'annakarenina' in body
        assert self.c.last_message
        message = self.c.last_message
        assert type(message) == dict
        assert message['name'] == u'annakarenina'
        assert message['title'] == u'A Novel By Tolstoy'

    def test_05_package_entity_get_404(self):
        # Check unregistered entity is not found.
        assert_raises(CkanApiError,
                      self.c.package_entity_get,
                      'mycoffeecup')
        status = self.c.last_status
        assert status == 404, status

    @classmethod
    def _generate_pkg_name(self):
        pkg_name = 'ckanclienttest'
        import time
        timestr = str(time.time()).replace('.', '')
        pkg_name += timestr
        return pkg_name

    def test_06_package_register_post(self):
        pkg_name = self._generate_pkg_name()
        # Check package isn't registered.
        assert_raises(CkanApiError,
                      self.c.package_entity_get, pkg_name)
        status = self.c.last_status
        assert status == 404, status
        # Check registration of new package.
        package = {
            'name': pkg_name,
            'url': 'orig_url',
            'download_url': 'orig_download_url',
            'tags': ['russian', 'newtag'],
            'extras': {'genre':'thriller', 'format':'ebook'},
        }
        self.c.package_register_post(package)
        status = self.c.last_status
        assert status == 201, status

        # Check package is registered.
        self.c.package_entity_get(pkg_name)
        status = self.c.last_status
        assert status == 200, status
        message = self.c.last_message
        assert message
        assert 'name' in message, repr(message)
        name = message['name']
        assert name == pkg_name
        url = message['url']
        assert url == 'orig_url'
        download_url = message['download_url']
        assert download_url == 'orig_download_url'
        tags = message['tags']
        # order out is not guaranteed
        assert set(tags) == set(['newtag', 'russian']), tags
        extras = message['extras']
        assert extras == package['extras']
                    

    def test_07_package_entity_put(self):
        # Register new package.
        pkg_name_test_07 = self._generate_pkg_name()
        package = {
            'name': pkg_name_test_07,
            'url': 'orig_url',
            'download_url': 'orig_download_url',
            'tags': ['russian'],
        }
        self.c.package_register_post(package)
        status = self.c.last_status
        assert status == 201, status

        # Check update of existing package.
        mytag = 'mytag' + pkg_name_test_07
        package = {
            'name': pkg_name_test_07,
            'url': 'new_url',
            'download_url': 'new_download_url',
            'tags': ['russian', 'tolstoy', mytag],
            'extras': {'genre':'thriller', 'format':'ebook'},
        }
        self.c.package_entity_put(package)
        status = self.c.last_status
        assert status == 200

        # Check package is updated.
        self.c.package_entity_get(pkg_name_test_07)
        status = self.c.last_status
        assert status == 200, status
        message = self.c.last_message
        name = message['name']
        assert name == pkg_name_test_07
        url = message['url']
        assert url == 'new_url'
        download_url = message['download_url']
        assert download_url == 'new_download_url'
        tags = message['tags']
        # order out is not guaranteed
        assert set(tags) == set(['russian', 'tolstoy', mytag]), tags
        extras = message['extras']
        assert extras == package['extras']


    def test_08_package_entity_delete(self):
        # create a package to be deleted
        pkg_name = self._generate_pkg_name()
        self.c.package_register_post({'name': pkg_name})
        status = self.c.last_status
        assert status == 201, status        

        # check it is readable
        self.c.package_entity_get(pkg_name)
        assert self.c.last_status == 200, self.c.last_status

        # delete it
        self.c.package_entity_delete(pkg_name)

        # see it is not readable by another user
        assert_raises(CkanApiError,
                      self.c2.package_entity_get, pkg_name)
        assert self.c2.last_status == 403, self.c.last_status

        # see it is still readable by the author (therefore pkg admin)
        self.c.package_entity_get(pkg_name)
        assert self.c.last_status == 200, self.c.last_status

    def test_09_tag_register_get(self):
        self.c.tag_register_get()
        status = self.c.last_status
        assert status == 200
        body = self.c.last_body
        assert 'russian' in body
        assert type(self.c.last_message) == list
        assert 'russian' in self.c.last_message

    def test_10_pkg_search_basic(self):
        res = self.c.package_search('Novel')
        status = self.c.last_status
        assert status == 200, status
        assert_equal(list(res['results']), [u'annakarenina'])
        assert_equal(res['count'], 1)

    def test_10_pkg_search_paged(self):
        res = self.c.package_search('russian', search_options={'limit': 1})
        status = self.c.last_status
        assert status == 200, status
        all_results = list(res['results'])
        assert set(all_results) >= set([u'annakarenina', u'warandpeace']), all_results
        assert res['count'] >= 2, '%r %r' % (res, all_results)

    def test_10_pkg_search_options(self):
        res = self.c.package_search(None, search_options={'groups': 'roger'})
        status = self.c.last_status
        assert status == 200, status
        assert_equal(list(res['results']), [u'annakarenina'])
        assert_equal(res['count'], 1)

    def test_10_pkg_search_options_all_fields(self):
        res = self.c.package_search(None, search_options={'groups': 'roger',
                                                          'all_fields': True})
        status = self.c.last_status
        assert status == 200, status
        assert_equal(res['count'], 1)
        assert_equal(list(res['results'])[0]['name'], u'annakarenina')

    def test_11_package_relationship_post(self):
        res = self.c.package_relationship_register_get('annakarenina')
        assert self.c.last_status == 200, self.c.last_status
        assert not self.c.last_message, self.c.last_body

        # create relationship
        res = self.c.package_relationship_entity_post('annakarenina', 'child_of', 'warandpeace', 'some comment')
        try:
            assert self.c.last_status == 201, self.c.last_status
        finally:
            self.delete_relationships()
        
    def test_12_package_relationship_get(self):
        # create relationship
        res = self.c.package_relationship_entity_post('annakarenina', 'child_of', 'warandpeace', 'some comment')
        
        # read relationship
        try:
            res = self.c.package_relationship_register_get('annakarenina')
            assert self.c.last_status == 200, self.c.last_status
            rels = self.c.last_message
            assert len(rels) == 1, rels
            assert rels[0]['subject'] == 'annakarenina', rels[0]
            assert rels[0]['object'] == 'warandpeace', rels[0]
            assert rels[0]['type'] == 'child_of', rels[0]
            assert rels[0]['comment'] == 'some comment', rels[0]
        finally:
            self.delete_relationships()

    def test_13_package_relationship_put(self):
        # create relationship
        res = self.c.package_relationship_entity_post('annakarenina', 'child_of', 'warandpeace', 'some comment')
        # update relationship
        try:
            res = self.c.package_relationship_entity_put('annakarenina', 'child_of', 'warandpeace', 'new comment')
            assert self.c.last_status == 200, self.c.last_status

            # read relationship
            res = self.c.package_relationship_register_get('annakarenina')
            assert self.c.last_status == 200, self.c.last_status
            rels = self.c.last_message
            assert len(rels) == 1, rels
            assert rels[0]['comment'] == 'new comment', rels[0]
        finally:
            self.delete_relationships()

    def test_14_package_relationship_delete(self):
        # create relationship
        res = self.c.package_relationship_entity_post('annakarenina', 'child_of', 'warandpeace', 'some comment')
        try:
            self.c.package_relationship_entity_delete('annakarenina',
                                                      'child_of', 'warandpeace')

            # read relationship gives 404
            assert_raises(CkanApiError,
                          self.c.package_relationship_register_get,
                          'annakarenina', 'child_of', 'warandpeace')
            assert self.c.last_status == 404, self.c.last_status

            # and register of relationships is blank
            res = self.c.package_relationship_register_get('annakarenina', 'relationships', 'warandpeace')
            assert self.c.last_status == 200, self.c.last_status
            assert not res, res
        finally:
            self.delete_relationships()

    def test_15_package_edit_form_get(self):
        try:
            import ckanext.dgu
        except exceptions.ImportError, e:
            raise SkipTest('Need dgu_form_api plugin (from ckanext-dgu) installed to test form api client.')
        if 'dgu_form_api' not in config.get('ckan.plugins', ''):
            raise SkipTest('Need dgu_form_api plugin (from ckanext-dgu) enabled to test form api client.')
            
        res = self.c.package_edit_form_get('annakarenina')
        assert self.c.last_status == 200, self.c.last_status
        assert res, res
Exemple #33
0
def ckan_client():
    ckan_api = config_get('ckan-api.url')
    return CkanClient(base_location='http://data.gov.uk/api')
Exemple #34
0
class Package(csv2rdf.interfaces.AuxilaryInterface):
    """ Reflects the CKAN package.
        CKAN package contains one or several CKAN resources
        Properties:
            maintainer, package_name, maintainer_email, 
            id, metadata_created, ckan, relationships,
            metadata_modified, author, author_email, 
            download_url, state, version, license_id, type,
            resources: [], tags: [], tracking_summary, name,
            isopen, license, notes_rendered, url, ckan_url,
            notes, license_title, ratings_average,
            extras: {geographic_coverage, temporal_coverage-from,
            temporal_granularity, date_updated, published_via,
            mandate, precision, geographic_granularity,
            published_by, taxonomy_url, update_frequency,
            temporal_coverage-to, date_update_future, date_released},
            license_url, ratings_count, title, revision_id
            
            ckan - <ckanclient.CkanClient object at 0x972ac8c>
    """
    def __init__(self, package_name):
        self.name = package_name
        self.ckan = CkanClient(base_location=csv2rdf.config.config.ckan_api_url,
                               api_key=csv2rdf.config.config.ckan_api_key)
        self.initialize()
        
    def initialize(self):
        entity = self.ckan.package_entity_get(self.name)
        self.unpack_object_to_self(entity)

    def get_metadata(self, dataset=None):
        if(dataset is None):
            dataset = self.name

        dataset_meta = self.cache_metadata_get(dataset)
        if(not dataset_meta):
            url = csv2rdf.config.config.ckan_base_url + "/dataset/"+dataset+".rdf"
            r = requests.get(url)
            assert r.status_code == requests.codes.ok #is 200?
            dataset_meta = r.content
            self.cache_metadata_put(dataset, dataset_meta)
        return dataset_meta

    def cache_metadata_get(self, dataset):
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.data_packages_metadata_path)
        if db.is_exists(dataset):
            return db.loadDbase(dataset)
        else:
            return False

    def cache_metadata_put(self, dataset, metadata):
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.data_packages_metadata_path)
        db.saveDbase(dataset, metadata)
        
    def download_all_resources(self):
        """
            Overwrites existing files!
        """
        db = csv2rdf.database.DatabasePlainFiles(csv2rdf.config.config.resource_dir)
        for resource in self.resources:
            url = resource['url']
            filename = self.extract_filename_url(url)
            try:
                r = requests.get(url, timeout=self.timeout)
                db.saveDbaseRaw(filename, r.content)
            except BaseException as e:
                print "Could not get the resource " + str(resource['id']) + " ! " + str(e)
        
    #
    # Interface - getters
    #
        
    def get_ckan_url(self):
        return str(self.ckan_base_url) + '/dataset/' + str(self.name)
Exemple #35
0
def command(ckan_api_url):

    from ckanext.dgu.ons.importer import OnsImporter
    # sources pasted here from http://www.statistics.gov.uk/hub/statistics-producers/index.html
    sources = '''
Agri-Food and Biosciences Institute
Agriculture and Rural Development (Northern Ireland)
Business, Innovation and Skills
Cabinet Office
Communities and Local Government
Culture, Media and Sport
Defence
Education
Education (Northern Ireland)
Employment and Learning (Northern Ireland)
Energy and Climate Change
Enterprise, Trade and Investment (Northern Ireland)
Environment (Northern Ireland)
Environment, Food and Rural Affairs
Food Standards Agency
Forestry Commission
Health
Health and Safety Executive
Health Protection Agency
Health, Social Service and Public Safety (Northern Ireland)
HM Revenue and Customs
HM Treasury
Home Office
ISD Scotland (part of NHS National Services Scotland)
International Development
Justice
Justice (Northern Ireland)
Marine Management Organisation
National Records of Scotland
National Treatment Agency
Northern Ireland Statistics and Research Agency
Office for National Statistics
Office for Rail Regulation
Office for Standards in Education, Children\'s Services and Skills
Office of Qualifications and Examinations Regulation
Office of the First and Deputy First Minister
Passenger Focus
Police Service of Northern Ireland (PSNI)
Public Health England
Regional Development (Northern Ireland)
Scottish Government
Social Development (Northern Ireland)
Transport
Welsh Government
Work and Pensions
Cancer Registry (Northern Ireland)
Civil Aviation Authority
Child Maintenance and Enforcement Commission
Health and Social Care Information Centre
Higher Education Statistics Agency
Independent Police Complaints Commission
NHS England
Scottish Consortium for Learning Disability
Student Loans Company
Eurostat
'''
    # These are extra sources seen in the past ONS data, picked up from
    # the ons_merge_duplicates tool:
    sources += '''
Cancer Registry Northern Ireland
Welsh Assembly Government
        '''
    pasted_lines_to_ignore = (
        'Government Statistical Departments',
        'Other statistics producers',
        'International statistics organisations',
    )
    ckanclient = CkanClient(base_location=ckan_api_url)
    num_errors = 0
    sources = sources.split('\n')
    for source in sources:
        if not source.strip() or source in pasted_lines_to_ignore:
            continue
        publisher = OnsImporter._source_to_publisher_(source.strip(),
                                                      ckanclient)
        if not publisher:
            log.error('Publisher not found: %s', source)
            num_errors += 1
    log.info('Completed with %i errors from %i sources', num_errors,
             len(sources))
Exemple #36
0
class CkanLoader(object):
    """
    Directs a CKAN service client to put obtained datasets on CKAN.
    """
    
    usage  = '''usage: %prog OPTIONS'''

    def __init__(self):
        """Sets up options and init the CKAN service client."""
        parser = OptionParser(self.usage)
        self.add_options(parser)
        (self.options, self.args) = parser.parse_args()
        self.init_ckanclient()

    def add_options(self, parser):
        """Adds options for CKAN serice location and REST API key."""
        parser.add_option(
            '--ckan-api-location',
            dest='ckan_api_location',
            default='http://127.0.0.1:5000/api',
            help="""The location of working CKAN REST API.""")
        parser.add_option(
            '--ckan-api-key',
            dest='ckan_api_key',
            help="""A valid CKAN REST API key.""")
        parser.add_option(
            '--no-create-confirmation',
            dest='no_create_confimation',
            action='store_true',
            help="""Don't prompt for confirmation when registering a new dataset.""")
        parser.add_option(
            '--no-update-confirmation',
            dest='no_update_confimation',
            action='store_true',
            help="""Don't prompt for confirmation when updating a registered dataset.""")

    def init_ckanclient(self):
        """Init the CKAN client from options."""
        if not self.options.ckan_api_location:
            print "Warning: CKAN API location not provided."
        if not self.options.ckan_api_key:
            print "Warning: CKAN API key not provided."
        self.ckanclient = CkanClient(
            base_location=self.options.ckan_api_location,
            api_key=self.options.ckan_api_key,
        )

    def run(self):
        """Obtain datasets and put them on CKAN."""
        try:
            self.datasets = []
            self.obtain_datasets()
            print "Putting %s datasets on CKAN running at %s" % (len(self.datasets), self.options.ckan_api_location)
            self.put_datasets_on_ckan()
        except KeyboardInterrupt:
            print ""
            print "exiting..."
            print ""

    def obtain_datasets(self):
        """Abstract method for obtaining datasets."""
        raise Exception, "Abstract method not implemented."

    def put_datasets_on_ckan(self):
        """Uses CKAN client to register (or update) obtained datasets."""
        # Todo: Fix ckan or ckanclient, so this method isn't so long-winded.
        print ""
        sleep(1)
        for dataset in self.datasets:
            try:
                registered_dataset = self.ckanclient.dataset_entity_get(dataset['name'])
            except CkanApiError:
                pass
            if self.ckanclient.last_status == 200:
                print "Dataset '%s' is already registered" % dataset['name']
                print ""
                pprint.pprint(dataset)
                print ""
                if not self.options.no_update_confimation:
                    answer = raw_input("Do you want to update this dataset with CKAN now? [y/N] ")
                    if not answer or answer.lower()[0] != 'y':
                        print "Skipping '%s' dataset..." % dataset['name']
                        print ""
                        sleep(1)
                        continue
                print "Updating dataset..."
                self.ckanclient.dataset_entity_put(dataset)
                if self.ckanclient.last_status == 200:
                    print "Updated dataset '%s' OK." % dataset['name']
                    sleep(1)
                elif self.ckanclient.last_status == 403 or '403' in str(self.ckanclient.last_url_error):
                    print "Error: Not authorised. Check your API key."
                    sleep(1)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_http_error:
                    print "Error: CKAN returned status code %s: %s" % (
                        self.ckanclient.last_status, self.ckanclient.last_http_error)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_url_error:
                    print "Error: URL problems: %s" % self.ckanclient.last_url_error
                    sleep(1)
                    sleep(1)
                    sleep(1)
                else:
                    raise Exception, "Error: CKAN request didn't work at all."
            elif self.ckanclient.last_status == 404 or '404' in str(self.ckanclient.last_url_error):
                print "Dataset '%s' not currently registered" % dataset['name']
                print ""
                pprint.pprint(dataset)
                print ""
                if not self.options.no_create_confimation:
                    answer = raw_input("Do you want to register this dataset with CKAN now? [y/N] ")
                    if not answer or answer.lower()[0] != 'y':
                        print "Skipping '%s' dataset..." % dataset['name']
                        print ""
                        sleep(1)
                        continue
                print "Registering dataset..."
                self.ckanclient.dataset_register_post(dataset)
                if self.ckanclient.last_status in [200, 201]:
                    print "Registered dataset '%s' OK." % dataset['name']
                    sleep(1)
                elif self.ckanclient.last_status == 403 or '403' in str(self.ckanclient.last_url_error):
                    print "Error: Not authorised. Check your API key."
                    sleep(1)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_http_error:
                    print "Error: CKAN returned status code %s: %s" % (
                        self.ckanclient.last_status, self.ckanclient.last_http_error)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_url_error:
                    print "Error: URL problems: %s" % self.ckanclient.last_url_error
                    sleep(1)
                    sleep(1)
                    sleep(1)
                else:
                    raise Exception, "Error: CKAN request didn't work at all."
            elif self.ckanclient.last_http_error:
                print "Error: CKAN returned status code %s: %s" % (
                    self.ckanclient.last_status, self.ckanclient.last_http_error)
                sleep(1)
                sleep(1)
                sleep(1)
            elif self.ckanclient.last_url_error:
                print "Error: URL problems: %s" % self.ckanclient.last_url_error
                sleep(1)
                sleep(1)
                sleep(1)
            else:
                raise Exception, "Error: CKAN request didn't work at all."

    def create_dataset(self, name, title='', url='', maintainer='', 
            maintainer_email='', author='', author_email='', notes='', 
            tags=[], extras={}, license_id=None, license=None, resources=[]):
        """Returns a CKAN REST API dataset from method arguments."""
        if not isinstance(tags, list):
            raise Exception, "Dataset tags must be a list: %s" % tags
        if not isinstance(extras, dict):
            raise Exception, "Dataset extras must be a dict: %s" % tags
        dataset = {}
        dataset['name'] = self.coerce_dataset_name(name)
        dataset['title'] = title
        dataset['url'] = url
        dataset['notes'] = notes
        dataset['maintainer'] = maintainer
        dataset['maintainer_email'] = maintainer_email
        dataset['author'] = author
        dataset['author_email'] = author_email
        dataset['tags'] = tags
        dataset['extras'] = extras
        # Pre and post licenses servicization.
        if license_id != None:
            dataset['license_id'] = license_id
        elif license != None:
            dataset['license'] = license
        dataset['resources'] = resources
        return dataset

    def coerce_dataset_name(self, name):
        """Converts unicode string to valid CKAN dataset name."""
        # Todo: Probably needs to be finished off.
        name = self.substitute_ascii_equivalents(name)
        name = name.lower()
        return name

    def substitute_ascii_equivalents(self, unicrap):
        # Method taken from: http://code.activestate.com/recipes/251871/
        """This takes a UNICODE string and replaces Latin-1 characters with
            something equivalent in 7-bit ASCII. It returns a plain ASCII string. 
            This function makes a best effort to convert Latin-1 characters into 
            ASCII equivalents. It does not just strip out the Latin-1 characters.
            All characters in the standard 7-bit ASCII range are preserved. 
            In the 8th bit range all the Latin-1 accented letters are converted 
            to unaccented equivalents. Most symbol characters are converted to 
            something meaningful. Anything not converted is deleted.
        """
        xlate={0xc0:'A', 0xc1:'A', 0xc2:'A', 0xc3:'A', 0xc4:'A', 0xc5:'A',
            0xc6:'Ae', 0xc7:'C',
            0xc8:'E', 0xc9:'E', 0xca:'E', 0xcb:'E',
            0xcc:'I', 0xcd:'I', 0xce:'I', 0xcf:'I',
            0xd0:'Th', 0xd1:'N',
            0xd2:'O', 0xd3:'O', 0xd4:'O', 0xd5:'O', 0xd6:'O', 0xd8:'O',
            0xd9:'U', 0xda:'U', 0xdb:'U', 0xdc:'U',
            0xdd:'Y', 0xde:'th', 0xdf:'ss',
            0xe0:'a', 0xe1:'a', 0xe2:'a', 0xe3:'a', 0xe4:'a', 0xe5:'a',
            0xe6:'ae', 0xe7:'c',
            0xe8:'e', 0xe9:'e', 0xea:'e', 0xeb:'e',
            0xec:'i', 0xed:'i', 0xee:'i', 0xef:'i',
            0xf0:'th', 0xf1:'n',
            0xf2:'o', 0xf3:'o', 0xf4:'o', 0xf5:'o', 0xf6:'o', 0xf8:'o',
            0xf9:'u', 0xfa:'u', 0xfb:'u', 0xfc:'u',
            0xfd:'y', 0xfe:'th', 0xff:'y',
            #0xa1:'!', 0xa2:'{cent}', 0xa3:'{pound}', 0xa4:'{currency}',
            #0xa5:'{yen}', 0xa6:'|', 0xa7:'{section}', 0xa8:'{umlaut}',
            #0xa9:'{C}', 0xaa:'{^a}', 0xab:'<<', 0xac:'{not}',
            #0xad:'-', 0xae:'{R}', 0xaf:'_', 0xb0:'{degrees}',
            #0xb1:'{+/-}', 0xb2:'{^2}', 0xb3:'{^3}', 0xb4:"'",
            #0xb5:'{micro}', 0xb6:'{paragraph}', 0xb7:'*', 0xb8:'{cedilla}',
            #0xb9:'{^1}', 0xba:'{^o}', 0xbb:'>>', 
            #0xbc:'{1/4}', 0xbd:'{1/2}', 0xbe:'{3/4}', 0xbf:'?',
            #0xd7:'*', 0xf7:'/'
            }

        r = ''
        for i in unicrap:
            if xlate.has_key(ord(i)):
                r += xlate[ord(i)]
            elif ord(i) >= 0x80:
                pass
            else:
                r += str(i)
        return r

    def create_dataset_resource(self, url='', format='', hash='', description=''):
        return {
            'url': url,
            'format': format,
            'hash': hash,
            'description': description,
        }
Exemple #37
0
 def __init__(self, package_name):
     self.name = package_name
     self.ckan = CkanClient(base_location=csv2rdf.config.config.ckan_api_url,
                            api_key=csv2rdf.config.config.ckan_api_key)
     self.initialize()
Exemple #38
0
class TestCkanClient(CkanServerCase):
    @classmethod
    def setup_class(self):
        self.pid = self._start_ckan_server()
        self.test_base_location = 'http://127.0.0.1:5000/api'
        self._wait_for_url(url=self.test_base_location)
        self._recreate_ckan_server_testdata(config_path)
        # this is api key created for tester user by create-test-data in ckan
        test_api_key = 'tester'
        test_api_key2 = 'tester2'

        self.c = CkanClient(
            base_location=self.test_base_location,
            api_key=test_api_key,
            is_verbose=True,
        )
        self.c2 = CkanClient(
            base_location=self.test_base_location,
            api_key=test_api_key2,
            is_verbose=True,
        )

    @classmethod
    def teardown_class(self):
        self._stop_ckan_server(self.pid)

    def delete_relationships(self):
        res = self.c.package_relationship_register_get('annakarenina')
        if self.c.last_status == 200:
            if self.c.last_message:
                for rel_dict in self.c.last_message:
                    self.c.package_relationship_entity_delete( \
                        rel_dict['subject'],
                        rel_dict['type'],
                        rel_dict['object'])

    def test_01_get_locations(self):
        rest_base = self.test_base_location + '/rest'
        search_base = self.test_base_location + '/search'
        url = self.c.get_location('Base')
        assert url == self.test_base_location, url
        url = self.c.get_location('Package Register')
        assert url == rest_base + '/package'
        url = self.c.get_location('Package Entity', 'myname')
        assert url == rest_base + '/package/myname'
        url = self.c.get_location('Package Entity', 'myname', 'relationships')
        assert url == rest_base + '/package/myname/relationships'
        url = self.c.get_location('Package Entity', 'myname', 'relationships',
                                  'name2')
        assert url == rest_base + '/package/myname/relationships/name2'
        url = self.c.get_location('Package Entity', 'myname', 'child_of',
                                  'name2')
        assert url == rest_base + '/package/myname/child_of/name2'
        url = self.c.get_location('Group Register')
        assert url == rest_base + '/group'
        url = self.c.get_location('Group Entity', 'myname')
        assert url == rest_base + '/group/myname'
        url = self.c.get_location('Tag Register')
        assert url == rest_base + '/tag'
        url = self.c.get_location('Tag Entity', 'myname')
        assert url == rest_base + '/tag/myname'
        url = self.c.get_location('Tag Entity', 'myname')
        assert url == rest_base + '/tag/myname'
        url = self.c.get_location('Package Search')
        assert url == search_base + '/package'

    def test_02_get_api_version(self):
        version = self.c.api_version_get()
        status = self.c.last_status
        assert status == 200
        body = self.c.last_body
        assert 'version' in body, body
        assert int(version) > 0, version

    def test_03_package_register_get(self):
        self.c.package_register_get()
        status = self.c.last_status
        assert status == 200
        body = self.c.last_body
        assert 'annakarenina' in body, body
        assert type(self.c.last_message) == list
        assert 'annakarenina' in self.c.last_message

    def test_04_package_entity_get(self):
        # Check registered entity is found.
        self.c.package_entity_get('annakarenina')
        status = self.c.last_status
        assert status == 200, status
        body = self.c.last_body
        assert 'annakarenina' in body
        assert self.c.last_message
        message = self.c.last_message
        assert type(message) == dict
        assert message['name'] == u'annakarenina'
        assert message['title'] == u'A Novel By Tolstoy'

    def test_05_package_entity_get_404(self):
        # Check unregistered entity is not found.
        assert_raises(CkanApiError, self.c.package_entity_get, 'mycoffeecup')
        status = self.c.last_status
        assert status == 404, status

    @classmethod
    def _generate_pkg_name(self):
        pkg_name = 'ckanclienttest'
        import time
        timestr = str(time.time()).replace('.', '')
        pkg_name += timestr
        return pkg_name

    def test_06_package_register_post(self):
        pkg_name = self._generate_pkg_name()
        # Check package isn't registered.
        assert_raises(CkanApiError, self.c.package_entity_get, pkg_name)
        status = self.c.last_status
        assert status == 404, status
        # Check registration of new package.
        package = {
            'name': pkg_name,
            'url': 'orig_url',
            'download_url': 'orig_download_url',
            'tags': ['russian', 'newtag'],
            'extras': {
                'genre': 'thriller',
                'format': 'ebook'
            },
        }
        self.c.package_register_post(package)
        status = self.c.last_status
        assert status == 201, status

        # Check package is registered.
        self.c.package_entity_get(pkg_name)
        status = self.c.last_status
        assert status == 200, status
        message = self.c.last_message
        assert message
        assert 'name' in message, repr(message)
        name = message['name']
        assert name == pkg_name
        url = message['url']
        assert url == 'orig_url'
        download_url = message['download_url']
        assert download_url == 'orig_download_url'
        tags = message['tags']
        # order out is not guaranteed
        assert set(tags) == set(['newtag', 'russian']), tags
        extras = message['extras']
        assert extras == package['extras']

    def test_07_package_entity_put(self):
        # Register new package.
        pkg_name_test_07 = self._generate_pkg_name()
        package = {
            'name': pkg_name_test_07,
            'url': 'orig_url',
            'download_url': 'orig_download_url',
            'tags': ['russian'],
        }
        self.c.package_register_post(package)
        status = self.c.last_status
        assert status == 201, status

        # Check update of existing package.
        mytag = 'mytag' + pkg_name_test_07
        package = {
            'name': pkg_name_test_07,
            'url': 'new_url',
            'download_url': 'new_download_url',
            'tags': ['russian', 'tolstoy', mytag],
            'extras': {
                'genre': 'thriller',
                'format': 'ebook'
            },
        }
        self.c.package_entity_put(package)
        status = self.c.last_status
        assert status == 200

        # Check package is updated.
        self.c.package_entity_get(pkg_name_test_07)
        status = self.c.last_status
        assert status == 200, status
        message = self.c.last_message
        name = message['name']
        assert name == pkg_name_test_07
        url = message['url']
        assert url == 'new_url'
        download_url = message['download_url']
        assert download_url == 'new_download_url'
        tags = message['tags']
        # order out is not guaranteed
        assert set(tags) == set(['russian', 'tolstoy', mytag]), tags
        extras = message['extras']
        assert extras == package['extras']

    def test_08_package_entity_delete(self):
        # create a package to be deleted
        pkg_name = self._generate_pkg_name()
        self.c.package_register_post({'name': pkg_name})
        status = self.c.last_status
        assert status == 201, status

        # check it is readable
        self.c.package_entity_get(pkg_name)
        assert self.c.last_status == 200, self.c.last_status

        # delete it
        self.c.package_entity_delete(pkg_name)

        # see it is not readable by another user
        assert_raises(CkanApiError, self.c2.package_entity_get, pkg_name)
        assert self.c2.last_status == 403, self.c.last_status

        # see it is still readable by the author (therefore pkg admin)
        self.c.package_entity_get(pkg_name)
        assert self.c.last_status == 200, self.c.last_status

    def test_09_tag_register_get(self):
        self.c.tag_register_get()
        status = self.c.last_status
        assert status == 200
        body = self.c.last_body
        assert 'russian' in body
        assert type(self.c.last_message) == list
        assert 'russian' in self.c.last_message

    def test_10_pkg_search_basic(self):
        res = self.c.package_search('Novel')
        status = self.c.last_status
        assert status == 200, status
        assert_equal(list(res['results']), [u'annakarenina'])
        assert_equal(res['count'], 1)

    def test_10_pkg_search_paged(self):
        res = self.c.package_search('russian', search_options={'limit': 1})
        status = self.c.last_status
        assert status == 200, status
        all_results = list(res['results'])
        assert set(all_results) >= set([u'annakarenina', u'warandpeace'
                                        ]), all_results
        assert res['count'] >= 2, '%r %r' % (res, all_results)

    def test_10_pkg_search_options(self):
        res = self.c.package_search(None, search_options={'groups': 'roger'})
        status = self.c.last_status
        assert status == 200, status
        assert_equal(list(res['results']), [u'annakarenina'])
        assert_equal(res['count'], 1)

    def test_10_pkg_search_options_all_fields(self):
        res = self.c.package_search(None,
                                    search_options={
                                        'groups': 'roger',
                                        'all_fields': True
                                    })
        status = self.c.last_status
        assert status == 200, status
        assert_equal(res['count'], 1)
        assert_equal(list(res['results'])[0]['name'], u'annakarenina')

    def test_11_package_relationship_post(self):
        res = self.c.package_relationship_register_get('annakarenina')
        assert self.c.last_status == 200, self.c.last_status
        assert not self.c.last_message, self.c.last_body

        # create relationship
        res = self.c.package_relationship_entity_post('annakarenina',
                                                      'child_of',
                                                      'warandpeace',
                                                      'some comment')
        try:
            assert self.c.last_status == 201, self.c.last_status
        finally:
            self.delete_relationships()

    def test_12_package_relationship_get(self):
        # create relationship
        res = self.c.package_relationship_entity_post('annakarenina',
                                                      'child_of',
                                                      'warandpeace',
                                                      'some comment')

        # read relationship
        try:
            res = self.c.package_relationship_register_get('annakarenina')
            assert self.c.last_status == 200, self.c.last_status
            rels = self.c.last_message
            assert len(rels) == 1, rels
            assert rels[0]['subject'] == 'annakarenina', rels[0]
            assert rels[0]['object'] == 'warandpeace', rels[0]
            assert rels[0]['type'] == 'child_of', rels[0]
            assert rels[0]['comment'] == 'some comment', rels[0]
        finally:
            self.delete_relationships()

    def test_13_package_relationship_put(self):
        # create relationship
        res = self.c.package_relationship_entity_post('annakarenina',
                                                      'child_of',
                                                      'warandpeace',
                                                      'some comment')
        # update relationship
        try:
            res = self.c.package_relationship_entity_put(
                'annakarenina', 'child_of', 'warandpeace', 'new comment')
            assert self.c.last_status == 200, self.c.last_status

            # read relationship
            res = self.c.package_relationship_register_get('annakarenina')
            assert self.c.last_status == 200, self.c.last_status
            rels = self.c.last_message
            assert len(rels) == 1, rels
            assert rels[0]['comment'] == 'new comment', rels[0]
        finally:
            self.delete_relationships()

    def test_14_package_relationship_delete(self):
        # create relationship
        res = self.c.package_relationship_entity_post('annakarenina',
                                                      'child_of',
                                                      'warandpeace',
                                                      'some comment')
        try:
            self.c.package_relationship_entity_delete('annakarenina',
                                                      'child_of',
                                                      'warandpeace')

            # read relationship gives 404
            assert_raises(CkanApiError,
                          self.c.package_relationship_register_get,
                          'annakarenina', 'child_of', 'warandpeace')
            assert self.c.last_status == 404, self.c.last_status

            # and register of relationships is blank
            res = self.c.package_relationship_register_get(
                'annakarenina', 'relationships', 'warandpeace')
            assert self.c.last_status == 200, self.c.last_status
            assert not res, res
        finally:
            self.delete_relationships()

    def test_15_package_edit_form_get(self):
        try:
            import ckanext.dgu
        except exceptions.ImportError, e:
            raise SkipTest(
                'Need dgu_form_api plugin (from ckanext-dgu) installed to test form api client.'
            )
        if 'dgu_form_api' not in config.get('ckan.plugins', ''):
            raise SkipTest(
                'Need dgu_form_api plugin (from ckanext-dgu) enabled to test form api client.'
            )

        res = self.c.package_edit_form_get('annakarenina')
        assert self.c.last_status == 200, self.c.last_status
        assert res, res
Exemple #39
0
 def __init__(self):
     self.ckan = CkanClient(base_location=csv2rdf.config.config.ckan_api_url,
                            api_key=csv2rdf.config.config.ckan_api_key)
Exemple #40
0
class S3Bounce(OFSInterface):
    """
    Use ckanext-storage API to bounce to an S3 store
    """
    def __init__(self, api_base):
        self.ckan = CkanClient(base_location=api_base)

    def put_stream(self, bucket, label, fp, metadata={}, cb=None, num_cb=None):
        if metadata is None:
            metadata = {"_owner": getpass.getuser()}

        path = "/" + bucket + "/" + label

        content_type = metadata.get("_format", "application/octet-stream")

        metadata = self.ckan.storage_metadata_set(path, metadata)
        BufferSize = 65536  ## set to something very small to make sure
        ## chunking is working properly

        headers = {'Content-Type': content_type}

        #if content_type is None:
        #    content_type = mimetypes.guess_type(filename)[0] or "text/plain"
        #headers['Content-Type'] = content_type
        #if content_encoding is not None:
        #   headers['Content-Encoding'] = content_encoding

        m = md5()
        fp.seek(0)
        s = fp.read(BufferSize)
        while s:
            m.update(s)
            s = fp.read(BufferSize)
        self.size = fp.tell()
        fp.seek(0)

        self.md5 = m.hexdigest()
        headers['Content-MD5'] = base64.encodestring(m.digest()).rstrip('\n')
        headers['Content-Length'] = str(self.size)

        headers['Expect'] = '100-Continue'

        host, headers = self.ckan.storage_auth_get(path, headers)

        def sender(http_conn, method, path, data, headers):
            http_conn.putrequest(method, path)
            for key in headers:
                http_conn.putheader(key, headers[key])
            http_conn.endheaders()
            fp.seek(0)
            http_conn.set_debuglevel(
                0)  ### XXX set to e.g. 4 to see what going on
            if cb:
                if num_cb > 2:
                    cb_count = self.size / BufferSize / (num_cb - 2)
                elif num_cb < 0:
                    cb_count = -1
                else:
                    cb_count = 0
                i = total_bytes = 0
                cb(total_bytes, self.size)
            l = fp.read(BufferSize)
            while len(l) > 0:
                http_conn.send(l)
                if cb:
                    total_bytes += len(l)
                    i += 1
                    if i == cb_count or cb_count == -1:
                        cb(total_bytes, self.size)
                        i = 0
                l = fp.read(BufferSize)
            if cb:
                cb(total_bytes, self.size)
            response = http_conn.getresponse()
            body = response.read()
            fp.seek(0)
            if response.status == 500 or response.status == 503 or \
                    response.getheader('location'):
                # we'll try again
                return response
            elif response.status >= 200 and response.status <= 299:
                self.etag = response.getheader('etag')
                if self.etag != '"%s"' % self.md5:
                    raise Exception('ETag from S3 did not match computed MD5')
                return response
            else:
                #raise provider.storage_response_error(
                #    response.status, response.reason, body)
                raise Exception(response.status, response.reason, body)

        awsc = AWSAuthConnection(host,
                                 aws_access_key_id="key_id",
                                 aws_secret_access_key="secret")

        awsc._mexe('PUT', path, None, headers, sender=sender)

        metadata = self.ckan.storage_metadata_update(path, {})
        from pprint import pprint
        pprint(metadata)
Exemple #41
0
 def __init__(self, api_base):
     self.ckan = CkanClient(base_location=api_base)
class TestCkanClient(CkanServerCase):
    @classmethod
    def setup_class(self):
        self.pid = self._start_ckan_server()
        self.test_base_location = "http://127.0.0.1:5000/api"
        self._wait_for_url(url=self.test_base_location)
        self._recreate_ckan_server_testdata(config_path)
        # this is api key created for tester user by create-test-data in ckan
        test_api_key = "tester"
        test_api_key2 = "tester2"

        self.c = CkanClient(base_location=self.test_base_location, api_key=test_api_key, is_verbose=True)
        self.c2 = CkanClient(base_location=self.test_base_location, api_key=test_api_key2, is_verbose=True)

    @classmethod
    def teardown_class(self):
        self._stop_ckan_server(self.pid)

    def delete_relationships(self):
        res = self.c.package_relationship_register_get("annakarenina")
        if self.c.last_status == 200:
            if self.c.last_message:
                for rel_dict in self.c.last_message:
                    self.c.package_relationship_entity_delete(rel_dict["subject"], rel_dict["type"], rel_dict["object"])

    def test_01_get_locations(self):
        rest_base = self.test_base_location + "/rest"
        search_base = self.test_base_location + "/search"
        url = self.c.get_location("Base")
        assert url == self.test_base_location, url
        url = self.c.get_location("Package Register")
        assert url == rest_base + "/package"
        url = self.c.get_location("Package Entity", "myname")
        assert url == rest_base + "/package/myname"
        url = self.c.get_location("Package Entity", "myname", "relationships")
        assert url == rest_base + "/package/myname/relationships"
        url = self.c.get_location("Package Entity", "myname", "relationships", "name2")
        assert url == rest_base + "/package/myname/relationships/name2"
        url = self.c.get_location("Package Entity", "myname", "child_of", "name2")
        assert url == rest_base + "/package/myname/child_of/name2"
        url = self.c.get_location("Group Register")
        assert url == rest_base + "/group"
        url = self.c.get_location("Group Entity", "myname")
        assert url == rest_base + "/group/myname"
        url = self.c.get_location("Tag Register")
        assert url == rest_base + "/tag"
        url = self.c.get_location("Tag Entity", "myname")
        assert url == rest_base + "/tag/myname"
        url = self.c.get_location("Tag Entity", "myname")
        assert url == rest_base + "/tag/myname"
        url = self.c.get_location("Package Search")
        assert url == search_base + "/package"

    def test_02_get_api_version(self):
        version = self.c.api_version_get()
        status = self.c.last_status
        assert status == 200
        body = self.c.last_body
        assert "version" in body, body
        assert int(version) > 0, version

    def test_03_package_register_get(self):
        self.c.package_register_get()
        status = self.c.last_status
        assert status == 200
        body = self.c.last_body
        assert "annakarenina" in body, body
        assert type(self.c.last_message) == list
        assert "annakarenina" in self.c.last_message

    def test_04_package_entity_get(self):
        # Check registered entity is found.
        self.c.package_entity_get("annakarenina")
        status = self.c.last_status
        assert status == 200, status
        body = self.c.last_body
        assert "annakarenina" in body
        assert self.c.last_message
        message = self.c.last_message
        assert type(message) == dict
        assert message["name"] == u"annakarenina"
        assert message["title"] == u"A Novel By Tolstoy"

    def test_05_package_entity_get_404(self):
        # Check unregistered entity is not found.
        assert_raises(CkanApiError, self.c.package_entity_get, "mycoffeecup")
        status = self.c.last_status
        assert status == 404, status

    @classmethod
    def _generate_pkg_name(self):
        pkg_name = "ckanclienttest"
        import time

        timestr = str(time.time()).replace(".", "")
        pkg_name += timestr
        return pkg_name

    def test_06_package_register_post(self):
        pkg_name = self._generate_pkg_name()
        # Check package isn't registered.
        assert_raises(CkanApiError, self.c.package_entity_get, pkg_name)
        status = self.c.last_status
        assert status == 404, status
        # Check registration of new package.
        package = {
            "name": pkg_name,
            "url": "orig_url",
            "download_url": "orig_download_url",
            "tags": ["russian", "newtag"],
            "extras": {"genre": "thriller", "format": "ebook"},
        }
        self.c.package_register_post(package)
        status = self.c.last_status
        assert status == 201, status

        # Check package is registered.
        self.c.package_entity_get(pkg_name)
        status = self.c.last_status
        assert status == 200, status
        message = self.c.last_message
        assert message
        assert "name" in message, repr(message)
        name = message["name"]
        assert name == pkg_name
        url = message["url"]
        assert url == "orig_url"
        download_url = message["download_url"]
        assert download_url == "orig_download_url"
        tags = message["tags"]
        # order out is not guaranteed
        assert set(tags) == set(["newtag", "russian"]), tags
        extras = message["extras"]
        assert extras == package["extras"]

    def test_07_package_entity_put(self):
        # Register new package.
        pkg_name_test_07 = self._generate_pkg_name()
        package = {
            "name": pkg_name_test_07,
            "url": "orig_url",
            "download_url": "orig_download_url",
            "tags": ["russian"],
        }
        self.c.package_register_post(package)
        status = self.c.last_status
        assert status == 201, status

        # Check update of existing package.
        mytag = "mytag" + pkg_name_test_07
        package = {
            "name": pkg_name_test_07,
            "url": "new_url",
            "download_url": "new_download_url",
            "tags": ["russian", "tolstoy", mytag],
            "extras": {"genre": "thriller", "format": "ebook"},
        }
        self.c.package_entity_put(package)
        status = self.c.last_status
        assert status == 200

        # Check package is updated.
        self.c.package_entity_get(pkg_name_test_07)
        status = self.c.last_status
        assert status == 200, status
        message = self.c.last_message
        name = message["name"]
        assert name == pkg_name_test_07
        url = message["url"]
        assert url == "new_url"
        download_url = message["download_url"]
        assert download_url == "new_download_url"
        tags = message["tags"]
        # order out is not guaranteed
        assert set(tags) == set(["russian", "tolstoy", mytag]), tags
        extras = message["extras"]
        assert extras == package["extras"]

    def test_08_package_entity_delete(self):
        # create a package to be deleted
        pkg_name = self._generate_pkg_name()
        self.c.package_register_post({"name": pkg_name})
        status = self.c.last_status
        assert status == 201, status

        # check it is readable
        self.c.package_entity_get(pkg_name)
        assert self.c.last_status == 200, self.c.last_status

        # delete it
        self.c.package_entity_delete(pkg_name)

        # see it is not readable by another user
        assert_raises(CkanApiError, self.c2.package_entity_get, pkg_name)
        assert self.c2.last_status == 403, self.c.last_status

        # see it is still readable by the author (therefore pkg admin)
        self.c.package_entity_get(pkg_name)
        assert self.c.last_status == 200, self.c.last_status

    def test_09_tag_register_get(self):
        self.c.tag_register_get()
        status = self.c.last_status
        assert status == 200
        body = self.c.last_body
        assert "russian" in body
        assert type(self.c.last_message) == list
        assert "russian" in self.c.last_message

    def test_10_pkg_search_basic(self):
        res = self.c.package_search("Novel")
        status = self.c.last_status
        assert status == 200, status
        assert_equal(list(res["results"]), [u"annakarenina"])
        assert_equal(res["count"], 1)

    def test_10_pkg_search_paged(self):
        res = self.c.package_search("russian", search_options={"limit": 1})
        status = self.c.last_status
        assert status == 200, status
        all_results = list(res["results"])
        assert set(all_results) >= set([u"annakarenina", u"warandpeace"]), all_results
        assert res["count"] >= 2, "%r %r" % (res, all_results)

    def test_10_pkg_search_options(self):
        res = self.c.package_search(None, search_options={"groups": "roger"})
        status = self.c.last_status
        assert status == 200, status
        assert_equal(list(res["results"]), [u"annakarenina"])
        assert_equal(res["count"], 1)

    def test_10_pkg_search_options_all_fields(self):
        res = self.c.package_search(None, search_options={"groups": "roger", "all_fields": True})
        status = self.c.last_status
        assert status == 200, status
        assert_equal(res["count"], 1)
        assert_equal(list(res["results"])[0]["name"], u"annakarenina")

    def test_11_package_relationship_post(self):
        res = self.c.package_relationship_register_get("annakarenina")
        assert self.c.last_status == 200, self.c.last_status
        assert not self.c.last_message, self.c.last_body

        # create relationship
        res = self.c.package_relationship_entity_post("annakarenina", "child_of", "warandpeace", "some comment")
        try:
            assert self.c.last_status == 201, self.c.last_status
        finally:
            self.delete_relationships()

    def test_12_package_relationship_get(self):
        # create relationship
        res = self.c.package_relationship_entity_post("annakarenina", "child_of", "warandpeace", "some comment")

        # read relationship
        try:
            res = self.c.package_relationship_register_get("annakarenina")
            assert self.c.last_status == 200, self.c.last_status
            rels = self.c.last_message
            assert len(rels) == 1, rels
            assert rels[0]["subject"] == "annakarenina", rels[0]
            assert rels[0]["object"] == "warandpeace", rels[0]
            assert rels[0]["type"] == "child_of", rels[0]
            assert rels[0]["comment"] == "some comment", rels[0]
        finally:
            self.delete_relationships()

    def test_13_package_relationship_put(self):
        # create relationship
        res = self.c.package_relationship_entity_post("annakarenina", "child_of", "warandpeace", "some comment")
        # update relationship
        try:
            res = self.c.package_relationship_entity_put("annakarenina", "child_of", "warandpeace", "new comment")
            assert self.c.last_status == 200, self.c.last_status

            # read relationship
            res = self.c.package_relationship_register_get("annakarenina")
            assert self.c.last_status == 200, self.c.last_status
            rels = self.c.last_message
            assert len(rels) == 1, rels
            assert rels[0]["comment"] == "new comment", rels[0]
        finally:
            self.delete_relationships()

    def test_14_package_relationship_delete(self):
        # create relationship
        res = self.c.package_relationship_entity_post("annakarenina", "child_of", "warandpeace", "some comment")
        try:
            self.c.package_relationship_entity_delete("annakarenina", "child_of", "warandpeace")

            # read relationship gives 404
            assert_raises(
                CkanApiError, self.c.package_relationship_register_get, "annakarenina", "child_of", "warandpeace"
            )
            assert self.c.last_status == 404, self.c.last_status

            # and register of relationships is blank
            res = self.c.package_relationship_register_get("annakarenina", "relationships", "warandpeace")
            assert self.c.last_status == 200, self.c.last_status
            assert not res, res
        finally:
            self.delete_relationships()

    def test_15_package_edit_form_get(self):
        try:
            import ckanext.dgu
        except exceptions.ImportError, e:
            raise SkipTest("Need dgu_form_api plugin (from ckanext-dgu) installed to test form api client.")
        if "dgu_form_api" not in config.get("ckan.plugins", ""):
            raise SkipTest("Need dgu_form_api plugin (from ckanext-dgu) enabled to test form api client.")

        res = self.c.package_edit_form_get("annakarenina")
        assert self.c.last_status == 200, self.c.last_status
        assert res, res