Exemple #1
0
class CkanLoader(object):
    """
    Directs a CKAN service client to put obtained datasets on CKAN.
    """
    
    usage  = '''usage: %prog OPTIONS'''

    def __init__(self):
        """Sets up options and init the CKAN service client."""
        parser = OptionParser(self.usage)
        self.add_options(parser)
        (self.options, self.args) = parser.parse_args()
        self.init_ckanclient()

    def add_options(self, parser):
        """Adds options for CKAN serice location and REST API key."""
        parser.add_option(
            '--ckan-api-location',
            dest='ckan_api_location',
            default='http://127.0.0.1:5000/api',
            help="""The location of working CKAN REST API.""")
        parser.add_option(
            '--ckan-api-key',
            dest='ckan_api_key',
            help="""A valid CKAN REST API key.""")
        parser.add_option(
            '--no-create-confirmation',
            dest='no_create_confimation',
            action='store_true',
            help="""Don't prompt for confirmation when registering a new dataset.""")
        parser.add_option(
            '--no-update-confirmation',
            dest='no_update_confimation',
            action='store_true',
            help="""Don't prompt for confirmation when updating a registered dataset.""")

    def init_ckanclient(self):
        """Init the CKAN client from options."""
        if not self.options.ckan_api_location:
            print "Warning: CKAN API location not provided."
        if not self.options.ckan_api_key:
            print "Warning: CKAN API key not provided."
        self.ckanclient = CkanClient(
            base_location=self.options.ckan_api_location,
            api_key=self.options.ckan_api_key,
        )

    def run(self):
        """Obtain datasets and put them on CKAN."""
        try:
            self.datasets = []
            self.obtain_datasets()
            print "Putting %s datasets on CKAN running at %s" % (len(self.datasets), self.options.ckan_api_location)
            self.put_datasets_on_ckan()
        except KeyboardInterrupt:
            print ""
            print "exiting..."
            print ""

    def obtain_datasets(self):
        """Abstract method for obtaining datasets."""
        raise Exception, "Abstract method not implemented."

    def put_datasets_on_ckan(self):
        """Uses CKAN client to register (or update) obtained datasets."""
        # Todo: Fix ckan or ckanclient, so this method isn't so long-winded.
        print ""
        sleep(1)
        for dataset in self.datasets:
            try:
                registered_dataset = self.ckanclient.dataset_entity_get(dataset['name'])
            except CkanApiError:
                pass
            if self.ckanclient.last_status == 200:
                print "Dataset '%s' is already registered" % dataset['name']
                print ""
                pprint.pprint(dataset)
                print ""
                if not self.options.no_update_confimation:
                    answer = raw_input("Do you want to update this dataset with CKAN now? [y/N] ")
                    if not answer or answer.lower()[0] != 'y':
                        print "Skipping '%s' dataset..." % dataset['name']
                        print ""
                        sleep(1)
                        continue
                print "Updating dataset..."
                self.ckanclient.dataset_entity_put(dataset)
                if self.ckanclient.last_status == 200:
                    print "Updated dataset '%s' OK." % dataset['name']
                    sleep(1)
                elif self.ckanclient.last_status == 403 or '403' in str(self.ckanclient.last_url_error):
                    print "Error: Not authorised. Check your API key."
                    sleep(1)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_http_error:
                    print "Error: CKAN returned status code %s: %s" % (
                        self.ckanclient.last_status, self.ckanclient.last_http_error)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_url_error:
                    print "Error: URL problems: %s" % self.ckanclient.last_url_error
                    sleep(1)
                    sleep(1)
                    sleep(1)
                else:
                    raise Exception, "Error: CKAN request didn't work at all."
            elif self.ckanclient.last_status == 404 or '404' in str(self.ckanclient.last_url_error):
                print "Dataset '%s' not currently registered" % dataset['name']
                print ""
                pprint.pprint(dataset)
                print ""
                if not self.options.no_create_confimation:
                    answer = raw_input("Do you want to register this dataset with CKAN now? [y/N] ")
                    if not answer or answer.lower()[0] != 'y':
                        print "Skipping '%s' dataset..." % dataset['name']
                        print ""
                        sleep(1)
                        continue
                print "Registering dataset..."
                self.ckanclient.dataset_register_post(dataset)
                if self.ckanclient.last_status in [200, 201]:
                    print "Registered dataset '%s' OK." % dataset['name']
                    sleep(1)
                elif self.ckanclient.last_status == 403 or '403' in str(self.ckanclient.last_url_error):
                    print "Error: Not authorised. Check your API key."
                    sleep(1)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_http_error:
                    print "Error: CKAN returned status code %s: %s" % (
                        self.ckanclient.last_status, self.ckanclient.last_http_error)
                    sleep(1)
                    sleep(1)
                    sleep(1)
                elif self.ckanclient.last_url_error:
                    print "Error: URL problems: %s" % self.ckanclient.last_url_error
                    sleep(1)
                    sleep(1)
                    sleep(1)
                else:
                    raise Exception, "Error: CKAN request didn't work at all."
            elif self.ckanclient.last_http_error:
                print "Error: CKAN returned status code %s: %s" % (
                    self.ckanclient.last_status, self.ckanclient.last_http_error)
                sleep(1)
                sleep(1)
                sleep(1)
            elif self.ckanclient.last_url_error:
                print "Error: URL problems: %s" % self.ckanclient.last_url_error
                sleep(1)
                sleep(1)
                sleep(1)
            else:
                raise Exception, "Error: CKAN request didn't work at all."

    def create_dataset(self, name, title='', url='', maintainer='', 
            maintainer_email='', author='', author_email='', notes='', 
            tags=[], extras={}, license_id=None, license=None, resources=[]):
        """Returns a CKAN REST API dataset from method arguments."""
        if not isinstance(tags, list):
            raise Exception, "Dataset tags must be a list: %s" % tags
        if not isinstance(extras, dict):
            raise Exception, "Dataset extras must be a dict: %s" % tags
        dataset = {}
        dataset['name'] = self.coerce_dataset_name(name)
        dataset['title'] = title
        dataset['url'] = url
        dataset['notes'] = notes
        dataset['maintainer'] = maintainer
        dataset['maintainer_email'] = maintainer_email
        dataset['author'] = author
        dataset['author_email'] = author_email
        dataset['tags'] = tags
        dataset['extras'] = extras
        # Pre and post licenses servicization.
        if license_id != None:
            dataset['license_id'] = license_id
        elif license != None:
            dataset['license'] = license
        dataset['resources'] = resources
        return dataset

    def coerce_dataset_name(self, name):
        """Converts unicode string to valid CKAN dataset name."""
        # Todo: Probably needs to be finished off.
        name = self.substitute_ascii_equivalents(name)
        name = name.lower()
        return name

    def substitute_ascii_equivalents(self, unicrap):
        # Method taken from: http://code.activestate.com/recipes/251871/
        """This takes a UNICODE string and replaces Latin-1 characters with
            something equivalent in 7-bit ASCII. It returns a plain ASCII string. 
            This function makes a best effort to convert Latin-1 characters into 
            ASCII equivalents. It does not just strip out the Latin-1 characters.
            All characters in the standard 7-bit ASCII range are preserved. 
            In the 8th bit range all the Latin-1 accented letters are converted 
            to unaccented equivalents. Most symbol characters are converted to 
            something meaningful. Anything not converted is deleted.
        """
        xlate={0xc0:'A', 0xc1:'A', 0xc2:'A', 0xc3:'A', 0xc4:'A', 0xc5:'A',
            0xc6:'Ae', 0xc7:'C',
            0xc8:'E', 0xc9:'E', 0xca:'E', 0xcb:'E',
            0xcc:'I', 0xcd:'I', 0xce:'I', 0xcf:'I',
            0xd0:'Th', 0xd1:'N',
            0xd2:'O', 0xd3:'O', 0xd4:'O', 0xd5:'O', 0xd6:'O', 0xd8:'O',
            0xd9:'U', 0xda:'U', 0xdb:'U', 0xdc:'U',
            0xdd:'Y', 0xde:'th', 0xdf:'ss',
            0xe0:'a', 0xe1:'a', 0xe2:'a', 0xe3:'a', 0xe4:'a', 0xe5:'a',
            0xe6:'ae', 0xe7:'c',
            0xe8:'e', 0xe9:'e', 0xea:'e', 0xeb:'e',
            0xec:'i', 0xed:'i', 0xee:'i', 0xef:'i',
            0xf0:'th', 0xf1:'n',
            0xf2:'o', 0xf3:'o', 0xf4:'o', 0xf5:'o', 0xf6:'o', 0xf8:'o',
            0xf9:'u', 0xfa:'u', 0xfb:'u', 0xfc:'u',
            0xfd:'y', 0xfe:'th', 0xff:'y',
            #0xa1:'!', 0xa2:'{cent}', 0xa3:'{pound}', 0xa4:'{currency}',
            #0xa5:'{yen}', 0xa6:'|', 0xa7:'{section}', 0xa8:'{umlaut}',
            #0xa9:'{C}', 0xaa:'{^a}', 0xab:'<<', 0xac:'{not}',
            #0xad:'-', 0xae:'{R}', 0xaf:'_', 0xb0:'{degrees}',
            #0xb1:'{+/-}', 0xb2:'{^2}', 0xb3:'{^3}', 0xb4:"'",
            #0xb5:'{micro}', 0xb6:'{paragraph}', 0xb7:'*', 0xb8:'{cedilla}',
            #0xb9:'{^1}', 0xba:'{^o}', 0xbb:'>>', 
            #0xbc:'{1/4}', 0xbd:'{1/2}', 0xbe:'{3/4}', 0xbf:'?',
            #0xd7:'*', 0xf7:'/'
            }

        r = ''
        for i in unicrap:
            if xlate.has_key(ord(i)):
                r += xlate[ord(i)]
            elif ord(i) >= 0x80:
                pass
            else:
                r += str(i)
        return r

    def create_dataset_resource(self, url='', format='', hash='', description=''):
        return {
            'url': url,
            'format': format,
            'hash': hash,
            'description': description,
        }