Example #1
0
    def update_config(self, config):
        # Must use IConfigurer rather than IConfigurable because only IConfigurer
        # is called before after_map, in which we need the configuration directives
        # to know how to set the paths.

        # TODO commenting out enterprise data inventory for right now
        # DataJsonPlugin.route_edata_path = config.get("ckanext.enterprisedatajson.path", "/enterprisedata.json")
        DataJsonPlugin.route_enabled = config.get("ckanext.datajson.url_enabled", "True") == 'True'
        DataJsonPlugin.route_path = config.get("ckanext.datajson.path", "/data.json")
        DataJsonPlugin.route_ld_path = config.get(" ckanext.datajsonld.path",
                                                  re.sub(r"\.json$", ".jsonld", DataJsonPlugin.route_path))
        DataJsonPlugin.ld_id = config.get("ckanext.datajsonld.id", config.get("ckan.site_url"))
        DataJsonPlugin.ld_title = config.get("ckan.site_title", "Catalog")
        DataJsonPlugin.site_url = config.get("ckan.site_url")

        DataJsonPlugin.absolute_route_path = DataJsonPlugin.site_url + DataJsonPlugin.route_path
        DataJsonPlugin.xlsx_file_name = config.get("ckanext.datajson.xlsx_file_name", "catalog.xlsx")
        DataJsonPlugin.xlsx_route_path = config.get("ckanext.datajson.xlsx_path", "/%s" % DataJsonPlugin.xlsx_file_name)

        DataJsonPlugin.inventory_links_enabled = config.get("ckanext.datajson.inventory_links_enabled",
                                                            "False") == 'True'

        # Adds our local templates directory. It's smart. It knows it's
        # relative to the path of *this* file. Wow.
        p.toolkit.add_template_directory(config, "templates")
Example #2
0
 def __init__(self):
     """
     Init del Controlador pricipal del plugin NeedUpdate.
     """
     self.ext_folder = ckan_config.get('ckanext.needupdate.ext_folder',
                                       '/usr/lib/ckan/default/src')
     self.ext_prefix = ckan_config.get('ckanext.needupdate.ext_folder',
                                       'ckanext-')
     self.ext_sufix = ckan_config.get('ckanext.needupdate.ext_folder', '')
Example #3
0
    def command(self):
        '''
        Parse command line arguments and call appropriate method.
        '''
        if not self.args or self.args[0] in ['--help', '-h', 'help']:
            print self.__doc__
            return

        cmd = self.args[0]
        self._load_config()

        resource_base_url = config.get('ottawa.geo_url')
        dirty = False
        writelog("running geo update...")

        model.repo.new_revision()
        for dataset, resources in self.mapping.iteritems():
            package = model.Package.get(dataset)

            if package is None:
                writelog("no such package: %s" % dataset)
                continue

            writelog("%s" % package.name)
            for existing_resource in package.resources:
                if existing_resource.format in resources:
                    if existing_resource.format == 'shp':
                        resource_path = resource_base_url + resources[existing_resource.format]['shp']
                    else:
                        resource_path = resource_base_url + resources[existing_resource.format]

                    file_name = 'temp_data/' + existing_resource.name + '.' + existing_resource.format
                    resource_exists = self.download_temp_file(resource_path, file_name)

                    if not resource_exists:
                        writelog("resource cannot be found in data repository: %s" % resource_path)
                        continue

                    if self.update_required(existing_resource, file_name):
                        writelog("Updating resource: %s" % resource_path)
                        if existing_resource.format == 'shp':
                            self.replace_shape_files(existing_resource, resources['shp'])
                        else:
                            self.replace_resource(existing_resource, file_name)

                        self.update_checksum(existing_resource, file_name)
                        self.update_dates(existing_resource)
                        dirty = True
                    else:
                        writelog("update not required for: %s" % resource_path)

        if dirty:
            model.Session.commit()
            writelog("geo update commited")
        else:
            writelog("no new resources detected")
Example #4
0
    def replace_resource(self, existing_resource, temp_file):
        geo_storage_dir = config.get('ottawa.geo_storage_dir')
        timestamp = datetime.now().strftime('%Y-%m-%dT%H%M%S')
        timestamp_dir = os.path.join(geo_storage_dir, timestamp)
        if not os.path.exists(timestamp_dir):
            os.makedirs(timestamp_dir)

        if existing_resource.format == 'shp':
            new_file_name = existing_resource.name + '.shp.zip'
        else:
            new_file_name = existing_resource.name + '.' + existing_resource.format

        end_path = os.path.join(timestamp_dir, new_file_name)
        shutil.copyfile(temp_file, end_path)

        geo_storage_url = config.get('ottawa.geo_storage_url')
        existing_resource.url = "%s%s/%s" % (
                                geo_storage_url,
                                timestamp,
                                new_file_name,
                            )

        writelog("saved new resource for %s" % existing_resource.id)
        return ""
Example #5
0
    def replace_shape_files(self, existing_resource, shape_file_locations):
        resource_base_url = config.get('ottawa.geo_url')

        shape_destination_dir = os.path.join('temp_data', existing_resource.name + '_shp')
        if not os.path.exists(shape_destination_dir):
            os.makedirs(shape_destination_dir)

        for shape_format, shape_location in shape_file_locations.iteritems():
            resource_location = resource_base_url + shape_location
            file_name = existing_resource.name + '.' + shape_format
            download_location = os.path.join(shape_destination_dir, file_name)
            self.download_temp_file(resource_location, download_location)

        zip_filename = os.path.join('temp_data', existing_resource.name + '.shp.zip')
        zip = zipfile.ZipFile(zip_filename, 'w')
        for root, dirs, files in os.walk(shape_destination_dir):
            for file in files:
                print 'writing file %s to %s' % (os.path.join(root, file), zip)
                zip.write(os.path.join(root, file), file)
        zip.close()

        self.replace_resource(existing_resource, zip_filename)
Example #6
0
    def make_json(self, export_type='datajson', owner_org=None):
        # Error handler for creating error log
        stream = StringIO.StringIO()
        eh = logging.StreamHandler(stream)
        eh.setLevel(logging.WARN)
        formatter = logging.Formatter('%(asctime)s - %(message)s')
        eh.setFormatter(formatter)
        logger.addHandler(eh)

        data = ''
        output = []
        errors_json = []
        Package2Pod.seen_identifiers = set()

        try:
            # Build the data.json file.
            if owner_org:
                if 'datajson' == export_type:
                    # we didn't check ownership for this type of export, so never load private datasets here
                    packages = DataJsonController._get_ckan_datasets(org=owner_org)
                    if not packages:
                        packages = self.get_packages(owner_org=owner_org, with_private=False)
                else:
                    packages = self.get_packages(owner_org=owner_org, with_private=True)
            else:
                # TODO: load data by pages
                # packages = p.toolkit.get_action("current_package_list_with_resources")(
                # None, {'limit': 50, 'page': 300})
                packages = DataJsonController._get_ckan_datasets()
                # packages = p.toolkit.get_action("current_package_list_with_resources")(None, {})
            import re
            for i in range(0, len(packages)):
                j = 0
                for extra in packages[i]['extras']:
                    if extra.get('key') == 'language':
                        print 'Key: {}, Value: {}'.format(extra.get('key'), extra.get('value'))
                        if not isinstance(extra.get('value'), (unicode, str)):
                            # Solo puedo operar si value es una instancia de UNICODE o STR
                            logger.warn('No fue posible renderizar el campo: \"Language\".')
                        else:
                            language = []
                            try:
                                # intento convertir directamente el valor de
                                # Language a una lista.
                                language = json.loads(extra['value'])
                            except ValueError:
                                # La traduccion no es posible, limpiar y reintentar
                                if "{" or "}" in extra.get('value'):
                                    lang = extra['value'].replace('{', '').replace('}', '').split(',')
                                else:
                                    lang = extra.get('value')
                                if ',' in lang:
                                    lang = lang.split(',')
                                else:
                                    lang = [lang]
                                language = json.loads(lang)
                            packages[i]['extras'][j]['value'] = language
                    j += 1
                try:
                    for index, resource in enumerate(packages[i]['resources']):
                        try:
                            fixed_attrDesc = json.loads(resource['attributesDescription'])
                            packages[i]['resources'][index]['attributesDescription'] = fixed_attrDesc
                        except ValueError:
                            logger.error('Fallo render de \'attributesDescription\'.')
                except KeyError:
                    pass
                # Obtengo el ckan.site_url para chequear la propiedad del recurso.
                ckan_site_url = config.get('ckan.site_url')
                try:
                    for index, resource in enumerate(packages[i]['resources']):
                        resource = packages[i]['resources'][index]
                        if not resource.get("accessURL", None):
                            accessURL = os.path.join(ckan_site_url, 'dataset', packages[i]['id'], 'resource', resource['id'])
                            resource.update({'accessURL': accessURL})

                except KeyError:
                    pass
                ckan_host = ''
                try:
                    ckan_host = re.match(
                        r'(?:http)s?:\/\/([\w][^\/=\s]+)\/?|(^w{3}[\.\w][^\/\=\s]{2,})\/?',
                        packages[i]['resources'][0]['url']).group(0)
                except Exception:
                    pass

                themes = self.safely_map(dict.get, packages[i]['groups'], 'name')
                packages[i]['groups'] = themes

                try:
                    packages[i]['author'] = {
                        'name': packages[i]['author'],
                        'mbox': packages[i]['author_email']
                    }
                except KeyError:
                    pass

                tags = self.safely_map(dict.get, packages[i]['tags'], 'display_name')
                packages[i]['tags'] = tags

                # packages[i] = json.loads(packages[i][0]['extras']['language'])
                try:
                    if len(packages[i]['url']) < 1:
                        packages[i]['url'] = '{host}/dataset/{dataset_id}'.format(
                            host=ckan_host[:-1],
                            dataset_id=packages[i]['name'])
                    logger.info("landingPage generado para el dataset_id: %s.", packages[i]['name'])
                except TypeError:
                    prepare_url = 'unknow'
                    try:
                        prepare_url = packages[i]['resources'][0]['url']
                        prepare_url = prepare_url.split('resource')[0]
                        logger.info("landingPage generado para el dataset_id: %s, Tipo de datos: \" harvest\".", packages[i]['name'])
                    except IndexError:
                        logger.error("autogen \"landingpage\" fails.")
                    packages[i].update({'url': prepare_url})

            json_export_map = get_export_map_json('export.map.json')

            if json_export_map:
                for pkg in packages:
                    if json_export_map.get('debug'):
                        output.append(pkg)

                    extras = dict([(x['key'], x['value']) for x in pkg.get('extras', {})])

                    if export_type in ['unredacted', 'redacted']:
                        if 'Draft' == extras.get('publishing_status'):
                            continue
                    elif 'draft' == export_type:
                        if 'publishing_status' not in extras.keys() or extras.get('publishing_status') != 'Draft':
                            continue

                    redaction_enabled = ('redacted' == export_type)
                    datajson_entry = Package2Pod.convert_package(pkg, json_export_map, redaction_enabled)
                    errors = None
                    if 'errors' in datajson_entry.keys():
                        errors_json.append(datajson_entry)
                        errors = datajson_entry.get('errors')
                        datajson_entry = None

                    if datajson_entry and \
                            (not json_export_map.get('validation_enabled') or self.is_valid(datajson_entry)):
                        # logger.debug("writing to json: %s" % (pkg.get('title')))
                        output.append(datajson_entry)
                    else:
                        publisher = detect_publisher(extras)
                        if errors:
                            logger.warn("Dataset id=[%s], title=[%s], organization=[%s] omitted, reason below:\n\t%s\n",
                                        pkg.get('id', None), pkg.get('title', None), publisher, errors)
                        else:
                            logger.warn("Dataset id=[%s], title=[%s], organization=[%s] omitted, reason above.\n",
                                        pkg.get('id', None), pkg.get('title', None), publisher)
                try:
                    # CLEAN Not requiered fields
                    for d in output:
                        del d["@type"]
                except Exception:
                    pass

                data = Package2Pod.wrap_json_catalog(output, json_export_map)
        except Exception as e:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            filename = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error("%s : %s : %s : %s", exc_type, filename, exc_tb.tb_lineno, unicode(e))

        # Get the error log
        eh.flush()
        error = stream.getvalue()
        eh.close()
        logger.removeHandler(eh)
        stream.close()

        # Skip compression if we export whole /data.json catalog
        if 'datajson' == export_type:
            return data

        return self.write_zip(data, error, errors_json, zip_name=export_type)