def __init__(self, resource_id=""):
     # Get the resource
     self.resource = toolkit.get_action("resource_show")(None, {"id": resource_id})
     
     # Get the path to the file
     url = self.resource["url"]
     self.file_path = file_path_from_url(url)
     
     # Check that it is a valid zip file
     self.is_valid = self.validate()
Example #2
0
    def __init__(self, resource_id=""):
        # Get the resource
        self.resource = toolkit.get_action("resource_show")(None, {
            "id": resource_id
        })

        # Get the path to the file
        url = self.resource["url"]
        self.file_path = file_path_from_url(url)

        # Check that it is a valid zip file
        self.is_valid = self.validate()
Example #3
0
    def before_index(self, pkg_dict):
        #pkg_dict['sample_created']={'prahadeesh':'abclll'}
        is_full_text_enabled = ckan_config.get('ngds.full_text_indexing',
                                               'false')

        file_formats_to_ignore = ('csv')

        import json

        if pkg_dict.get('data_dict'):
            dict = json.loads(pkg_dict.get('data_dict'))
            resources = dict.get('resources')

        #print "resources: ", resources

        if resources:
            document_index_list = []
            for resource in resources:

                res_file_field = 'resource_file_%s' % resource.get("id")
                #print "res_file_field:", res_file_field
                pkg_dict[res_file_field] = ''

                try:
                    for (okey,
                         nkey) in [('distributor', 'res_distributor'),
                                   ('protocol', 'res_protocol'),
                                   ('layer', 'res_layer'),
                                   ('resource_format', 'res_resource_format'),
                                   ('content_model', 'res_content_model')]:
                        pkg_dict[nkey] = pkg_dict.get(
                            nkey, []) + [resource.get(okey, u'')]

                    if is_full_text_enabled == 'true' and resource.get('resource_format', '') == 'unstructured' and \
                                    str(resource.get('format', u'')).lower() not in file_formats_to_ignore:

                        file_path = helpers.file_path_from_url(
                            resource.get("url"))
                        if file_path:
                            resource_index_dict = {
                                'package_id': pkg_dict.get('id'),
                                'resource_id': resource.get("id"),
                                'file_path': file_path,
                            }
                            document_index_list.append(resource_index_dict)
                except Exception, ex:
                    print "Exception while getting some full text indexing values: %s" % ex

            if is_full_text_enabled == 'true' and document_index_list:
                helpers.create_package_resource_document_index(
                    pkg_dict.get('id'), document_index_list)
Example #4
0
    def before_index(self, pkg_dict):
        #pkg_dict['sample_created']={'prahadeesh':'abclll'}
        is_full_text_enabled = ckan_config.get('ngds.full_text_indexing', 'false')

        file_formats_to_ignore = ('csv')

        import json

        if pkg_dict.get('data_dict'):
            dict = json.loads(pkg_dict.get('data_dict'))
            resources = dict.get('resources')

        #print "resources: ", resources

        if resources:
            document_index_list = []
            for resource in resources:

                res_file_field = 'resource_file_%s' % resource.get("id")
                #print "res_file_field:", res_file_field
                pkg_dict[res_file_field] = ''

                try:
                    for (okey, nkey) in [('distributor', 'res_distributor'),
                                         ('protocol', 'res_protocol'),
                                         ('layer', 'res_layer'),
                                         ('resource_format', 'res_resource_format'),
                                         ('content_model', 'res_content_model')]:
                        pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')]

                    if is_full_text_enabled == 'true' and resource.get('resource_format', '') == 'unstructured' and \
                                    str(resource.get('format', u'')).lower() not in file_formats_to_ignore:

                        file_path = helpers.file_path_from_url(resource.get("url"))
                        if file_path:
                            resource_index_dict = {'package_id': pkg_dict.get('id'),
                                                   'resource_id': resource.get("id"),
                                                   'file_path': file_path,
                            }
                            document_index_list.append(resource_index_dict)
                except Exception, ex:
                    print "Exception while getting some full text indexing values: %s" % ex

            if is_full_text_enabled == 'true' and document_index_list:
                helpers.create_package_resource_document_index(pkg_dict.get('id'), document_index_list)