def __init__(self, resource_id=""): # Get the resource self.resource = toolkit.get_action("resource_show")(None, {"id": resource_id}) # Get the path to the file url = self.resource["url"] self.file_path = file_path_from_url(url) # Check that it is a valid zip file self.is_valid = self.validate()
def __init__(self, resource_id=""): # Get the resource self.resource = toolkit.get_action("resource_show")(None, { "id": resource_id }) # Get the path to the file url = self.resource["url"] self.file_path = file_path_from_url(url) # Check that it is a valid zip file self.is_valid = self.validate()
def before_index(self, pkg_dict): #pkg_dict['sample_created']={'prahadeesh':'abclll'} is_full_text_enabled = ckan_config.get('ngds.full_text_indexing', 'false') file_formats_to_ignore = ('csv') import json if pkg_dict.get('data_dict'): dict = json.loads(pkg_dict.get('data_dict')) resources = dict.get('resources') #print "resources: ", resources if resources: document_index_list = [] for resource in resources: res_file_field = 'resource_file_%s' % resource.get("id") #print "res_file_field:", res_file_field pkg_dict[res_file_field] = '' try: for (okey, nkey) in [('distributor', 'res_distributor'), ('protocol', 'res_protocol'), ('layer', 'res_layer'), ('resource_format', 'res_resource_format'), ('content_model', 'res_content_model')]: pkg_dict[nkey] = pkg_dict.get( nkey, []) + [resource.get(okey, u'')] if is_full_text_enabled == 'true' and resource.get('resource_format', '') == 'unstructured' and \ str(resource.get('format', u'')).lower() not in file_formats_to_ignore: file_path = helpers.file_path_from_url( resource.get("url")) if file_path: resource_index_dict = { 'package_id': pkg_dict.get('id'), 'resource_id': resource.get("id"), 'file_path': file_path, } document_index_list.append(resource_index_dict) except Exception, ex: print "Exception while getting some full text indexing values: %s" % ex if is_full_text_enabled == 'true' and document_index_list: helpers.create_package_resource_document_index( pkg_dict.get('id'), document_index_list)
def before_index(self, pkg_dict): #pkg_dict['sample_created']={'prahadeesh':'abclll'} is_full_text_enabled = ckan_config.get('ngds.full_text_indexing', 'false') file_formats_to_ignore = ('csv') import json if pkg_dict.get('data_dict'): dict = json.loads(pkg_dict.get('data_dict')) resources = dict.get('resources') #print "resources: ", resources if resources: document_index_list = [] for resource in resources: res_file_field = 'resource_file_%s' % resource.get("id") #print "res_file_field:", res_file_field pkg_dict[res_file_field] = '' try: for (okey, nkey) in [('distributor', 'res_distributor'), ('protocol', 'res_protocol'), ('layer', 'res_layer'), ('resource_format', 'res_resource_format'), ('content_model', 'res_content_model')]: pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')] if is_full_text_enabled == 'true' and resource.get('resource_format', '') == 'unstructured' and \ str(resource.get('format', u'')).lower() not in file_formats_to_ignore: file_path = helpers.file_path_from_url(resource.get("url")) if file_path: resource_index_dict = {'package_id': pkg_dict.get('id'), 'resource_id': resource.get("id"), 'file_path': file_path, } document_index_list.append(resource_index_dict) except Exception, ex: print "Exception while getting some full text indexing values: %s" % ex if is_full_text_enabled == 'true' and document_index_list: helpers.create_package_resource_document_index(pkg_dict.get('id'), document_index_list)