def netcdf_pre_create_resource(sender, **kwargs): files = kwargs['files'] metadata = kwargs['metadata'] validate_files_dict = kwargs['validate_files'] source_names = kwargs['source_names'] if __debug__: assert(isinstance(source_names, list)) file_selected = False in_file_name = '' nc_file_name = '' if files: file_selected = True in_file_name = files[0].file.name nc_file_name = os.path.splitext(files[0].name)[0] elif source_names: nc_file_name = os.path.splitext(os.path.basename(source_names[0]))[0] ref_tmpfiles = utils.get_fed_zone_files(source_names) if ref_tmpfiles: in_file_name = ref_tmpfiles[0] file_selected = True if file_selected and in_file_name: # file validation and metadata extraction nc_dataset = nc_utils.get_nc_dataset(in_file_name) if isinstance(nc_dataset, netCDF4.Dataset): # Extract the metadata from netcdf file res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(in_file_name) # populate metadata list with extracted metadata add_metadata_to_list(metadata, res_dublin_core_meta, res_type_specific_meta) # create the ncdump text file dump_file = create_header_info_txt_file(in_file_name, nc_file_name) dump_file_name = nc_file_name + '_header_info.txt' uploaded_file = UploadedFile(file=open(dump_file), name=dump_file_name) files.append(uploaded_file) else: validate_files_dict['are_files_valid'] = False validate_files_dict['message'] = 'Please check if the uploaded file ' \ 'is in valid NetCDF format.' if source_names and in_file_name: shutil.rmtree(os.path.dirname(in_file_name))
def netcdf_post_create_resource(sender, **kwargs): log = logging.getLogger() resource = kwargs['resource'] validate_files_dict = kwargs['validate_files'] res_file = resource.files.all().first() if res_file: temp_file = utils.get_file_from_irods(res_file) nc_dataset = nc_utils.get_nc_dataset(temp_file) nc_file_name = res_file.file_name if isinstance(nc_dataset, netCDF4.Dataset): # Extract the metadata from netcdf file res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict( temp_file) # populate metadata list with extracted metadata metadata = [] add_metadata_to_list(metadata, res_dublin_core_meta, res_type_specific_meta) for element in metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = list(element.items())[0] if k == 'title': # update title element title_element = resource.metadata.title resource.metadata.update_element('title', title_element.id, **v) elif k == 'rights': rights_element = resource.metadata.rights resource.metadata.update_element('rights', rights_element.id, **v) elif k == 'creator': resource.metadata.creators.all().delete() resource.metadata.create_element('creator', **v) else: resource.metadata.create_element(k, **v) # create the ncdump text file dump_file = create_header_info_txt_file(temp_file, nc_file_name) dump_file_name = nc_file_name + '_header_info.txt' uploaded_file = UploadedFile(file=open(dump_file, mode="rb"), name=dump_file_name) utils.add_file_to_resource(resource, uploaded_file) else: delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False err_msg = "Uploaded file was not added to the resource." \ " Please provide a valid NetCDF file. " validate_files_dict['message'] = err_msg log_msg = "File validation failed for netcdf resource (ID:{})." log_msg = log_msg.format(resource.short_id) log.error(log_msg) # cleanup the temp file directory if os.path.exists(temp_file): shutil.rmtree(os.path.dirname(temp_file)) # set metadata is dirty flag as false for resource creation metadata = resource.metadata metadata.is_dirty = False metadata.save() # since we are extracting metadata after resource creation # metadata xml files need to be regenerated - so need to set the # dirty bag flags if resource.files.all().count() > 0: utils.set_dirty_bag_flag(resource)
def netcdf_pre_add_files_to_resource(sender, **kwargs): nc_res = kwargs['resource'] files = kwargs['files'] validate_files_dict = kwargs['validate_files'] source_names = kwargs['source_names'] if __debug__: assert (isinstance(source_names, list)) if len(files) > 1: # file number validation validate_files_dict['are_files_valid'] = False validate_files_dict['message'] = 'Only one file can be uploaded.' file_selected = False in_file_name = '' nc_file_name = '' if files: file_selected = True in_file_name = files[0].file.name nc_file_name = os.path.splitext(files[0].name)[0] elif source_names: nc_file_name = os.path.splitext(os.path.basename(source_names[0]))[0] ref_tmpfiles = utils.get_fed_zone_files(source_names) if ref_tmpfiles: in_file_name = ref_tmpfiles[0] file_selected = True if file_selected and in_file_name: # file type validation and existing metadata update and create new ncdump text file nc_dataset = nc_utils.get_nc_dataset(in_file_name) if isinstance(nc_dataset, netCDF4.Dataset): # delete all existing resource files and metadata related for f in ResourceFile.objects.filter(object_id=nc_res.id): delete_resource_file_only(nc_res, f) # update resource modification info user = kwargs['user'] utils.resource_modified(nc_res, user, overwrite_bag=False) # extract metadata res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict( in_file_name) # update title info if res_dublin_core_meta.get('title'): if nc_res.metadata.title: nc_res.metadata.title.delete() nc_res.metadata.create_element( 'title', value=res_dublin_core_meta['title']) # update description info if res_dublin_core_meta.get('description'): if nc_res.metadata.description: nc_res.metadata.description.delete() nc_res.metadata.create_element( 'description', abstract=res_dublin_core_meta.get('description')) # update creator info if res_dublin_core_meta.get('creator_name'): name = res_dublin_core_meta.get('creator_name') email = res_dublin_core_meta.get('creator_email', '') url = res_dublin_core_meta.get('creator_url', '') arguments = dict(name=name, email=email, homepage=url) creator = nc_res.metadata.creators.all().filter( name=name).first() if creator: order = creator.order if order != 1: creator.delete() arguments['order'] = order nc_res.metadata.create_element('creator', **arguments) else: nc_res.metadata.create_element('creator', **arguments) # update contributor info if res_dublin_core_meta.get('contributor_name'): name_list = res_dublin_core_meta['contributor_name'].split(',') existing_contributor_names = [ contributor.name for contributor in nc_res.metadata.contributors.all() ] for name in name_list: if name not in existing_contributor_names: nc_res.metadata.create_element('contributor', name=name) # update subject info if res_dublin_core_meta.get('subject'): keywords = res_dublin_core_meta['subject'].split(',') existing_keywords = [ subject.value for subject in nc_res.metadata.subjects.all() ] for keyword in keywords: if keyword not in existing_keywords: nc_res.metadata.create_element('subject', value=keyword) # update source if res_dublin_core_meta.get('source'): for source in nc_res.metadata.sources.all(): source.delete() nc_res.metadata.create_element( 'source', derived_from=res_dublin_core_meta.get('source')) # update license element: if res_dublin_core_meta.get('rights'): raw_info = res_dublin_core_meta.get('rights') b = re.search("(?P<url>https?://[^\s]+)", raw_info) url = b.group('url') if b else '' statement = raw_info.replace(url, '') if url else raw_info if nc_res.metadata.rights: nc_res.metadata.rights.delete() nc_res.metadata.create_element('rights', statement=statement, url=url) # update relation if res_dublin_core_meta.get('references'): nc_res.metadata.relations.filter(type='cites').all().delete() nc_res.metadata.create_element( 'relation', type='cites', value=res_dublin_core_meta['references']) # update box info nc_res.metadata.coverages.all().delete() if res_dublin_core_meta.get('box'): nc_res.metadata.create_element( 'coverage', type='box', value=res_dublin_core_meta['box']) # update period info if res_dublin_core_meta.get('period'): nc_res.metadata.create_element( 'coverage', type='period', value=res_dublin_core_meta['period']) # update variable info nc_res.metadata.variables.all().delete() for var_info in list(res_type_specific_meta.values()): nc_res.metadata.create_element( 'variable', name=var_info['name'], unit=var_info['unit'], type=var_info['type'], shape=var_info['shape'], missing_value=var_info['missing_value'], descriptive_name=var_info['descriptive_name'], method=var_info['method']) # update the original spatial coverage meta nc_res.metadata.ori_coverage.all().delete() if res_dublin_core_meta.get('original-box'): if res_dublin_core_meta.get('projection-info'): nc_res.metadata.create_element( 'originalcoverage', value=res_dublin_core_meta['original-box'], projection_string_type=res_dublin_core_meta[ 'projection-info']['type'], projection_string_text=res_dublin_core_meta[ 'projection-info']['text'], datum=res_dublin_core_meta['projection-info']['datum']) else: nc_res.metadata.create_element( 'originalcoverage', value=res_dublin_core_meta['original-box']) # create the ncdump text file dump_file = create_header_info_txt_file(in_file_name, nc_file_name) dump_file_name = nc_file_name + '_header_info.txt' uploaded_file = UploadedFile(file=open(dump_file, 'rb'), name=dump_file_name) files.append(uploaded_file) else: validate_files_dict['are_files_valid'] = False validate_files_dict['message'] = 'Please check if the uploaded file is ' \ 'invalid NetCDF format.' if source_names and in_file_name: shutil.rmtree(os.path.dirname(in_file_name))
def netcdf_post_create_resource(sender, **kwargs): log = logging.getLogger() resource = kwargs['resource'] validate_files_dict = kwargs['validate_files'] res_file = resource.files.all().first() if res_file: temp_file = utils.get_file_from_irods(res_file) nc_dataset = nc_utils.get_nc_dataset(temp_file) nc_file_name = res_file.file_name if isinstance(nc_dataset, netCDF4.Dataset): # Extract the metadata from netcdf file res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(temp_file) # populate metadata list with extracted metadata metadata = [] add_metadata_to_list(metadata, res_dublin_core_meta, res_type_specific_meta) for element in metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] if k == 'title': # update title element title_element = resource.metadata.title resource.metadata.update_element('title', title_element.id, **v) elif k == 'rights': rights_element = resource.metadata.rights resource.metadata.update_element('rights', rights_element.id, **v) elif k == 'creator': resource.metadata.creators.all().delete() resource.metadata.create_element('creator', **v) else: resource.metadata.create_element(k, **v) # create the ncdump text file dump_file = create_header_info_txt_file(temp_file, nc_file_name) dump_file_name = nc_file_name + '_header_info.txt' uploaded_file = UploadedFile(file=open(dump_file), name=dump_file_name) utils.add_file_to_resource(resource, uploaded_file) else: delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False err_msg = "Uploaded file was not added to the resource." \ " Please provide a valid NetCDF file. " validate_files_dict['message'] = err_msg log_msg = "File validation failed for netcdf resource (ID:{})." log_msg = log_msg.format(resource.short_id) log.error(log_msg) # cleanup the temp file directory if os.path.exists(temp_file): shutil.rmtree(os.path.dirname(temp_file)) # set metadata is dirty flag as false for resource creation metadata = resource.metadata metadata.is_dirty = False metadata.save() # since we are extracting metadata after resource creation # metadata xml files need to be regenerated - so need to set the # dirty bag flags if resource.files.all().count() > 0: utils.set_dirty_bag_flag(resource)
def netcdf_pre_add_files_to_resource(sender, **kwargs): nc_res = kwargs['resource'] files = kwargs['files'] validate_files_dict = kwargs['validate_files'] source_names = kwargs['source_names'] if __debug__: assert(isinstance(source_names, list)) if len(files) > 1: # file number validation validate_files_dict['are_files_valid'] = False validate_files_dict['message'] = 'Only one file can be uploaded.' file_selected = False in_file_name = '' nc_file_name = '' if files: file_selected = True in_file_name = files[0].file.name nc_file_name = os.path.splitext(files[0].name)[0] elif source_names: nc_file_name = os.path.splitext(os.path.basename(source_names[0]))[0] ref_tmpfiles = utils.get_fed_zone_files(source_names) if ref_tmpfiles: in_file_name = ref_tmpfiles[0] file_selected = True if file_selected and in_file_name: # file type validation and existing metadata update and create new ncdump text file nc_dataset = nc_utils.get_nc_dataset(in_file_name) if isinstance(nc_dataset, netCDF4.Dataset): # delete all existing resource files and metadata related for f in ResourceFile.objects.filter(object_id=nc_res.id): delete_resource_file_only(nc_res, f) # update resource modification info user = kwargs['user'] utils.resource_modified(nc_res, user, overwrite_bag=False) # extract metadata res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(in_file_name) # update title info if res_dublin_core_meta.get('title'): if nc_res.metadata.title: nc_res.metadata.title.delete() nc_res.metadata.create_element('title', value=res_dublin_core_meta['title']) # update description info if res_dublin_core_meta.get('description'): if nc_res.metadata.description: nc_res.metadata.description.delete() nc_res.metadata.create_element('description', abstract=res_dublin_core_meta.get('description')) # update creator info if res_dublin_core_meta.get('creator_name'): name = res_dublin_core_meta.get('creator_name') email = res_dublin_core_meta.get('creator_email', '') url = res_dublin_core_meta.get('creator_url', '') arguments = dict(name=name, email=email, homepage=url) creator = nc_res.metadata.creators.all().filter(name=name).first() if creator: order = creator.order if order != 1: creator.delete() arguments['order'] = order nc_res.metadata.create_element('creator', **arguments) else: nc_res.metadata.create_element('creator', **arguments) # update contributor info if res_dublin_core_meta.get('contributor_name'): name_list = res_dublin_core_meta['contributor_name'].split(',') existing_contributor_names = [contributor.name for contributor in nc_res.metadata.contributors.all()] for name in name_list: if name not in existing_contributor_names: nc_res.metadata.create_element('contributor', name=name) # update subject info if res_dublin_core_meta.get('subject'): keywords = res_dublin_core_meta['subject'].split(',') existing_keywords = [subject.value for subject in nc_res.metadata.subjects.all()] for keyword in keywords: if keyword not in existing_keywords: nc_res.metadata.create_element('subject', value=keyword) # update source if res_dublin_core_meta.get('source'): for source in nc_res.metadata.sources.all(): source.delete() nc_res.metadata.create_element('source', derived_from=res_dublin_core_meta.get('source')) # update license element: if res_dublin_core_meta.get('rights'): raw_info = res_dublin_core_meta.get('rights') b = re.search("(?P<url>https?://[^\s]+)", raw_info) url = b.group('url') if b else '' statement = raw_info.replace(url, '') if url else raw_info if nc_res.metadata.rights: nc_res.metadata.rights.delete() nc_res.metadata.create_element('rights', statement=statement, url=url) # update relation if res_dublin_core_meta.get('references'): nc_res.metadata.relations.filter(type='cites').all().delete() nc_res.metadata.create_element('relation', type='cites', value=res_dublin_core_meta['references']) # update box info nc_res.metadata.coverages.all().delete() if res_dublin_core_meta.get('box'): nc_res.metadata.create_element('coverage', type='box', value=res_dublin_core_meta['box']) # update period info if res_dublin_core_meta.get('period'): nc_res.metadata.create_element('coverage', type='period', value=res_dublin_core_meta['period']) # update variable info nc_res.metadata.variables.all().delete() for var_info in res_type_specific_meta.values(): nc_res.metadata.create_element('variable', name=var_info['name'], unit=var_info['unit'], type=var_info['type'], shape=var_info['shape'], missing_value=var_info['missing_value'], descriptive_name=var_info['descriptive_name'], method=var_info['method']) # update the original spatial coverage meta nc_res.metadata.ori_coverage.all().delete() if res_dublin_core_meta.get('original-box'): if res_dublin_core_meta.get('projection-info'): nc_res.metadata.create_element( 'originalcoverage', value=res_dublin_core_meta['original-box'], projection_string_type=res_dublin_core_meta['projection-info']['type'], projection_string_text=res_dublin_core_meta['projection-info']['text'], datum=res_dublin_core_meta['projection-info']['datum']) else: nc_res.metadata.create_element('originalcoverage', value=res_dublin_core_meta['original-box']) # create the ncdump text file dump_file = create_header_info_txt_file(in_file_name, nc_file_name) dump_file_name = nc_file_name + '_header_info.txt' uploaded_file = UploadedFile(file=open(dump_file), name=dump_file_name) files.append(uploaded_file) else: validate_files_dict['are_files_valid'] = False validate_files_dict['message'] = 'Please check if the uploaded file is ' \ 'invalid NetCDF format.' if source_names and in_file_name: shutil.rmtree(os.path.dirname(in_file_name))