def _process_uploaded_file(resource, validate_files_dict): log = logging.getLogger() # find a tif file or a zip file res_file = None for r_file in resource.files.all(): if r_file.extension.lower() in ('.tif', '.tiff', '.zip'): res_file = r_file break if res_file: # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) # validate the file validation_results = raster.raster_file_validation( raster_file=temp_file, resource=resource) if not validation_results['error_info']: log.info("Geo raster file validation successful.") # extract metadata temp_dir = os.path.dirname(temp_file) temp_vrt_file_path = [ os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == os.path.splitext(f)[1] ].pop() metadata = raster.extract_metadata(temp_vrt_file_path) # delete the original resource file if it is a zip file if res_file.extension.lower() == '.zip': file_name = delete_resource_file_only(resource, res_file) delete_format_metadata_after_delete_file(resource, file_name) # add all extracted files (tif and vrt) for f in validation_results['new_resource_files_to_add']: uploaded_file = UploadedFile(file=open(f, 'rb'), name=os.path.basename(f)) utils.add_file_to_resource(resource, uploaded_file) # use the extracted metadata to populate resource metadata for element in metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] resource.metadata.create_element(k, **v) log_msg = "Geo raster resource (ID:{}) - extracted metadata was saved to DB" log_msg = log_msg.format(resource.short_id) log.info(log_msg) else: # delete all the files in the resource for res_file in resource.files.all(): delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False err_msg = "Uploaded file was not added to the resource. " err_msg += ", ".join(msg for msg in validation_results['error_info']) validate_files_dict['message'] = err_msg log_msg = "File validation failed for raster resource (ID:{})." log_msg = log_msg.format(resource.short_id) log.error(log_msg) # cleanup the temp file directory if os.path.exists(temp_file): shutil.rmtree(os.path.dirname(temp_file))
def _process_uploaded_csv_file(resource, res_file, validate_files_dict, user, delete_existing_metadata=True): # get the csv file from iRODS to a temp directory fl_obj_name = utils.get_file_from_irods(res_file) validate_err_message = validate_csv_file(fl_obj_name) if not validate_err_message: # first delete relevant existing metadata elements if delete_existing_metadata: TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(is_dirty=False) _delete_extracted_metadata(resource) # delete the sqlite file if it exists _delete_resource_file(resource, ".sqlite") # add the blank sqlite file add_blank_sqlite_file(resource, upload_folder=None) resource_modified(resource, user, overwrite_bag=False) # populate CV metadata django models from the blank sqlite file extract_cv_metadata_from_blank_sqlite_file(resource) else: # file validation failed # delete the invalid file just uploaded delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = validate_err_message # cleanup the temp csv file if os.path.exists(fl_obj_name): shutil.rmtree(os.path.dirname(fl_obj_name))
def _process_uploaded_csv_file(resource, res_file, validate_files_dict, user, delete_existing_metadata=True): # get the csv file from iRODS to a temp directory fl_obj_name = utils.get_file_from_irods(res_file) validate_err_message = validate_csv_file(fl_obj_name) if not validate_err_message: # first delete relevant existing metadata elements if delete_existing_metadata: TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update( is_dirty=False) _delete_extracted_metadata(resource) # delete the sqlite file if it exists _delete_resource_file(resource, ".sqlite") # add the blank sqlite file add_blank_sqlite_file(resource, upload_folder='') resource_modified(resource, user, overwrite_bag=False) # populate CV metadata django models from the blank sqlite file extract_cv_metadata_from_blank_sqlite_file(resource) else: # file validation failed # delete the invalid file just uploaded delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = validate_err_message # cleanup the temp csv file if os.path.exists(fl_obj_name): shutil.rmtree(os.path.dirname(fl_obj_name))
def _process_uploaded_file(resource, validate_files_dict): log = logging.getLogger() # find a tif file or a zip file res_file = None for r_file in resource.files.all(): if r_file.extension.lower() in ('.tif', '.tiff', '.zip'): res_file = r_file break if res_file: # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) # validate the file validation_results = raster.raster_file_validation(raster_file=temp_file, resource=resource) if not validation_results['error_info']: log.info("Geo raster file validation successful.") # extract metadata temp_dir = os.path.dirname(temp_file) temp_vrt_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == os.path.splitext(f)[1]].pop() metadata = raster.extract_metadata(temp_vrt_file_path) # delete the original resource file if it is a zip file if res_file.extension.lower() == '.zip': file_name = delete_resource_file_only(resource, res_file) delete_format_metadata_after_delete_file(resource, file_name) # add all extracted files (tif and vrt) for f in validation_results['new_resource_files_to_add']: uploaded_file = UploadedFile(file=open(f, 'rb'), name=os.path.basename(f)) utils.add_file_to_resource(resource, uploaded_file) # use the extracted metadata to populate resource metadata for element in metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] resource.metadata.create_element(k, **v) log_msg = "Geo raster resource (ID:{}) - extracted metadata was saved to DB" log_msg = log_msg.format(resource.short_id) log.info(log_msg) else: # delete all the files in the resource for res_file in resource.files.all(): delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False err_msg = "Uploaded file was not added to the resource. " err_msg += ", ".join(msg for msg in validation_results['error_info']) validate_files_dict['message'] = err_msg log_msg = "File validation failed for raster resource (ID:{})." log_msg = log_msg.format(resource.short_id) log.error(log_msg) # cleanup the temp file directory if os.path.exists(temp_file): shutil.rmtree(os.path.dirname(temp_file))
def _process_uploaded_sqlite_file(user, resource, res_file, validate_files_dict, delete_existing_metadata=True): # check if it a sqlite file fl_ext = utils.get_resource_file_name_and_extension(res_file)[2] if fl_ext == '.sqlite': # get the file from iRODS to a temp directory fl_obj_name = utils.get_file_from_irods(res_file) validate_err_message = _validate_odm2_db_file(fl_obj_name) if not validate_err_message: # first delete relevant existing metadata elements if delete_existing_metadata: TimeSeriesMetaData.objects.filter( id=resource.metadata.id).update(is_dirty=False) _delete_extracted_metadata(resource) extract_err_message = _extract_metadata(resource, fl_obj_name) if extract_err_message: # delete the invalid file delete_resource_file_only(resource, res_file) # cleanup any extracted metadata _delete_extracted_metadata(resource) validate_files_dict['are_files_valid'] = False extract_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = extract_err_message else: # set metadata is_dirty to False TimeSeriesMetaData.objects.filter( id=resource.metadata.id).update(is_dirty=False) # delete the csv file if it exists _delete_resource_file(resource, ".csv") utils.resource_modified(resource, user, overwrite_bag=False) else: # file validation failed # delete the invalid file just uploaded delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = validate_err_message # cleanup the temp file if os.path.exists(fl_obj_name): shutil.rmtree(os.path.dirname(fl_obj_name)) else: # delete the invalid file delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False err_message = "The uploaded file not a sqlite file. {}" err_message += err_message.format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = err_message
def list_tif_files(vrt_file): """ lists tif files named in a vrt_file :param vrt_file: ResourceFile for of a vrt to list associated tif(f) files :return: List of string filenames read from vrt_file, empty list if not found """ temp_vrt_file = utils.get_file_from_irods(vrt_file) with open(temp_vrt_file, 'r') as opened_vrt_file: vrt_string = opened_vrt_file.read() root = ET.fromstring(vrt_string) file_names_in_vrt = [ file_name.text for file_name in root.iter('SourceFilename') ] return file_names_in_vrt return []
def _process_uploaded_sqlite_file(user, resource, res_file, validate_files_dict, delete_existing_metadata=True): # check if it a sqlite file fl_ext = utils.get_resource_file_name_and_extension(res_file)[2] if fl_ext == '.sqlite': # get the file from iRODS to a temp directory fl_obj_name = utils.get_file_from_irods(res_file) validate_err_message = validate_odm2_db_file(fl_obj_name) if not validate_err_message: # first delete relevant existing metadata elements if delete_existing_metadata: TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(is_dirty=False) _delete_extracted_metadata(resource) extract_err_message = extract_metadata(resource, fl_obj_name) if extract_err_message: # delete the invalid file delete_resource_file_only(resource, res_file) # cleanup any extracted metadata _delete_extracted_metadata(resource) validate_files_dict['are_files_valid'] = False extract_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = extract_err_message else: # set metadata is_dirty to False TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(is_dirty=False) # delete the csv file if it exists _delete_resource_file(resource, ".csv") utils.resource_modified(resource, user, overwrite_bag=False) else: # file validation failed # delete the invalid file just uploaded delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = validate_err_message # cleanup the temp file if os.path.exists(fl_obj_name): shutil.rmtree(os.path.dirname(fl_obj_name)) else: # delete the invalid file delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False err_message = "The uploaded file not a sqlite file. {}" err_message += err_message.format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = err_message
def netcdf_post_create_resource(sender, **kwargs): log = logging.getLogger() resource = kwargs['resource'] validate_files_dict = kwargs['validate_files'] res_file = resource.files.all().first() if res_file: temp_file = utils.get_file_from_irods(res_file) nc_dataset = nc_utils.get_nc_dataset(temp_file) nc_file_name = res_file.file_name if isinstance(nc_dataset, netCDF4.Dataset): # Extract the metadata from netcdf file res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict( temp_file) # populate metadata list with extracted metadata metadata = [] add_metadata_to_list(metadata, res_dublin_core_meta, res_type_specific_meta) for element in metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = list(element.items())[0] if k == 'title': # update title element title_element = resource.metadata.title resource.metadata.update_element('title', title_element.id, **v) elif k == 'rights': rights_element = resource.metadata.rights resource.metadata.update_element('rights', rights_element.id, **v) elif k == 'creator': resource.metadata.creators.all().delete() resource.metadata.create_element('creator', **v) else: resource.metadata.create_element(k, **v) # create the ncdump text file dump_file = create_header_info_txt_file(temp_file, nc_file_name) dump_file_name = nc_file_name + '_header_info.txt' uploaded_file = UploadedFile(file=open(dump_file, mode="rb"), name=dump_file_name) utils.add_file_to_resource(resource, uploaded_file) else: delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False err_msg = "Uploaded file was not added to the resource." \ " Please provide a valid NetCDF file. " validate_files_dict['message'] = err_msg log_msg = "File validation failed for netcdf resource (ID:{})." log_msg = log_msg.format(resource.short_id) log.error(log_msg) # cleanup the temp file directory if os.path.exists(temp_file): shutil.rmtree(os.path.dirname(temp_file)) # set metadata is dirty flag as false for resource creation metadata = resource.metadata metadata.is_dirty = False metadata.save() # since we are extracting metadata after resource creation # metadata xml files need to be regenerated - so need to set the # dirty bag flags if resource.files.all().count() > 0: utils.set_dirty_bag_flag(resource)
def get_all_related_shp_files(resource, selected_resource_file, file_type): """ This helper function copies all the related shape files to a temp directory and return a list of those temp file paths as well as a list of existing related resource file objects :param resource: an instance of BaseResource to which the *selecetd_resource_file* belongs :param selected_resource_file: an instance of ResourceFile selected by the user to set GeoFeaureFile type (the file must be a .shp or a .zip file) :param file_type: a flag (True/False) to control resource VS file type actions :return: a list of temp file paths for all related shape files, and a list of corresponding resource file objects """ def collect_shape_resource_files(res_file): # compare without the file extension (-4) if res_file.short_path.lower().endswith('.shp.xml'): if selected_resource_file.short_path[: -4] == res_file.short_path[: -8]: shape_res_files.append(f) elif selected_resource_file.short_path[:-4] == res_file.short_path[: -4]: shape_res_files.append(res_file) shape_temp_files = [] shape_res_files = [] temp_dir = '' if selected_resource_file.extension.lower() == '.shp': for f in resource.files.all(): if f.file_folder == selected_resource_file.file_folder: if f.extension.lower( ) == '.xml' and not f.file_name.lower().endswith('.shp.xml'): continue if f.extension.lower( ) in GeoFeatureLogicalFile.get_allowed_storage_file_types(): collect_shape_resource_files(f) for f in shape_res_files: temp_file = utils.get_file_from_irods(f) if not temp_dir: temp_dir = os.path.dirname(temp_file) else: file_temp_dir = os.path.dirname(temp_file) dst_dir = os.path.join(temp_dir, os.path.basename(temp_file)) shutil.copy(temp_file, dst_dir) shutil.rmtree(file_temp_dir) temp_file = dst_dir shape_temp_files.append(temp_file) elif selected_resource_file.extension.lower() == '.zip': temp_file = utils.get_file_from_irods(selected_resource_file) temp_dir = os.path.dirname(temp_file) if not zipfile.is_zipfile(temp_file): if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) raise ValidationError('Selected file is not a zip file') zf = zipfile.ZipFile(temp_file, 'r') zf.extractall(temp_dir) zf.close() for dirpath, _, filenames in os.walk(temp_dir): for name in filenames: if name == selected_resource_file.file_name: # skip the user selected zip file continue file_path = os.path.abspath(os.path.join(dirpath, name)) shape_temp_files.append(file_path) shape_res_files.append(selected_resource_file) return shape_temp_files, shape_res_files
def set_file_type(cls, resource, user, file_id=None, folder_path=None): """ Creates a RefTimeseriesLogicalFile (aggregation) from a json resource file (.refts.json) """ log = logging.getLogger() if file_id is None: raise ValueError("Must specify id of the file to be set as an aggregation type") # get the the selected resource file object res_file = utils.get_resource_file_by_id(resource, file_id) if res_file is None: raise ValidationError("File not found.") if not res_file.file_name.lower().endswith('.refts.json'): raise ValidationError("Selected file '{}' is not a Ref Time Series file.".format( res_file.file_name)) if res_file.has_logical_file and not res_file.logical_file.is_fileset: raise ValidationError("Selected file '{}' is already part of an aggregation".format( res_file.file_name)) try: json_file_content = _validate_json_file(res_file) except Exception as ex: log.exception("failed json validation") raise ValidationError(ex.message) # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) temp_dir = os.path.dirname(temp_file) with transaction.atomic(): # create a reftiemseries logical file object to be associated with # resource files logical_file = cls.create(resource) # create logical file record in DB logical_file.save() logical_file.metadata.json_file_content = json_file_content logical_file.metadata.save() try: # make the json file part of the aggregation logical_file.add_resource_file(res_file) logical_file.dataset_name = logical_file.metadata.get_title_from_json() logical_file.save() # extract metadata _extract_metadata(resource, logical_file) log.info("RefTimeseries aggregation type - json file was added to the resource.") logical_file._finalize(user, resource, folder_created=False, res_files_to_delete=[]) log.info("RefTimeseries aggregation type was created.") except Exception as ex: msg = "RefTimeseries aggregation type. Error when setting aggregation " \ "type. Error:{}" msg = msg.format(ex.message) log.exception(msg) raise ValidationError(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir)
def raster_file_validation(raster_file, resource, raster_folder=None): """ Validates if the relevant files are valid for raster aggregation or raster resource type :param raster_file: a temp file (extension tif or zip) retrieved from irods and stored on temp dir in django :param raster_folder: (optional) folder in which raster file exists on irods. :param resource: an instance of CompositeResource or GeoRasterResource in which raster_file exits. :return A list of error messages and a list of file paths for all files that belong to raster """ error_info = [] new_resource_files_to_add = [] raster_resource_files = [] create_vrt = True validation_results = {'error_info': error_info, 'new_resource_files_to_add': new_resource_files_to_add, 'raster_resource_files': raster_resource_files, 'vrt_created': create_vrt} file_name_part, ext = os.path.splitext(os.path.basename(raster_file)) ext = ext.lower() if ext == '.tif' or ext == '.tiff': res_files = ResourceFile.list_folder(resource=resource, folder=raster_folder, sub_folders=False) # check if there is already a vrt file in that folder vrt_files = [f for f in res_files if f.extension.lower() == ".vrt"] tif_files = [f for f in res_files if f.extension.lower() == ".tif" or f.extension.lower() == ".tiff"] if vrt_files: if len(vrt_files) > 1: error_info.append("More than one vrt file was found.") return validation_results create_vrt = False elif len(tif_files) != 1: # if there are more than one tif file and no vrt file, then we just use the # selected tif file to create the aggregation in case of composite resource if resource.resource_type == "CompositeResource": tif_files = [tif_file for tif_file in tif_files if raster_file.endswith(tif_file.file_name)] else: # if there are more than one tif file, there needs to be one vrt file error_info.append("A vrt file is missing.") return validation_results raster_resource_files.extend(vrt_files) raster_resource_files.extend(tif_files) if vrt_files: temp_dir = os.path.dirname(raster_file) temp_vrt_file = utils.get_file_from_irods(vrt_files[0], temp_dir) else: # create the .vrt file try: temp_vrt_file = create_vrt_file(raster_file) except Exception as ex: error_info.append(ex.message) else: if os.path.isfile(temp_vrt_file): new_resource_files_to_add.append(temp_vrt_file) elif ext == '.zip': try: extract_file_paths = _explode_raster_zip_file(raster_file) except Exception as ex: error_info.append(ex.message) else: if extract_file_paths: new_resource_files_to_add.extend(extract_file_paths) else: error_info.append("Invalid file mime type found.") if not error_info: if ext == ".zip": # in case of zip, there needs to be more than one file extracted out of the zip file if len(new_resource_files_to_add) < 2: error_info.append("Invalid zip file. Seems to contain only one file. " "Multiple tif files are expected.") return validation_results files_ext = [os.path.splitext(path)[1].lower() for path in new_resource_files_to_add] if files_ext.count('.vrt') > 1: error_info.append("Invalid zip file. Seems to contain multiple vrt files.") return validation_results elif files_ext.count('.vrt') == 0: error_info.append("Invalid zip file. No vrt file was found.") return validation_results elif files_ext.count('.tif') + files_ext.count('.tiff') < 1: error_info.append("Invalid zip file. No tif/tiff file was found.") return validation_results # check if there are files that are not raster related non_raster_files = [f_ext for f_ext in files_ext if f_ext not in ('.tif', '.tiff', '.vrt')] if non_raster_files: error_info.append("Invalid zip file. Contains files that are not raster related.") return validation_results temp_vrt_file = new_resource_files_to_add[files_ext.index('.vrt')] # validate vrt file if we didn't create it if ext == '.zip' or not create_vrt: raster_dataset = gdal.Open(temp_vrt_file, GA_ReadOnly) if raster_dataset is None: error_info.append('Failed to open the vrt file.') return validation_results # check if the vrt file is valid try: raster_dataset.RasterXSize raster_dataset.RasterYSize raster_dataset.RasterCount except AttributeError: error_info.append('Raster size and band information are missing.') return validation_results # check if the raster file numbers and names are valid in vrt file with open(temp_vrt_file, 'r') as vrt_file: vrt_string = vrt_file.read() root = ET.fromstring(vrt_string) file_names_in_vrt = [file_name.text for file_name in root.iter('SourceFilename')] if ext == '.zip': file_names = [os.path.basename(path) for path in new_resource_files_to_add] else: file_names = [f.file_name for f in raster_resource_files] file_names = [f_name for f_name in file_names if not f_name.endswith('.vrt')] if len(file_names) > len(file_names_in_vrt): msg = 'One or more additional tif files were found which are not listed in ' \ 'the provided {} file.' msg = msg.format(os.path.basename(temp_vrt_file)) error_info.append(msg) else: for vrt_ref_raster_name in file_names_in_vrt: if vrt_ref_raster_name in file_names \ or (os.path.split(vrt_ref_raster_name)[0] == '.' and os.path.split(vrt_ref_raster_name)[1] in file_names): continue elif os.path.basename(vrt_ref_raster_name) in file_names: msg = "Please specify {} as {} in the .vrt file, because it will " \ "be saved in the same folder with .vrt file in HydroShare." msg = msg.format(vrt_ref_raster_name, os.path.basename(vrt_ref_raster_name)) error_info.append(msg) break else: msg = "The file {tif} which is listed in the {vrt} file is missing." msg = msg.format(tif=os.path.basename(vrt_ref_raster_name), vrt=os.path.basename(temp_vrt_file)) error_info.append(msg) break return validation_results
def set_file_type(cls, resource, user, file_id=None, folder_path=None): """ Creates a GeoRasterLogicalFile (aggregation) from a tif or a zip resource file, or a folder """ log = logging.getLogger() res_file, folder_path = cls._validate_set_file_type_inputs(resource, file_id, folder_path) file_name = res_file.file_name # get file name without the extension - needed for naming the aggregation folder base_file_name = file_name[:-len(res_file.extension)] file_folder = res_file.file_folder aggregation_folder_created = False # determine if we need to create a new folder for the aggregation create_new_folder = cls._check_create_aggregation_folder( selected_res_file=res_file, selected_folder=folder_path, aggregation_file_count=1) upload_folder = '' # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) temp_dir = os.path.dirname(temp_file) res_files_to_delete = [] raster_folder = folder_path if folder_path is not None else file_folder # validate the file validation_results = raster_file_validation(raster_file=temp_file, resource=resource, raster_folder=raster_folder) if not validation_results['error_info']: msg = "Geographic raster aggregation. Error when creating aggregation. Error:{}" file_type_success = False log.info("Geographic raster aggregation validation successful.") # extract metadata temp_vrt_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == os.path.splitext(f)[1]].pop() metadata = extract_metadata(temp_vrt_file_path) log.info("Geographic raster metadata extraction was successful.") with transaction.atomic(): # create a geo raster logical file object to be associated with resource files logical_file = cls.initialize(base_file_name, resource) try: if not folder_path: # we are here means aggregation is being created by selecting a file if create_new_folder: # create a folder for the raster file type using the base file name # as the name for the new folder upload_folder = cls._create_aggregation_folder(resource, file_folder, base_file_name) log.info("Folder created:{}".format(upload_folder)) aggregation_folder_created = True else: upload_folder = file_folder # create logical file record in DB logical_file.save() if res_file.extension.lower() in [".tiff", ".tif"]: if aggregation_folder_created: tgt_folder = upload_folder # copy any existing raster specific files to the new aggregation # folder and make them part of the logical file files_to_copy = validation_results['raster_resource_files'] logical_file.copy_resource_files(resource, files_to_copy, tgt_folder) res_files_to_delete.extend(files_to_copy) else: # make the existing raster specific files part of the # aggregation/file type for raster_res_file in validation_results['raster_resource_files']: logical_file.add_resource_file(raster_res_file) else: # selected file must be a zip file res_files_to_delete.append(res_file) else: # create logical file record in DB logical_file.save() # user selected a folder to create aggregation upload_folder = folder_path # make all the files in the selected folder as part of the aggregation logical_file.add_resource_files_in_folder(resource, folder_path) # add all new files to resource and make those part of the logical file if validation_results['new_resource_files_to_add']: files_to_add_to_resource = validation_results['new_resource_files_to_add'] logical_file.add_files_to_resource( resource=resource, files_to_add=files_to_add_to_resource, upload_folder=upload_folder) log.info("Geographic raster aggregation type - new files were added " "to the resource.") # use the extracted metadata to populate file metadata for element in metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] logical_file.metadata.create_element(k, **v) log.info("Geographic raster aggregation type - metadata was saved to DB") logical_file._finalize(user, resource, folder_created=aggregation_folder_created, res_files_to_delete=res_files_to_delete, reset_title=True) file_type_success = True post_add_raster_aggregation.send( sender=AbstractLogicalFile, resource=resource, file=logical_file ) except Exception as ex: msg = msg.format(ex.message) log.exception(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) if not file_type_success: aggregation_from_folder = folder_path is not None cls._cleanup_on_fail_to_create_aggregation(user, resource, upload_folder, file_folder, aggregation_from_folder) raise ValidationError(msg) else: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) err_msg = "Geographic raster aggregation type validation failed. {}".format( ' '.join(validation_results['error_info'])) log.error(err_msg) raise ValidationError(err_msg)
def netcdf_post_create_resource(sender, **kwargs): log = logging.getLogger() resource = kwargs['resource'] validate_files_dict = kwargs['validate_files'] res_file = resource.files.all().first() if res_file: temp_file = utils.get_file_from_irods(res_file) nc_dataset = nc_utils.get_nc_dataset(temp_file) nc_file_name = res_file.file_name if isinstance(nc_dataset, netCDF4.Dataset): # Extract the metadata from netcdf file res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(temp_file) # populate metadata list with extracted metadata metadata = [] add_metadata_to_list(metadata, res_dublin_core_meta, res_type_specific_meta) for element in metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] if k == 'title': # update title element title_element = resource.metadata.title resource.metadata.update_element('title', title_element.id, **v) elif k == 'rights': rights_element = resource.metadata.rights resource.metadata.update_element('rights', rights_element.id, **v) elif k == 'creator': resource.metadata.creators.all().delete() resource.metadata.create_element('creator', **v) else: resource.metadata.create_element(k, **v) # create the ncdump text file dump_file = create_header_info_txt_file(temp_file, nc_file_name) dump_file_name = nc_file_name + '_header_info.txt' uploaded_file = UploadedFile(file=open(dump_file), name=dump_file_name) utils.add_file_to_resource(resource, uploaded_file) else: delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False err_msg = "Uploaded file was not added to the resource." \ " Please provide a valid NetCDF file. " validate_files_dict['message'] = err_msg log_msg = "File validation failed for netcdf resource (ID:{})." log_msg = log_msg.format(resource.short_id) log.error(log_msg) # cleanup the temp file directory if os.path.exists(temp_file): shutil.rmtree(os.path.dirname(temp_file)) # set metadata is dirty flag as false for resource creation metadata = resource.metadata metadata.is_dirty = False metadata.save() # since we are extracting metadata after resource creation # metadata xml files need to be regenerated - so need to set the # dirty bag flags if resource.files.all().count() > 0: utils.set_dirty_bag_flag(resource)
def set_file_type(cls, resource, user, file_id=None, folder_path=None): """ Creates a RefTimeseriesLogicalFile (aggregation) from a json resource file (.refts.json) """ log = logging.getLogger() if file_id is None: raise ValueError("Must specify id of the file to be set as an aggregation type") # get the the selected resource file object res_file = utils.get_resource_file_by_id(resource, file_id) if res_file is None: raise ValidationError("File not found.") if not res_file.file_name.lower().endswith('.refts.json'): raise ValidationError("Selected file '{}' is not a Ref Time Series file.".format( res_file.file_name)) if res_file.has_logical_file and not res_file.logical_file.is_fileset: raise ValidationError("Selected file '{}' is already part of an aggregation".format( res_file.file_name)) try: json_file_content = _validate_json_file(res_file) except Exception as ex: log.exception("failed json validation") raise ValidationError(ex.message) # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) temp_dir = os.path.dirname(temp_file) with transaction.atomic(): # create a reftiemseries logical file object to be associated with # resource files logical_file = cls.create(resource) # create logical file record in DB logical_file.save() logical_file.metadata.json_file_content = json_file_content logical_file.metadata.save() try: # make the json file part of the aggregation logical_file.add_resource_file(res_file) logical_file.dataset_name = logical_file.metadata.get_title_from_json() logical_file.save() # extract metadata _extract_metadata(resource, logical_file) log.info("RefTimeseries aggregation type - json file was added to the resource.") logical_file._finalize(user, resource, folder_created=False, res_files_to_delete=[]) log.info("RefTimeseries aggregation type was created.") post_add_reftimeseries_aggregation.send( sender=AbstractLogicalFile, resource=resource, file=logical_file ) except Exception as ex: msg = "RefTimeseries aggregation type. Error when setting aggregation " \ "type. Error:{}" msg = msg.format(ex.message) log.exception(msg) raise ValidationError(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir)
def set_file_type(cls, resource, file_id, user): """ Sets a tif or zip raster resource file to GeoRasterFile type :param resource: an instance of resource type CompositeResource :param file_id: id of the resource file to be set as GeoRasterFile type :param user: user who is setting the file type :return: """ # had to import it here to avoid import loop from hs_core.views.utils import create_folder log = logging.getLogger() # get the file from irods res_file = utils.get_resource_file_by_id(resource, file_id) if res_file is None: raise ValidationError("File not found.") if res_file.extension != '.nc': raise ValidationError("Not a NetCDF file.") # base file name (no path included) file_name = res_file.file_name # file name without the extension nc_file_name = file_name.split(".")[0] resource_metadata = [] file_type_metadata = [] files_to_add_to_resource = [] if res_file.has_generic_logical_file: # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) temp_dir = os.path.dirname(temp_file) files_to_add_to_resource.append(temp_file) # file validation and metadata extraction nc_dataset = nc_utils.get_nc_dataset(temp_file) if isinstance(nc_dataset, netCDF4.Dataset): # Extract the metadata from netcdf file res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict( temp_file) # populate resource_metadata and file_type_metadata lists with extracted metadata add_metadata_to_list(resource_metadata, res_dublin_core_meta, res_type_specific_meta, file_type_metadata, resource) # create the ncdump text file dump_file = create_header_info_txt_file( temp_file, nc_file_name) files_to_add_to_resource.append(dump_file) file_folder = res_file.file_folder with transaction.atomic(): # first delete the netcdf file that we retrieved from irods # for setting it to netcdf file type delete_resource_file(resource.short_id, res_file.id, user) # create a netcdf logical file object to be associated with # resource files logical_file = cls.create() # by default set the dataset_name attribute of the logical file to the # name of the file selected to set file type unless the extracted metadata # has a value for title dataset_title = res_dublin_core_meta.get('title', None) if dataset_title is not None: logical_file.dataset_name = dataset_title else: logical_file.dataset_name = nc_file_name logical_file.save() try: # create a folder for the netcdf file type using the base file # name as the name for the new folder new_folder_path = cls.compute_file_type_folder( resource, file_folder, nc_file_name) fed_file_full_path = '' if resource.resource_federation_path: fed_file_full_path = os.path.join( resource.root_path, new_folder_path) create_folder(resource.short_id, new_folder_path) log.info("Folder created:{}".format(new_folder_path)) new_folder_name = new_folder_path.split('/')[-1] if file_folder is None: upload_folder = new_folder_name else: upload_folder = os.path.join( file_folder, new_folder_name) # add all new files to the resource for f in files_to_add_to_resource: uploaded_file = UploadedFile( file=open(f, 'rb'), name=os.path.basename(f)) new_res_file = utils.add_file_to_resource( resource, uploaded_file, folder=upload_folder, fed_res_file_name_or_path=fed_file_full_path) # make each resource file we added as part of the logical file logical_file.add_resource_file(new_res_file) log.info( "NetCDF file type - new files were added to the resource." ) except Exception as ex: msg = "NetCDF file type. Error when setting file type. Error:{}" msg = msg.format(ex.message) log.exception(msg) # TODO: in case of any error put the original file back and # delete the folder that was created raise ValidationError(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) log.info("NetCDF file type was created.") # use the extracted metadata to populate resource metadata for element in resource_metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] if k == 'title': # update title element title_element = resource.metadata.title resource.metadata.update_element( 'title', title_element.id, **v) else: resource.metadata.create_element(k, **v) log.info("Resource - metadata was saved to DB") # use the extracted metadata to populate file metadata for element in file_type_metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] if k == 'subject': logical_file.metadata.keywords = v logical_file.metadata.save() else: logical_file.metadata.create_element(k, **v) log.info("NetCDF file type - metadata was saved to DB") else: err_msg = "Not a valid NetCDF file. File type file validation failed." log.error(err_msg) # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) raise ValidationError(err_msg)
from hs_core.hydroshare.utils import resource_modified, get_file_from_irods from hs_file_types import raster_meta_extract from hs_geo_raster_resource.models import RasterResource copy_res_fail = [] meta_update_fail = [] meta_update_success = [] # start migration for each raster resource that has raster files for res in RasterResource.objects.all(): # copy all the resource files to temp dir temp_dir = '' res_file_tmp_path = '' try: temp_dir = tempfile.mkdtemp() for res_file in res.files.all(): res_file_tmp_path = get_file_from_irods(res_file) shutil.copy( res_file_tmp_path, os.path.join(temp_dir, os.path.basename(res_file_tmp_path))) shutil.rmtree(os.path.dirname(res_file_tmp_path)) vrt_file_path = [ os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == f[-4:] ].pop() except Exception as e: if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) if os.path.isfile(res_file_tmp_path): shutil.rmtree(os.path.dirname(res_file_tmp_path)) copy_res_fail.append('{}:{}'.format(res.short_id,
def set_file_type(cls, resource, user, file_id=None, folder_path=None): """ Creates a NetCDFLogicalFile (aggregation) from a netcdf file (.nc) resource file or a folder """ log = logging.getLogger() res_file, folder_path = cls._validate_set_file_type_inputs(resource, file_id, folder_path) # base file name (no path included) file_name = res_file.file_name # file name without the extension - needed for naming the new aggregation folder nc_file_name = file_name[:-len(res_file.extension)] resource_metadata = [] file_type_metadata = [] upload_folder = '' res_files_to_delete = [] # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) temp_dir = os.path.dirname(temp_file) # file validation and metadata extraction nc_dataset = nc_utils.get_nc_dataset(temp_file) if isinstance(nc_dataset, netCDF4.Dataset): msg = "NetCDF aggregation. Error when creating aggregation. Error:{}" file_type_success = False # extract the metadata from netcdf file res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(temp_file) # populate resource_metadata and file_type_metadata lists with extracted metadata add_metadata_to_list(resource_metadata, res_dublin_core_meta, res_type_specific_meta, file_type_metadata, resource) # create the ncdump text file dump_file = create_header_info_txt_file(temp_file, nc_file_name) file_folder = res_file.file_folder aggregation_folder_created = False create_new_folder = cls._check_create_aggregation_folder( selected_res_file=res_file, selected_folder=folder_path, aggregation_file_count=1) with transaction.atomic(): # create a netcdf logical file object to be associated with # resource files dataset_title = res_dublin_core_meta.get('title', nc_file_name) logical_file = cls.initialize(dataset_title, resource) try: if folder_path is None: # we are here means aggregation is being created by selecting a file # create a folder for the netcdf file type using the base file # name as the name for the new folder if the file is not already in a folder if create_new_folder: upload_folder = cls._create_aggregation_folder(resource, file_folder, nc_file_name) aggregation_folder_created = True log.info("NetCDF Aggregation creation - folder created:{}".format( upload_folder)) else: # selected nc file is already in a folder upload_folder = file_folder # create logical file record in DB logical_file.save() if aggregation_folder_created: # copy the nc file to the new aggregation folder and make it part # of the logical file tgt_folder = upload_folder files_to_copy = [res_file] logical_file.copy_resource_files(resource, files_to_copy, tgt_folder) res_files_to_delete.append(res_file) else: # make the selected nc file as part of the aggregation/file type logical_file.add_resource_file(res_file) else: # logical file record gets created in DB logical_file.save() # folder has been selected to create aggregation upload_folder = folder_path # make the .nc file part of the aggregation logical_file.add_resource_file(res_file) # add the new dump txt file to the resource uploaded_file = UploadedFile(file=open(dump_file, 'rb'), name=os.path.basename(dump_file)) new_res_file = utils.add_file_to_resource( resource, uploaded_file, folder=upload_folder, add_to_aggregation=False ) # make this new resource file we added part of the logical file logical_file.add_resource_file(new_res_file) log.info("NetCDF aggregation creation - a new file was added to the resource.") # use the extracted metadata to populate resource metadata for element in resource_metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] if k == 'title': # update title element title_element = resource.metadata.title resource.metadata.update_element('title', title_element.id, **v) else: resource.metadata.create_element(k, **v) log.info("NetCDF Aggregation creation - Resource metadata was saved to DB") # use the extracted metadata to populate file metadata for element in file_type_metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] if k == 'subject': logical_file.metadata.keywords = v logical_file.metadata.save() # update resource level keywords resource_keywords = [subject.value.lower() for subject in resource.metadata.subjects.all()] for kw in logical_file.metadata.keywords: if kw.lower() not in resource_keywords: resource.metadata.create_element('subject', value=kw) else: logical_file.metadata.create_element(k, **v) log.info("NetCDF aggregation - metadata was saved in aggregation") logical_file._finalize(user, resource, folder_created=aggregation_folder_created, res_files_to_delete=res_files_to_delete) file_type_success = True post_add_netcdf_aggregation.send( sender=AbstractLogicalFile, resource=resource, file=logical_file ) except Exception as ex: msg = msg.format(ex.message) log.exception(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) if not file_type_success: aggregation_from_folder = folder_path is not None cls._cleanup_on_fail_to_create_aggregation(user, resource, upload_folder, file_folder, aggregation_from_folder) raise ValidationError(msg) else: err_msg = "Not a valid NetCDF file. NetCDF aggregation validation failed." log.error(err_msg) # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) raise ValidationError(err_msg)
def netcdf_file_update(instance, nc_res_file, txt_res_file, user): log = logging.getLogger() # check the instance type file_type = isinstance(instance, NetCDFLogicalFile) # get the file from irods to temp dir temp_nc_file = utils.get_file_from_irods(nc_res_file) nc_dataset = netCDF4.Dataset(temp_nc_file, 'a') try: # update title title = instance.dataset_name if file_type else instance.metadata.title.value if title.lower() != 'untitled resource': if hasattr(nc_dataset, 'title'): delattr(nc_dataset, 'title') nc_dataset.title = title # update keywords keywords = instance.metadata.keywords if file_type \ else [item.value for item in instance.metadata.subjects.all()] if hasattr(nc_dataset, 'keywords'): delattr(nc_dataset, 'keywords') if keywords: nc_dataset.keywords = ', '.join(keywords) # update key/value metadata extra_metadata_dict = instance.metadata.extra_metadata if file_type \ else instance.extra_metadata if hasattr(nc_dataset, 'hs_extra_metadata'): delattr(nc_dataset, 'hs_extra_metadata') if extra_metadata_dict: extra_metadata = [] for k, v in extra_metadata_dict.items(): extra_metadata.append("{}:{}".format(k, v)) nc_dataset.hs_extra_metadata = ', '.join(extra_metadata) # update temporal coverage temporal_coverage = instance.metadata.temporal_coverage if file_type \ else instance.metadata.coverages.all().filter(type='period').first() for attr_name in ['time_coverage_start', 'time_coverage_end']: if hasattr(nc_dataset, attr_name): delattr(nc_dataset, attr_name) if temporal_coverage: nc_dataset.time_coverage_start = temporal_coverage.value['start'] nc_dataset.time_coverage_end = temporal_coverage.value['end'] # update spatial coverage spatial_coverage = instance.metadata.spatial_coverage if file_type \ else instance.metadata.coverages.all().filter(type='box').first() for attr_name in ['geospatial_lat_min', 'geospatial_lat_max', 'geospatial_lon_min', 'geospatial_lon_max']: if hasattr(nc_dataset, attr_name): delattr(nc_dataset, attr_name) if spatial_coverage: nc_dataset.geospatial_lat_min = spatial_coverage.value['southlimit'] nc_dataset.geospatial_lat_max = spatial_coverage.value['northlimit'] nc_dataset.geospatial_lon_min = spatial_coverage.value['westlimit'] nc_dataset.geospatial_lon_max = spatial_coverage.value['eastlimit'] # update variables if instance.metadata.variables.all(): dataset_variables = nc_dataset.variables for variable in instance.metadata.variables.all(): if variable.name in dataset_variables.keys(): dataset_variable = dataset_variables[variable.name] # update units if hasattr(dataset_variable, 'units'): delattr(dataset_variable, 'units') if variable.unit != 'Unknown': dataset_variable.setncattr('units', variable.unit) # update long_name if hasattr(dataset_variable, 'long_name'): delattr(dataset_variable, 'long_name') if variable.descriptive_name: dataset_variable.setncattr('long_name', variable.descriptive_name) # update method if hasattr(dataset_variable, 'comment'): delattr(dataset_variable, 'comment') if variable.method: dataset_variable.setncattr('comment', variable.method) # update missing value if variable.missing_value: if hasattr(dataset_variable, 'missing_value'): missing_value = dataset_variable.missing_value delattr(dataset_variable, 'missing_value') else: missing_value = '' try: dt = np.dtype(dataset_variable.datatype.name) missing_value = np.fromstring(variable.missing_value + ' ', dtype=dt.type, sep=" ") except: pass if missing_value: dataset_variable.setncattr('missing_value', missing_value) # Update metadata element that only apply to netCDF resource if not file_type: # update summary if hasattr(nc_dataset, 'summary'): delattr(nc_dataset, 'summary') if instance.metadata.description: nc_dataset.summary = instance.metadata.description.abstract # update contributor if hasattr(nc_dataset, 'contributor_name'): delattr(nc_dataset, 'contributor_name') contributor_list = instance.metadata.contributors.all() if contributor_list: res_contri_name = [] for contributor in contributor_list: res_contri_name.append(contributor.name) nc_dataset.contributor_name = ', '.join(res_contri_name) # update creator for attr_name in ['creator_name', 'creator_email', 'creator_url']: if hasattr(nc_dataset, attr_name): delattr(nc_dataset, attr_name) creator = instance.metadata.creators.all().filter(order=1).first() if creator: nc_dataset.creator_name = creator.name if creator.name else creator.organization if creator.email: nc_dataset.creator_email = creator.email if creator.description or creator.homepage: nc_dataset.creator_url = creator.homepage if creator.homepage \ else 'https://www.hydroshare.org' + creator.description # update license if hasattr(nc_dataset, 'license'): delattr(nc_dataset, 'license') if instance.metadata.rights: nc_dataset.license = "{0} {1}".format(instance.metadata.rights.statement, instance.metadata.rights.url) # update reference if hasattr(nc_dataset, 'references'): delattr(nc_dataset, 'references') reference_list = instance.metadata.relations.all().filter(type='cites') if reference_list: res_meta_ref = [] for reference in reference_list: res_meta_ref.append(reference.value) nc_dataset.references = ' \n'.join(res_meta_ref) # update source if hasattr(nc_dataset, 'source'): delattr(nc_dataset, 'source') source_list = instance.metadata.sources.all() if source_list: res_meta_source = [] for source in source_list: res_meta_source.append(source.derived_from) nc_dataset.source = ' \n'.join(res_meta_source) # close nc dataset nc_dataset.close() except Exception as ex: log.exception(ex.message) if os.path.exists(temp_nc_file): shutil.rmtree(os.path.dirname(temp_nc_file)) raise ex # create the ncdump text file nc_file_name = os.path.basename(temp_nc_file).split(".")[0] temp_text_file = create_header_info_txt_file(temp_nc_file, nc_file_name) # push the updated nc file and the txt file to iRODS utils.replace_resource_file_on_irods(temp_nc_file, nc_res_file, user) utils.replace_resource_file_on_irods(temp_text_file, txt_res_file, user) metadata = instance.metadata if file_type: instance.create_aggregation_xml_documents(create_map_xml=False) metadata.is_dirty = False metadata.save() # cleanup the temp dir if os.path.exists(temp_nc_file): shutil.rmtree(os.path.dirname(temp_nc_file))
def set_file_type(cls, resource, file_id, user): """ Sets a tif or zip raster resource file to GeoRasterFile type :param resource: an instance of resource type CompositeResource :param file_id: id of the resource file to be set as GeoRasterFile type :param user: user who is setting the file type :return: """ # had to import it here to avoid import loop from hs_core.views.utils import create_folder, remove_folder log = logging.getLogger() # get the file from irods res_file = utils.get_resource_file_by_id(resource, file_id) # base file name (no path included) file_name = utils.get_resource_file_name_and_extension(res_file)[1] # file name without the extension file_name = file_name[:-len(res_file.extension)] file_folder = res_file.file_folder upload_folder = '' if res_file is not None and res_file.has_generic_logical_file: # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) # validate the file error_info, files_to_add_to_resource = raster_file_validation( raster_file=temp_file) if not error_info: log.info("Geo raster file type file validation successful.") # extract metadata temp_dir = os.path.dirname(temp_file) temp_vrt_file_path = [ os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == os.path.splitext(f)[1] ].pop() metadata = extract_metadata(temp_vrt_file_path) log.info( "Geo raster file type metadata extraction was successful.") with transaction.atomic(): # create a geo raster logical file object to be associated with resource files logical_file = cls.create() # by default set the dataset_name attribute of the logical file to the # name of the file selected to set file type logical_file.dataset_name = file_name logical_file.save() try: # create a folder for the raster file type using the base file name as the # name for the new folder new_folder_path = cls.compute_file_type_folder( resource, file_folder, file_name) log.info("Folder created:{}".format(new_folder_path)) create_folder(resource.short_id, new_folder_path) new_folder_name = new_folder_path.split('/')[-1] if file_folder is None: upload_folder = new_folder_name else: upload_folder = os.path.join( file_folder, new_folder_name) # add all new files to the resource for f in files_to_add_to_resource: uploaded_file = UploadedFile( file=open(f, 'rb'), name=os.path.basename(f)) # the added resource file will be part of a new generic logical file # by default new_res_file = utils.add_file_to_resource( resource, uploaded_file, folder=upload_folder) # delete the generic logical file object if new_res_file.logical_file is not None: # deleting the file level metadata object will delete the associated # logical file object new_res_file.logical_file.metadata.delete() # make each resource file we added as part of the logical file logical_file.add_resource_file(new_res_file) log.info( "Geo raster file type - new files were added to the resource." ) # use the extracted metadata to populate file metadata for element in metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] logical_file.metadata.create_element(k, **v) log.info( "Geo raster file type - metadata was saved to DB") # set resource to private if logical file is missing required metadata resource.update_public_and_discoverable() # delete the original resource file delete_resource_file(resource.short_id, res_file.id, user) log.info("Deleted original resource file.") except Exception as ex: msg = "Geo raster file type. Error when setting file type. Error:{}" msg = msg.format(ex.message) log.exception(msg) if upload_folder: # delete any new files uploaded as part of setting file type folder_to_remove = os.path.join( 'data', 'contents', upload_folder) remove_folder(user, resource.short_id, folder_to_remove) log.info("Deleted newly created file type folder") raise ValidationError(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) else: err_msg = "Geo raster file type file validation failed.{}".format( ' '.join(error_info)) log.info(err_msg) raise ValidationError(err_msg) else: if res_file is None: err_msg = "Failed to set Geo raster file type. " \ "Resource doesn't have the specified file." log.error(err_msg) raise ValidationError(err_msg) else: err_msg = "Failed to set Geo raster file type." \ "The specified file doesn't have a generic logical file type." log.error(err_msg) raise ValidationError(err_msg)
def _process_uploaded_csv_file(resource, res_file, validate_files_dict, user, delete_existing_metadata=True): # get the csv file from iRODS to a temp directory fl_obj_name = utils.get_file_from_irods(res_file) validate_err_message = _validate_csv_file(resource, fl_obj_name) if not validate_err_message: # first delete relevant existing metadata elements if delete_existing_metadata: TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update( is_dirty=False) _delete_extracted_metadata(resource) # delete the sqlite file if it exists _delete_resource_file(resource, ".sqlite") # add the blank sqlite file resource.add_blank_sqlite_file(user) # populate CV metadata django models from the blank sqlite file # copy the blank sqlite file to a temp directory temp_dir = tempfile.mkdtemp() odm2_sqlite_file_name = 'ODM2.sqlite' odm2_sqlite_file = 'hs_app_timeseries/files/{}'.format( odm2_sqlite_file_name) target_temp_sqlite_file = os.path.join(temp_dir, odm2_sqlite_file_name) shutil.copy(odm2_sqlite_file, target_temp_sqlite_file) con = sqlite3.connect(target_temp_sqlite_file) with con: # get the records in python dictionary format con.row_factory = sqlite3.Row cur = con.cursor() # populate the lookup CV tables that are needed later for metadata editing _create_cv_lookup_models(cur, resource.metadata, 'CV_VariableType', CVVariableType) _create_cv_lookup_models(cur, resource.metadata, 'CV_VariableName', CVVariableName) _create_cv_lookup_models(cur, resource.metadata, 'CV_Speciation', CVSpeciation) _create_cv_lookup_models(cur, resource.metadata, 'CV_SiteType', CVSiteType) _create_cv_lookup_models(cur, resource.metadata, 'CV_ElevationDatum', CVElevationDatum) _create_cv_lookup_models(cur, resource.metadata, 'CV_MethodType', CVMethodType) _create_cv_lookup_models(cur, resource.metadata, 'CV_UnitsType', CVUnitsType) _create_cv_lookup_models(cur, resource.metadata, 'CV_Status', CVStatus) _create_cv_lookup_models(cur, resource.metadata, 'CV_Medium', CVMedium) _create_cv_lookup_models(cur, resource.metadata, 'CV_AggregationStatistic', CVAggregationStatistic) # save some data from the csv file with open(fl_obj_name, 'r') as fl_obj: csv_reader = csv.reader(fl_obj, delimiter=',') # read the first row - header header = csv_reader.next() # read the 1st data row start_date_str = csv_reader.next()[0] last_row = None data_row_count = 1 for row in csv_reader: last_row = row data_row_count += 1 end_date_str = last_row[0] # save the series names along with number of data points for each series # columns starting with the 2nd column are data series names value_counts = {} for data_col_name in header[1:]: value_counts[data_col_name] = str(data_row_count) TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update( value_counts=value_counts) # create the temporal coverage element resource.metadata.create_element('coverage', type='period', value={ 'start': start_date_str, 'end': end_date_str }) # cleanup the temp sqlite file directory if os.path.exists(temp_dir): shutil.rmtree(temp_dir) else: # file validation failed # delete the invalid file just uploaded delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = validate_err_message # cleanup the temp csv file if os.path.exists(fl_obj_name): shutil.rmtree(os.path.dirname(fl_obj_name))
def get_all_related_shp_files(resource, selected_resource_file, file_type): """ This helper function copies all the related shape files to a temp directory and return a list of those temp file paths as well as a list of existing related resource file objects :param resource: an instance of BaseResource to which the *selecetd_resource_file* belongs :param selected_resource_file: an instance of ResourceFile selected by the user to set GeoFeaureFile type (the file must be a .shp or a .zip file) :param file_type: a flag (True/False) to control resource VS file type actions :return: a list of temp file paths for all related shape files, and a list of corresponding resource file objects """ def collect_shape_resource_files(res_file): # compare without the file extension (-4) if res_file.short_path.lower().endswith('.shp.xml'): if selected_resource_file.short_path[:-4] == res_file.short_path[:-8]: shape_res_files.append(f) elif selected_resource_file.short_path[:-4] == res_file.short_path[:-4]: shape_res_files.append(res_file) shape_temp_files = [] shape_res_files = [] temp_dir = '' if selected_resource_file.extension.lower() == '.shp': for f in resource.files.all(): if f.file_folder == selected_resource_file.file_folder: if f.extension.lower() == '.xml' and not f.file_name.lower().endswith('.shp.xml'): continue if f.extension.lower() in GeoFeatureLogicalFile.get_allowed_storage_file_types(): collect_shape_resource_files(f) for f in shape_res_files: temp_file = utils.get_file_from_irods(f) if not temp_dir: temp_dir = os.path.dirname(temp_file) else: file_temp_dir = os.path.dirname(temp_file) dst_dir = os.path.join(temp_dir, os.path.basename(temp_file)) shutil.copy(temp_file, dst_dir) shutil.rmtree(file_temp_dir) temp_file = dst_dir shape_temp_files.append(temp_file) elif selected_resource_file.extension.lower() == '.zip': temp_file = utils.get_file_from_irods(selected_resource_file) temp_dir = os.path.dirname(temp_file) if not zipfile.is_zipfile(temp_file): if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) raise ValidationError('Selected file is not a zip file') zf = zipfile.ZipFile(temp_file, 'r') zf.extractall(temp_dir) zf.close() for dirpath, _, filenames in os.walk(temp_dir): for name in filenames: if name == selected_resource_file.file_name: # skip the user selected zip file continue file_path = os.path.abspath(os.path.join(dirpath, name)) shape_temp_files.append(file_path) shape_res_files.append(selected_resource_file) return shape_temp_files, shape_res_files
def set_file_type(cls, resource, user, file_id=None, folder_path=None): """ Creates a GeoRasterLogicalFile (aggregation) from a tif or a zip resource file, or a folder """ log = logging.getLogger() res_file, folder_path = cls._validate_set_file_type_inputs(resource, file_id, folder_path) file_name = res_file.file_name # get file name without the extension - needed for naming the aggregation folder base_file_name = file_name[:-len(res_file.extension)] file_folder = res_file.file_folder aggregation_folder_created = False # determine if we need to create a new folder for the aggregation create_new_folder = cls._check_create_aggregation_folder( selected_res_file=res_file, selected_folder=folder_path, aggregation_file_count=1) upload_folder = '' # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) temp_dir = os.path.dirname(temp_file) res_files_to_delete = [] raster_folder = folder_path if folder_path is not None else file_folder # validate the file validation_results = raster_file_validation(raster_file=temp_file, resource=resource, raster_folder=raster_folder) if not validation_results['error_info']: msg = "Geographic raster aggregation. Error when creating aggregation. Error:{}" file_type_success = False log.info("Geographic raster aggregation validation successful.") # extract metadata temp_vrt_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == os.path.splitext(f)[1]].pop() metadata = extract_metadata(temp_vrt_file_path) log.info("Geographic raster metadata extraction was successful.") with transaction.atomic(): # create a geo raster logical file object to be associated with resource files logical_file = cls.initialize(base_file_name) try: if not folder_path: # we are here means aggregation is being created by selecting a file if create_new_folder: # create a folder for the raster file type using the base file name # as the name for the new folder upload_folder = cls._create_aggregation_folder(resource, file_folder, base_file_name) log.info("Folder created:{}".format(upload_folder)) aggregation_folder_created = True else: upload_folder = file_folder if res_file.extension.lower() in [".tiff", ".tif"]: if aggregation_folder_created: tgt_folder = upload_folder # copy any existing raster specific files to the new aggregation # folder and make them part of the logical file files_to_copy = validation_results['raster_resource_files'] logical_file.copy_resource_files(resource, files_to_copy, tgt_folder) res_files_to_delete.extend(files_to_copy) else: # make the existing raster specific files part of the # aggregation/file type for raster_res_file in validation_results['raster_resource_files']: logical_file.add_resource_file(raster_res_file) else: # selected file must be a zip file res_files_to_delete.append(res_file) else: # user selected a folder to create aggregation upload_folder = folder_path # make all the files in the selected folder as part of the aggregation logical_file.add_resource_files_in_folder(resource, folder_path) # add all new files to resource and make those part of the logical file if validation_results['new_resource_files_to_add']: files_to_add_to_resource = validation_results['new_resource_files_to_add'] logical_file.add_files_to_resource( resource=resource, files_to_add=files_to_add_to_resource, upload_folder=upload_folder) log.info("Geographic raster aggregation type - new files were added " "to the resource.") # use the extracted metadata to populate file metadata for element in metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] logical_file.metadata.create_element(k, **v) log.info("Geographic raster aggregation type - metadata was saved to DB") logical_file._finalize(user, resource, folder_created=aggregation_folder_created, res_files_to_delete=res_files_to_delete, reset_title=True) file_type_success = True except Exception as ex: msg = msg.format(ex.message) log.exception(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) if not file_type_success: aggregation_from_folder = folder_path is not None cls._cleanup_on_fail_to_create_aggregation(user, resource, upload_folder, file_folder, aggregation_from_folder) raise ValidationError(msg) else: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) err_msg = "Geographic raster aggregation type validation failed. {}".format( ' '.join(validation_results['error_info'])) log.error(err_msg) raise ValidationError(err_msg)
from hs_core.hydroshare.utils import resource_modified, get_file_from_irods from hs_file_types import raster_meta_extract from hs_geo_raster_resource.models import RasterResource copy_res_fail = [] meta_update_fail = [] meta_update_success = [] # start migration for each raster resource that has raster files for res in RasterResource.objects.all(): # copy all the resource files to temp dir temp_dir = '' res_file_tmp_path = '' try: temp_dir = tempfile.mkdtemp() for res_file in res.files.all(): res_file_tmp_path = get_file_from_irods(res_file) shutil.copy(res_file_tmp_path, os.path.join(temp_dir, os.path.basename(res_file_tmp_path))) shutil.rmtree(os.path.dirname(res_file_tmp_path)) vrt_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == f[-4:]].pop() except Exception as e: if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) if os.path.isfile(res_file_tmp_path): shutil.rmtree(os.path.dirname(res_file_tmp_path)) copy_res_fail.append('{}:{}'.format(res.short_id, res.metadata.title.value)) continue # update the metadata for the original coverage information of all the raster resources try:
def raster_file_validation(raster_file, resource, raster_folder=None): """ Validates if the relevant files are valid for raster aggregation or raster resource type :param raster_file: a temp file (extension tif or zip) retrieved from irods and stored on temp dir in django :param raster_folder: (optional) folder in which raster file exists on irods. :param resource: an instance of CompositeResource or GeoRasterResource in which raster_file exits. :return A list of error messages and a list of file paths for all files that belong to raster """ error_info = [] new_resource_files_to_add = [] raster_resource_files = [] create_vrt = True validation_results = { 'error_info': error_info, 'new_resource_files_to_add': new_resource_files_to_add, 'raster_resource_files': raster_resource_files, 'vrt_created': create_vrt } file_name_part, ext = os.path.splitext(os.path.basename(raster_file)) ext = ext.lower() if ext == '.tif' or ext == '.tiff': res_files = ResourceFile.list_folder(resource=resource, folder=raster_folder, sub_folders=False) # check if there is already a vrt file in that folder vrt_files = [f for f in res_files if f.extension.lower() == ".vrt"] tif_files = [ f for f in res_files if f.extension.lower() == ".tif" or f.extension.lower() == ".tiff" ] if vrt_files: if len(vrt_files) > 1: error_info.append("More than one vrt file was found.") return validation_results create_vrt = False elif len(tif_files) != 1: # if there are more than one tif file and no vrt file, then we just use the # selected tif file to create the aggregation in case of composite resource if resource.resource_type == "CompositeResource": tif_files = [ tif_file for tif_file in tif_files if raster_file.endswith(tif_file.file_name) ] else: # if there are more than one tif file, there needs to be one vrt file error_info.append("A vrt file is missing.") return validation_results raster_resource_files.extend(vrt_files) raster_resource_files.extend(tif_files) if vrt_files: temp_dir = os.path.dirname(raster_file) temp_vrt_file = utils.get_file_from_irods(vrt_files[0], temp_dir) else: # create the .vrt file try: temp_vrt_file = create_vrt_file(raster_file) except Exception as ex: error_info.append(str(ex)) else: if os.path.isfile(temp_vrt_file): new_resource_files_to_add.append(temp_vrt_file) elif ext == '.zip': try: extract_file_paths = _explode_raster_zip_file(raster_file) except Exception as ex: error_info.append(str(ex)) else: if extract_file_paths: new_resource_files_to_add.extend(extract_file_paths) else: error_info.append("Invalid file mime type found.") if not error_info: if ext == ".zip": # in case of zip, there needs to be more than one file extracted out of the zip file if len(new_resource_files_to_add) < 2: error_info.append( "Invalid zip file. Seems to contain only one file. " "Multiple tif files are expected.") return validation_results files_ext = [ os.path.splitext(path)[1].lower() for path in new_resource_files_to_add ] if files_ext.count('.vrt') > 1: error_info.append( "Invalid zip file. Seems to contain multiple vrt files.") return validation_results elif files_ext.count('.vrt') == 0: error_info.append("Invalid zip file. No vrt file was found.") return validation_results elif files_ext.count('.tif') + files_ext.count('.tiff') < 1: error_info.append( "Invalid zip file. No tif/tiff file was found.") return validation_results # check if there are files that are not raster related non_raster_files = [ f_ext for f_ext in files_ext if f_ext not in ('.tif', '.tiff', '.vrt') ] if non_raster_files: error_info.append( "Invalid zip file. Contains files that are not raster related." ) return validation_results temp_vrt_file = new_resource_files_to_add[files_ext.index('.vrt')] # validate vrt file if we didn't create it if ext == '.zip' or not create_vrt: raster_dataset = gdal.Open(temp_vrt_file, GA_ReadOnly) if raster_dataset is None: error_info.append('Failed to open the vrt file.') return validation_results # check if the vrt file is valid try: raster_dataset.RasterXSize raster_dataset.RasterYSize raster_dataset.RasterCount except AttributeError: error_info.append( 'Raster size and band information are missing.') return validation_results # check if the raster file numbers and names are valid in vrt file with open(temp_vrt_file, 'r') as vrt_file: vrt_string = vrt_file.read() root = ET.fromstring(vrt_string) file_names_in_vrt = [ file_name.text for file_name in root.iter('SourceFilename') ] if ext == '.zip': file_names = [ os.path.basename(path) for path in new_resource_files_to_add ] else: file_names = [f.file_name for f in raster_resource_files] file_names = [ f_name for f_name in file_names if not f_name.endswith('.vrt') ] if len(file_names) > len(file_names_in_vrt): msg = 'One or more additional tif files were found which are not listed in ' \ 'the provided {} file.' msg = msg.format(os.path.basename(temp_vrt_file)) error_info.append(msg) else: for vrt_ref_raster_name in file_names_in_vrt: if vrt_ref_raster_name in file_names \ or (os.path.split(vrt_ref_raster_name)[0] == '.' and os.path.split(vrt_ref_raster_name)[1] in file_names): continue elif os.path.basename(vrt_ref_raster_name) in file_names: msg = "Please specify {} as {} in the .vrt file, because it will " \ "be saved in the same folder with .vrt file in HydroShare." msg = msg.format(vrt_ref_raster_name, os.path.basename(vrt_ref_raster_name)) error_info.append(msg) break else: msg = "The file {tif} which is listed in the {vrt} file is missing." msg = msg.format( tif=os.path.basename(vrt_ref_raster_name), vrt=os.path.basename(temp_vrt_file)) error_info.append(msg) break return validation_results
def set_file_type(cls, resource, user, file_id=None, folder_path=None): """ Creates a NetCDFLogicalFile (aggregation) from a netcdf file (.nc) resource file or a folder """ log = logging.getLogger() res_file, folder_path = cls._validate_set_file_type_inputs( resource, file_id, folder_path) # base file name (no path included) file_name = res_file.file_name # file name without the extension - needed for naming the new aggregation folder nc_file_name = file_name[:-len(res_file.extension)] resource_metadata = [] file_type_metadata = [] upload_folder = '' res_files_to_delete = [] # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) temp_dir = os.path.dirname(temp_file) # file validation and metadata extraction nc_dataset = nc_utils.get_nc_dataset(temp_file) if isinstance(nc_dataset, netCDF4.Dataset): msg = "NetCDF aggregation. Error when creating aggregation. Error:{}" file_type_success = False # extract the metadata from netcdf file res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict( temp_file) # populate resource_metadata and file_type_metadata lists with extracted metadata add_metadata_to_list(resource_metadata, res_dublin_core_meta, res_type_specific_meta, file_type_metadata, resource) # create the ncdump text file dump_file = create_header_info_txt_file(temp_file, nc_file_name) file_folder = res_file.file_folder aggregation_folder_created = False create_new_folder = cls._check_create_aggregation_folder( selected_res_file=res_file, selected_folder=folder_path, aggregation_file_count=1) with transaction.atomic(): # create a netcdf logical file object to be associated with # resource files dataset_title = res_dublin_core_meta.get('title', nc_file_name) logical_file = cls.initialize(dataset_title, resource) try: if folder_path is None: # we are here means aggregation is being created by selecting a file # create a folder for the netcdf file type using the base file # name as the name for the new folder if the file is not already in a folder if create_new_folder: upload_folder = cls._create_aggregation_folder( resource, file_folder, nc_file_name) aggregation_folder_created = True log.info( "NetCDF Aggregation creation - folder created:{}" .format(upload_folder)) else: # selected nc file is already in a folder upload_folder = file_folder # create logical file record in DB logical_file.save() if aggregation_folder_created: # copy the nc file to the new aggregation folder and make it part # of the logical file tgt_folder = upload_folder files_to_copy = [res_file] logical_file.copy_resource_files( resource, files_to_copy, tgt_folder) res_files_to_delete.append(res_file) else: # make the selected nc file as part of the aggregation/file type logical_file.add_resource_file(res_file) else: # logical file record gets created in DB logical_file.save() # folder has been selected to create aggregation upload_folder = folder_path # make the .nc file part of the aggregation logical_file.add_resource_file(res_file) # add the new dump txt file to the resource uploaded_file = UploadedFile( file=open(dump_file, 'rb'), name=os.path.basename(dump_file)) new_res_file = utils.add_file_to_resource( resource, uploaded_file, folder=upload_folder, add_to_aggregation=False) # make this new resource file we added part of the logical file logical_file.add_resource_file(new_res_file) log.info( "NetCDF aggregation creation - a new file was added to the resource." ) # use the extracted metadata to populate resource metadata for element in resource_metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] if k == 'title': # update title element title_element = resource.metadata.title resource.metadata.update_element( 'title', title_element.id, **v) else: resource.metadata.create_element(k, **v) log.info( "NetCDF Aggregation creation - Resource metadata was saved to DB" ) # use the extracted metadata to populate file metadata for element in file_type_metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] if k == 'subject': logical_file.metadata.keywords = v logical_file.metadata.save() # update resource level keywords resource_keywords = [ subject.value.lower() for subject in resource.metadata.subjects.all() ] for kw in logical_file.metadata.keywords: if kw.lower() not in resource_keywords: resource.metadata.create_element('subject', value=kw) else: logical_file.metadata.create_element(k, **v) log.info( "NetCDF aggregation - metadata was saved in aggregation" ) logical_file._finalize( user, resource, folder_created=aggregation_folder_created, res_files_to_delete=res_files_to_delete) file_type_success = True except Exception as ex: msg = msg.format(ex.message) log.exception(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) if not file_type_success: aggregation_from_folder = folder_path is not None cls._cleanup_on_fail_to_create_aggregation( user, resource, upload_folder, file_folder, aggregation_from_folder) raise ValidationError(msg) else: err_msg = "Not a valid NetCDF file. NetCDF aggregation validation failed." log.error(err_msg) # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) raise ValidationError(err_msg)
def update_netcdf_file(self, user): """ writes metadata to the netcdf file associated with this instance of the logical file :return: """ log = logging.getLogger() nc_res_file = '' txt_res_file = '' for f in self.files.all(): if f.extension == '.nc': nc_res_file = f break for f in self.files.all(): if f.extension == '.txt': txt_res_file = f break if not nc_res_file: msg = "No netcdf file exists for this logical file." log.exception(msg) raise ValidationError(msg) # get the file from irods to temp dir temp_nc_file = utils.get_file_from_irods(nc_res_file) nc_dataset = netCDF4.Dataset(temp_nc_file, 'a') try: # update title if hasattr(nc_dataset, 'title'): if nc_dataset.title != self.dataset_name: delattr(nc_dataset, 'title') nc_dataset.title = self.dataset_name else: nc_dataset.title = self.dataset_name # update keywords if self.metadata.keywords: if hasattr(nc_dataset, 'keywords'): delattr(nc_dataset, 'keywords') nc_dataset.keywords = ', '.join(self.metadata.keywords) # update key/value metadata if self.metadata.extra_metadata: if hasattr(nc_dataset, 'hs_extra_metadata'): delattr(nc_dataset, 'hs_extra_metadata') extra_metadata = [] for k, v in self.metadata.extra_metadata.items(): extra_metadata.append("{}:{}".format(k, v)) nc_dataset.hs_extra_metadata = ', '.join(extra_metadata) # update temporal coverage if self.metadata.temporal_coverage: for attr_name in ['time_coverage_start', 'time_coverage_end']: if hasattr(nc_dataset, attr_name): delattr(nc_dataset, attr_name) nc_dataset.time_coverage_start = self.metadata.temporal_coverage.value[ 'start'] nc_dataset.time_coverage_end = self.metadata.temporal_coverage.value[ 'end'] # update spatial coverage if self.metadata.spatial_coverage: for attr_name in [ 'geospatial_lat_min', 'geospatial_lat_max', 'geospatial_lon_min', 'geospatial_lon_max' ]: # clean up old info if hasattr(nc_dataset, attr_name): delattr(nc_dataset, attr_name) spatial_coverage = self.metadata.spatial_coverage nc_dataset.geospatial_lat_min = spatial_coverage.value[ 'southlimit'] nc_dataset.geospatial_lat_max = spatial_coverage.value[ 'northlimit'] nc_dataset.geospatial_lon_min = spatial_coverage.value[ 'westlimit'] nc_dataset.geospatial_lon_max = spatial_coverage.value[ 'eastlimit'] # update variables if self.metadata.variables.all(): dataset_variables = nc_dataset.variables for variable in self.metadata.variables.all(): if variable.name in dataset_variables.keys(): dataset_variable = dataset_variables[variable.name] if variable.unit != 'Unknown': # clean up old info if hasattr(dataset_variable, 'units'): delattr(dataset_variable, 'units') dataset_variable.setncattr( 'units', variable.unit) if variable.descriptive_name: # clean up old info if hasattr(dataset_variable, 'long_name'): delattr(dataset_variable, 'long_name') dataset_variable.setncattr( 'long_name', variable.descriptive_name) if variable.method: # clean up old info if hasattr(dataset_variable, 'comment'): delattr(dataset_variable, 'comment') dataset_variable.setncattr('comment', variable.method) if variable.missing_value: if hasattr(dataset_variable, 'missing_value'): missing_value = dataset_variable.missing_value delattr(dataset_variable, 'missing_value') else: missing_value = '' try: dt = np.dtype(dataset_variable.datatype.name) missing_value = np.fromstring( variable.missing_value + ' ', dtype=dt.type, sep=" ") except: pass if missing_value: dataset_variable.setncattr( 'missing_value', missing_value) # close nc dataset nc_dataset.close() except Exception as ex: log.exception(ex.message) if os.path.exists(temp_nc_file): shutil.rmtree(os.path.dirname(temp_nc_file)) raise ex # create the ncdump text file nc_file_name = os.path.basename(temp_nc_file).split(".")[0] temp_text_file = create_header_info_txt_file(temp_nc_file, nc_file_name) # push the updated nc file and the txt file to iRODS utils.replace_resource_file_on_irods(temp_nc_file, nc_res_file, user) utils.replace_resource_file_on_irods(temp_text_file, txt_res_file, user) self.metadata.is_dirty = False self.metadata.save() # cleanup the temp dir if os.path.exists(temp_nc_file): shutil.rmtree(os.path.dirname(temp_nc_file))
def netcdf_file_update(instance, nc_res_file, txt_res_file, user): log = logging.getLogger() # check the instance type file_type = isinstance(instance, NetCDFLogicalFile) # get the file from irods to temp dir temp_nc_file = utils.get_file_from_irods(nc_res_file) nc_dataset = netCDF4.Dataset(temp_nc_file, 'a') try: # update title title = instance.dataset_name if file_type else instance.metadata.title.value if title.lower() != 'untitled resource': if hasattr(nc_dataset, 'title'): delattr(nc_dataset, 'title') nc_dataset.title = title # update keywords keywords = instance.metadata.keywords if file_type \ else [item.value for item in instance.metadata.subjects.all()] if hasattr(nc_dataset, 'keywords'): delattr(nc_dataset, 'keywords') if keywords: nc_dataset.keywords = ', '.join(keywords) # update key/value metadata extra_metadata_dict = instance.metadata.extra_metadata if file_type \ else instance.extra_metadata if hasattr(nc_dataset, 'hs_extra_metadata'): delattr(nc_dataset, 'hs_extra_metadata') if extra_metadata_dict: extra_metadata = [] for k, v in extra_metadata_dict.items(): extra_metadata.append("{}:{}".format(k, v)) nc_dataset.hs_extra_metadata = ', '.join(extra_metadata) # update temporal coverage temporal_coverage = instance.metadata.temporal_coverage if file_type \ else instance.metadata.coverages.all().filter(type='period').first() for attr_name in ['time_coverage_start', 'time_coverage_end']: if hasattr(nc_dataset, attr_name): delattr(nc_dataset, attr_name) if temporal_coverage: nc_dataset.time_coverage_start = temporal_coverage.value['start'] nc_dataset.time_coverage_end = temporal_coverage.value['end'] # update spatial coverage spatial_coverage = instance.metadata.spatial_coverage if file_type \ else instance.metadata.coverages.all().filter(type='box').first() for attr_name in [ 'geospatial_lat_min', 'geospatial_lat_max', 'geospatial_lon_min', 'geospatial_lon_max' ]: if hasattr(nc_dataset, attr_name): delattr(nc_dataset, attr_name) if spatial_coverage: nc_dataset.geospatial_lat_min = spatial_coverage.value[ 'southlimit'] nc_dataset.geospatial_lat_max = spatial_coverage.value[ 'northlimit'] nc_dataset.geospatial_lon_min = spatial_coverage.value['westlimit'] nc_dataset.geospatial_lon_max = spatial_coverage.value['eastlimit'] # update variables if instance.metadata.variables.all(): dataset_variables = nc_dataset.variables for variable in instance.metadata.variables.all(): if variable.name in dataset_variables.keys(): dataset_variable = dataset_variables[variable.name] # update units if hasattr(dataset_variable, 'units'): delattr(dataset_variable, 'units') if variable.unit != 'Unknown': dataset_variable.setncattr('units', variable.unit) # update long_name if hasattr(dataset_variable, 'long_name'): delattr(dataset_variable, 'long_name') if variable.descriptive_name: dataset_variable.setncattr('long_name', variable.descriptive_name) # update method if hasattr(dataset_variable, 'comment'): delattr(dataset_variable, 'comment') if variable.method: dataset_variable.setncattr('comment', variable.method) # update missing value if variable.missing_value: if hasattr(dataset_variable, 'missing_value'): missing_value = dataset_variable.missing_value delattr(dataset_variable, 'missing_value') else: missing_value = '' try: dt = np.dtype(dataset_variable.datatype.name) missing_value = np.fromstring( variable.missing_value + ' ', dtype=dt.type, sep=" ") except: pass if missing_value: dataset_variable.setncattr('missing_value', missing_value) # Update metadata element that only apply to netCDF resource if not file_type: # update summary if hasattr(nc_dataset, 'summary'): delattr(nc_dataset, 'summary') if instance.metadata.description: nc_dataset.summary = instance.metadata.description.abstract # update contributor if hasattr(nc_dataset, 'contributor_name'): delattr(nc_dataset, 'contributor_name') contributor_list = instance.metadata.contributors.all() if contributor_list: res_contri_name = [] for contributor in contributor_list: res_contri_name.append(contributor.name) nc_dataset.contributor_name = ', '.join(res_contri_name) # update creator for attr_name in ['creator_name', 'creator_email', 'creator_url']: if hasattr(nc_dataset, attr_name): delattr(nc_dataset, attr_name) creator = instance.metadata.creators.all().filter(order=1).first() if creator: nc_dataset.creator_name = creator.name if creator.name else creator.organization if creator.email: nc_dataset.creator_email = creator.email if creator.description or creator.homepage: nc_dataset.creator_url = creator.homepage if creator.homepage \ else 'https://www.hydroshare.org' + creator.description # update license if hasattr(nc_dataset, 'license'): delattr(nc_dataset, 'license') if instance.metadata.rights: nc_dataset.license = "{0} {1}".format( instance.metadata.rights.statement, instance.metadata.rights.url) # update reference if hasattr(nc_dataset, 'references'): delattr(nc_dataset, 'references') reference_list = instance.metadata.relations.all().filter( type='cites') if reference_list: res_meta_ref = [] for reference in reference_list: res_meta_ref.append(reference.value) nc_dataset.references = ' \n'.join(res_meta_ref) # update source if hasattr(nc_dataset, 'source'): delattr(nc_dataset, 'source') source_list = instance.metadata.sources.all() if source_list: res_meta_source = [] for source in source_list: res_meta_source.append(source.derived_from) nc_dataset.source = ' \n'.join(res_meta_source) # close nc dataset nc_dataset.close() except Exception as ex: log.exception(ex.message) if os.path.exists(temp_nc_file): shutil.rmtree(os.path.dirname(temp_nc_file)) raise ex # create the ncdump text file nc_file_name = os.path.basename(temp_nc_file).split(".")[0] temp_text_file = create_header_info_txt_file(temp_nc_file, nc_file_name) # push the updated nc file and the txt file to iRODS utils.replace_resource_file_on_irods(temp_nc_file, nc_res_file, user) utils.replace_resource_file_on_irods(temp_text_file, txt_res_file, user) metadata = instance.metadata metadata.is_dirty = False metadata.save() # cleanup the temp dir if os.path.exists(temp_nc_file): shutil.rmtree(os.path.dirname(temp_nc_file))
def set_file_type(cls, resource, file_id, user): """ Sets a json resource file to RefTimeseriesFile type :param resource: an instance of resource type CompositeResource :param file_id: id of the resource file to be set as RefTimeSeriesFile type :param user: user who is setting the file type :return: """ log = logging.getLogger() # get the the selected resource file object res_file = utils.get_resource_file_by_id(resource, file_id) if res_file is None: raise ValidationError("File not found.") if res_file.extension != '.refts': raise ValidationError("Not a Ref Time Series file.") files_to_add_to_resource = [] if res_file.has_generic_logical_file: try: json_file_content = _validate_json_file(res_file) except Exception as ex: raise ValidationError(ex.message) # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) temp_dir = os.path.dirname(temp_file) files_to_add_to_resource.append(temp_file) file_folder = res_file.file_folder with transaction.atomic(): # first delete the json file that we retrieved from irods # for setting it to reftimeseries file type delete_resource_file(resource.short_id, res_file.id, user) # create a reftiemseries logical file object to be associated with # resource files logical_file = cls.create() logical_file.metadata.json_file_content = json_file_content logical_file.metadata.save() try: # add the json file back to the resource uploaded_file = UploadedFile( file=open(temp_file, 'rb'), name=os.path.basename(temp_file)) # the added resource file will be part of a new generic logical file by default new_res_file = utils.add_file_to_resource( resource, uploaded_file, folder=file_folder) # delete the generic logical file object if new_res_file.logical_file is not None: # deleting the file level metadata object will delete the associated # logical file object new_res_file.logical_file.metadata.delete() # make the resource file we added as part of the logical file logical_file.add_resource_file(new_res_file) logical_file.metadata.save() logical_file.dataset_name = logical_file.metadata.get_title_from_json( ) logical_file.save() # extract metadata _extract_metadata(resource, logical_file) log.info( "RefTimeseries file type - json file was added to the resource." ) except Exception as ex: msg = "RefTimeseries file type. Error when setting file type. Error:{}" msg = msg.format(ex.message) log.exception(msg) raise ValidationError(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) log.info("RefTimeseries file type was created.") else: err_msg = "Selected file is not part of a GenericLogical file." log.error(err_msg) raise ValidationError(err_msg)
def create_scidas_virtual_app(request, res_id, cluster): user = get_user(request) if not user.is_authenticated() or not user.is_active: messages.error( request, "Only authorized user can make appliance provision request.") return HttpResponseRedirect(request.META['HTTP_REFERER']) res, _, _ = authorize(request, res_id, needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE) cluster_name = cluster if cluster_name != 'chameleon' and cluster_name != 'aws' and cluster_name != 'azure': cluster_name = '' file_data_list = [] p_data = {} file_path = '/' + ds.IRODS_ZONE + '/home/' + ds.IRODS_USERNAME for rf in ResourceFile.objects.filter(object_id=res.id): fname = '' if rf.resource_file.name: fname = os.path.join(file_path, rf.resource_file.name) elif rf.fed_resource_file.name: fname = rf.fed_resource_file.name if fname: file_data_list.append(fname) if fname.endswith('.json') and not p_data: temp_json_file = get_file_from_irods(rf) with open(temp_json_file, 'r') as fp: jdata = load(fp) if 'id' in jdata and 'containers' in jdata: p_data = jdata url = settings.PIVOT_URL app_id = user.username + '_cs_app_id' preset_url = '' if not p_data: p_data = { "id": app_id, "containers": [{ "id": app_id, "image": "scidas/irods-jupyter-hydroshare", "resources": { "cpus": 2, "mem": 2048 }, "port_mappings": [{ "container_port": 8888, "host_port": 0, "protocol": "tcp" }], "args": ["--ip=0.0.0.0", "--NotebookApp.token=\"\""], "data": file_data_list }] } else: app_id = p_data['id'] p_data['containers'][0]['data'] = file_data_list if cluster_name: p_data['containers'][0]['cluster'] = cluster_name if 'endpoints' in p_data['containers'][0]: if p_data['containers'][0]['endpoints']: preset_ep_data = p_data['containers'][0]['endpoints'][0] preset_url = 'http://' + preset_ep_data['host'] + ':' + str( preset_ep_data['host_port']) # delete the appliance before posting to create a new one in case it already exists app_url = url + '/' + app_id response = requests.delete(app_url) is_deleted = False if response.status_code != status.HTTP_404_NOT_FOUND and \ response.status_code != status.HTTP_200_OK: idx = 0 while idx < 2: get_response = requests.get(app_url) idx += 1 if get_response.status_code == status.HTTP_404_NOT_FOUND: is_deleted = True break else: # appliance is not deleted successfully yet, wait and poll # again one more time time.sleep(2) else: is_deleted = True if not is_deleted: errmsg = 'The old appliance ' + app_id + ' cannot be deleted successfully' messages.error(request, errmsg) return HttpResponseRedirect(request.META['HTTP_REFERER']) response = requests.post(url, data=dumps(p_data)) if response.status_code != status.HTTP_200_OK and \ response.status_code != status.HTTP_201_CREATED: return HttpResponseBadRequest(content=response.text) while True: response = requests.get(app_url) if not response.status_code == status.HTTP_200_OK: return HttpResponseBadRequest(content=response.text) return_data = loads(response.content) con_ret_data_list = return_data['containers'] con_ret_data = con_ret_data_list[0] con_state = con_ret_data['state'] ep_data_list = con_ret_data['endpoints'] if con_state == 'running' and (ep_data_list or preset_url): break else: # the jupyter appliance is not ready yet, need to wait and poll again time.sleep(2) if preset_url: app_url = preset_url else: ep_data = ep_data_list[0] app_url = 'http://' + ep_data['host'] + ':' + str(ep_data['host_port']) # make sure the new directed url is loaded and working before redirecting. # Since scidas will install dependencies included in requirements.txt, it will take some time # before the app_url is ready to go after the appliance is provisioned, hence wait for up to 30 seconds # before erroring out if connection to the url keeps being refused. idx = 0 while True: try: ret = urlopen(app_url, timeout=10) break except URLError as ex: errmsg = ex.reason if hasattr(ex, 'reason') else 'URLError' idx += 1 time.sleep(5) if idx > 6: messages.error(request, errmsg) return HttpResponseRedirect(request.META['HTTP_REFERER']) if ret.code == 200: return HttpResponseRedirect(app_url) else: messages.error(request, 'time out error') return HttpResponseRedirect(request.META['HTTP_REFERER'])