def netcdf_pre_delete_file_from_resource(sender, **kwargs): nc_res = kwargs['resource'] metadata = nc_res.metadata metadata.is_dirty = False metadata.save() del_file = kwargs['file'] del_file_ext = utils.get_resource_file_name_and_extension(del_file)[2] # update resource modification info user = nc_res.creator utils.resource_modified(nc_res, user, overwrite_bag=False) # delete the netcdf header file or .nc file file_ext = {'.nc': 'application/x-netcdf', '.txt': 'text/plain'} if del_file_ext in file_ext: del file_ext[del_file_ext] for f in ResourceFile.objects.filter(object_id=nc_res.id): ext = utils.get_resource_file_name_and_extension(f)[2] if ext in file_ext: delete_resource_file_only(nc_res, f) nc_res.metadata.formats.filter(value=file_ext[ext]).delete() break # delete all the coverage info nc_res.metadata.coverages.all().delete() # delete all the extended meta info nc_res.metadata.variables.all().delete() nc_res.metadata.ori_coverage.all().delete()
def post_add_files_to_resource_handler(sender, **kwargs): resource = kwargs['resource'] files = kwargs['files'] validate_files_dict = kwargs['validate_files'] user = kwargs['user'] fed_res_fnames = kwargs['fed_res_file_names'] if files: file_name = files[0].name elif fed_res_fnames: file_name = os.path.basename(fed_res_fnames[0]) # extract metadata from the just uploaded file uploaded_file_to_process = None uploaded_file_ext = '' for res_file in resource.files.all(): _, res_file_name, uploaded_file_ext = utils.get_resource_file_name_and_extension( res_file) if res_file_name == file_name: uploaded_file_to_process = res_file break if uploaded_file_to_process: if uploaded_file_ext == ".sqlite": _process_uploaded_sqlite_file(user, resource, uploaded_file_to_process, validate_files_dict, delete_existing_metadata=True) elif uploaded_file_ext == ".csv": _process_uploaded_csv_file(resource, uploaded_file_to_process, validate_files_dict, user, delete_existing_metadata=True)
def post_add_files_to_resource_handler(sender, **kwargs): resource = kwargs['resource'] files = kwargs['files'] validate_files_dict = kwargs['validate_files'] user = kwargs['user'] source_names = kwargs['source_names'] if __debug__: assert(isinstance(source_names, list)) if files: file_name = files[0].name elif source_names: file_name = os.path.basename(source_names[0]) # extract metadata from the just uploaded file uploaded_file_to_process = None uploaded_file_ext = '' for res_file in resource.files.all(): _, res_file_name, uploaded_file_ext = utils.get_resource_file_name_and_extension(res_file) if res_file_name == file_name: uploaded_file_to_process = res_file break if uploaded_file_to_process: if uploaded_file_ext == ".sqlite": _process_uploaded_sqlite_file(user, resource, uploaded_file_to_process, validate_files_dict, delete_existing_metadata=True) elif uploaded_file_ext == ".csv": _process_uploaded_csv_file(resource, uploaded_file_to_process, validate_files_dict, user, delete_existing_metadata=True)
def post_create_resource_handler(sender, **kwargs): resource = kwargs['resource'] validate_files_dict = kwargs['validate_files'] user = kwargs['user'] # extract metadata from the just uploaded file res_file = resource.files.all().first() if res_file: # check if the uploaded file is a sqlite file or csv file file_ext = utils.get_resource_file_name_and_extension(res_file)[2] if file_ext == '.sqlite': # metadata can exist at this point if a timeseries resource is created # using REST API since the API caller can pass metadata information. Before # metadata can be extracted from the sqlite file and populated to database, existing # metadata needs to be deleted. _process_uploaded_sqlite_file(user, resource, res_file, validate_files_dict, delete_existing_metadata=True) elif file_ext == '.csv': _process_uploaded_csv_file(resource, res_file, validate_files_dict, user, delete_existing_metadata=False) # since we are extracting metadata after resource creation # metadata xml files need to be regenerated - so need to set the # dirty bag flags utils.set_dirty_bag_flag(resource)
def get_hs_term_dict(self): # get existing hs_term_dict from base class hs_term_dict = super(NetcdfResource, self).get_hs_term_dict() # add new terms for NetCDF res hs_term_dict["HS_FILE_NAME"] = "" for res_file in self.files.all(): _, f_fullname, f_ext = get_resource_file_name_and_extension(res_file) if f_ext.lower() == '.nc': hs_term_dict["HS_FILE_NAME"] = f_fullname break return hs_term_dict
def assert_geofeature_file_type_metadata(self, expected_folder_name): # test files in the file type self.assertEqual(self.composite_resource.files.count(), 3) # check that there is no GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 0) # check that there is one GeoFeatureLogicalFile object self.assertEqual(GeoFeatureLogicalFile.objects.count(), 1) # check that there is one GeoFeatureFileMetaData object self.assertEqual(GeoFeatureFileMetaData.objects.count(), 1) logical_file = GeoFeatureLogicalFile.objects.first() self.assertEqual(logical_file.files.count(), 3) # check that the 3 resource files are now associated with GeoFeatureLogicalFile for res_file in self.composite_resource.files.all(): self.assertEqual(res_file.logical_file_type_name, "GeoFeatureLogicalFile") self.assertEqual(res_file.has_logical_file, True) self.assertTrue( isinstance(res_file.logical_file, GeoFeatureLogicalFile)) # check that we put the 3 files in a new folder for res_file in self.composite_resource.files.all(): file_path, base_file_name, _ = get_resource_file_name_and_extension( res_file) expected_file_path = "{}/data/contents/{}/{}" res_file.file_folder = expected_folder_name expected_file_path = expected_file_path.format( self.composite_resource.root_path, expected_folder_name, base_file_name) self.assertEqual(file_path, expected_file_path) # test extracted raster file type metadata # there should not be any resource level coverage self.assertEqual(self.composite_resource.metadata.coverages.count(), 0) self.assertNotEqual(logical_file.metadata.geometryinformation, None) self.assertEqual(logical_file.metadata.geometryinformation.featureCount, 51) self.assertEqual(logical_file.metadata.geometryinformation.geometryType, "MULTIPOLYGON") self.assertNotEqual(logical_file.metadata.originalcoverage, None) self.assertEqual(logical_file.metadata.originalcoverage.datum, 'unknown') self.assertEqual(logical_file.metadata.originalcoverage.projection_name, 'unknown') self.assertGreater( len(logical_file.metadata.originalcoverage.projection_string), 0) self.assertEqual(logical_file.metadata.originalcoverage.unit, 'unknown') self.assertEqual(logical_file.metadata.originalcoverage.eastlimit, -66.9692712587578) self.assertEqual(logical_file.metadata.originalcoverage.northlimit, 71.406235393967) self.assertEqual(logical_file.metadata.originalcoverage.southlimit, 18.921786345087) self.assertEqual(logical_file.metadata.originalcoverage.westlimit, -178.217598362366)
def _process_uploaded_sqlite_file(user, resource, res_file, validate_files_dict, delete_existing_metadata=True): # check if it a sqlite file fl_ext = utils.get_resource_file_name_and_extension(res_file)[2] if fl_ext == '.sqlite': # get the file from iRODS to a temp directory fl_obj_name = utils.get_file_from_irods(res_file) validate_err_message = _validate_odm2_db_file(fl_obj_name) if not validate_err_message: # first delete relevant existing metadata elements if delete_existing_metadata: TimeSeriesMetaData.objects.filter( id=resource.metadata.id).update(is_dirty=False) _delete_extracted_metadata(resource) extract_err_message = _extract_metadata(resource, fl_obj_name) if extract_err_message: # delete the invalid file delete_resource_file_only(resource, res_file) # cleanup any extracted metadata _delete_extracted_metadata(resource) validate_files_dict['are_files_valid'] = False extract_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = extract_err_message else: # set metadata is_dirty to False TimeSeriesMetaData.objects.filter( id=resource.metadata.id).update(is_dirty=False) # delete the csv file if it exists _delete_resource_file(resource, ".csv") utils.resource_modified(resource, user, overwrite_bag=False) else: # file validation failed # delete the invalid file just uploaded delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = validate_err_message # cleanup the temp file if os.path.exists(fl_obj_name): shutil.rmtree(os.path.dirname(fl_obj_name)) else: # delete the invalid file delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False err_message = "The uploaded file not a sqlite file. {}" err_message += err_message.format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = err_message
def add_to_xml_container(self, container): """Generates xml+rdf representation of all the metadata elements associated with this logical file type instance. Subclass must override this if it has additional metadata elements.""" NAMESPACES = CoreMetaData.NAMESPACES dataset_container = etree.SubElement( container, '{%s}Dataset' % NAMESPACES['hsterms']) rdf_Description = etree.SubElement( dataset_container, '{%s}Description' % NAMESPACES['rdf']) dc_datatype = etree.SubElement(rdf_Description, '{%s}type' % NAMESPACES['dc']) data_type = current_site_url( ) + "/terms/" + self.logical_file.data_type dc_datatype.set('{%s}resource' % NAMESPACES['rdf'], data_type) if self.logical_file.dataset_name: dc_datatitle = etree.SubElement(rdf_Description, '{%s}title' % NAMESPACES['dc']) dc_datatitle.text = self.logical_file.dataset_name # add fileType node for res_file in self.logical_file.files.all(): hsterms_datafile = etree.SubElement( rdf_Description, '{%s}dataFile' % NAMESPACES['hsterms']) rdf_dataFile_Description = etree.SubElement( hsterms_datafile, '{%s}Description' % NAMESPACES['rdf']) file_uri = u'{hs_url}/resource/{res_id}/data/contents/{file_name}'.format( hs_url=current_site_url(), res_id=self.logical_file.resource.short_id, file_name=res_file.short_path) rdf_dataFile_Description.set('{%s}about' % NAMESPACES['rdf'], file_uri) dc_title = etree.SubElement(rdf_dataFile_Description, '{%s}title' % NAMESPACES['dc']) file_name = get_resource_file_name_and_extension(res_file)[1] dc_title.text = file_name dc_format = etree.SubElement(rdf_dataFile_Description, '{%s}format' % NAMESPACES['dc']) dc_format.text = res_file.mime_type self.add_keywords_to_xml_container(rdf_Description) self.add_extra_metadata_to_xml_container(rdf_Description) for coverage in self.coverages.all(): coverage.add_to_xml_container(rdf_Description) return rdf_Description
def _process_uploaded_sqlite_file(user, resource, res_file, validate_files_dict, delete_existing_metadata=True): # check if it a sqlite file fl_ext = utils.get_resource_file_name_and_extension(res_file)[2] if fl_ext == '.sqlite': # get the file from iRODS to a temp directory fl_obj_name = utils.get_file_from_irods(res_file) validate_err_message = validate_odm2_db_file(fl_obj_name) if not validate_err_message: # first delete relevant existing metadata elements if delete_existing_metadata: TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(is_dirty=False) _delete_extracted_metadata(resource) extract_err_message = extract_metadata(resource, fl_obj_name) if extract_err_message: # delete the invalid file delete_resource_file_only(resource, res_file) # cleanup any extracted metadata _delete_extracted_metadata(resource) validate_files_dict['are_files_valid'] = False extract_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = extract_err_message else: # set metadata is_dirty to False TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(is_dirty=False) # delete the csv file if it exists _delete_resource_file(resource, ".csv") utils.resource_modified(resource, user, overwrite_bag=False) else: # file validation failed # delete the invalid file just uploaded delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = validate_err_message # cleanup the temp file if os.path.exists(fl_obj_name): shutil.rmtree(os.path.dirname(fl_obj_name)) else: # delete the invalid file delete_resource_file_only(resource, res_file) validate_files_dict['are_files_valid'] = False err_message = "The uploaded file not a sqlite file. {}" err_message += err_message.format(FILE_UPLOAD_ERROR_MESSAGE) validate_files_dict['message'] = err_message
def assert_geofeature_file_type_metadata(self, expected_folder_name): # test files in the file type self.assertEqual(self.composite_resource.files.count(), 3) # check that there is no GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 0) # check that there is one GeoFeatureLogicalFile object self.assertEqual(GeoFeatureLogicalFile.objects.count(), 1) # check that there is one GeoFeatureFileMetaData object self.assertEqual(GeoFeatureFileMetaData.objects.count(), 1) logical_file = GeoFeatureLogicalFile.objects.first() self.assertEqual(logical_file.files.count(), 3) # check that the 3 resource files are now associated with GeoFeatureLogicalFile for res_file in self.composite_resource.files.all(): self.assertEqual(res_file.logical_file_type_name, "GeoFeatureLogicalFile") self.assertEqual(res_file.has_logical_file, True) self.assertTrue(isinstance(res_file.logical_file, GeoFeatureLogicalFile)) # check that we put the 3 files in a new folder for res_file in self.composite_resource.files.all(): file_path, base_file_name, _ = get_resource_file_name_and_extension(res_file) expected_file_path = "{}/data/contents/{}/{}" res_file.file_folder = expected_folder_name expected_file_path = expected_file_path.format(self.composite_resource.root_path, expected_folder_name, base_file_name) self.assertEqual(file_path, expected_file_path) # test extracted raster file type metadata # there should not be any resource level coverage self.assertEqual(self.composite_resource.metadata.coverages.count(), 0) self.assertNotEqual(logical_file.metadata.geometryinformation, None) self.assertEqual(logical_file.metadata.geometryinformation.featureCount, 51) self.assertEqual(logical_file.metadata.geometryinformation.geometryType, "MULTIPOLYGON") self.assertNotEqual(logical_file.metadata.originalcoverage, None) self.assertEqual(logical_file.metadata.originalcoverage.datum, 'unknown') self.assertEqual(logical_file.metadata.originalcoverage.projection_name, 'unknown') self.assertGreater(len(logical_file.metadata.originalcoverage.projection_string), 0) self.assertEqual(logical_file.metadata.originalcoverage.unit, 'unknown') self.assertEqual(float(logical_file.metadata.originalcoverage.eastlimit), -66.9692712587578) self.assertEqual(float(logical_file.metadata.originalcoverage.northlimit), 71.406235393967) self.assertEqual(float(logical_file.metadata.originalcoverage.southlimit), 18.921786345087) self.assertEqual(float(logical_file.metadata.originalcoverage.westlimit), -178.217598362366)
def add_to_xml_container(self, container): """Generates xml+rdf representation of all the metadata elements associated with this logical file type instance. Subclass must override this if it has additional metadata elements.""" NAMESPACES = CoreMetaData.NAMESPACES dataset_container = etree.SubElement( container, '{%s}Dataset' % NAMESPACES['hsterms']) rdf_Description = etree.SubElement( dataset_container, '{%s}Description' % NAMESPACES['rdf']) hsterms_datatype = etree.SubElement( rdf_Description, '{%s}dataType' % NAMESPACES['hsterms']) hsterms_datatype.text = self.logical_file.data_type if self.logical_file.dataset_name: hsterms_datatitle = etree.SubElement( rdf_Description, '{%s}dataTitle' % NAMESPACES['hsterms']) hsterms_datatitle.text = self.logical_file.dataset_name # add fileType node for res_file in self.logical_file.files.all(): hsterms_datafile = etree.SubElement( rdf_Description, '{%s}dataFile' % NAMESPACES['hsterms']) rdf_dataFile_Description = etree.SubElement( hsterms_datafile, '{%s}Description' % NAMESPACES['rdf']) dc_title = etree.SubElement(rdf_dataFile_Description, '{%s}title' % NAMESPACES['dc']) file_name = get_resource_file_name_and_extension(res_file)[1] dc_title.text = file_name dc_format = etree.SubElement(rdf_dataFile_Description, '{%s}format' % NAMESPACES['dc']) dc_format.text = res_file.mime_type self.add_keywords_to_xml_container(rdf_Description) self.add_extra_metadata_to_xml_container(rdf_Description) for coverage in self.coverages.all(): coverage.add_to_xml_container(rdf_Description) return rdf_Description
def post_create_resource_handler(sender, **kwargs): resource = kwargs['resource'] validate_files_dict = kwargs['validate_files'] user = kwargs['user'] # extract metadata from the just uploaded file res_file = resource.files.all().first() if res_file: # check if the uploaded file is a sqlite file or csv file file_ext = utils.get_resource_file_name_and_extension(res_file)[2] if file_ext == '.sqlite': _process_uploaded_sqlite_file(user, resource, res_file, validate_files_dict, delete_existing_metadata=False) elif file_ext == '.csv': _process_uploaded_csv_file(resource, res_file, validate_files_dict, user, delete_existing_metadata=False)
def _delete_resource_file(resource, file_ext): for res_file in resource.files.all(): _, _, res_file_ext = utils.get_resource_file_name_and_extension(res_file) if res_file_ext == file_ext: delete_resource_file_only(resource, res_file)
def test_zip_set_file_type_to_geo_feature_all(self): # here we are using a zip file that has all the 15 files for setting it # to Geo Feature file type which includes metadata extraction self._create_composite_resource(self.osm_all_files_zip_file) self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() expected_folder_name = res_file.file_name[:-4] # check that the resource file is associated with GenericLogicalFile self.assertEqual(res_file.has_logical_file, True) self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile") # check that there is one GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 1) # set the zip file to GeoFeatureFile type GeoFeatureLogicalFile.set_file_type(self.composite_resource, res_file.id, self.user) # test files in the file type self.assertEqual(self.composite_resource.files.count(), 15) # check that there is no GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 0) # check that there is no GenericFileMetaData object self.assertEqual(GenericFileMetaData.objects.count(), 0) # check that there is one GeoFeatureLogicalFile object self.assertEqual(GeoFeatureLogicalFile.objects.count(), 1) logical_file = GeoFeatureLogicalFile.objects.first() self.assertEqual(logical_file.files.count(), 15) # check that the 3 resource files are now associated with GeoFeatureLogicalFile for res_file in self.composite_resource.files.all(): self.assertEqual(res_file.logical_file_type_name, "GeoFeatureLogicalFile") self.assertEqual(res_file.has_logical_file, True) self.assertTrue( isinstance(res_file.logical_file, GeoFeatureLogicalFile)) # check that we put the 3 files in a new folder for res_file in self.composite_resource.files.all(): file_path, base_file_name, _ = get_resource_file_name_and_extension( res_file) expected_file_path = "{}/data/contents/{}/{}" res_file.file_folder = expected_folder_name expected_file_path = expected_file_path.format( self.composite_resource.root_path, expected_folder_name, base_file_name) self.assertEqual(file_path, expected_file_path) # test extracted raster file type metadata # there should one resource level coverage self.assertEqual(self.composite_resource.metadata.coverages.count(), 1) self.assertEqual(logical_file.metadata.fieldinformations.all().count(), 7) self.assertEqual( logical_file.metadata.geometryinformation.featureCount, 87) self.assertEqual( logical_file.metadata.geometryinformation.geometryType, "POLYGON") self.assertEqual(logical_file.metadata.originalcoverage.datum, 'WGS_1984') self.assertTrue( abs(logical_file.metadata.originalcoverage.eastlimit - 3.4520493) < self.allowance) self.assertTrue( abs(logical_file.metadata.originalcoverage.northlimit - 45.0466382) < self.allowance) self.assertTrue( abs(logical_file.metadata.originalcoverage.southlimit - 42.5732416) < self.allowance) self.assertTrue( abs(logical_file.metadata.originalcoverage.westlimit - (-0.3263017)) < self.allowance) self.assertEqual(logical_file.metadata.originalcoverage.unit, 'Degree') self.assertEqual( logical_file.metadata.originalcoverage.projection_name, 'GCS_WGS_1984') # there should be file level keywords for key in ('Logan River', 'TauDEM'): self.assertIn(key, logical_file.metadata.keywords) self.assertEqual(len(logical_file.metadata.keywords), 2) self.composite_resource.delete() # there should be no GeoFeatureLogicalFile object at this point self.assertEqual(GeoFeatureLogicalFile.objects.count(), 0) # there should be no GenericFileMetaData object at this point self.assertEqual(GeoFeatureFileMetaData.objects.count(), 0)
def test_shp_set_file_type_to_geo_feature_required(self): # here we are using a shp file for setting it # to Geo Feature file type which includes metadata extraction self._create_composite_resource() # add the 3 required files to the resource files = [] shp_temp_file = os.path.join(self.temp_dir, self.states_shp_file_name) shutil.copy(self.states_shp_file, shp_temp_file) shx_temp_file = os.path.join(self.temp_dir, self.states_shx_file_name) shutil.copy(self.states_shx_file, shx_temp_file) dbf_temp_file = os.path.join(self.temp_dir, self.states_dbf_file_name) shutil.copy(self.states_dbf_file, dbf_temp_file) files.append( UploadedFile(file=open(shp_temp_file, 'r'), name=self.states_shp_file_name)) files.append( UploadedFile(file=open(shx_temp_file, 'r'), name=self.states_shx_file_name)) files.append( UploadedFile(file=open(dbf_temp_file, 'r'), name=self.states_dbf_file_name)) hydroshare.utils.resource_file_add_process(self.composite_resource, files, self.user) self.assertEqual(self.composite_resource.files.all().count(), 3) res_file = self.composite_resource.files.first() expected_folder_name = res_file.file_name[:-4] # check that the resource file is associated with GenericLogicalFile self.assertEqual(res_file.has_logical_file, True) self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile") # check that there is 3 GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 3) # set the shp file to GeoFeatureFile type shp_res_file = [ f for f in self.composite_resource.files.all() if f.extension == '.shp' ][0] GeoFeatureLogicalFile.set_file_type(self.composite_resource, shp_res_file.id, self.user) # test files in the file type self.assertEqual(self.composite_resource.files.count(), 3) # check that there is no GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 0) # check that there is one GeoFeatureLogicalFile object self.assertEqual(GeoFeatureLogicalFile.objects.count(), 1) logical_file = GeoFeatureLogicalFile.objects.first() self.assertEqual(logical_file.files.count(), 3) # check that the 3 resource files are now associated with GeoFeatureLogicalFile for res_file in self.composite_resource.files.all(): self.assertEqual(res_file.logical_file_type_name, "GeoFeatureLogicalFile") self.assertEqual(res_file.has_logical_file, True) self.assertTrue( isinstance(res_file.logical_file, GeoFeatureLogicalFile)) # check that we put the 3 files in a new folder for res_file in self.composite_resource.files.all(): file_path, base_file_name, _ = get_resource_file_name_and_extension( res_file) expected_file_path = "{}/data/contents/{}/{}" res_file.file_folder = expected_folder_name expected_file_path = expected_file_path.format( self.composite_resource.root_path, expected_folder_name, base_file_name) self.assertEqual(file_path, expected_file_path) # test extracted raster file type metadata # there should not be any resource level coverage self.assertEqual(self.composite_resource.metadata.coverages.count(), 0) self.assertNotEqual(logical_file.metadata.geometryinformation, None) self.assertEqual( logical_file.metadata.geometryinformation.featureCount, 51) self.assertEqual( logical_file.metadata.geometryinformation.geometryType, "MULTIPOLYGON") self.assertNotEqual(logical_file.metadata.originalcoverage, None) self.assertEqual(logical_file.metadata.originalcoverage.datum, 'unknown') self.assertEqual( logical_file.metadata.originalcoverage.projection_name, 'unknown') self.assertGreater( len(logical_file.metadata.originalcoverage.projection_string), 0) self.assertEqual(logical_file.metadata.originalcoverage.unit, 'unknown') self.assertEqual(logical_file.metadata.originalcoverage.eastlimit, -66.9692712587578) self.assertEqual(logical_file.metadata.originalcoverage.northlimit, 71.406235393967) self.assertEqual(logical_file.metadata.originalcoverage.southlimit, 18.921786345087) self.assertEqual(logical_file.metadata.originalcoverage.westlimit, -178.217598362366) # there should not be any file level keywords self.assertEqual(logical_file.metadata.keywords, []) self.composite_resource.delete() # there should be no GeoFeatureLogicalFile object at this point self.assertEqual(GeoFeatureLogicalFile.objects.count(), 0) # there should be no GenericFileMetaData object at this point self.assertEqual(GeoFeatureFileMetaData.objects.count(), 0)
def _delete_resource_file(resource, file_ext): for res_file in resource.files.all(): _, _, res_file_ext = utils.get_resource_file_name_and_extension( res_file) if res_file_ext == file_ext: delete_resource_file_only(resource, res_file)
def assert_raster_file_type_metadata(self): # test the resource now has 2 files (vrt file added as part of metadata extraction) self.assertEqual(self.composite_resource.files.all().count(), 2) # check that the 2 resource files are now associated with GeoRasterLogicalFile for res_file in self.composite_resource.files.all(): self.assertEqual(res_file.logical_file_type_name, "GeoRasterLogicalFile") self.assertEqual(res_file.has_logical_file, True) self.assertTrue(isinstance(res_file.logical_file, GeoRasterLogicalFile)) # check that we put the 2 files in a new folder (small_logan) for res_file in self.composite_resource.files.all(): file_path, base_file_name, _ = get_resource_file_name_and_extension( res_file) expected_file_path = "{}/data/contents/small_logan/{}" expected_file_path = expected_file_path.format( self.composite_resource.root_path, base_file_name) self.assertEqual(file_path, expected_file_path) # check that there is no GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 0) # check that there is one GeoRasterLogicalFile object self.assertEqual(GeoRasterLogicalFile.objects.count(), 1) res_file = self.composite_resource.files.first() # check that the logicalfile is associated with 2 files logical_file = res_file.logical_file self.assertEqual(logical_file.dataset_name, 'small_logan') self.assertEqual(logical_file.has_metadata, True) self.assertEqual(logical_file.files.all().count(), 2) self.assertEqual(set(self.composite_resource.files.all()), set(logical_file.files.all())) # test that size property of the logical file is equal to sun of size of all files # that are part of the logical file self.assertEqual(logical_file.size, sum([f.size for f in logical_file.files.all()])) # test that there should be 1 object of type GeoRasterFileMetaData self.assertEqual(GeoRasterFileMetaData.objects.count(), 1) # test that the metadata associated with logical file id of type GeoRasterFileMetaData self.assertTrue(isinstance(logical_file.metadata, GeoRasterFileMetaData)) # there should be 2 format elements associated with resource self.assertEqual(self.composite_resource.metadata.formats.all().count(), 2) self.assertEqual( self.composite_resource.metadata.formats.all().filter( value='application/vrt').count(), 1) self.assertEqual( self.composite_resource.metadata.formats.all().filter( value='image/tiff').count(), 1) # test extracted metadata for the file type # geo raster file type should have all the metadata elements self.assertEqual(logical_file.metadata.has_all_required_elements(), True) # there should be 1 coverage element - box type self.assertNotEqual(logical_file.metadata.spatial_coverage, None) self.assertEqual(logical_file.metadata.spatial_coverage.type, 'box') box_coverage = logical_file.metadata.spatial_coverage self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(box_coverage.value['northlimit'], 42.0500269597691) self.assertEqual(box_coverage.value['eastlimit'], -111.57773718106195) self.assertEqual(box_coverage.value['southlimit'], 41.98722286029891) self.assertEqual(box_coverage.value['westlimit'], -111.69756293084055) # testing extended metadata element: original coverage ori_coverage = logical_file.metadata.originalCoverage self.assertNotEqual(ori_coverage, None) self.assertEqual(ori_coverage.value['northlimit'], 4655492.446916306) self.assertEqual(ori_coverage.value['eastlimit'], 452144.01909127034) self.assertEqual(ori_coverage.value['southlimit'], 4648592.446916306) self.assertEqual(ori_coverage.value['westlimit'], 442274.01909127034) self.assertEqual(ori_coverage.value['units'], 'meter') self.assertEqual(ori_coverage.value['projection'], 'NAD83 / UTM zone 12N') # testing extended metadata element: cell information cell_info = logical_file.metadata.cellInformation self.assertEqual(cell_info.rows, 230) self.assertEqual(cell_info.columns, 329) self.assertEqual(cell_info.cellSizeXValue, 30.0) self.assertEqual(cell_info.cellSizeYValue, 30.0) self.assertEqual(cell_info.cellDataType, 'Float32') # testing extended metadata element: band information self.assertEqual(logical_file.metadata.bandInformations.count(), 1) band_info = logical_file.metadata.bandInformations.first() self.assertEqual(band_info.noDataValue, '-3.40282346639e+38') self.assertEqual(band_info.maximumValue, '2880.00708008') self.assertEqual(band_info.minimumValue, '1870.63659668')
def set_file_type(cls, resource, file_id, user): """ Sets a tif or zip raster resource file to GeoRasterFile type :param resource: an instance of resource type CompositeResource :param file_id: id of the resource file to be set as GeoRasterFile type :param user: user who is setting the file type :return: """ # had to import it here to avoid import loop from hs_core.views.utils import create_folder, remove_folder log = logging.getLogger() # get the file from irods res_file = utils.get_resource_file_by_id(resource, file_id) # base file name (no path included) file_name = utils.get_resource_file_name_and_extension(res_file)[1] # file name without the extension file_name = file_name[:-len(res_file.extension)] file_folder = res_file.file_folder upload_folder = '' if res_file is not None and res_file.has_generic_logical_file: # get the file from irods to temp dir temp_file = utils.get_file_from_irods(res_file) # validate the file error_info, files_to_add_to_resource = raster_file_validation( raster_file=temp_file) if not error_info: log.info("Geo raster file type file validation successful.") # extract metadata temp_dir = os.path.dirname(temp_file) temp_vrt_file_path = [ os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == os.path.splitext(f)[1] ].pop() metadata = extract_metadata(temp_vrt_file_path) log.info( "Geo raster file type metadata extraction was successful.") with transaction.atomic(): # create a geo raster logical file object to be associated with resource files logical_file = cls.create() # by default set the dataset_name attribute of the logical file to the # name of the file selected to set file type logical_file.dataset_name = file_name logical_file.save() try: # create a folder for the raster file type using the base file name as the # name for the new folder new_folder_path = cls.compute_file_type_folder( resource, file_folder, file_name) log.info("Folder created:{}".format(new_folder_path)) create_folder(resource.short_id, new_folder_path) new_folder_name = new_folder_path.split('/')[-1] if file_folder is None: upload_folder = new_folder_name else: upload_folder = os.path.join( file_folder, new_folder_name) # add all new files to the resource for f in files_to_add_to_resource: uploaded_file = UploadedFile( file=open(f, 'rb'), name=os.path.basename(f)) # the added resource file will be part of a new generic logical file # by default new_res_file = utils.add_file_to_resource( resource, uploaded_file, folder=upload_folder) # delete the generic logical file object if new_res_file.logical_file is not None: # deleting the file level metadata object will delete the associated # logical file object new_res_file.logical_file.metadata.delete() # make each resource file we added as part of the logical file logical_file.add_resource_file(new_res_file) log.info( "Geo raster file type - new files were added to the resource." ) # use the extracted metadata to populate file metadata for element in metadata: # here k is the name of the element # v is a dict of all element attributes/field names and field values k, v = element.items()[0] logical_file.metadata.create_element(k, **v) log.info( "Geo raster file type - metadata was saved to DB") # set resource to private if logical file is missing required metadata resource.update_public_and_discoverable() # delete the original resource file delete_resource_file(resource.short_id, res_file.id, user) log.info("Deleted original resource file.") except Exception as ex: msg = "Geo raster file type. Error when setting file type. Error:{}" msg = msg.format(ex.message) log.exception(msg) if upload_folder: # delete any new files uploaded as part of setting file type folder_to_remove = os.path.join( 'data', 'contents', upload_folder) remove_folder(user, resource.short_id, folder_to_remove) log.info("Deleted newly created file type folder") raise ValidationError(msg) finally: # remove temp dir if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) else: err_msg = "Geo raster file type file validation failed.{}".format( ' '.join(error_info)) log.info(err_msg) raise ValidationError(err_msg) else: if res_file is None: err_msg = "Failed to set Geo raster file type. " \ "Resource doesn't have the specified file." log.error(err_msg) raise ValidationError(err_msg) else: err_msg = "Failed to set Geo raster file type." \ "The specified file doesn't have a generic logical file type." log.error(err_msg) raise ValidationError(err_msg)
def test_zip_set_file_type_to_geo_raster(self): # here we are using a valid raster zip file for setting it # to Geo Raster file type which includes metadata extraction self.raster_file_obj = open(self.raster_zip_file, 'r') self._create_composite_resource() self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # check that the resource file is associated with GenericLogicalFile self.assertEqual(res_file.has_logical_file, True) self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile") # check that the resource file is not associated with any logical file # self.assertEqual(res_file.has_logical_file, False) # set the zip file to GeoRasterFile type GeoRasterLogicalFile.set_file_type(self.composite_resource, res_file.id, self.user) # test the resource now has 3 files (one vrt file and 2 tif files) self.assertEqual(self.composite_resource.files.all().count(), 3) tif_files = hydroshare.utils.get_resource_files_by_extension( self.composite_resource, '.tif') self.assertEqual(len(tif_files), 2) vrt_files = hydroshare.utils.get_resource_files_by_extension( self.composite_resource, '.vrt') self.assertEqual(len(vrt_files), 1) # check that the logicalfile is associated with 3 files self.assertEqual(GeoRasterLogicalFile.objects.count(), 1) res_file = self.composite_resource.files.first() logical_file = res_file.logical_file self.assertEqual(logical_file.dataset_name, 'logan_vrt_small') self.assertEqual(logical_file.has_metadata, True) self.assertEqual(logical_file.files.all().count(), 3) self.assertEqual(set(self.composite_resource.files.all()), set(logical_file.files.all())) # check that we put the 3 files in a new folder (small_logan) for res_file in self.composite_resource.files.all(): file_path, base_file_name, _ = get_resource_file_name_and_extension( res_file) expected_file_path = "{}/data/contents/logan_vrt_small/{}" expected_file_path = expected_file_path.format( self.composite_resource.short_id, base_file_name) self.assertEqual(file_path, expected_file_path) # check that there is no GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 0) # test that size property of the logical file is equal to sun of size of all files # that are part of the logical file self.assertEqual(logical_file.size, sum([f.size for f in logical_file.files.all()])) # test extracted metadata for the file type # geo raster file type should have all the metadata elements self.assertEqual(logical_file.metadata.has_all_required_elements(), True) # there should be 1 coverage element - box type self.assertNotEqual(logical_file.metadata.spatial_coverage, None) self.assertEqual(logical_file.metadata.spatial_coverage.type, 'box') box_coverage = logical_file.metadata.spatial_coverage self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(box_coverage.value['northlimit'], 42.050028785767275) self.assertEqual(box_coverage.value['eastlimit'], -111.5773750264389) self.assertEqual(box_coverage.value['southlimit'], 41.98745777902698) self.assertEqual(box_coverage.value['westlimit'], -111.65768822411239) # testing extended metadata element: original coverage ori_coverage = logical_file.metadata.originalCoverage self.assertNotEqual(ori_coverage, None) self.assertEqual(ori_coverage.value['northlimit'], 4655492.446916306) self.assertEqual(ori_coverage.value['eastlimit'], 452174.01909127034) self.assertEqual(ori_coverage.value['southlimit'], 4648592.446916306) self.assertEqual(ori_coverage.value['westlimit'], 445574.01909127034) self.assertEqual(ori_coverage.value['units'], 'meter') self.assertEqual(ori_coverage.value['projection'], 'NAD83 / UTM zone 12N') # testing extended metadata element: cell information cell_info = logical_file.metadata.cellInformation self.assertEqual(cell_info.rows, 230) self.assertEqual(cell_info.columns, 220) self.assertEqual(cell_info.cellSizeXValue, 30.0) self.assertEqual(cell_info.cellSizeYValue, 30.0) self.assertEqual(cell_info.cellDataType, 'Float32') # testing extended metadata element: band information self.assertEqual(logical_file.metadata.bandInformations.count(), 1) band_info = logical_file.metadata.bandInformations.first() self.assertEqual(band_info.noDataValue, '-3.40282346639e+38') self.assertEqual(band_info.maximumValue, '2880.00708008') self.assertEqual(band_info.minimumValue, '2274.95898438') self.composite_resource.delete()
def data_store_structure(request): """ Get file hierarchy (collection of subcollections and data objects) for the requested directory in hydroshareZone or any federated zone used for HydroShare resource backend store. It is invoked by an AJAX call and returns json object that holds content for files and folders under the requested directory/collection/subcollection. The AJAX request must be a POST request with input data passed in for res_id and store_path where store_path is the relative path under res_id collection/directory """ res_id = request.POST.get('res_id', None) if res_id is None: return HttpResponse('Bad request - resource id is not included', status=status.HTTP_500_INTERNAL_SERVER_ERROR) res_id = str(res_id).strip() try: resource, _, _ = authorize( request, res_id, needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE) except NotFound: return HttpResponse('Bad request - resource not found', status=status.HTTP_400_BAD_REQUEST) except PermissionDenied: return HttpResponse('Permission denied', status=status.HTTP_401_UNAUTHORIZED) store_path = request.POST.get('store_path', None) if store_path is None: return HttpResponse('Bad request - store_path is not included', status=status.HTTP_400_BAD_REQUEST) store_path = str(store_path).strip() if not store_path: return HttpResponse('Bad request - store_path cannot be empty', status=status.HTTP_400_BAD_REQUEST) # this is federated if warranted, automatically, by choosing an appropriate session. istorage = resource.get_irods_storage() if resource.resource_federation_path: res_coll = os.path.join(resource.resource_federation_path, res_id, store_path) rel_path = store_path else: res_coll = os.path.join(res_id, store_path) rel_path = res_coll try: store = istorage.listdir(res_coll) files = [] for fname in store[1]: name_with_full_path = os.path.join(res_coll, fname) name_with_rel_path = os.path.join(rel_path, fname) size = istorage.size(name_with_full_path) mtype = get_file_mime_type(fname) idx = mtype.find('/') if idx >= 0: mtype = mtype[idx + 1:] f_pk = '' f_url = '' logical_file_type = '' logical_file_id = '' for f in ResourceFile.objects.filter(object_id=resource.id): if name_with_rel_path == get_resource_file_name_and_extension( f)[0]: f_pk = f.pk f_url = get_resource_file_url(f) if resource.resource_type == "CompositeResource": logical_file_type = f.logical_file_type_name logical_file_id = f.logical_file.id break files.append({ 'name': fname, 'size': size, 'type': mtype, 'pk': f_pk, 'url': f_url, 'logical_type': logical_file_type, 'logical_file_id': logical_file_id }) except SessionException as ex: return HttpResponse(ex.stderr, status=status.HTTP_500_INTERNAL_SERVER_ERROR) return_object = { 'files': files, 'folders': store[0], 'can_be_public': resource.can_be_public_or_discoverable } if resource.resource_type == "CompositeResource": spatial_coverage_dict = get_coverage_data_dict(resource) temporal_coverage_dict = get_coverage_data_dict( resource, coverage_type='temporal') return_object['spatial_coverage'] = spatial_coverage_dict return_object['temporal_coverage'] = temporal_coverage_dict return HttpResponse(json.dumps(return_object), content_type="application/json")