def test_file_rename_or_move(self): # test that file can't be moved or renamed for any resource file # that's part of the TimeSeries logical file object (LFO) self.sqlite_file_obj = open(self.sqlite_file, 'r') self._create_composite_resource() res_file = self.composite_resource.files.first() # extract metadata from the sqlite file TimeSeriesLogicalFile.set_file_type(self.composite_resource, res_file.id, self.user) # test renaming of files that are associated with timeseries LFO - should raise exception self.assertEqual(self.composite_resource.files.count(), 1) base_path = "data/contents/ODM2_Multi_Site_One_Variable/{}" src_path = base_path.format('ODM2_Multi_Site_One_Variable.sqlite') tgt_path = base_path.format('ODM2_Multi_Site_One_Variable_1.sqlite') with self.assertRaises(DRF_ValidationError): move_or_rename_file_or_folder(self.user, self.composite_resource.short_id, src_path, tgt_path) # TODO: test for renaming csv file when we implement csv file # test moving the files associated with timeseries LFO tgt_path = 'data/contents/new_folder/ODM2_Multi_Site_One_Variable.sqlite' with self.assertRaises(DRF_ValidationError): move_or_rename_file_or_folder(self.user, self.composite_resource.short_id, src_path, tgt_path) # TODO: test for moving csv file when we implement csv file self.composite_resource.delete()
def _test_invalid_csv_file(self, invalid_csv_file_name): invalid_csv_file_obj = self._get_invalid_csv_file_obj( invalid_csv_file_name) file_to_upload = UploadedFile(file=invalid_csv_file_obj, name=os.path.basename( invalid_csv_file_obj.name)) self._create_composite_resource(title='Untitled Resource', file_to_upload=file_to_upload) self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # check that the resource file is associated with GenericLogicalFile self.assertEqual(res_file.has_logical_file, True) self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile") # check that there is one GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 1) # check that there is no TimeSeriesLogicalFile object self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0) # trying to set this invalid csv file to timeseries file type should raise # ValidationError with self.assertRaises(ValidationError): TimeSeriesLogicalFile.set_file_type(self.composite_resource, res_file.id, self.user) # test that the invalid file did not get deleted self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # check that the resource file is still associated with generic logical file self.assertEqual(res_file.has_logical_file, True) self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile") self.composite_resource.delete()
def test_sqlite_set_file_type_to_timeseries(self): # here we are using a valid sqlite file for setting it # to TimeSeries file type which includes metadata extraction self.sqlite_file_obj = open(self.sqlite_file, 'r') self._create_composite_resource(title='Untitled Resource') self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # check that the resource file is associated with GenericLogicalFile self.assertEqual(res_file.has_logical_file, True) self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile") # check that there is one GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 1) # check that there is no TimeSeriesLogicalFile object self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0) # set the sqlite file to TimeSeries file type TimeSeriesLogicalFile.set_file_type(self.composite_resource, res_file.id, self.user) # test extracted metadata assert_time_series_file_type_metadata(self) # test file level keywords # res_file = self.composite_resource.files.first() # logical_file = res_file.logical_file # self.assertEqual(len(logical_file.metadata.keywords), 1) # self.assertEqual(logical_file.metadata.keywords[0], 'Snow water equivalent') self.composite_resource.delete()
def test_timeseries_file_type_folder_delete(self): # when a file is set to TimeSeriesLogicalFile type # system automatically creates folder using the name of the file # that was used to set the file type # Here we need to test that when that folder gets deleted, all files # in that folder gets deleted, the logicalfile object gets deleted and # the associated metadata objects get deleted self.sqlite_file_obj = open(self.sqlite_file, 'r') self._create_composite_resource(title='Untitled Resource') self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # set the sqlite file to TimeSeries file type TimeSeriesLogicalFile.set_file_type(self.composite_resource, res_file.id, self.user) res_file = self.composite_resource.files.first() # test that we have one logical file of type TimeSeries self.assertEqual(TimeSeriesLogicalFile.objects.count(), 1) self.assertEqual(TimeSeriesFileMetaData.objects.count(), 1) # delete the folder for the logical file folder_path = "data/contents/ODM2_Multi_Site_One_Variable" remove_folder(self.user, self.composite_resource.short_id, folder_path) # there should no content files self.assertEqual(self.composite_resource.files.count(), 0) # there should not be any timeseries logical file or metadata file self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0) self.assertEqual(TimeSeriesFileMetaData.objects.count(), 0) # test that all file level metadata deleted # there should be no Site metadata objects self.assertTrue(Site.objects.count() == 0) # there should be no Variable metadata objects self.assertTrue(Variable.objects.count() == 0) # there should be no Method metadata objects self.assertTrue(Method.objects.count() == 0) # there should be no ProcessingLevel metadata objects self.assertTrue(ProcessingLevel.objects.count() == 0) # there should be no TimeSeriesResult metadata objects self.assertTrue(TimeSeriesResult.objects.count() == 0) # there should not be any CV type records self.assertEqual(CVVariableType.objects.all().count(), 0) self.assertEqual(CVVariableName.objects.all().count(), 0) self.assertEqual(CVSpeciation.objects.all().count(), 0) self.assertEqual(CVElevationDatum.objects.all().count(), 0) self.assertEqual(CVSiteType.objects.all().count(), 0) self.assertEqual(CVMethodType.objects.all().count(), 0) self.assertEqual(CVUnitsType.objects.all().count(), 0) self.assertEqual(CVStatus.objects.all().count(), 0) self.assertEqual(CVMedium.objects.all().count(), 0) self.assertEqual(CVAggregationStatistic.objects.all().count(), 0) self.composite_resource.delete()
def _test_file_metadata_on_file_delete(self, ext): self.sqlite_file_obj = open(self.sqlite_file, 'r') self._create_composite_resource() res_file = self.composite_resource.files.first() # set the sqlite file to TimeSeries file type TimeSeriesLogicalFile.set_file_type(self.composite_resource, res_file.id, self.user) # test that we have one logical file of type TimeSeries self.assertEqual(TimeSeriesLogicalFile.objects.count(), 1) self.assertEqual(TimeSeriesFileMetaData.objects.count(), 1) # delete content file specified by extension (ext parameter) res_file = hydroshare.utils.get_resource_files_by_extension( self.composite_resource, ext)[0] hydroshare.delete_resource_file(self.composite_resource.short_id, res_file.id, self.user) # test that we don't have any logical file of type TimeSeries self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0) self.assertEqual(TimeSeriesFileMetaData.objects.count(), 0) # test that all file level metadata deleted # there should be no Site metadata objects self.assertTrue(Site.objects.count() == 0) # there should be no Variable metadata objects self.assertTrue(Variable.objects.count() == 0) # there should be no Method metadata objects self.assertTrue(Method.objects.count() == 0) # there should be no ProcessingLevel metadata objects self.assertTrue(ProcessingLevel.objects.count() == 0) # there should be no TimeSeriesResult metadata objects self.assertTrue(TimeSeriesResult.objects.count() == 0) # there should not be any CV type records self.assertEqual(CVVariableType.objects.all().count(), 0) self.assertEqual(CVVariableName.objects.all().count(), 0) self.assertEqual(CVSpeciation.objects.all().count(), 0) self.assertEqual(CVElevationDatum.objects.all().count(), 0) self.assertEqual(CVSiteType.objects.all().count(), 0) self.assertEqual(CVMethodType.objects.all().count(), 0) self.assertEqual(CVUnitsType.objects.all().count(), 0) self.assertEqual(CVStatus.objects.all().count(), 0) self.assertEqual(CVMedium.objects.all().count(), 0) self.assertEqual(CVAggregationStatistic.objects.all().count(), 0) self.composite_resource.delete()
def test_bag_ingestion(self): from hs_core.views.utils import unzip_file def normalize_metadata(metadata_str): """Prepares metadata string to match resource id and hydroshare url of original""" return metadata_str\ .replace(current_site_url(), "http://www.hydroshare.org")\ .replace(res.short_id, "97523bdb7b174901b3fc2d89813458f1") # create empty resource res = resource.create_resource( 'CompositeResource', self.user, 'My Test Resource' ) full_paths = {} files_to_upload = [UploadedFile(file=open('hs_core/tests/data/test_resource_metadata_files.zip', 'rb'), name="test_resource_metadata_files.zip")] add_resource_files(res.short_id, *files_to_upload, full_paths=full_paths) unzip_file(self.user, res.short_id, "data/contents/test_resource_metadata_files.zip", True, overwrite=True, auto_aggregate=True, ingest_metadata=True) def compare_metadatas(new_metadata_str, original_metadata_file): original_graph = Graph() with open(os.path.join(self.extracted_directory, original_metadata_file), "r") as f: original_graph = original_graph.parse(data=f.read()) new_graph = Graph() new_graph = new_graph.parse(data=normalize_metadata(new_metadata_str)) # remove modified date, they'll never match subject = new_graph.value(predicate=RDF.type, object=DCTERMS.modified) new_graph.remove((subject, None, None)) subject = original_graph.value(predicate=RDF.type, object=DCTERMS.modified) original_graph.remove((subject, None, None)) for (new_triple, original_triple) in _squashed_graphs_triples(new_graph, original_graph): self.assertEquals(new_triple, original_triple, "Ingested resource metadata does not match original") res.refresh_from_db() compare_metadatas(res.metadata.get_xml(), "resourcemetadata.xml") compare_metadatas(res.get_logical_files(GenericLogicalFile.type_name())[0].metadata.get_xml(), "test_meta.xml") compare_metadatas(res.get_logical_files(FileSetLogicalFile.type_name())[0].metadata.get_xml(), "asdf/asdf_meta.xml") compare_metadatas(res.get_logical_files(GeoFeatureLogicalFile.type_name())[0].metadata.get_xml(), "watersheds_meta.xml") compare_metadatas(res.get_logical_files(GeoRasterLogicalFile.type_name())[0].metadata.get_xml(), "logan_meta.xml") compare_metadatas(res.get_logical_files(NetCDFLogicalFile.type_name())[0].metadata.get_xml(), "SWE_time_meta.xml") compare_metadatas(res.get_logical_files(RefTimeseriesLogicalFile.type_name())[0].metadata.get_xml(), "msf_version.refts_meta.xml") compare_metadatas(res.get_logical_files(TimeSeriesLogicalFile.type_name())[0].metadata.get_xml(), "ODM2_Multi_Site_One_Variable_meta.xml")
def test_nc_set_file_type_to_netcdf(self): # only do federation testing when REMOTE_USE_IRODS is True and irods docker containers # are set up properly super(TimeSeriesFileTypeTest, self).assert_federated_irods_available() # here we are using a valid netcdf file for setting it # to NetCDF file type which includes metadata extraction fed_test_file_full_path = '/{zone}/home/{username}/{fname}'.format( zone=settings.HS_USER_IRODS_ZONE, username=self.user.username, fname=self.sqlite_file_name) res_upload_files = [] fed_res_path = hydroshare.utils.get_federated_zone_home_path(fed_test_file_full_path) res_title = 'Untitled resource' self.composite_resource = hydroshare.create_resource( resource_type='CompositeResource', owner=self.user, title=res_title, files=res_upload_files, source_names=[fed_test_file_full_path], fed_res_path=fed_res_path, move=False, metadata=[], auto_aggregate=False ) # test resource is created on federated zone self.assertNotEqual(self.composite_resource.resource_federation_path, '') self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # check that the resource file is not associated with any logical file self.assertEqual(res_file.has_logical_file, False) fed_file_path = "{}/{}".format(self.composite_resource.file_path, self.sqlite_file_name) self.assertEqual(res_file.storage_path, fed_file_path) # set the sqlite file to TimeSeries file type TimeSeriesLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) # test extracted metadata res_file = self.composite_resource.files.first() base_file_name, _ = os.path.splitext(res_file.file_name) expected_file_folder = base_file_name assert_time_series_file_type_metadata(self, expected_file_folder=expected_file_folder)
def _test_invalid_file(self): self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # check that the resource file is associated with the generic logical file self.assertEqual(res_file.has_logical_file, True) self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile") # trying to set this invalid sqlite file to timeseries file type should raise # ValidationError with self.assertRaises(ValidationError): TimeSeriesLogicalFile.set_file_type(self.composite_resource, res_file.id, self.user) # test that the invalid file did not get deleted self.assertEqual(self.composite_resource.files.all().count(), 1) # check that the resource file is not associated with generic logical file self.assertEqual(res_file.has_logical_file, True) self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile")
def test_file_metadata_on_logical_file_delete(self): # test that when the TimeSeriesLogicalFile instance is deleted # all metadata associated with it also get deleted self.sqlite_file_obj = open(self.sqlite_file, 'r') self._create_composite_resource(title='Untitled Resource') self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # set the sqlite file to TimeSeries file type TimeSeriesLogicalFile.set_file_type(self.composite_resource, res_file.id, self.user) res_file = self.composite_resource.files.first() logical_file = res_file.logical_file # file level metadata # there should be Site metadata objects self.assertTrue(Site.objects.count() > 0) # there should be Variable metadata objects self.assertTrue(Variable.objects.count() > 0) # there should be Method metadata objects self.assertTrue(Method.objects.count() > 0) # there should be ProcessingLevel metadata objects self.assertTrue(ProcessingLevel.objects.count() > 0) # there should be TimeSeriesResult metadata objects self.assertTrue(TimeSeriesResult.objects.count() > 0) # CV lookup data self.assertEqual(logical_file.metadata.cv_variable_types.all().count(), 23) self.assertEqual(CVVariableType.objects.all().count(), 23) self.assertEqual(logical_file.metadata.cv_variable_names.all().count(), 805) self.assertEqual(CVVariableName.objects.all().count(), 805) self.assertEqual(logical_file.metadata.cv_speciations.all().count(), 145) self.assertEqual(CVSpeciation.objects.all().count(), 145) self.assertEqual( logical_file.metadata.cv_elevation_datums.all().count(), 5) self.assertEqual(CVElevationDatum.objects.all().count(), 5) self.assertEqual(logical_file.metadata.cv_site_types.all().count(), 51) self.assertEqual(CVSiteType.objects.all().count(), 51) self.assertEqual(logical_file.metadata.cv_method_types.all().count(), 25) self.assertEqual(CVMethodType.objects.all().count(), 25) self.assertEqual(logical_file.metadata.cv_units_types.all().count(), 179) self.assertEqual(CVUnitsType.objects.all().count(), 179) self.assertEqual(logical_file.metadata.cv_statuses.all().count(), 4) self.assertEqual(CVStatus.objects.all().count(), 4) self.assertEqual(logical_file.metadata.cv_mediums.all().count(), 18) self.assertEqual(CVMedium.objects.all().count(), 18) self.assertEqual( logical_file.metadata.cv_aggregation_statistics.all().count(), 17) self.assertEqual(CVAggregationStatistic.objects.all().count(), 17) # delete the logical file logical_file.logical_delete(self.user) # test that we have no logical file of type TimeSeries self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0) self.assertEqual(TimeSeriesFileMetaData.objects.count(), 0) # test that all file level metadata deleted # there should be no Site metadata objects self.assertTrue(Site.objects.count() == 0) # there should be no Variable metadata objects self.assertTrue(Variable.objects.count() == 0) # there should be no Method metadata objects self.assertTrue(Method.objects.count() == 0) # there should be no ProcessingLevel metadata objects self.assertTrue(ProcessingLevel.objects.count() == 0) # there should be no TimeSeriesResult metadata objects self.assertTrue(TimeSeriesResult.objects.count() == 0) # there should not be any CV type records self.assertEqual(CVVariableType.objects.all().count(), 0) self.assertEqual(CVVariableName.objects.all().count(), 0) self.assertEqual(CVSpeciation.objects.all().count(), 0) self.assertEqual(CVElevationDatum.objects.all().count(), 0) self.assertEqual(CVSiteType.objects.all().count(), 0) self.assertEqual(CVMethodType.objects.all().count(), 0) self.assertEqual(CVUnitsType.objects.all().count(), 0) self.assertEqual(CVStatus.objects.all().count(), 0) self.assertEqual(CVMedium.objects.all().count(), 0) self.assertEqual(CVAggregationStatistic.objects.all().count(), 0) self.composite_resource.delete()
def test_sqlite_metadata_update(self): # here we are using a valid sqlite file for setting it # to TimeSeries file type which includes metadata extraction # then we are testing update of the file level metadata elements self.sqlite_file_obj = open(self.sqlite_file, 'r') self._create_composite_resource(title='Untitled Resource') self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # check that the resource file is associated with GenericLogicalFile self.assertEqual(res_file.has_logical_file, True) self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile") # check that there is one GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 1) # check that there is no TimeSeriesLogicalFile object self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0) # set the sqlite file to TimeSeries file type TimeSeriesLogicalFile.set_file_type(self.composite_resource, res_file.id, self.user) res_file = self.composite_resource.files.first() logical_file = res_file.logical_file # test updating site element site = logical_file.metadata.sites.filter( site_code='USU-LBR-Paradise').first() self.assertNotEqual(site, None) site_name = 'Little Bear River at McMurdy Hollow near Paradise, Utah' self.assertEqual(site.site_name, site_name) self.assertEqual(site.elevation_m, 1445) self.assertEqual(site.elevation_datum, 'NGVD29') self.assertEqual(site.site_type, 'Stream') self.assertFalse(logical_file.metadata.is_dirty) site_name = 'Little Bear River at Logan, Utah' site_data = { 'site_name': site_name, 'elevation_m': site.elevation_m, 'elevation_datum': site.elevation_datum, 'site_type': site.site_type } logical_file.metadata.update_element('Site', site.id, **site_data) site = logical_file.metadata.sites.filter( site_code='USU-LBR-Paradise').first() self.assertEqual(site.site_name, site_name) self.assertTrue(logical_file.metadata.is_dirty) # updating site lat/long should update the resource coverage as well as file level coverage box_coverage = self.composite_resource.metadata.coverages.all().filter( type='box').first() self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(box_coverage.value['northlimit'], 41.718473) self.assertEqual(box_coverage.value['eastlimit'], -111.799324) self.assertEqual(box_coverage.value['southlimit'], 41.495409) self.assertEqual(box_coverage.value['westlimit'], -111.946402) box_coverage = logical_file.metadata.spatial_coverage self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(box_coverage.value['northlimit'], 41.718473) self.assertEqual(box_coverage.value['eastlimit'], -111.799324) self.assertEqual(box_coverage.value['southlimit'], 41.495409) self.assertEqual(box_coverage.value['westlimit'], -111.946402) site_data['latitude'] = 40.7896 logical_file.metadata.update_element('Site', site.id, **site_data) site = logical_file.metadata.sites.filter( site_code='USU-LBR-Paradise').first() self.assertEqual(site.latitude, 40.7896) # test that resource level coverage got updated box_coverage = self.composite_resource.metadata.coverages.all().filter( type='box').first() self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(box_coverage.value['northlimit'], 41.718473) self.assertEqual(box_coverage.value['eastlimit'], -111.799324) # this is the changed value for the southlimit as a result of changing the sit latitude self.assertEqual(box_coverage.value['southlimit'], 40.7896) self.assertEqual(box_coverage.value['westlimit'], -111.946402) # test that file level coverage got updated box_coverage = logical_file.metadata.spatial_coverage self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(box_coverage.value['northlimit'], 41.718473) self.assertEqual(box_coverage.value['eastlimit'], -111.799324) # this is the changed value for the southlimit as a result of changing the sit latitude self.assertEqual(box_coverage.value['southlimit'], 40.7896) self.assertEqual(box_coverage.value['westlimit'], -111.946402) logical_file.metadata.is_dirty = False logical_file.metadata.save() # test updating variable element variable = logical_file.metadata.variables.filter( variable_code='USU36').first() self.assertNotEqual(variable, None) self.assertEqual(variable.variable_name, 'Temperature') self.assertEqual(variable.variable_type, 'Water Quality') self.assertEqual(variable.no_data_value, -9999) self.assertEqual(variable.speciation, 'Not Applicable') self.assertEqual(variable.variable_definition, None) var_def = 'Concentration of oxygen dissolved in water.' variable_data = {'variable_definition': var_def} logical_file.metadata.update_element('Variable', variable.id, **variable_data) variable = logical_file.metadata.variables.filter( variable_code='USU36').first() self.assertEqual(variable.variable_definition, var_def) self.assertEqual(variable.variable_name, 'Temperature') self.assertEqual(variable.variable_type, 'Water Quality') self.assertEqual(variable.no_data_value, -9999) self.assertEqual(variable.speciation, 'Not Applicable') self.assertTrue(logical_file.metadata.is_dirty) logical_file.metadata.is_dirty = False logical_file.metadata.save() # test updating method element method = logical_file.metadata.methods.filter(method_code=28).first() self.assertNotEqual(method, None) self.assertEqual( method.method_name, 'Quality Control Level 1 Data Series created from raw ' 'QC Level 0 data using ODM Tools.') self.assertEqual(method.method_type, 'Instrument deployment') self.assertEqual( method.method_description, 'Quality Control Level 1 Data Series created ' 'from raw QC Level 0 data using ODM Tools.') self.assertEqual(method.method_link, None) method_link = "http://somesite.com" method_data = {'method_link': method_link} logical_file.metadata.update_element('Method', method.id, **method_data) method = logical_file.metadata.methods.filter(method_code=28).first() self.assertNotEqual(method, None) self.assertEqual( method.method_name, 'Quality Control Level 1 Data Series created from raw ' 'QC Level 0 data using ODM Tools.') self.assertEqual(method.method_type, 'Instrument deployment') self.assertEqual( method.method_description, 'Quality Control Level 1 Data Series created ' 'from raw QC Level 0 data using ODM Tools.') self.assertEqual(method.method_link, method_link) self.assertTrue(logical_file.metadata.is_dirty) logical_file.metadata.is_dirty = False logical_file.metadata.save() # test updating processing level element pro_level = logical_file.metadata.processing_levels.filter( processing_level_code=1).first() self.assertNotEqual(pro_level, None) self.assertEqual(pro_level.definition, 'Quality controlled data') explanation = 'Quality controlled data that have passed quality assurance procedures ' \ 'such as routine estimation of timing and sensor calibration or visual ' \ 'inspection and removal of obvious errors. An example is USGS published ' \ 'streamflow records following parsing through USGS quality ' \ 'control procedures.' self.assertEqual(pro_level.explanation, explanation) definition = "Uncontrolled data" pro_level_data = {'definition': definition} logical_file.metadata.update_element('ProcessingLevel', pro_level.id, **pro_level_data) pro_level = logical_file.metadata.processing_levels.filter( processing_level_code=1).first() self.assertNotEqual(pro_level, None) self.assertEqual(pro_level.definition, definition) explanation = 'Quality controlled data that have passed quality assurance procedures ' \ 'such as routine estimation of timing and sensor calibration or visual ' \ 'inspection and removal of obvious errors. An example is USGS published ' \ 'streamflow records following parsing through USGS quality ' \ 'control procedures.' self.assertEqual(pro_level.explanation, explanation) self.assertTrue(logical_file.metadata.is_dirty) logical_file.metadata.is_dirty = False logical_file.metadata.save() # test updating time series result element ts_result = logical_file.metadata.time_series_results.all().first() self.assertNotEqual(ts_result, None) self.assertEqual(ts_result.units_type, 'Temperature') self.assertEqual(ts_result.units_name, 'degree celsius') self.assertEqual(ts_result.units_abbreviation, 'degC') self.assertEqual(ts_result.status, 'Unknown') self.assertEqual(ts_result.sample_medium, 'Surface Water') self.assertEqual(ts_result.value_count, 1441) self.assertEqual(ts_result.aggregation_statistics, 'Average') ts_data = {'status': 'Complete'} logical_file.metadata.update_element('timeseriesresult', ts_result.id, **ts_data) ts_result = logical_file.metadata.time_series_results.all().first() self.assertNotEqual(ts_result, None) self.assertEqual(ts_result.units_type, 'Temperature') self.assertEqual(ts_result.units_name, 'degree celsius') self.assertEqual(ts_result.units_abbreviation, 'degC') self.assertEqual(ts_result.status, 'Complete') self.assertEqual(ts_result.sample_medium, 'Surface Water') self.assertEqual(ts_result.value_count, 1441) self.assertEqual(ts_result.aggregation_statistics, 'Average') self.assertTrue(logical_file.metadata.is_dirty) self.composite_resource.delete()
def test_CSV_set_file_type_to_timeseries(self): # here we are using a valid CSV file for setting it # to TimeSeries file type which includes metadata extraction self.odm2_csv_file_obj = open(self.odm2_csv_file, 'r') file_to_upload = UploadedFile(file=self.odm2_csv_file_obj, name=os.path.basename( self.odm2_csv_file_obj.name)) self._create_composite_resource(title='Untitled Resource', file_to_upload=file_to_upload) self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # check that the resource file is associated with GenericLogicalFile self.assertEqual(res_file.has_logical_file, True) self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile") # check that there is one GenericLogicalFile object self.assertEqual(GenericLogicalFile.objects.count(), 1) # check that there is no TimeSeriesLogicalFile object self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0) # set the CSV file to TimeSeries file type TimeSeriesLogicalFile.set_file_type(self.composite_resource, res_file.id, self.user) # test that the ODM2.sqlite blank file got added to the resource self.assertEqual(self.composite_resource.files.all().count(), 2) csv_res_file = None sqlite_res_file = None for res_file in self.composite_resource.files.all(): if res_file.extension == '.sqlite': sqlite_res_file = res_file elif res_file.extension == '.csv': csv_res_file = res_file self.assertNotEqual(csv_res_file, None) self.assertNotEqual(sqlite_res_file, None) self.assertEqual(csv_res_file.logical_file_type_name, "TimeSeriesLogicalFile") self.assertEqual(sqlite_res_file.logical_file_type_name, "TimeSeriesLogicalFile") self.assertEqual(TimeSeriesLogicalFile.objects.count(), 1) logical_file = csv_res_file.logical_file # test that both csv and sqlite files of the logical file are in a folder csv_file_name = os.path.basename(self.odm2_csv_file_obj.name) for res_file in logical_file.files.all(): self.assertEqual(res_file.file_folder, csv_file_name[:-4]) # since the uploaded csv file has 2 data columns, the metadata should have 2 series names self.assertEqual(len(logical_file.metadata.series_names), 2) csv_data_column_names = set(['Temp_DegC_Mendon', 'Temp_DegC_Paradise']) self.assertEqual(set(logical_file.metadata.series_names), csv_data_column_names) # since the uploaded csv file has 2 data columns, the metadata should have # the attribute value_counts (dict) 2 elements self.assertEqual(len(logical_file.metadata.value_counts), 2) self.assertEqual(set(logical_file.metadata.value_counts.keys()), csv_data_column_names) # there should be 20 data values for each series self.assertEqual( logical_file.metadata.value_counts['Temp_DegC_Mendon'], '20') self.assertEqual( logical_file.metadata.value_counts['Temp_DegC_Paradise'], '20') # the dataset name (title) must be set the name of the CSV file self.assertEqual(logical_file.dataset_name, csv_file_name[:-4]) # there should not be any file level abstract self.assertEqual(logical_file.metadata.abstract, None) # there should not be any file level keywords self.assertEqual(logical_file.metadata.keywords, []) # there should be 1 coverage element of type period at the file level self.assertEqual(logical_file.metadata.coverages.all().count(), 1) self.assertEqual( logical_file.metadata.coverages.filter(type='period').count(), 1) self.assertEqual(logical_file.has_csv_file, True) # at file level there should not be any site element self.assertEqual(logical_file.metadata.sites.all().count(), 0) # at file level there should not be any method element self.assertEqual(logical_file.metadata.methods.all().count(), 0) # at file level there should not be any variable element self.assertEqual(logical_file.metadata.variables.all().count(), 0) # at file level there should not an any site processing level self.assertEqual(logical_file.metadata.processing_levels.all().count(), 0) # at file level there should not be any result element self.assertEqual( logical_file.metadata.time_series_results.all().count(), 0) # resource title does not get updated when csv is set to file type self.assertEqual(self.composite_resource.metadata.title.value, 'Untitled Resource') # self._test_no_change_in_metadata() # there should be 2 format elements - since the resource has a csv file and a sqlite file self.assertEqual( self.composite_resource.metadata.formats.all().count(), 2) # there should be 1 coverage element of type period self.assertEqual( self.composite_resource.metadata.coverages.all().count(), 1) self.assertEqual( self.composite_resource.metadata.coverages.filter( type='period').count(), 1) self.composite_resource.delete()
def get_folder_aggregation_type_to_set(self, dir_path): """Returns an aggregation (file type) type that the specified folder *dir_path* can possibly be set to. :param dir_path: Resource file directory path (full folder path starting with resource id) for which the possible aggregation type that can be set needs to be determined :return If the specified folder is already represents an aggregation or does not contain suitable file(s) then returns "" (empty string). If the specified folder contains only the files that meet the requirements of a supported aggregation, and does not contain other folders or does not have a parent folder then return the class name of that matching aggregation type. """ aggregation_type_to_set = "" if self.get_folder_aggregation_object(dir_path) is not None: # target folder is already an aggregation return None istorage = self.get_irods_storage() irods_path = dir_path if self.is_federated: irods_path = os.path.join(self.resource_federation_path, irods_path) store = istorage.listdir(irods_path) if store[0]: # seems there are folders under dir_path - no aggregation type can be set if the target # folder contains other folders return None files_in_folder = [ res_file for res_file in self.files.all() if res_file.dir_path == dir_path ] if not files_in_folder: # folder is empty return None if len(files_in_folder) > 1: # check for geo feature aggregation_type_to_set = GeoFeatureLogicalFile.check_files_for_aggregation_type( files_in_folder) if aggregation_type_to_set: return aggregation_type_to_set # check for raster aggregation_type_to_set = GeoRasterLogicalFile.check_files_for_aggregation_type( files_in_folder) if aggregation_type_to_set: return aggregation_type_to_set else: # check for raster aggregation_type_to_set = GeoRasterLogicalFile.check_files_for_aggregation_type( files_in_folder) if aggregation_type_to_set: return aggregation_type_to_set # check for NetCDF aggregation type aggregation_type_to_set = NetCDFLogicalFile.check_files_for_aggregation_type( files_in_folder) if aggregation_type_to_set: return aggregation_type_to_set # check for TimeSeries aggregation type aggregation_type_to_set = TimeSeriesLogicalFile.check_files_for_aggregation_type( files_in_folder) if aggregation_type_to_set: return aggregation_type_to_set return None
def handle(self, *args, **options): logger = logging.getLogger(__name__) resource_counter = 0 to_resource_type = 'CompositeResource' msg = "THERE ARE CURRENTLY {} TIMESERIES RESOURCES PRIOR TO CONVERSION.".format( TimeSeriesResource.objects.all().count()) logger.info(msg) print(">> {}".format(msg)) for ts_res in TimeSeriesResource.objects.all(): # check resource exists on irods istorage = ts_res.get_irods_storage() if not istorage.exists(ts_res.root_path): err_msg = "Timeseries resource not found in irods (ID: {})".format( ts_res.short_id) logger.error(err_msg) print("Error:>> {}".format(err_msg)) # skip this timeseries resource for migration continue sqlite_file = None res_file_count = ts_res.files.count() if res_file_count == 1 or res_file_count == 2: for res_file in ts_res.files.all(): if res_file.extension.lower() == '.sqlite': sqlite_file = res_file create_ts_aggregation = sqlite_file is not None if create_ts_aggregation: # check resource files exist on irods file_missing = False for res_file in ts_res.files.all(): file_path = res_file.public_path if not istorage.exists(file_path): err_msg = "File path not found in irods:{}".format( file_path) logger.error(err_msg) err_msg = "Failed to convert timeseries resource (ID: {}). " \ "Resource file is missing on irods".format(ts_res.short_id) print("Error:>> {}".format(err_msg)) file_missing = True break if file_missing: # skip this corrupt timeseries resource for migration continue # change the resource_type ts_metadata_obj = ts_res.metadata ts_res.resource_type = to_resource_type ts_res.content_model = to_resource_type.lower() ts_res.save() # get the converted resource object - CompositeResource comp_res = ts_res.get_content_model() # set CoreMetaData object for the composite resource core_meta_obj = CoreMetaData.objects.create() comp_res.content_object = core_meta_obj # migrate timeseries resource core metadata elements to composite resource migrate_core_meta_elements(ts_metadata_obj, comp_res) # update url attribute of the metadata 'type' element type_element = comp_res.metadata.type type_element.url = '{0}/terms/{1}'.format(current_site_url(), to_resource_type) type_element.save() if create_ts_aggregation: # create a Timeseries aggregation ts_aggr = None try: ts_aggr = TimeSeriesLogicalFile.create(resource=comp_res) except Exception as ex: err_msg = 'Failed to create Timeseries aggregation for resource (ID: {})' err_msg = err_msg.format(ts_res.short_id) err_msg = err_msg + '\n' + ex.message logger.error(err_msg) print("Error:>> {}".format(err_msg)) if ts_aggr is not None: # set aggregation dataset title ts_aggr.dataset_name = comp_res.metadata.title.value ts_aggr.save() # make the res files part of the aggregation for res_file in comp_res.files.all(): ts_aggr.add_resource_file(res_file) # migrate timeseries specific metadata to aggregation for site in ts_metadata_obj.sites: site.content_object = ts_aggr.metadata site.save() for variable in ts_metadata_obj.variables: variable.content_object = ts_aggr.metadata variable.save() for method in ts_metadata_obj.methods: method.content_object = ts_aggr.metadata method.save() for proc_level in ts_metadata_obj.processing_levels: proc_level.content_object = ts_aggr.metadata proc_level.save() for ts_result in ts_metadata_obj.time_series_results: ts_result.content_object = ts_aggr.metadata ts_result.save() # create aggregation level coverage elements for coverage in comp_res.metadata.coverages.all(): aggr_coverage = Coverage() aggr_coverage.type = coverage.type aggr_coverage._value = coverage._value aggr_coverage.content_object = ts_aggr.metadata aggr_coverage.save() utc_offset = ts_metadata_obj.utc_offset if utc_offset: utc_offset.content_object = ts_aggr.metadata utc_offset.save() ts_aggr.metadata.value_counts = ts_metadata_obj.value_counts ts_aggr.metadata.save() # create aggregation level keywords keywords = [ sub.value for sub in comp_res.metadata.subjects.all() ] ts_aggr.metadata.keywords = keywords # set aggregation metadata dirty status to that of the timeseries resource # metadata dirty status - this would trigger netcdf file update for the # new aggregation if metadata is dirty ts_aggr.metadata.is_dirty = ts_metadata_obj.is_dirty ts_aggr.metadata.save() # create aggregation level xml files ts_aggr.create_aggregation_xml_documents() msg = 'One Timeseries aggregation was created in resource (ID: {})' msg = msg.format(comp_res.short_id) logger.info(msg) comp_res.save() # set resource to dirty so that resource level xml files (resource map and # metadata xml files) will be re-generated as part of next bag download try: set_dirty_bag_flag(comp_res) except Exception as ex: err_msg = 'Failed to set bag flag dirty for the converted resource (ID: {})' err_msg = err_msg.format(ts_res.short_id) err_msg = err_msg + '\n' + ex.message logger.error(err_msg) print("Error:>> {}".format(err_msg)) resource_counter += 1 # delete the instance of TimeSeriesMetaData that was part of the original # timeseries resource ts_metadata_obj.delete() msg = 'Timeseries resource (ID: {}) was converted to Composite Resource type' msg = msg.format(comp_res.short_id) logger.info(msg) msg = "{} TIMESERIES RESOURCES WERE CONVERTED TO COMPOSITE RESOURCE.".format( resource_counter) logger.info(msg) print(">> {}".format(msg)) msg = "THERE ARE CURRENTLY {} TIMESERIES RESOURCES AFTER CONVERSION.".format( TimeSeriesResource.objects.all().count()) logger.info(msg) if TimeSeriesResource.objects.all().count() > 0: msg = "NOT ALL TIMESERIES RESOURCES WERE CONVERTED TO COMPOSITE RESOURCE TYPE" logger.error(msg) print(">> {}".format(msg))