def test_unfederated_folder_path_setting(self): """ an unfederated file in a subfolder has the proper state after state changes """ # resource should not have any files at this point self.assertEqual(self.res.files.all().count(), 0, msg="resource file count didn't match") ResourceFile.create_folder(self.res, 'foo') # add one file to the resource hydroshare.add_resource_files(self.res.short_id, self.test_file_1, folder='foo') # resource should has only one file at this point self.assertEqual(self.res.files.all().count(), 1, msg="resource file count didn't match") # get the handle of the file created above resfile = self.res.files.all()[0] # determine where that file should live shortpath = os.path.join(self.res.short_id, "data", "contents", "foo", "file1.txt") self.assertEqual(resfile.file_folder, "foo") self.assertEqual(resfile.storage_path, shortpath) self.assertTrue(resfile.path_is_acceptable(shortpath)) # non-existent files should raise error otherpath = os.path.join(self.res.short_id, "data", "contents", "foo", "file2.txt") with self.assertRaises(ValidationError): resfile.path_is_acceptable(otherpath) # try setting to an unqualified name; should qualify it resfile.set_storage_path("foo/file1.txt") # should match computed path self.assertEqual(resfile.file_folder, "foo") self.assertEqual(resfile.storage_path, shortpath) # now try to change that path to what it is already resfile.set_storage_path(shortpath) # should match computed path self.assertEqual(resfile.file_folder, "foo") self.assertEqual(resfile.storage_path, shortpath) # now try to change that path to a good path to a non-existent object with self.assertRaises(ValidationError): resfile.set_storage_path(otherpath) # should not change self.assertEqual(resfile.file_folder, "foo") self.assertEqual(resfile.storage_path, shortpath) # TODO: how to eliminate this particular error. # dumbpath = 'x' + shortpath # dumbpath = self.res.short_id + "file1.txt" # clean up after folder test # ResourceFile.remove_folder(self.res, 'foo', self.user) # delete resources to clean up hydroshare.delete_resource(self.res.short_id)
def _test_create_aggregation_from_folder(self, foldet_to_test): self.create_composite_resource() self.assertEqual(self.composite_resource.files.count(), 0) # create a folder to upload the nc file there new_folder = foldet_to_test ResourceFile.create_folder(self.composite_resource, new_folder) # add the nc file to the resource at the above folder res_file = self.add_file_to_resource(file_to_add=self.netcdf_file, upload_folder=new_folder) self.assertEqual(res_file.file_folder, new_folder) # resource should have 1 file now self.assertEqual(self.composite_resource.files.count(), 1) for res_file in self.composite_resource.files.all(): self.assertFalse(res_file.has_logical_file) self.assertEqual(NetCDFLogicalFile.objects.count(), 0) # create the aggregation from the folder NetCDFLogicalFile.set_file_type(self.composite_resource, self.user, folder_path=new_folder) self.assertEqual(NetCDFLogicalFile.objects.count(), 1) for res_file in self.composite_resource.files.all(): # test that each resource file is part of an aggregation (logical file) self.assertTrue(res_file.has_logical_file) # test that the each resource file has the same folder - no new folder created self.assertEqual(res_file.file_folder, new_folder) self.composite_resource.delete()
def test_aggregation_file_move(self): # test any resource file that's part of the GeoFeature logical file can't be moved self.create_composite_resource(self.states_required_zip_file) res_file = self.composite_resource.files.first() base_file_name, _ = os.path.splitext(res_file.file_name) # extract metadata from the tif file GeoFeatureLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) # test renaming of files that are associated with geo feature LFO - which should # raise exception self.assertEqual(self.composite_resource.files.count(), 3) new_folder = 'geofeature_aggr' ResourceFile.create_folder(self.composite_resource, new_folder) # moving any of the resource files to this new folder should raise exception tgt_path = 'data/contents/geofeature_aggr' for res_file in self.composite_resource.files.all(): with self.assertRaises(DRF_ValidationError): src_path = os.path.join('data', 'contents', res_file.short_path) move_or_rename_file_or_folder(self.user, self.composite_resource.short_id, src_path, tgt_path) self.composite_resource.delete()
def test_create_aggregation_from_nc_file_2(self): # here we are using a valid nc file for setting it # to NetCDF file type which includes metadata extraction # the nc file in this case is not at the root of the folder hierarchy but in a folder self.create_composite_resource() new_folder = 'netcdf_aggr' ResourceFile.create_folder(self.composite_resource, new_folder) # add the the nc file to the resource at the above folder self.add_file_to_resource(file_to_add=self.netcdf_file, upload_folder=new_folder) self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # check that the resource file is not associated with any logical file self.assertEqual(res_file.has_logical_file, False) # check that there is no NetCDFLogicalFile object self.assertEqual(NetCDFLogicalFile.objects.count(), 0) # set the nc file to NetCDF file type NetCDFLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) # test extracted metadata assert_netcdf_file_type_metadata(self, self.res_title, aggr_folder=new_folder) # test file level keywords res_file = self.composite_resource.files.first() logical_file = res_file.logical_file self.assertEqual(len(logical_file.metadata.keywords), 1) self.assertEqual(logical_file.metadata.keywords[0], 'Snow water equivalent') self.composite_resource.delete()
def test_create_aggregation_2(self): """Test that we can create a generic aggregation from a resource file that exists in a folder """ self.create_composite_resource() new_folder = 'generic_folder' ResourceFile.create_folder(self.composite_resource, new_folder) # add the the txt file to the resource at the above folder self.add_file_to_resource(file_to_add=self.generic_file, upload_folder=new_folder) # there should be one resource file self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # file has a folder self.assertEqual(res_file.file_folder, new_folder) # check that the resource file is not part of an aggregation self.assertEqual(res_file.has_logical_file, False) self.assertEqual(GenericLogicalFile.objects.count(), 0) # set file to generic logical file type (aggregation) GenericLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) res_file = self.composite_resource.files.first() # file has the same folder self.assertEqual(res_file.file_folder, new_folder) self.assertEqual(res_file.logical_file_type_name, self.logical_file_type_name) self.assertEqual(GenericLogicalFile.objects.count(), 1) self.composite_resource.delete()
def test_aggregation_file_move(self): # test any resource file that's part of the NetCDF logical file can't be moved self.create_composite_resource() self.add_file_to_resource(file_to_add=self.netcdf_file) res_file = self.composite_resource.files.first() # create the aggregation using the nc file NetCDFLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) # test renaming of files that are associated with raster LFO - which should raise exception self.assertEqual(self.composite_resource.files.count(), 2) res_file = self.composite_resource.files.first() base_file_name, ext = os.path.splitext(res_file.file_name) expected_folder_name = base_file_name self.assertEqual(res_file.file_folder, expected_folder_name) new_folder = 'netcdf_aggr' ResourceFile.create_folder(self.composite_resource, new_folder) # moving any of the resource files to this new folder should raise exception tgt_path = 'data/contents/{}'.format(new_folder) for res_file in self.composite_resource.files.all(): with self.assertRaises(DRF_ValidationError): src_path = os.path.join('data', 'contents', res_file.short_path) move_or_rename_file_or_folder(self.user, self.composite_resource.short_id, src_path, tgt_path) self.composite_resource.delete()
def setUp(self): super(TestFolderDownloadZip, self).setUp() self.output_path = "zips/rand/foo.zip" self.group, _ = Group.objects.get_or_create(name='Hydroshare Author') self.user = create_account( '*****@*****.**', username='******', first_name='Shaun', last_name='Livingston', superuser=False, groups=[] ) # create files self.n1 = "test1.txt" test_file = open(self.n1, 'w') test_file.write("Test text file in test1.txt") test_file.close() test_file = open(self.n1, "r") self.res = create_resource(resource_type='GenericResource', owner=self.user, title='Test Resource', metadata=[], ) ResourceFile.create_folder(self.res, 'foo') # add one file to the resource add_resource_files(self.res.short_id, test_file, folder='foo')
def link_irods_file_to_django(resource, filepath): """ Link a newly created irods file to Django resource model :param filepath: full path to file """ # link the newly created file (**filepath**) to Django resource model b_add_file = False # TODO: folder is an abstract concept... utilize short_path for whole API if resource: folder, base = ResourceFile.resource_path_is_acceptable(resource, filepath, test_exists=False) try: ResourceFile.get(resource=resource, file=base, folder=folder) except ObjectDoesNotExist: # this does not copy the file from anywhere; it must exist already ResourceFile.create(resource=resource, file=base, folder=folder) b_add_file = True if b_add_file: file_format_type = get_file_mime_type(filepath) if file_format_type not in [mime.value for mime in resource.metadata.formats.all()]: resource.metadata.create_element('format', value=file_format_type) # this should assign a logical file object to this new file # if this resource supports logical file resource.set_default_logical_file()
def rename_irods_file_or_folder_in_django(resource, src_name, tgt_name): """ Rename file in Django DB after the file is renamed in Django side :param resource: the BaseResource object representing a HydroShare resource :param src_name: the file or folder full path name to be renamed :param tgt_name: the file or folder full path name to be renamed to :return: Note: the need to copy and recreate the file object was made unnecessary by the ResourceFile.set_storage_path routine, which always sets that correctly. Thus it is possible to move without copying. Thus, logical file relationships are preserved and no longer need adjustment. """ # checks src_name as a side effect. folder, base = ResourceFile.resource_path_is_acceptable(resource, src_name, test_exists=False) try: res_file_obj = ResourceFile.get(resource=resource, file=base, folder=folder) # checks tgt_name as a side effect. ResourceFile.resource_path_is_acceptable(resource, tgt_name, test_exists=True) res_file_obj.set_storage_path(tgt_name) except ObjectDoesNotExist: # src_name and tgt_name are folder names res_file_objs = ResourceFile.list_folder(resource, src_name) for fobj in res_file_objs: src_path = fobj.storage_path # naively replace src_name with tgt_name new_path = src_path.replace(src_name, tgt_name, 1) fobj.set_storage_path(new_path)
def test_create_aggregation_from_shp_file_required_3(self): # here we are using a shp file that exists in a folder # for setting it to Geo Feature file type which includes metadata extraction # The same folder contains another file that is not going to be part of the # geofeature aggregation a new folder should be created in this case to represent the # geofeature aggregation # location shp file before aggregation is created: my_folder/states.shp # location of another file before aggregation is created: my_folder/states_invalid.zip # location of shp file after aggregation is created: my_folder/states/states.shp # location of another file after aggregation is created: my_folder/states_invalid.zip self.create_composite_resource() new_folder = 'my_folder' ResourceFile.create_folder(self.composite_resource, new_folder) # add the 3 required files to the resource at the above folder res_file = self.add_file_to_resource(file_to_add=self.states_shp_file, upload_folder=new_folder) self.assertEqual(res_file.file_folder, new_folder) res_file = self.add_file_to_resource(file_to_add=self.states_shx_file, upload_folder=new_folder) self.assertEqual(res_file.file_folder, new_folder) res_file = self.add_file_to_resource(file_to_add=self.states_dbf_file, upload_folder=new_folder) self.assertEqual(res_file.file_folder, new_folder) self.assertEqual(self.composite_resource.files.all().count(), 3) # add a file that is not related to aggregation res_file = self.add_file_to_resource( file_to_add=self.states_zip_invalid_file, upload_folder=new_folder) self.assertEqual(res_file.file_folder, new_folder) self.assertEqual(self.composite_resource.files.all().count(), 4) # check that the resource file is not associated with any logical file self.assertEqual(res_file.has_logical_file, False) self.assertEqual(GeoFeatureLogicalFile.objects.count(), 0) # set the shp file to GeoFeatureLogicalFile type shp_res_file = [ f for f in self.composite_resource.files.all() if f.extension == '.shp' ][0] GeoFeatureLogicalFile.set_file_type(self.composite_resource, self.user, shp_res_file.id) self.assertEqual(GeoFeatureLogicalFile.objects.count(), 1) base_shp_file_base_name, _ = os.path.splitext(shp_res_file.file_name) shp_res_file = [ f for f in self.composite_resource.files.all() if f.extension == '.shp' ][0] logical_file = shp_res_file.logical_file self.assertEqual(logical_file.files.count(), 3) for res_file in logical_file.files.all(): # test that the each resource file has the same folder - no new folder created self.assertEqual(res_file.file_folder, new_folder) self.assertEqual(self.composite_resource.files.all().count(), 4) self.composite_resource.delete()
def test_create_aggregation_from_nc_file_3(self): # here we are using a valid nc file for setting it # to NetCDF file type which includes metadata extraction # the nc file in this case is not at the root of the folder hierarchy but in a folder. The # same folder contains another file that's not going part of the aggregation # location nc file before aggregation is created: /my_folder/netcdf_valid.nc # location of another file before aggregation is created: /my_folder/netcdf_invalid.nc # location of nc file after aggregation is created: # /my_folder/netcdf_valid/netcdf_valid.nc # location of another file after aggregation is created: /my_folder/netcdf_invalid.nc self.create_composite_resource() new_folder = 'my_folder' ResourceFile.create_folder(self.composite_resource, new_folder) # add the the nc file to the resource at the above folder self.add_file_to_resource(file_to_add=self.netcdf_file, upload_folder=new_folder) self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # check that the resource file is not associated with any logical file self.assertEqual(res_file.has_logical_file, False) # check that there is no NetCDFLogicalFile object self.assertEqual(NetCDFLogicalFile.objects.count(), 0) # add another file to the same folder self.add_file_to_resource(file_to_add=self.netcdf_invalid_file, upload_folder=new_folder) self.assertEqual(self.composite_resource.files.all().count(), 2) # set the nc file to NetCDF file type NetCDFLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) self.assertEqual(self.composite_resource.files.all().count(), 3) # test logical file/aggregation self.assertEqual(len(self.composite_resource.logical_files), 1) logical_file = self.composite_resource.logical_files[0] self.assertEqual(logical_file.files.count(), 2) base_nc_file_name, _ = os.path.splitext(self.netcdf_file_name) expected_file_folder = '{0}/{1}'.format(new_folder, base_nc_file_name) for res_file in logical_file.files.all(): self.assertEqual(res_file.file_folder, expected_file_folder) self.assertTrue(isinstance(logical_file, NetCDFLogicalFile)) self.assertTrue(logical_file.metadata, NetCDFLogicalFile) # test the location of the file that's not part of the netcdf aggregation other_res_file = None for res_file in self.composite_resource.files.all(): if not res_file.has_logical_file: other_res_file = res_file break self.assertEqual(other_res_file.file_folder, new_folder) self.composite_resource.delete()
def test_aggregation_folder_move(self): # test changes to aggregation name, aggregation metadata xml file path, and aggregation # resource map xml file path on aggregation folder move self.create_composite_resource() self.add_file_to_resource(file_to_add=self.netcdf_file) res_file = self.composite_resource.files.first() base_file_name, ext = os.path.splitext(res_file.file_name) aggregation_folder_name = base_file_name # create aggregation from the nc file NetCDFLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) self.assertEqual(self.composite_resource.files.count(), 2) for res_file in self.composite_resource.files.all(): self.assertEqual(res_file.file_folder, aggregation_folder_name) # create a folder to move the aggregation folder there parent_folder = 'parent_folder' ResourceFile.create_folder(self.composite_resource, parent_folder) # move the aggregation folder to the parent folder src_path = 'data/contents/{}'.format(aggregation_folder_name) tgt_path = 'data/contents/{0}/{1}'.format(parent_folder, aggregation_folder_name) move_or_rename_file_or_folder(self.user, self.composite_resource.short_id, src_path, tgt_path) file_folder = '{0}/{1}'.format(parent_folder, aggregation_folder_name) for res_file in self.composite_resource.files.all(): self.assertEqual(res_file.file_folder, file_folder) # test aggregation name update res_file = self.composite_resource.files.first() logical_file = res_file.logical_file self.assertEqual(logical_file.aggregation_name, res_file.file_folder) # test aggregation xml file paths expected_meta_file_path = '{0}/{1}/{2}_meta.xml'.format(parent_folder, aggregation_folder_name, aggregation_folder_name) self.assertEqual(logical_file.metadata_short_file_path, expected_meta_file_path) expected_map_file_path = '{0}/{1}/{2}_resmap.xml'.format(parent_folder, aggregation_folder_name, aggregation_folder_name) self.assertEqual(logical_file.map_short_file_path, expected_map_file_path) self.composite_resource.delete()
def test_create_aggregation_from_zip_file_required_2(self): # here we are using a zip file that has only the 3 required files for setting it # to Geo Feature file type which includes metadata extraction # the zip file that we are using to create an aggregation here is not at the root of the # folder hierarchy but in a folder - no new folder should be created as part of creating # this aggregation self.create_composite_resource() new_folder = 'geofeature_aggr' ResourceFile.create_folder(self.composite_resource, new_folder) # add the the zip file to the resource at the above folder self.add_file_to_resource(file_to_add=self.states_required_zip_file, upload_folder=new_folder) self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() self.assertEqual(res_file.file_folder, new_folder) # check that the resource file is not associated with any logical file type self.assertEqual(res_file.has_logical_file, False) self.assertEqual(GeoFeatureLogicalFile.objects.count(), 0) res_file = self.composite_resource.files.first() self.assertEqual(res_file.extension, '.zip') # set the zip file to GeoFeatureLogicalFile type GeoFeatureLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) # test file type and file type metadata assert_geofeature_file_type_metadata(self, new_folder) for res_file in self.composite_resource.files.all(): # test that each resource file is part of an aggregation (logical file) self.assertTrue(res_file.has_logical_file) # test that the each resource file has the same folder - no new folder was created self.assertEqual(res_file.file_folder, new_folder) # there should not be any file level keywords res_file = self.composite_resource.files.first() logical_file = res_file.logical_file self.assertEqual(logical_file.metadata.keywords, []) self.composite_resource.delete() # there should be no GeoFeatureLogicalFile object at this point self.assertEqual(GeoFeatureLogicalFile.objects.count(), 0) # there should be no GeoFeatureFileMetaData object at this point self.assertEqual(GeoFeatureFileMetaData.objects.count(), 0)
def test_create_aggregation_2(self): # here we are using a valid time series json file for setting it # to RefTimeSeries file type which includes metadata extraction # this resource file is in a folder self.res_title = "Untitled resource" self.create_composite_resource() new_folder = 'refts_folder' ResourceFile.create_folder(self.composite_resource, new_folder) # add the the json file to the resource at the above folder self.add_file_to_resource(file_to_add=self.refts_file, upload_folder=new_folder) self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # test resource file is in a folder self.assertEqual(res_file.file_folder, new_folder) # check that the resource file is not associated with any logical file self.assertEqual(res_file.has_logical_file, False) # check that there is no RefTimeseriesLogicalFile object self.assertEqual(RefTimeseriesLogicalFile.objects.count(), 0) # set the json file to RefTimeseries file type RefTimeseriesLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) res_file = self.composite_resource.files.first() # check that there is one RefTimeseriesLogicalFile object self.assertEqual(RefTimeseriesLogicalFile.objects.count(), 1) # test resource file is in the same folder self.assertEqual(res_file.file_folder, new_folder) self.assertEqual(res_file.logical_file_type_name, self.logical_file_type_name) # test extracted ref time series file type metadata assert_ref_time_series_file_type_metadata(self) # test that the content of the json file is same is what we have # saved in json_file_content field of the file metadata object res_file = self.composite_resource.files.first() logical_file = res_file.logical_file self.assertEqual(logical_file.metadata.json_file_content, res_file.resource_file.read()) # test resource file is in a folder self.assertEqual(res_file.file_folder, new_folder) self.composite_resource.delete()
def test_aggregation_folder_sub_folder_not_allowed(self): # test a folder can't be created inside a folder that represents an aggregation self.create_composite_resource() self.add_file_to_resource(file_to_add=self.netcdf_file) res_file = self.composite_resource.files.first() self.assertEqual(res_file.file_folder, None) # create aggregation from the nc file NetCDFLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) res_file = self.composite_resource.files.first() self.assertNotEqual(res_file.file_folder, None) # create a folder inside the aggregation folder new_folder = '{}/sub_folder'.format(res_file.file_folder) with self.assertRaises(DRF_ValidationError): ResourceFile.create_folder(self.composite_resource, new_folder) self.composite_resource.delete()
def test_create_aggregation_from_shp_file_required_2(self): # here we are using a shp file that exists in a folder # for setting it to Geo Feature file type which includes metadata extraction # no new folder should be created as part o creating this aggregation self.create_composite_resource() new_folder = 'geofeature_aggr' ResourceFile.create_folder(self.composite_resource, new_folder) # add the 3 required files to the resource at the above folder res_file = self.add_file_to_resource(file_to_add=self.states_shp_file, upload_folder=new_folder) self.assertEqual(res_file.file_folder, new_folder) res_file = self.add_file_to_resource(file_to_add=self.states_shx_file, upload_folder=new_folder) self.assertEqual(res_file.file_folder, new_folder) res_file = self.add_file_to_resource(file_to_add=self.states_dbf_file, upload_folder=new_folder) self.assertEqual(res_file.file_folder, new_folder) self.assertEqual(self.composite_resource.files.all().count(), 3) res_file = self.composite_resource.files.first() # check that the resource file is not associated with any logical file self.assertEqual(res_file.has_logical_file, False) self.assertEqual(GeoFeatureLogicalFile.objects.count(), 0) # set the shp file to GeoFeatureLogicalFile type shp_res_file = [ f for f in self.composite_resource.files.all() if f.extension == '.shp' ][0] GeoFeatureLogicalFile.set_file_type(self.composite_resource, self.user, shp_res_file.id) self.assertEqual(GeoFeatureLogicalFile.objects.count(), 1) for res_file in self.composite_resource.files.all(): # test that each resource file is part of an aggregation (logical file) self.assertTrue(res_file.has_logical_file) # test that the each resource file has the same folder - no new folder created self.assertEqual(res_file.file_folder, new_folder) self.composite_resource.delete()
def test_aggregation_file_rename(self): # test that a file can't renamed for any resource file # that's part of the GeoFeature logical file self.create_composite_resource() new_folder = 'my_folder' ResourceFile.create_folder(self.composite_resource, new_folder) # add the 3 required files to the resource at the above folder self.add_file_to_resource(file_to_add=self.states_required_zip_file, upload_folder=new_folder) res_file = self.composite_resource.files.first() base_file_name, _ = os.path.splitext(res_file.file_name) # create aggregation from the zip file GeoFeatureLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) # test renaming of files that are associated with aggregation raises exception self.assertEqual(self.composite_resource.files.count(), 3) src_path = 'data/contents/{}/states.shp'.format(new_folder) tgt_path = 'data/contents/{}/states-1.shp'.format(new_folder) with self.assertRaises(DRF_ValidationError): move_or_rename_file_or_folder(self.user, self.composite_resource.short_id, src_path, tgt_path) src_path = 'data/contents/{}/states.dbf'.format(new_folder) tgt_path = 'data/contents/{}/states-1.dbf'.format(new_folder) with self.assertRaises(DRF_ValidationError): move_or_rename_file_or_folder(self.user, self.composite_resource.short_id, src_path, tgt_path) src_path = 'data/contents/{}/states.shx'.format(new_folder) tgt_path = 'data/contents/{}/states-1.shx'.format(new_folder) with self.assertRaises(DRF_ValidationError): move_or_rename_file_or_folder(self.user, self.composite_resource.short_id, src_path, tgt_path) self.composite_resource.delete()
def test_aggregation_folder_move_not_allowed(self): # test a folder is not allowed to be moved into a folder that represents an aggregation self.create_composite_resource() self.add_file_to_resource(file_to_add=self.netcdf_file) res_file = self.composite_resource.files.first() base_file_name, ext = os.path.splitext(res_file.file_name) aggregation_folder_name = base_file_name # create aggregation from the nc file NetCDFLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) # create a folder to move the aggregation folder there new_folder = 'folder_to_move' ResourceFile.create_folder(self.composite_resource, new_folder) # move the new folder into the aggregation folder src_path = 'data/contents/{}'.format(new_folder) tgt_path = 'data/contents/{}'.format(aggregation_folder_name) with self.assertRaises(DRF_ValidationError): move_or_rename_file_or_folder(self.user, self.composite_resource.short_id, src_path, tgt_path) self.composite_resource.delete()
def setUp(self): super(TestFolderDownloadZip, self).setUp() self.group, _ = Group.objects.get_or_create(name='Hydroshare Author') self.user = create_account('*****@*****.**', username='******', first_name='Shaun', last_name='Livingston', superuser=False, groups=[]) self.res = create_resource(resource_type='CompositeResource', owner=self.user, title='Test Resource', metadata=[]) ResourceFile.create_folder(self.res, 'foo') # create files self.n1 = "test1.txt" test_file = open(self.n1, 'w') test_file.write("Test text file in test1.txt") test_file.close() self.test_file = open(self.n1, "rb") add_resource_files(self.res.short_id, self.test_file, folder='foo') # copy refts file into new file to be added to the resource as an aggregation reft_data_file = open( 'hs_core/tests/data/multi_sites_formatted_version1.0.refts.json', 'rb') refts_file = open('multi_sites_formatted_version1.0.refts.json', 'wb') refts_file.writelines(reft_data_file.readlines()) refts_file.close() self.refts_file = open('multi_sites_formatted_version1.0.refts.json', 'rb') add_resource_files(self.res.short_id, self.refts_file) self.res.create_aggregation_xml_documents() self.istorage = IrodsStorage()
def test_fileset_create_delete(self): """Test that we can create a fileset aggregation from a folder that contains one file and delete the aggregation through the api""" self.create_composite_resource() new_folder = 'fileset_folder' ResourceFile.create_folder(self.composite_resource, new_folder) # add the the txt file to the resource at the above folder self.add_file_to_resource(file_to_add=self.generic_file, upload_folder=new_folder) # there should be one resource file self.assertEqual(self.composite_resource.files.all().count(), 1) res_file = self.composite_resource.files.first() # file has a folder self.assertEqual(res_file.file_folder, new_folder) # check that the resource file is not part of an aggregation self.assertEqual(res_file.has_logical_file, False) self.assertEqual(FileSetLogicalFile.objects.count(), 0) # set folder to fileset logical file type (aggregation) set_type_url = reverse('set_file_type_public', kwargs={"pk": self.composite_resource.short_id, "file_path": "", "hs_file_type": "FileSet"}) self.client.post(set_type_url, data={"folder_path": new_folder}) res_file = self.composite_resource.files.first() # file has the same folder self.assertEqual(res_file.file_folder, new_folder) self.assertEqual(res_file.logical_file_type_name, self.logical_file_type_name) self.assertEqual(FileSetLogicalFile.objects.count(), 1) # aggregation dataset name should be same as the folder name self.assertEqual(res_file.logical_file.dataset_name, new_folder) delete_agg_url = reverse('delete_aggregation_public', kwargs={"resource_id": self.composite_resource.short_id, "file_path": new_folder, "hs_file_type": "FileSetLogicalFile"}) self.client.delete(delete_agg_url) self.assertEqual(FileSetLogicalFile.objects.count(), 0) self.assertEqual(self.composite_resource.files.all().count(), 0) self.composite_resource.delete()
def test_geofeature_file_type_folder_delete(self): # when a file is set to geofeaturelogical file type # system automatically creates a folder using the name of the file # that was used to set the file type # Here we need to test that when that folder gets deleted, all files # in that folder gets deleted, the logicalfile object gets deleted and # the associated metadata objects get deleted self.create_composite_resource() new_folder = 'my_folder' ResourceFile.create_folder(self.composite_resource, new_folder) # add the 3 required files to the resource at the above folder self.add_file_to_resource(file_to_add=self.states_required_zip_file, upload_folder=new_folder) res_file = self.composite_resource.files.first() base_file_name, _ = os.path.splitext(res_file.file_name) # extract metadata from the zip file GeoFeatureLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) # test that we have one logical file of type GeoFeatureLogicalFile type as a result # of metadata extraction self.assertEqual(GeoFeatureFileMetaData.objects.count(), 1) # should have one GeoFeatureFileMetadata object self.assertEqual(GeoFeatureFileMetaData.objects.count(), 1) # there should be 3 content files self.assertEqual(self.composite_resource.files.count(), 3) # delete the folder for the logical file folder_path = "data/contents/{}".format(new_folder) remove_folder(self.user, self.composite_resource.short_id, folder_path) # there should be no content files self.assertEqual(self.composite_resource.files.count(), 0) # there should not be any logical file or file metadata object as a result # of folder deletion self.assertEqual(GeoFeatureFileMetaData.objects.count(), 0) self.assertEqual(GeoFeatureFileMetaData.objects.count(), 0) self.composite_resource.delete()
def setUp(self): super(TestFolderDownloadZip, self).setUp() self.group, _ = Group.objects.get_or_create(name='Hydroshare Author') self.user = create_account( '*****@*****.**', username='******', first_name='Shaun', last_name='Livingston', superuser=False, groups=[] ) self.res = create_resource(resource_type='CompositeResource', owner=self.user, title='Test Resource', metadata=[]) ResourceFile.create_folder(self.res, 'foo') # create files self.n1 = "test1.txt" test_file = open(self.n1, 'w') test_file.write("Test text file in test1.txt") test_file.close() self.test_file = open(self.n1, "r") add_resource_files(self.res.short_id, self.test_file, folder='foo') # copy refts file into new file to be added to the resource as an aggregation reft_data_file = open('hs_core/tests/data/multi_sites_formatted_version1.0.refts.json', 'r') refts_file = open('multi_sites_formatted_version1.0.refts.json', 'w') refts_file.writelines(reft_data_file.readlines()) refts_file.close() self.refts_file = open('multi_sites_formatted_version1.0.refts.json', 'r') add_resource_files(self.res.short_id, self.refts_file) self.res.create_aggregation_xml_documents()
def add_file_to_resource(request, *args, **kwargs): try: shortkey = kwargs['shortkey'] except KeyError: raise TypeError('shortkey must be specified...') res, _, _ = authorize(request, shortkey, edit=True, full=True, superuser=True) for f in request.FILES.getlist('files'): res.files.add(ResourceFile(content_object=res, resource_file=f)) resource_modified(res, request.user) return HttpResponseRedirect(request.META['HTTP_REFERER'])
def check_aggregations(resource, folders, res_files): """ A helper to support creating aggregations for a given composite resource when new folders or files are added to the resource Checks for aggregations in each folder first, then checks for aggregations in each file :param resource: resource object :param folders: list of folders as strings to check for aggregations creation :param res_files: list of ResourceFile objects to check for aggregations creation :return: """ if resource.resource_type == "CompositeResource": from hs_file_types.utils import set_logical_file_type # check folders for aggregations for fol in folders: folder = fol if not fol.startswith(resource.file_path): # need absolute folder path to check if folder can be set to aggregation folder = os.path.join(resource.file_path, fol) else: # need relative folder path for creating aggregation from folder fol = fol[len(resource.file_path) + 1:] agg_type = resource.get_folder_aggregation_type_to_set(folder) if agg_type == 'TimeSeriesLogicalFile': # check if the folder (fol) contains a csv file res_files = ResourceFile.list_folder(resource=resource, folder=fol, sub_folders=False) # there can be only one file in the folder # if that file is a csv file - don't use the folder to create aggregation if res_files[0].extension.lower() == '.csv': continue if agg_type and agg_type != "FileSetLogicalFile": agg_type = agg_type.replace('LogicalFile', '') set_logical_file_type(res=resource, user=None, file_id=None, hs_file_type=agg_type, folder_path=fol, fail_feedback=False) # check files for aggregation for res_file in res_files: if not res_file.has_logical_file or res_file.logical_file.is_fileset: set_logical_file_type(res=resource, user=None, file_id=res_file.pk, fail_feedback=False)
def supports_zip(self, folder_to_zip): """check if the given folder can be zipped or not""" # find all the resource files in the folder to be zipped # this is being passed both qualified and unqualified paths! full_path = folder_to_zip if not full_path.startswith(self.file_path): full_path = os.path.join(self.file_path, full_path) # get all resource files at full_path and its sub-folders res_file_objects = ResourceFile.list_folder(self, full_path) # check any logical file associated with the resource file supports zip functionality for res_file in res_file_objects: if res_file.has_logical_file: if not res_file.logical_file.supports_zip: return False return True
def supports_delete_folder_on_zip(self, original_folder): """check if the specified folder can be deleted at the end of zipping that folder""" # find all the resource files in the folder to be deleted # this is being passed both qualified and unqualified paths! full_path = original_folder if not full_path.startswith(self.file_path): full_path = os.path.join(self.file_path, full_path) # get all resource files at full_path and its sub-folders res_file_objects = ResourceFile.list_folder(self, full_path) # check any logical file associated with the resource file supports deleting the folder # after its zipped for res_file in res_file_objects: if res_file.has_logical_file: if not res_file.logical_file.supports_delete_folder_on_zip: return False return True
def _recreate_xml_docs_for_folder(self, folder, check_metadata_dirty=False): """Re-creates xml metadata and map documents associated with the specified folder. If the *folder* represents an aggregation then map and metadata xml documents are recreated only for that aggregation. Otherwise, xml documents are created for any aggregation that may exist in the specified folder and its sub-folders. :param folder: folder for which xml documents need to be re-created :param check_metadata_dirty: if true, then xml files will be created only if the aggregation metadata is dirty """ # first check if the the folder represents an aggregation try: aggregation = self.get_aggregation_by_name(folder) if check_metadata_dirty: if aggregation.metadata.is_dirty: aggregation.create_aggregation_xml_documents() else: aggregation.create_aggregation_xml_documents() # if we found an aggregation by the folder name that means this folder doesn't # have any sub folders as multi-file aggregation folder can't have sub folders except ObjectDoesNotExist: # create xml map and metadata xml documents for all aggregations that exist # in *folder* and its sub-folders if not folder.startswith(self.file_path): folder = os.path.join(self.file_path, folder) res_file_objects = ResourceFile.list_folder(self, folder) aggregations = [] for res_file in res_file_objects: if res_file.has_logical_file and res_file.logical_file not in aggregations: aggregations.append(res_file.logical_file) if check_metadata_dirty: aggregations = [ aggr for aggr in aggregations if aggr.metadata.is_dirty ] for aggregation in aggregations: aggregation.create_aggregation_xml_documents()
def add_resource_files_in_folder(self, resource, folder): """ A helper for creating aggregation. Makes all resource files in a given folder and its sub folders as part of the aggregation/logical file type :param resource: an instance of CompositeResource :param folder: folder from which all files need to be made part of this aggregation """ # get all resource files that in folder *folder* and all its sub folders res_files = ResourceFile.list_folder(resource=resource, folder=folder, sub_folders=True) for res_file in res_files: if not res_file.has_logical_file: self.add_resource_file(res_file) elif res_file.logical_file.is_fileset and not \ res_file.logical_file.aggregation_name.startswith(folder): # resource file that is part of a fileset aggregation where the fileset aggregation # is not a sub folder of *folder* needs to be made part of this new fileset # aggregation self.add_resource_file(res_file) return res_files
def raster_file_validation(raster_file, resource, raster_folder=None): """ Validates if the relevant files are valid for raster aggregation or raster resource type :param raster_file: a temp file (extension tif or zip) retrieved from irods and stored on temp dir in django :param raster_folder: (optional) folder in which raster file exists on irods. :param resource: an instance of CompositeResource or GeoRasterResource in which raster_file exits. :return A list of error messages and a list of file paths for all files that belong to raster """ error_info = [] new_resource_files_to_add = [] raster_resource_files = [] create_vrt = True validation_results = {'error_info': error_info, 'new_resource_files_to_add': new_resource_files_to_add, 'raster_resource_files': raster_resource_files, 'vrt_created': create_vrt} file_name_part, ext = os.path.splitext(os.path.basename(raster_file)) ext = ext.lower() if ext == '.tif' or ext == '.tiff': res_files = ResourceFile.list_folder(resource=resource, folder=raster_folder, sub_folders=False) # check if there is already a vrt file in that folder vrt_files = [f for f in res_files if f.extension.lower() == ".vrt"] tif_files = [f for f in res_files if f.extension.lower() == ".tif" or f.extension.lower() == ".tiff"] if vrt_files: if len(vrt_files) > 1: error_info.append("More than one vrt file was found.") return validation_results create_vrt = False elif len(tif_files) != 1: # if there are more than one tif file and no vrt file, then we just use the # selected tif file to create the aggregation in case of composite resource if resource.resource_type == "CompositeResource": tif_files = [tif_file for tif_file in tif_files if raster_file.endswith(tif_file.file_name)] else: # if there are more than one tif file, there needs to be one vrt file error_info.append("A vrt file is missing.") return validation_results raster_resource_files.extend(vrt_files) raster_resource_files.extend(tif_files) if vrt_files: temp_dir = os.path.dirname(raster_file) temp_vrt_file = utils.get_file_from_irods(vrt_files[0], temp_dir) else: # create the .vrt file try: temp_vrt_file = create_vrt_file(raster_file) except Exception as ex: error_info.append(ex.message) else: if os.path.isfile(temp_vrt_file): new_resource_files_to_add.append(temp_vrt_file) elif ext == '.zip': try: extract_file_paths = _explode_raster_zip_file(raster_file) except Exception as ex: error_info.append(ex.message) else: if extract_file_paths: new_resource_files_to_add.extend(extract_file_paths) else: error_info.append("Invalid file mime type found.") if not error_info: if ext == ".zip": # in case of zip, there needs to be more than one file extracted out of the zip file if len(new_resource_files_to_add) < 2: error_info.append("Invalid zip file. Seems to contain only one file. " "Multiple tif files are expected.") return validation_results files_ext = [os.path.splitext(path)[1].lower() for path in new_resource_files_to_add] if files_ext.count('.vrt') > 1: error_info.append("Invalid zip file. Seems to contain multiple vrt files.") return validation_results elif files_ext.count('.vrt') == 0: error_info.append("Invalid zip file. No vrt file was found.") return validation_results elif files_ext.count('.tif') + files_ext.count('.tiff') < 1: error_info.append("Invalid zip file. No tif/tiff file was found.") return validation_results # check if there are files that are not raster related non_raster_files = [f_ext for f_ext in files_ext if f_ext not in ('.tif', '.tiff', '.vrt')] if non_raster_files: error_info.append("Invalid zip file. Contains files that are not raster related.") return validation_results temp_vrt_file = new_resource_files_to_add[files_ext.index('.vrt')] # validate vrt file if we didn't create it if ext == '.zip' or not create_vrt: raster_dataset = gdal.Open(temp_vrt_file, GA_ReadOnly) if raster_dataset is None: error_info.append('Failed to open the vrt file.') return validation_results # check if the vrt file is valid try: raster_dataset.RasterXSize raster_dataset.RasterYSize raster_dataset.RasterCount except AttributeError: error_info.append('Raster size and band information are missing.') return validation_results # check if the raster file numbers and names are valid in vrt file with open(temp_vrt_file, 'r') as vrt_file: vrt_string = vrt_file.read() root = ET.fromstring(vrt_string) file_names_in_vrt = [file_name.text for file_name in root.iter('SourceFilename')] if ext == '.zip': file_names = [os.path.basename(path) for path in new_resource_files_to_add] else: file_names = [f.file_name for f in raster_resource_files] file_names = [f_name for f_name in file_names if not f_name.endswith('.vrt')] if len(file_names) > len(file_names_in_vrt): msg = 'One or more additional tif files were found which are not listed in ' \ 'the provided {} file.' msg = msg.format(os.path.basename(temp_vrt_file)) error_info.append(msg) else: for vrt_ref_raster_name in file_names_in_vrt: if vrt_ref_raster_name in file_names \ or (os.path.split(vrt_ref_raster_name)[0] == '.' and os.path.split(vrt_ref_raster_name)[1] in file_names): continue elif os.path.basename(vrt_ref_raster_name) in file_names: msg = "Please specify {} as {} in the .vrt file, because it will " \ "be saved in the same folder with .vrt file in HydroShare." msg = msg.format(vrt_ref_raster_name, os.path.basename(vrt_ref_raster_name)) error_info.append(msg) break else: msg = "The file {tif} which is listed in the {vrt} file is missing." msg = msg.format(tif=os.path.basename(vrt_ref_raster_name), vrt=os.path.basename(temp_vrt_file)) error_info.append(msg) break return validation_results
def test_federated_folder_path_logic(self): """ a federated file in a subfolder has the proper state after state changes """ # resource should not have any files at this point self.assertEqual(self.res.files.all().count(), 0, msg="resource file count didn't match") ResourceFile.create_folder(self.res, 'foo') # add one file to the resource hydroshare.add_resource_files(self.res.short_id, self.test_file_1, folder='foo') # resource should has only one file at this point self.assertEqual(self.res.files.all().count(), 1, msg="resource file count didn't match") # get the handle of the file created above resfile = self.res.files.all()[0] self.assertEqual(resfile.resource_file.name, os.path.join(self.res.short_id, "data", "contents", "foo", "file1.txt")) self.assertEqual(resfile.file_folder, "foo") # cheat: set a fake federated path to test path logic fedpath = "/myzone/home/myuser" self.res.resource_federation_path = fedpath self.res.save() resfile.content_object.refresh_from_db() resfile.set_storage_path('foo/file1.txt', test_exists=False) # determine where that file should live shortpath = os.path.join(fedpath, self.res.short_id, "data", "contents", "foo", "file1.txt") self.assertEqual(shortpath, resfile.storage_path) # this should result in an exact path resfile.set_storage_path(shortpath, test_exists=False) self.assertEqual(resfile.file_folder, "foo") self.assertEqual(resfile.storage_path, shortpath) self.assertTrue(resfile.path_is_acceptable(shortpath, test_exists=False)) # non-existent files should raise error otherpath = os.path.join(fedpath, self.res.short_id, "data", "contents", "foo", "file2.txt") resfile.path_is_acceptable(otherpath, test_exists=False) # This won't work because federation path is fake. # with self.assertRaises(ValidationError): # resfile.path_is_acceptable(otherpath, test_exists=True) # try setting to an unqualified name; should qualify it resfile.set_storage_path("foo/file1.txt", test_exists=False) # should match computed path self.assertEqual(resfile.file_folder, "foo") self.assertEqual(resfile.storage_path, shortpath) # now try to change that path to what it is already resfile.set_storage_path(shortpath, test_exists=False) # should match computed path self.assertEqual(resfile.file_folder, "foo") self.assertEqual(resfile.storage_path, shortpath) # now try to change that path to a good path to a non-existent object resfile.set_storage_path(otherpath, test_exists=False) # conclusion: unfederate the resource self.res.resource_federation_path = "" self.res.save() resfile.content_object.refresh_from_db() resfile.set_storage_path("foo/file1.txt", test_exists=False) # delete resources to clean up hydroshare.delete_resource(self.res.short_id)
def data_store_rename_file_or_folder(request, pk=None): """ Rename one file or folder in a resource file hierarchy. It is invoked by an AJAX call :param request: a REST request :param pk: the short_id of a resource to modify, from REST URL. This is invoked by an AJAX call in the UI. It returns a json object that has the relative path of the target file or folder that has been renamed. The AJAX request must be a POST request with input data for source_path and target_path, where source_path and target_path are the relative paths (relative to path res_id/data/contents) for the source and target file or folder. This routine is **specifically** targeted at validating requests from the UI. Thus it is much more limiting than a general purpose REST responder. """ pk = request.POST.get('res_id', pk) if pk is None: return HttpResponse('Bad request - resource id is not included', status=status.HTTP_400_BAD_REQUEST) pk = str(pk).strip() try: resource, _, user = authorize(request, pk, needed_permission=ACTION_TO_AUTHORIZE.EDIT_RESOURCE) except NotFound: return HttpResponse('Bad request - resource not found', status=status.HTTP_400_BAD_REQUEST) except PermissionDenied: return HttpResponse('Permission denied', status=status.HTTP_401_UNAUTHORIZED) src_path = resolve_request(request).get('source_path', None) tgt_path = resolve_request(request).get('target_path', None) try: src_path = _validate_path(src_path, 'src_path') tgt_path = _validate_path(tgt_path, 'tgt_path') except ValidationError as ex: return HttpResponse(ex.message, status=status.HTTP_400_BAD_REQUEST) src_folder, src_base = os.path.split(src_path) tgt_folder, tgt_base = os.path.split(tgt_path) if src_folder != tgt_folder: return HttpResponse('Rename: Source and target names must be in same folder', status=status.HTTP_400_BAD_REQUEST) istorage = resource.get_irods_storage() # protect against stale data botches: source files should exist src_storage_path = os.path.join(resource.root_path, src_path) try: folder, base = ResourceFile.resource_path_is_acceptable(resource, src_storage_path, test_exists=True) except ValidationError: return HttpResponse('Object to be renamed does not exist', status=status.HTTP_400_BAD_REQUEST) if not irods_path_is_directory(istorage, src_storage_path): try: # Django record should exist for each file ResourceFile.get(resource, base, folder=folder) except ResourceFile.DoesNotExist: return HttpResponse('Object to be renamed does not exist', status=status.HTTP_400_BAD_REQUEST) # check that the target doesn't exist tgt_storage_path = os.path.join(resource.root_path, tgt_path) tgt_short_path = tgt_path[len('data/contents/'):] if istorage.exists(tgt_storage_path): return HttpResponse('Desired name is already in use', status=status.HTTP_400_BAD_REQUEST) try: folder, base = ResourceFile.resource_path_is_acceptable(resource, tgt_storage_path, test_exists=False) except ValidationError: return HttpResponse('Poorly structured desired name {}' .format(tgt_short_path), status=status.HTTP_400_BAD_REQUEST) try: ResourceFile.get(resource, base, folder=tgt_short_path) return HttpResponse('Desired name {} is already in use' .format(tgt_short_path), status=status.HTTP_400_BAD_REQUEST) except ResourceFile.DoesNotExist: pass # correct response try: rename_file_or_folder(user, pk, src_path, tgt_path) except SessionException as ex: return HttpResponse(ex.stderr, status=status.HTTP_500_INTERNAL_SERVER_ERROR) except DRF_ValidationError as ex: return HttpResponse(ex.detail, status=status.HTTP_400_BAD_REQUEST) return_object = {'target_rel_path': tgt_path} return HttpResponse( json.dumps(return_object), content_type='application/json' )
def add_file_to_resource(resource, f, folder=None, source_name='', move=False, check_target_folder=False, add_to_aggregation=True): """ Add a ResourceFile to a Resource. Adds the 'format' metadata element to the resource. :param resource: Resource to which file should be added :param f: File-like object to add to a resource :param folder: folder at which the file will live :param source_name: the logical file name of the resource content file for federated iRODS resource or the federated zone name; By default, it is empty. A non-empty value indicates the file needs to be added into the federated zone, either from local disk where f holds the uploaded file from local disk, or from the federated zone directly where f is empty but source_name has the whole data object iRODS path in the federated zone :param move: indicate whether the file should be copied or moved from private user account to proxy user account in federated zone; A value of False indicates copy is needed, a value of True indicates no copy, but the file will be moved from private user account to proxy user account. The default value is False. :param check_target_folder: if true and the resource is a composite resource then uploading a file to the specified folder will be validated before adding the file to the resource :param add_to_aggregation: if true and the resource is a composite resource then the file being added to the resource also will be added to a fileset aggregation if such an aggregation exists in the file path :return: The identifier of the ResourceFile added. """ # validate parameters if check_target_folder and resource.resource_type != 'CompositeResource': raise ValidationError("Resource must be a CompositeResource for validating target folder") if f: if check_target_folder and folder is not None: tgt_full_upload_path = os.path.join(resource.file_path, folder) if not resource.can_add_files(target_full_path=tgt_full_upload_path): err_msg = "File can't be added to this folder which represents an aggregation" raise ValidationError(err_msg) openfile = File(f) if not isinstance(f, UploadedFile) else f ret = ResourceFile.create(resource, openfile, folder=folder, source=None, move=False) if add_to_aggregation: if folder is not None and resource.resource_type == 'CompositeResource': aggregation = resource.get_fileset_aggregation_in_path(folder) if aggregation is not None: # make the added file part of the fileset aggregation aggregation.add_resource_file(ret) # add format metadata element if necessary file_format_type = get_file_mime_type(f.name) elif source_name: try: # create from existing iRODS file ret = ResourceFile.create(resource, None, folder=folder, source=source_name, move=move) except SessionException as ex: try: ret.delete() except Exception: pass # raise the exception for the calling function to inform the error on the page interface raise SessionException(ex.exitcode, ex.stdout, ex.stderr) # add format metadata element if necessary file_format_type = get_file_mime_type(source_name) else: raise ValueError('Invalid input parameter is passed into this add_file_to_resource() ' 'function') # TODO: generate this from data in ResourceFile rather than extension if file_format_type not in [mime.value for mime in resource.metadata.formats.all()]: resource.metadata.create_element('format', value=file_format_type) ret.calculate_size() return ret
def add_file_to_resource(resource, f, folder=None, source_name='', check_target_folder=False, add_to_aggregation=True): """ Add a ResourceFile to a Resource. Adds the 'format' metadata element to the resource. :param resource: Resource to which file should be added :param f: File-like object to add to a resource :param folder: folder at which the file will live :param source_name: the logical file name of the resource content file for federated iRODS resource or the federated zone name; By default, it is empty. A non-empty value indicates the file needs to be added into the federated zone, either from local disk where f holds the uploaded file from local disk, or from the federated zone directly where f is empty but source_name has the whole data object iRODS path in the federated zone :param check_target_folder: if true and the resource is a composite resource then uploading a file to the specified folder will be validated before adding the file to the resource :param add_to_aggregation: if true and the resource is a composite resource then the file being added to the resource also will be added to a fileset aggregation if such an aggregation exists in the file path :return: The identifier of the ResourceFile added. """ # validate parameters if check_target_folder and resource.resource_type != 'CompositeResource': raise ValidationError( "Resource must be a CompositeResource for validating target folder" ) if f: if check_target_folder and folder is not None: tgt_full_upload_path = os.path.join(resource.file_path, folder) if not resource.can_add_files( target_full_path=tgt_full_upload_path): err_msg = "File can't be added to this folder which represents an aggregation" raise ValidationError(err_msg) openfile = File(f) if not isinstance(f, UploadedFile) else f ret = ResourceFile.create(resource, openfile, folder=folder, source=None) if add_to_aggregation: if folder is not None and resource.resource_type == 'CompositeResource': aggregation = resource.get_fileset_aggregation_in_path(folder) if aggregation is not None: # make the added file part of the fileset aggregation aggregation.add_resource_file(ret) # add format metadata element if necessary file_format_type = get_file_mime_type(f.name) elif source_name: try: # create from existing iRODS file ret = ResourceFile.create(resource, None, folder=folder, source=source_name) except SessionException as ex: try: ret.delete() except Exception: pass # raise the exception for the calling function to inform the error on the page interface raise SessionException(ex.exitcode, ex.stdout, ex.stderr) # add format metadata element if necessary file_format_type = get_file_mime_type(source_name) else: raise ValueError( 'Invalid input parameter is passed into this add_file_to_resource() ' 'function') # TODO: generate this from data in ResourceFile rather than extension if file_format_type not in [ mime.value for mime in resource.metadata.formats.all() ]: resource.metadata.create_element('format', value=file_format_type) ret.calculate_size() return ret
def copy_resource_files_and_AVUs(src_res_id, dest_res_id): """ Copy resource files and AVUs from source resource to target resource including both on iRODS storage and on Django database :param src_res_id: source resource uuid :param dest_res_id: target resource uuid :return: """ avu_list = ['bag_modified', 'metadata_dirty', 'isPublic', 'resourceType'] src_res = get_resource_by_shortkey(src_res_id) tgt_res = get_resource_by_shortkey(dest_res_id) # This makes the assumption that the destination is in the same exact zone. # Also, bags and similar attached files are not copied. istorage = src_res.get_irods_storage() # This makes an exact copy of all physical files. src_files = os.path.join(src_res.root_path, 'data') # This has to be one segment short of the source because it is a target directory. dest_files = tgt_res.root_path istorage.copyFiles(src_files, dest_files) src_coll = src_res.root_path tgt_coll = tgt_res.root_path for avu_name in avu_list: value = istorage.getAVU(src_coll, avu_name) # make formerly public things private if avu_name == 'isPublic': istorage.setAVU(tgt_coll, avu_name, 'false') # bag_modified AVU needs to be set to true for copied resource elif avu_name == 'bag_modified': istorage.setAVU(tgt_coll, avu_name, 'true') # everything else gets copied literally else: istorage.setAVU(tgt_coll, avu_name, value) # link copied resource files to Django resource model files = src_res.files.all() # if resource files are part of logical files, then logical files also need copying src_logical_files = list(set([f.logical_file for f in files if f.has_logical_file])) map_logical_files = {} for src_logical_file in src_logical_files: map_logical_files[src_logical_file] = src_logical_file.get_copy(tgt_res) for n, f in enumerate(files): folder, base = os.path.split(f.short_path) # strips object information. new_resource_file = ResourceFile.create(tgt_res, base, folder=folder) # if the original file is part of a logical file, then # add the corresponding new resource file to the copy of that logical file if f.has_logical_file: tgt_logical_file = map_logical_files[f.logical_file] if f.logical_file.extra_data: tgt_logical_file.extra_data = copy.deepcopy(f.logical_file.extra_data) tgt_logical_file.save() tgt_logical_file.add_resource_file(new_resource_file) if src_res.resource_type.lower() == "collectionresource": # clone contained_res list of original collection and add to new collection # note that new collection resource will not contain "deleted resources" tgt_res.resources = src_res.resources.all()
def raster_file_validation(raster_file, resource, raster_folder=None): """ Validates if the relevant files are valid for raster aggregation or raster resource type :param raster_file: a temp file (extension tif or zip) retrieved from irods and stored on temp dir in django :param raster_folder: (optional) folder in which raster file exists on irods. :param resource: an instance of CompositeResource or GeoRasterResource in which raster_file exits. :return A list of error messages and a list of file paths for all files that belong to raster """ error_info = [] new_resource_files_to_add = [] raster_resource_files = [] create_vrt = True validation_results = { 'error_info': error_info, 'new_resource_files_to_add': new_resource_files_to_add, 'raster_resource_files': raster_resource_files, 'vrt_created': create_vrt } file_name_part, ext = os.path.splitext(os.path.basename(raster_file)) ext = ext.lower() if ext == '.tif' or ext == '.tiff': res_files = ResourceFile.list_folder(resource=resource, folder=raster_folder, sub_folders=False) # check if there is already a vrt file in that folder vrt_files = [f for f in res_files if f.extension.lower() == ".vrt"] tif_files = [ f for f in res_files if f.extension.lower() == ".tif" or f.extension.lower() == ".tiff" ] if vrt_files: if len(vrt_files) > 1: error_info.append("More than one vrt file was found.") return validation_results create_vrt = False elif len(tif_files) != 1: # if there are more than one tif file and no vrt file, then we just use the # selected tif file to create the aggregation in case of composite resource if resource.resource_type == "CompositeResource": tif_files = [ tif_file for tif_file in tif_files if raster_file.endswith(tif_file.file_name) ] else: # if there are more than one tif file, there needs to be one vrt file error_info.append("A vrt file is missing.") return validation_results raster_resource_files.extend(vrt_files) raster_resource_files.extend(tif_files) if vrt_files: temp_dir = os.path.dirname(raster_file) temp_vrt_file = utils.get_file_from_irods(vrt_files[0], temp_dir) else: # create the .vrt file try: temp_vrt_file = create_vrt_file(raster_file) except Exception as ex: error_info.append(str(ex)) else: if os.path.isfile(temp_vrt_file): new_resource_files_to_add.append(temp_vrt_file) elif ext == '.zip': try: extract_file_paths = _explode_raster_zip_file(raster_file) except Exception as ex: error_info.append(str(ex)) else: if extract_file_paths: new_resource_files_to_add.extend(extract_file_paths) else: error_info.append("Invalid file mime type found.") if not error_info: if ext == ".zip": # in case of zip, there needs to be more than one file extracted out of the zip file if len(new_resource_files_to_add) < 2: error_info.append( "Invalid zip file. Seems to contain only one file. " "Multiple tif files are expected.") return validation_results files_ext = [ os.path.splitext(path)[1].lower() for path in new_resource_files_to_add ] if files_ext.count('.vrt') > 1: error_info.append( "Invalid zip file. Seems to contain multiple vrt files.") return validation_results elif files_ext.count('.vrt') == 0: error_info.append("Invalid zip file. No vrt file was found.") return validation_results elif files_ext.count('.tif') + files_ext.count('.tiff') < 1: error_info.append( "Invalid zip file. No tif/tiff file was found.") return validation_results # check if there are files that are not raster related non_raster_files = [ f_ext for f_ext in files_ext if f_ext not in ('.tif', '.tiff', '.vrt') ] if non_raster_files: error_info.append( "Invalid zip file. Contains files that are not raster related." ) return validation_results temp_vrt_file = new_resource_files_to_add[files_ext.index('.vrt')] # validate vrt file if we didn't create it if ext == '.zip' or not create_vrt: raster_dataset = gdal.Open(temp_vrt_file, GA_ReadOnly) if raster_dataset is None: error_info.append('Failed to open the vrt file.') return validation_results # check if the vrt file is valid try: raster_dataset.RasterXSize raster_dataset.RasterYSize raster_dataset.RasterCount except AttributeError: error_info.append( 'Raster size and band information are missing.') return validation_results # check if the raster file numbers and names are valid in vrt file with open(temp_vrt_file, 'r') as vrt_file: vrt_string = vrt_file.read() root = ET.fromstring(vrt_string) file_names_in_vrt = [ file_name.text for file_name in root.iter('SourceFilename') ] if ext == '.zip': file_names = [ os.path.basename(path) for path in new_resource_files_to_add ] else: file_names = [f.file_name for f in raster_resource_files] file_names = [ f_name for f_name in file_names if not f_name.endswith('.vrt') ] if len(file_names) > len(file_names_in_vrt): msg = 'One or more additional tif files were found which are not listed in ' \ 'the provided {} file.' msg = msg.format(os.path.basename(temp_vrt_file)) error_info.append(msg) else: for vrt_ref_raster_name in file_names_in_vrt: if vrt_ref_raster_name in file_names \ or (os.path.split(vrt_ref_raster_name)[0] == '.' and os.path.split(vrt_ref_raster_name)[1] in file_names): continue elif os.path.basename(vrt_ref_raster_name) in file_names: msg = "Please specify {} as {} in the .vrt file, because it will " \ "be saved in the same folder with .vrt file in HydroShare." msg = msg.format(vrt_ref_raster_name, os.path.basename(vrt_ref_raster_name)) error_info.append(msg) break else: msg = "The file {tif} which is listed in the {vrt} file is missing." msg = msg.format( tif=os.path.basename(vrt_ref_raster_name), vrt=os.path.basename(temp_vrt_file)) error_info.append(msg) break return validation_results
def data_store_move_to_folder(request, pk=None): """ Move a list of files and/or folders to another folder in a resource file hierarchy. :param request: a REST request :param pk: the short_id of a resource to modify, from REST URL. It is invoked by an AJAX call and returns a json object that has the relative paths of the target files or folders to which files have been moved. The AJAX request must be a POST request with input data passed in for source_paths and target_path where source_paths and target_path are the relative paths for the source and target file or folder in the res_id file directory. This routine is **specifically** targeted at validating requests from the UI. Thus it is much more limiting than a general purpose REST responder. """ pk = request.POST.get('res_id', pk) if pk is None: return HttpResponse('Bad request - resource id is not included', status=status.HTTP_400_BAD_REQUEST) # whether to treat request as atomic: skip overwrites for valid request atomic = request.POST.get('atomic', 'false') == 'true' # False by default pk = str(pk).strip() try: resource, _, user = authorize( request, pk, needed_permission=ACTION_TO_AUTHORIZE.EDIT_RESOURCE) except NotFound: return HttpResponse('Bad request - resource not found', status=status.HTTP_400_BAD_REQUEST) except PermissionDenied: return HttpResponse('Permission denied', status=status.HTTP_401_UNAUTHORIZED) tgt_path = resolve_request(request).get('target_path', None) src_paths = resolve_request(request).get('source_paths', None) if src_paths is None or tgt_path is None: return HttpResponse( 'Bad request - src_paths or tgt_path is not included', status=status.HTTP_400_BAD_REQUEST) tgt_path = str(tgt_path).strip() if not tgt_path: return HttpResponse('Target directory not specified', status=status.HTTP_400_BAD_REQUEST) # protect against common hacking attacks if not tgt_path.startswith('data/contents/'): return HttpResponse( 'Target directory path must start with data/contents/', status=status.HTTP_400_BAD_REQUEST) if tgt_path.find('/../') >= 0 or tgt_path.endswith('/..'): return HttpResponse('Bad request - tgt_path cannot contain /../', status=status.HTTP_400_BAD_REQUEST) istorage = resource.get_irods_storage() # strip trailing slashes (if any) tgt_path = tgt_path.rstrip('/') tgt_short_path = tgt_path[len('data/contents/'):] tgt_storage_path = os.path.join(resource.root_path, tgt_path) if not irods_path_is_directory(istorage, tgt_storage_path): return HttpResponse('Target of move is not an existing folder', status=status.HTTP_400_BAD_REQUEST) src_paths = json.loads(src_paths) for i in range(len(src_paths)): src_paths[i] = str(src_paths[i]).strip().rstrip('/') # protect against common hacking attacks for src_path in src_paths: if not src_path.startswith('data/contents/'): return HttpResponse( 'Paths to be moved must start with data/contents/', status=status.HTTP_400_BAD_REQUEST) if src_path.find('/../') >= 0 or src_path.endswith('/..'): return HttpResponse('Paths to be moved cannot contain /../', status=status.HTTP_400_BAD_REQUEST) valid_src_paths = [] skipped_tgt_paths = [] for src_path in src_paths: src_storage_path = os.path.join(resource.root_path, src_path) src_short_path = src_path[len('data/contents/'):] # protect against stale data botches: source files should exist try: folder, file = ResourceFile.resource_path_is_acceptable( resource, src_storage_path, test_exists=True) except ValidationError: return HttpResponse( 'Source file {} does not exist'.format(src_short_path), status=status.HTTP_400_BAD_REQUEST) if not irods_path_is_directory( istorage, src_storage_path): # there is django record try: ResourceFile.get(resource, file, folder=folder) except ResourceFile.DoesNotExist: return HttpResponse( 'Source file {} does not exist'.format(src_short_path), status=status.HTTP_400_BAD_REQUEST) # protect against inadvertent overwrite base = os.path.basename(src_storage_path) tgt_overwrite = os.path.join(tgt_storage_path, base) if not istorage.exists(tgt_overwrite): valid_src_paths.append( src_path) # partly qualified path for operation else: # skip pre-existing objects skipped_tgt_paths.append(os.path.join(tgt_short_path, base)) if skipped_tgt_paths: if atomic: message = 'move would overwrite {}'.format( ', '.join(skipped_tgt_paths)) return HttpResponse(message, status=status.HTTP_400_BAD_REQUEST) # if not atomic, then try to move the files that don't have conflicts # stop immediately on error. try: move_to_folder(user, pk, valid_src_paths, tgt_path) except SessionException as ex: return HttpResponse(ex.stderr, status=status.HTTP_500_INTERNAL_SERVER_ERROR) except DRF_ValidationError as ex: return HttpResponse(ex.detail, status=status.HTTP_400_BAD_REQUEST) return_object = {'target_rel_path': tgt_path} if skipped_tgt_paths: # add information on skipped steps message = '[Warn] skipped move to existing {}'.format( ', '.join(skipped_tgt_paths)) return_object['additional_status'] = message return HttpResponse(json.dumps(return_object), content_type='application/json')
def data_store_move_to_folder(request, pk=None): """ Move a list of files and/or folders to another folder in a resource file hierarchy. :param request: a REST request :param pk: the short_id of a resource to modify, from REST URL. It is invoked by an AJAX call and returns a json object that has the relative paths of the target files or folders to which files have been moved. The AJAX request must be a POST request with input data passed in for source_paths and target_path where source_paths and target_path are the relative paths (relative to path res_id/data/contents) for the source and target file or folder in the res_id file directory. This routine is **specifically** targeted at validating requests from the UI. Thus it is much more limiting than a general purpose REST responder. """ pk = request.POST.get('res_id', pk) if pk is None: return HttpResponse('Bad request - resource id is not included', status=status.HTTP_400_BAD_REQUEST) # whether to treat request as atomic: skip overwrites for valid request atomic = request.POST.get('atomic', 'false') == 'true' # False by default pk = str(pk).strip() try: resource, _, user = authorize(request, pk, needed_permission=ACTION_TO_AUTHORIZE.EDIT_RESOURCE) except NotFound: return HttpResponse('Bad request - resource not found', status=status.HTTP_400_BAD_REQUEST) except PermissionDenied: return HttpResponse('Permission denied', status=status.HTTP_401_UNAUTHORIZED) tgt_path = resolve_request(request).get('target_path', None) src_paths = resolve_request(request).get('source_paths', None) try: tgt_path = _validate_path(tgt_path, 'tgt_path', check_path_empty=False) except ValidationError as ex: return HttpResponse(ex.message, status=status.HTTP_400_BAD_REQUEST) istorage = resource.get_irods_storage() tgt_short_path = tgt_path[len('data/contents/'):] tgt_storage_path = os.path.join(resource.root_path, tgt_path) if not irods_path_is_directory(istorage, tgt_storage_path): return HttpResponse('Target of move is not an existing folder', status=status.HTTP_400_BAD_REQUEST) src_paths = json.loads(src_paths) # protect against common hacking attacks for index, src_path in enumerate(src_paths): try: src_paths[index] = _validate_path(src_path, 'src_paths') except ValidationError as ex: return HttpResponse(ex.message, status=status.HTTP_400_BAD_REQUEST) valid_src_paths = [] skipped_tgt_paths = [] for src_path in src_paths: src_storage_path = os.path.join(resource.root_path, src_path) src_short_path = src_path[len('data/contents/'):] # protect against stale data botches: source files should exist try: folder, file = ResourceFile.resource_path_is_acceptable(resource, src_storage_path, test_exists=True) except ValidationError: return HttpResponse('Source file {} does not exist'.format(src_short_path), status=status.HTTP_400_BAD_REQUEST) if not irods_path_is_directory(istorage, src_storage_path): # there is django record try: ResourceFile.get(resource, file, folder=folder) except ResourceFile.DoesNotExist: return HttpResponse('Source file {} does not exist'.format(src_short_path), status=status.HTTP_400_BAD_REQUEST) # protect against inadvertent overwrite base = os.path.basename(src_storage_path) tgt_overwrite = os.path.join(tgt_storage_path, base) if not istorage.exists(tgt_overwrite): valid_src_paths.append(src_path) # partly qualified path for operation else: # skip pre-existing objects skipped_tgt_paths.append(os.path.join(tgt_short_path, base)) if skipped_tgt_paths: if atomic: message = 'move would overwrite {}'.format(', '.join(skipped_tgt_paths)) return HttpResponse(message, status=status.HTTP_400_BAD_REQUEST) # if not atomic, then try to move the files that don't have conflicts # stop immediately on error. try: move_to_folder(user, pk, valid_src_paths, tgt_path) except SessionException as ex: return HttpResponse(ex.stderr, status=status.HTTP_500_INTERNAL_SERVER_ERROR) except DRF_ValidationError as ex: return HttpResponse(ex.detail, status=status.HTTP_400_BAD_REQUEST) return_object = {'target_rel_path': tgt_path} if skipped_tgt_paths: # add information on skipped steps message = '[Warn] skipped move to existing {}'.format(', '.join(skipped_tgt_paths)) return_object['additional_status'] = message return HttpResponse( json.dumps(return_object), content_type='application/json' )
def add_file_to_resource(resource, f, folder=None, source_name='', source_size=0, move=False, is_file_reference=False): """ Add a ResourceFile to a Resource. Adds the 'format' metadata element to the resource. :param resource: Resource to which file should be added :param f: File-like object to add to a resource :param source_name: the logical file name of the resource content file for federated iRODS resource or the federated zone name; By default, it is empty. A non-empty value indicates the file needs to be added into the federated zone, either from local disk where f holds the uploaded file from local disk, or from the federated zone directly where f is empty but source_name has the whole data object iRODS path in the federated zone :param source_size: the size of the reference file in source_name if is_file_reference is True; otherwise, it is set to 0 and useless. :param move: indicate whether the file should be copied or moved from private user account to proxy user account in federated zone; A value of False indicates copy is needed, a value of True indicates no copy, but the file will be moved from private user account to proxy user account. The default value is False. :param is_file_reference: indicate whether the file being added is a reference to an external file stored in an external zone or URL. source_name will hold the reference file path or url :return: The identifier of the ResourceFile added. """ # importing here to avoid circular import from hs_file_types.models import GenericLogicalFile if f: openfile = File(f) if not isinstance(f, UploadedFile) else f ret = ResourceFile.create(resource, openfile, folder=folder, source=None, move=False) # add format metadata element if necessary file_format_type = get_file_mime_type(f.name) elif source_name: try: # create from existing iRODS file ret = ResourceFile.create(resource, None, folder=folder, source=source_name, source_size=source_size, is_file_reference=is_file_reference, move=move) except SessionException as ex: try: ret.delete() except Exception: pass # raise the exception for the calling function to inform the error on the page interface raise SessionException(ex.exitcode, ex.stdout, ex.stderr) # add format metadata element if necessary file_format_type = get_file_mime_type(source_name) else: raise ValueError( 'Invalid input parameter is passed into this add_file_to_resource() ' 'function') # TODO: generate this from data in ResourceFile rather than extension if file_format_type not in [ mime.value for mime in resource.metadata.formats.all() ]: resource.metadata.create_element('format', value=file_format_type) # if a file gets added successfully to composite resource, then better to set the generic # logical file here if resource.resource_type == "CompositeResource": logical_file = GenericLogicalFile.create() ret.logical_file_content_object = logical_file ret.save() return ret
def test_aggregation_folder_move(self): # test changes to aggregation name, aggregation metadata xml file path, and aggregation # resource map xml file path on aggregation folder move self.create_composite_resource() new_folder = 'my_folder' ResourceFile.create_folder(self.composite_resource, new_folder) # add the 3 required files to the resource at the above folder self.add_file_to_resource(file_to_add=self.states_required_zip_file, upload_folder=new_folder) res_file = self.composite_resource.files.first() # create aggregation from the zip file GeoFeatureLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id) self.assertEqual(self.composite_resource.files.count(), 3) base_file_name, _ = os.path.splitext(res_file.file_name) for res_file in self.composite_resource.files.all(): self.assertEqual(res_file.file_folder, new_folder) # test aggregation name res_file = self.composite_resource.files.first() logical_file = res_file.logical_file shp_file_path = get_path_with_no_file_extension( logical_file.aggregation_name) # test aggregation xml file paths expected_meta_file_path = '{0}{1}'.format(shp_file_path, METADATA_FILE_ENDSWITH) self.assertEqual(logical_file.metadata_short_file_path, expected_meta_file_path) expected_map_file_path = '{0}{1}'.format(shp_file_path, RESMAP_FILE_ENDSWITH) self.assertEqual(logical_file.map_short_file_path, expected_map_file_path) # create a folder to move the aggregation folder there parent_folder = 'parent_folder' ResourceFile.create_folder(self.composite_resource, parent_folder) # move the aggregation folder to the parent folder src_path = 'data/contents/{}'.format(new_folder) tgt_path = 'data/contents/{}/{}'.format(parent_folder, new_folder) move_or_rename_file_or_folder(self.user, self.composite_resource.short_id, src_path, tgt_path) file_folder = '{}/{}'.format(parent_folder, new_folder) for res_file in self.composite_resource.files.all(): self.assertEqual(res_file.file_folder, file_folder) # test aggregation name update res_file = self.composite_resource.files.first() logical_file = res_file.logical_file shp_file_path = get_path_with_no_file_extension( logical_file.aggregation_name) # test aggregation xml file paths expected_meta_file_path = '{0}{1}'.format(shp_file_path, METADATA_FILE_ENDSWITH) self.assertEqual(logical_file.metadata_short_file_path, expected_meta_file_path) expected_map_file_path = '{0}{1}'.format(shp_file_path, RESMAP_FILE_ENDSWITH) self.assertEqual(logical_file.map_short_file_path, expected_map_file_path) self.composite_resource.delete()
def copy_resource_files_and_AVUs(src_res_id, dest_res_id): """ Copy resource files and AVUs from source resource to target resource including both on iRODS storage and on Django database :param src_res_id: source resource uuid :param dest_res_id: target resource uuid :return: """ avu_list = ['bag_modified', 'metadata_dirty', 'isPublic', 'resourceType'] src_res = get_resource_by_shortkey(src_res_id) tgt_res = get_resource_by_shortkey(dest_res_id) # This makes the assumption that the destination is in the same exact zone. # Also, bags and similar attached files are not copied. istorage = src_res.get_irods_storage() # This makes an exact copy of all physical files. src_files = os.path.join(src_res.root_path, 'data') # This has to be one segment short of the source because it is a target directory. dest_files = tgt_res.root_path istorage.copyFiles(src_files, dest_files) src_coll = src_res.root_path tgt_coll = tgt_res.root_path for avu_name in avu_list: value = istorage.getAVU(src_coll, avu_name) # make formerly public things private if avu_name == 'isPublic': istorage.setAVU(tgt_coll, avu_name, 'false') # bag_modified AVU needs to be set to true for copied resource elif avu_name == 'bag_modified': istorage.setAVU(tgt_coll, avu_name, 'true') # everything else gets copied literally else: istorage.setAVU(tgt_coll, avu_name, value) # link copied resource files to Django resource model files = src_res.files.all() # if resource files are part of logical files, then logical files also need copying src_logical_files = list( set([f.logical_file for f in files if f.has_logical_file])) map_logical_files = {} for src_logical_file in src_logical_files: map_logical_files[src_logical_file] = src_logical_file.get_copy() for n, f in enumerate(files): folder, base = os.path.split( f.short_path) # strips object information. new_resource_file = ResourceFile.create(tgt_res, base, folder=folder) # if the original file is part of a logical file, then # add the corresponding new resource file to the copy of that logical file if f.has_logical_file: tgt_logical_file = map_logical_files[f.logical_file] tgt_logical_file.add_resource_file(new_resource_file) if src_res.resource_type.lower() == "collectionresource": # clone contained_res list of original collection and add to new collection # note that new collection resource will not contain "deleted resources" tgt_res.resources = src_res.resources.all()
def test_aggregation_metadata_CRUD(self): # here we are using a valid nc file for creating a NetCDF file type (aggregation) # then testing with metadata CRUD actions for the aggregation self.create_composite_resource() new_folder = 'nc_folder' ResourceFile.create_folder(self.composite_resource, new_folder) # add the the nc file to the resource at the above folder self.add_file_to_resource(file_to_add=self.netcdf_file, upload_folder=new_folder) # make the netcdf file part of the NetCDFLogicalFile res_file = self.composite_resource.files.first() self.assertEqual(NetCDFFileMetaData.objects.count(), 0) netcdf_logical_file = NetCDFLogicalFile.create(self.composite_resource) netcdf_logical_file.save() self.assertEqual(NetCDFFileMetaData.objects.count(), 1) netcdf_logical_file.add_resource_file(res_file) res_file = self.composite_resource.files.first() self.assertEqual(res_file.logical_file_type_name, 'NetCDFLogicalFile') self.assertEqual(netcdf_logical_file.files.count(), 1) # create keywords - note it is possible to have duplicate keywords # appropriate view functions need to disallow duplicate keywords keywords = ['key-1', 'key-1', 'key-2'] netcdf_logical_file.metadata.keywords = keywords netcdf_logical_file.metadata.save() self.assertEqual(len(keywords), len(netcdf_logical_file.metadata.keywords)) for keyword in keywords: self.assertIn(keyword, netcdf_logical_file.metadata.keywords) # create OriginalCoverage element self.assertEqual(netcdf_logical_file.metadata.original_coverage, None) coverage_data = {'northlimit': 121.345, 'southlimit': 42.678, 'eastlimit': 123.789, 'westlimit': 40.789, 'units': 'meters'} netcdf_logical_file.metadata.create_element('OriginalCoverage', value=coverage_data) self.assertNotEqual(netcdf_logical_file.metadata.original_coverage, None) self.assertEqual(float(netcdf_logical_file.metadata.original_coverage.value['northlimit']), 121.345) # test updating OriginalCoverage element orig_coverage = netcdf_logical_file.metadata.original_coverage coverage_data = {'northlimit': 111.333, 'southlimit': 42.678, 'eastlimit': 123.789, 'westlimit': 40.789, 'units': 'meters'} netcdf_logical_file.metadata.update_element('OriginalCoverage', orig_coverage.id, value=coverage_data) self.assertEqual(float(netcdf_logical_file.metadata.original_coverage.value['northlimit']), 111.333) # trying to create a 2nd OriginalCoverage element should raise exception with self.assertRaises(Exception): netcdf_logical_file.metadata.create_element('OriginalCoverage', value=coverage_data) # trying to update bounding box values with non-numeric values # (e.g., 'north_limit' key with a non-numeric value) should raise exception coverage_data = {'northlimit': '121.345a', 'southlimit': 42.678, 'eastlimit': 123.789, 'westlimit': 40.789, 'units': 'meters'} with self.assertRaises(ValidationError): netcdf_logical_file.metadata.update_element('OriginalCoverage', orig_coverage.id, value=coverage_data) # test creating spatial coverage # there should not be any spatial coverage for the netcdf file type self.assertEqual(netcdf_logical_file.metadata.spatial_coverage, None) coverage_data = {'projection': 'WGS 84 EPSG:4326', 'northlimit': 41.87, 'southlimit': 41.863, 'eastlimit': -111.505, 'westlimit': -111.511, 'units': 'meters'} # create spatial coverage netcdf_logical_file.metadata.create_element('Coverage', type="box", value=coverage_data) spatial_coverage = netcdf_logical_file.metadata.spatial_coverage self.assertEqual(float(spatial_coverage.value['northlimit']), 41.87) # test updating spatial coverage coverage_data = {'projection': 'WGS 84 EPSG:4326', 'northlimit': 41.87706, 'southlimit': 41.863, 'eastlimit': -111.505, 'westlimit': -111.511, 'units': 'meters'} netcdf_logical_file.metadata.update_element('Coverage', element_id=spatial_coverage.id, type="box", value=coverage_data) spatial_coverage = netcdf_logical_file.metadata.spatial_coverage self.assertEqual(float(spatial_coverage.value['northlimit']), 41.87706) # create Variable element self.assertEqual(netcdf_logical_file.metadata.variables.count(), 0) variable_data = {'name': 'variable_name', 'type': 'Int', 'unit': 'deg F', 'shape': 'variable_shape'} netcdf_logical_file.metadata.create_element('Variable', **variable_data) self.assertEqual(netcdf_logical_file.metadata.variables.count(), 1) self.assertEqual(netcdf_logical_file.metadata.variables.first().name, 'variable_name') # test that multiple Variable elements can be created variable_data = {'name': 'variable_name_2', 'type': 'Int', 'unit': 'deg F', 'shape': 'variable_shape_2'} netcdf_logical_file.metadata.create_element('Variable', **variable_data) self.assertEqual(netcdf_logical_file.metadata.variables.count(), 2) # test update Variable element variable = netcdf_logical_file.metadata.variables.first() variable_data = {'name': 'variable_name_updated', 'type': 'Int', 'unit': 'deg F', 'shape': 'variable_shape'} netcdf_logical_file.metadata.update_element('Variable', variable.id, **variable_data) variable = netcdf_logical_file.metadata.variables.get(id=variable.id) self.assertEqual(variable.name, 'variable_name_updated') self.composite_resource.delete()
def test_unfederated_folder_path_checks(self): """ an unfederated file in a subfolder has the proper state after state changes """ # resource should not have any files at this point self.assertEqual(self.res.files.all().count(), 0, msg="resource file count didn't match") ResourceFile.create_folder(self.res, 'foo') # should succeed without errors check_irods_files(self.res, stop_on_error=True) # add one file to the resource hydroshare.add_resource_files(self.res.short_id, self.test_file_1, folder='foo') # should succeed without errors check_irods_files(self.res, stop_on_error=True) # resource should has only one file at this point self.assertEqual(self.res.files.all().count(), 1, msg="resource file count didn't match") # get the handle of the file created above resfile = self.res.files.all()[0] # determine where that file should live fullpath = os.path.join(self.res.short_id, "data", "contents", "foo", "file1.txt") self.assertEqual(resfile.file_folder, "foo") self.assertEqual(resfile.storage_path, fullpath) # now try to intentionally corrupt it resfile.set_short_path("fuzz.txt") # should raise exception with self.assertRaises(ValidationError): check_irods_files(self.res, stop_on_error=True) # now don't raise exception and read error errors, ecount = check_irods_files(self.res, return_errors=True, log_errors=False) self.assertTrue(errors[0].endswith( 'data/contents/fuzz.txt does not exist in iRODS')) self.assertTrue(errors[1].endswith( 'data/contents/foo/file1.txt in iRODs does not exist in Django')) self.assertTrue(errors[2].endswith( "type is GenericResource, title is 'My Test Resource'")) # now try to clean it up errors, ecount = check_irods_files(self.res, return_errors=True, log_errors=False, clean_irods=True, clean_django=True) self.assertTrue(errors[0].endswith( 'data/contents/fuzz.txt does not exist in iRODS (DELETED FROM DJANGO)')) self.assertTrue(errors[1].endswith( 'data/contents/foo/file1.txt in iRODs does not exist in Django (DELETED FROM IRODS)')) self.assertTrue(errors[2].endswith( "type is GenericResource, title is 'My Test Resource'")) # resource should not have any files at this point self.assertEqual(self.res.files.all().count(), 0, msg="resource file count didn't match") # now check should succeed errors, ecount = check_irods_files(self.res, stop_on_error=True, log_errors=False) self.assertEqual(ecount, 0) # delete resources to clean up hydroshare.delete_resource(self.res.short_id)
def data_store_rename_file_or_folder(request, pk=None): """ Rename one file or folder in a resource file hierarchy. It is invoked by an AJAX call :param request: a REST request :param pk: the short_id of a resource to modify, from REST URL. This is invoked by an AJAX call in the UI. It returns a json object that has the relative path of the target file or folder that has been renamed. The AJAX request must be a POST request with input data for source_path and target_path, where source_path and target_path are the relative paths for the source and target file or folder. This routine is **specifically** targeted at validating requests from the UI. Thus it is much more limiting than a general purpose REST responder. """ pk = request.POST.get('res_id', pk) if pk is None: return HttpResponse('Bad request - resource id is not included', status=status.HTTP_400_BAD_REQUEST) pk = str(pk).strip() try: resource, _, user = authorize( request, pk, needed_permission=ACTION_TO_AUTHORIZE.EDIT_RESOURCE) except NotFound: return HttpResponse('Bad request - resource not found', status=status.HTTP_400_BAD_REQUEST) except PermissionDenied: return HttpResponse('Permission denied', status=status.HTTP_401_UNAUTHORIZED) src_path = resolve_request(request).get('source_path', None) tgt_path = resolve_request(request).get('target_path', None) if src_path is None or tgt_path is None: return HttpResponse('Source or target name is not specified', status=status.HTTP_400_BAD_REQUEST) if not src_path or not tgt_path: return HttpResponse('Source or target name is empty', status=status.HTTP_400_BAD_REQUEST) src_path = str(src_path).strip() tgt_path = str(tgt_path).strip() src_folder, src_base = os.path.split(src_path) tgt_folder, tgt_base = os.path.split(tgt_path) if src_folder != tgt_folder: return HttpResponse( 'Rename: Source and target names must be in same folder', status=status.HTTP_400_BAD_REQUEST) if not src_path.startswith('data/contents/'): return HttpResponse( 'Rename: Source path must start with data/contents/', status=status.HTTP_400_BAD_REQUEST) if src_path.find('/../') >= 0 or src_path.endswith('/..'): return HttpResponse('Rename: Source path cannot contain /../', status=status.HTTP_400_BAD_REQUEST) if not tgt_path.startswith('data/contents/'): return HttpResponse( 'Rename: Target path must start with data/contents/', status=status.HTTP_400_BAD_REQUEST) if tgt_path.find('/../') >= 0 or tgt_path.endswith('/..'): return HttpResponse('Rename: Target path cannot contain /../', status=status.HTTP_400_BAD_REQUEST) istorage = resource.get_irods_storage() # protect against stale data botches: source files should exist src_storage_path = os.path.join(resource.root_path, src_path) try: folder, base = ResourceFile.resource_path_is_acceptable( resource, src_storage_path, test_exists=True) except ValidationError: return HttpResponse('Object to be renamed does not exist', status=status.HTTP_400_BAD_REQUEST) if not irods_path_is_directory(istorage, src_storage_path): try: # Django record should exist for each file ResourceFile.get(resource, base, folder=folder) except ResourceFile.DoesNotExist: return HttpResponse('Object to be renamed does not exist', status=status.HTTP_400_BAD_REQUEST) # check that the target doesn't exist tgt_storage_path = os.path.join(resource.root_path, tgt_path) tgt_short_path = tgt_path[len('data/contents/'):] if istorage.exists(tgt_storage_path): return HttpResponse('Desired name is already in use', status=status.HTTP_400_BAD_REQUEST) try: folder, base = ResourceFile.resource_path_is_acceptable( resource, tgt_storage_path, test_exists=False) except ValidationError: return HttpResponse( 'Poorly structured desired name {}'.format(tgt_short_path), status=status.HTTP_400_BAD_REQUEST) try: ResourceFile.get(resource, base, folder=tgt_short_path) return HttpResponse( 'Desired name {} is already in use'.format(tgt_short_path), status=status.HTTP_400_BAD_REQUEST) except ResourceFile.DoesNotExist: pass # correct response try: rename_file_or_folder(user, pk, src_path, tgt_path) except SessionException as ex: return HttpResponse(ex.stderr, status=status.HTTP_500_INTERNAL_SERVER_ERROR) except DRF_ValidationError as ex: return HttpResponse(ex.detail, status=status.HTTP_400_BAD_REQUEST) return_object = {'target_rel_path': tgt_path} return HttpResponse(json.dumps(return_object), content_type='application/json')
def test_unfederated_folder_path_checks(self): """ an unfederated file in a subfolder has the proper state after state changes """ # resource should not have any files at this point self.assertEqual(self.res.files.all().count(), 0, msg="resource file count didn't match") ResourceFile.create_folder(self.res, 'foo') # should succeed without errors self.res.check_irods_files(stop_on_error=True) # add one file to the resource hydroshare.add_resource_files(self.res.short_id, self.test_file_1, folder='foo') # should succeed without errors self.res.check_irods_files(stop_on_error=True) # resource should has only one file at this point self.assertEqual(self.res.files.all().count(), 1, msg="resource file count didn't match") # get the handle of the file created above resfile = self.res.files.all()[0] # determine where that file should live fullpath = os.path.join(self.res.short_id, "data", "contents", "foo", "file1.txt") self.assertEqual(resfile.file_folder, "foo") self.assertEqual(resfile.storage_path, fullpath) # now try to intentionally corrupt it resfile.set_short_path("fuzz.txt") # should raise exception with self.assertRaises(ValidationError): self.res.check_irods_files(stop_on_error=True) # now don't raise exception and read error errors, ecount = self.res.check_irods_files(return_errors=True, log_errors=False) self.assertTrue(errors[0].endswith( 'data/contents/fuzz.txt does not exist in iRODS')) self.assertTrue(errors[1].endswith( 'data/contents/foo/file1.txt in iRODs does not exist in Django')) self.assertTrue(errors[2].endswith( "type is GenericResource, title is 'My Test Resource'")) # now try to clean it up errors, ecount = self.res.check_irods_files(return_errors=True, log_errors=False, clean_irods=True, clean_django=True) self.assertTrue(errors[0].endswith( 'data/contents/fuzz.txt does not exist in iRODS (DELETED FROM DJANGO)')) self.assertTrue(errors[1].endswith( 'data/contents/foo/file1.txt in iRODs does not exist in Django (DELETED FROM IRODS)')) self.assertTrue(errors[2].endswith( "type is GenericResource, title is 'My Test Resource'")) # resource should not have any files at this point self.assertEqual(self.res.files.all().count(), 0, msg="resource file count didn't match") # now check should succeed errors, ecount = self.res.check_irods_files(stop_on_error=True, log_errors=False) self.assertEqual(ecount, 0) # delete resources to clean up hydroshare.delete_resource(self.res.short_id)
def add_file_to_resource(resource, f, folder=None, source_name='', move=False): """ Add a ResourceFile to a Resource. Adds the 'format' metadata element to the resource. :param resource: Resource to which file should be added :param f: File-like object to add to a resource :param source_name: the logical file name of the resource content file for federated iRODS resource or the federated zone name; By default, it is empty. A non-empty value indicates the file needs to be added into the federated zone, either from local disk where f holds the uploaded file from local disk, or from the federated zone directly where f is empty but source_name has the whole data object iRODS path in the federated zone :param move: indicate whether the file should be copied or moved from private user account to proxy user account in federated zone; A value of False indicates copy is needed, a value of True indicates no copy, but the file will be moved from private user account to proxy user account. The default value is False. :return: The identifier of the ResourceFile added. """ if f: openfile = File(f) if not isinstance(f, UploadedFile) else f ret = ResourceFile.create(resource, openfile, folder=folder, source=None, move=False) # add format metadata element if necessary file_format_type = get_file_mime_type(f.name) elif source_name: try: # create from existing iRODS file ret = ResourceFile.create(resource, None, folder=folder, source=source_name, move=move) except SessionException as ex: try: ret.delete() except Exception: pass # raise the exception for the calling function to inform the error on the page interface raise SessionException(ex.exitcode, ex.stdout, ex.stderr) # add format metadata element if necessary file_format_type = get_file_mime_type(source_name) else: raise ValueError( 'Invalid input parameter is passed into this add_file_to_resource() ' 'function') # TODO: generate this from data in ResourceFile rather than extension if file_format_type not in [ mime.value for mime in resource.metadata.formats.all() ]: resource.metadata.create_element('format', value=file_format_type) return ret