def test_get_mime_type(self): test_file = 'my_file.txt' self.assertEquals(utils.get_file_mime_type(test_file), 'text/plain') test_file = 'my_file.tif' self.assertEquals(utils.get_file_mime_type(test_file), 'image/tiff') test_file = 'my_file.abc' self.assertEquals(utils.get_file_mime_type(test_file), 'application/abc')
def link_irods_file_to_django(resource, filepath): """ Link a newly created irods file to Django resource model :param filepath: full path to file """ # link the newly created file (**filepath**) to Django resource model b_add_file = False # TODO: folder is an abstract concept... utilize short_path for whole API if resource: folder, base = ResourceFile.resource_path_is_acceptable(resource, filepath, test_exists=False) try: ResourceFile.get(resource=resource, file=base, folder=folder) except ObjectDoesNotExist: # this does not copy the file from anywhere; it must exist already ResourceFile.create(resource=resource, file=base, folder=folder) b_add_file = True if b_add_file: file_format_type = get_file_mime_type(filepath) if file_format_type not in [mime.value for mime in resource.metadata.formats.all()]: resource.metadata.create_element('format', value=file_format_type) # this should assign a logical file object to this new file # if this resource supports logical file resource.set_default_logical_file()
def add_resource_files(pk, *files): """ Called by clients to update a resource in HydroShare by adding a single file. REST URL: PUT /resource/{pid}/files/{file} Parameters: pid - Unique HydroShare identifier for the resource that is to be updated. file - The data bytes of the file that will be added to the existing resource identified by pid Returns: The pid assigned to the updated resource Return Type: pid Raises: Exceptions.NotAuthorized - The user is not authorized Exceptions.InvalidContent - The content of the file is invalid Exception.ServiceFailure - The service is unable to process the request Notes: For mutable resources (resources not formally published), the update adds the file that is passed to this method to the resource. For immutable resources (formally published resources), this method creates a new resource that is a new version of the formally published resource. HydroShare will record the update by storing the SystemMetadata.obsoletes and SystemMetadata.obsoletedBy fields for the respective resources in their system metadata HydroShare MUST check or set the values of SystemMetadata.obsoletes and SystemMetadata.obsoletedBy so that they accurately represent the relationship between the new and old objects. HydroShare MUST also set SystemMetadata.dateSysMetadataModified. The modified system metadata entries must then be available in HydroShare.listObjects() to ensure that any cataloging systems pick up the changes when filtering on SystmeMetadata.dateSysMetadataModified. A formally published resource can only be obsoleted by one newer version. Once a resource is obsoleted, no other resources can obsolete it. """ resource = utils.get_resource_by_shortkey(pk) ret = [] for file in files: ret.append(ResourceFile.objects.create( content_object=resource, resource_file=File(file) if not isinstance(file, UploadedFile) else file )) # add format metadata element if necessary file_format_type = utils.get_file_mime_type(file.name) if file_format_type not in [mime.value for mime in resource.metadata.formats.all()]: resource.metadata.create_element('format', value=file_format_type) return ret
def delete_format_metadata_after_delete_file(resource, file_name): """ delete format metadata as appropriate after a file is deleted. :param resource: BaseResource object representing a HydroShare resource :param file_name: the file name to be deleted :return: """ delete_file_mime_type = utils.get_file_mime_type(file_name) delete_file_extension = os.path.splitext(file_name)[1] # if there is no other resource file with the same extension as the # file just deleted then delete the matching format metadata element for the resource resource_file_extensions = [os.path.splitext(get_resource_file_name(f))[1] for f in resource.files.all()] if delete_file_extension not in resource_file_extensions: format_element = resource.metadata.formats.filter(value=delete_file_mime_type).first() if format_element: resource.metadata.delete_element(format_element.term, format_element.id)
def raster_pre_delete_file_from_resource_trigger(sender, **kwargs): res = kwargs['resource'] del_file = kwargs['file'] del_res_fname = get_resource_file_name(del_file) # delete core metadata coverage now that the only file is deleted res.metadata.coverages.all().delete() # delete all other resource specific metadata res.metadata.originalCoverage.delete() res.metadata.cellInformation.delete() res.metadata.bandInformations.all().delete() # delete all the files that is not the user selected file for f in ResourceFile.objects.filter(object_id=res.id): fname = get_resource_file_name(f) if fname != del_res_fname: delete_resource_file_only(res, f) # delete the format of the files that is not the user selected delete file del_file_format = utils.get_file_mime_type(del_res_fname) for format_element in res.metadata.formats.all(): if format_element.value != del_file_format: res.metadata.delete_element(format_element.term, format_element.id)
def create_bag_files(resource): """ create and update files needed by bagit operation that is conducted on iRODS server; no bagit operation is performed, only files that will be included in the bag are created or updated. Parameters: :param resource: A resource whose files will be created or updated to be included in the resource bag. :return: istorage, an IrodsStorage object that will be used by subsequent operation to create a bag on demand as needed. """ from hs_core.hydroshare.utils import current_site_url, get_file_mime_type istorage = resource.get_irods_storage() # the temp_path is a temporary holding path to make the files available to iRODS # we have to make temp_path unique even for the same resource with same update time # to accommodate asynchronous multiple file move operations for the same resource # TODO: This is always in /tmp; otherwise code breaks because open() is called on the result! temp_path = istorage.getUniqueTmpPath try: os.makedirs(temp_path) except OSError as ex: # TODO: there might be concurrent operations. if ex.errno == errno.EEXIST: shutil.rmtree(temp_path) os.makedirs(temp_path) else: raise Exception(ex.message) # an empty visualization directory will not be put into the zipped bag file by ibun command, # so creating an empty visualization directory to be put into the zip file as done by the two # statements below does not work. However, if visualization directory has content to be # uploaded, it will work. This is to be implemented as part of the resource model in the future. # The following two statements are placeholders serving as reminder # to_file_name = '{res_id}/data/visualization/'.format(res_id=resource.short_id) # istorage.saveFile('', to_file_name, create_directory=True) # create resourcemetadata.xml in local directory and upload it to iRODS from_file_name = os.path.join(temp_path, 'resourcemetadata.xml') with open(from_file_name, 'w') as out: # write resource level metadata out.write(resource.get_metadata_xml()) to_file_name = os.path.join(resource.root_path, 'data', 'resourcemetadata.xml') istorage.saveFile(from_file_name, to_file_name, True) # URLs are found in the /data/ subdirectory to comply with bagit format assumptions current_site_url = current_site_url() # This is the qualified resource url. hs_res_url = os.path.join(current_site_url, 'resource', resource.short_id, 'data') # this is the path to the resourcemedata file for download metadata_url = os.path.join(hs_res_url, 'resourcemetadata.xml') # this is the path to the resourcemap file for download res_map_url = os.path.join(hs_res_url, 'resourcemap.xml') # make the resource map: utils.namespaces['citoterms'] = Namespace('http://purl.org/spar/cito/') utils.namespaceSearchOrder.append('citoterms') ag_url = os.path.join(hs_res_url, 'resourcemap.xml#aggregation') a = Aggregation(ag_url) # Set properties of the aggregation a._dc.title = resource.metadata.title.value a._dcterms.type = URIRef(resource.metadata.type.url) a._citoterms.isDocumentedBy = metadata_url a._ore.isDescribedBy = res_map_url res_type_aggregation = AggregatedResource(resource.metadata.type.url) res_type_aggregation._rdfs.label = resource._meta.verbose_name res_type_aggregation._rdfs.isDefinedBy = current_site_url + "/terms" a.add_resource(res_type_aggregation) # Create a description of the metadata document that describes the whole resource and add it # to the aggregation resMetaFile = AggregatedResource(metadata_url) resMetaFile._dc.title = "Dublin Core science metadata document describing the HydroShare " \ "resource" resMetaFile._citoterms.documents = ag_url resMetaFile._ore.isAggregatedBy = ag_url resMetaFile._dc.format = "application/rdf+xml" a.add_resource(resMetaFile) # Add the resource files to the aggregation files = ResourceFile.objects.filter(object_id=resource.id) for f in files: # only the files that are not part of file type aggregation (logical file) # should be added to the resource level map xml file if f.logical_file is None: res_uri = u'{hs_url}/resource/{res_id}/data/contents/{file_name}'.format( hs_url=current_site_url, res_id=resource.short_id, file_name=f.short_path) ar = AggregatedResource(res_uri) ar._ore.isAggregatedBy = ag_url ar._dc.format = get_file_mime_type(os.path.basename(f.short_path)) a.add_resource(ar) # handle collection resource type # save contained resource urls into resourcemap.xml if resource.resource_type == "CollectionResource" and resource.resources: for contained_res in resource.resources.all(): contained_res_id = contained_res.short_id resource_map_url = '{hs_url}/resource/{res_id}/data/resourcemap.xml'.format( hs_url=current_site_url, res_id=contained_res_id) ar = AggregatedResource(resource_map_url) ar._ore.isAggregatedBy = ag_url ar._dc.format = "application/rdf+xml" a.add_resource(ar) elif resource.resource_type == "CompositeResource": # add file type aggregations to resource aggregation for logical_file in resource.logical_files: if logical_file.has_parent: # skip nested aggregations continue aggr_uri = u'{hs_url}/resource/{res_id}/data/contents/{map_file_path}#aggregation' aggr_uri = aggr_uri.format( hs_url=current_site_url, res_id=resource.short_id, map_file_path=logical_file.map_short_file_path) agg = Aggregation(aggr_uri) agg._ore.isAggregatedBy = ag_url agg_type_url = "{site}/terms/{aggr_type}" agg_type_url = agg_type_url.format(site=current_site_url, aggr_type=logical_file.get_aggregation_type_name()) agg._dcterms.type = URIRef(agg_type_url) a.add_resource(agg) # Register a serializer with the aggregation, which creates a new ResourceMap that needs a URI serializer = RdfLibSerializer('xml') resMap = a.register_serialization(serializer, res_map_url) resMap._dc.identifier = resource.short_id # Fetch the serialization remdoc = a.get_serialization() # change the namespace for the 'creator' element from 'dcterms' to 'dc' xml_string = remdoc.data.replace('dcterms:creator', 'dc:creator') # delete this extra element # <ore:aggregates rdf:resource="[hydroshare domain]/terms/[Resource class name]"/> xml_string = xml_string.replace( '<ore:aggregates rdf:resource="%s"/>\n' % str(resource.metadata.type.url), '') # create resourcemap.xml and upload it to iRODS from_file_name = os.path.join(temp_path, 'resourcemap.xml') with open(from_file_name, 'w') as out: out.write(xml_string) to_file_name = os.path.join(resource.root_path, 'data', 'resourcemap.xml') istorage.saveFile(from_file_name, to_file_name, False) # if the resource is a composite resource generate aggregation metadata # and map xml documents if resource.resource_type == "CompositeResource": resource.create_aggregation_xml_documents() res_coll = resource.root_path istorage.setAVU(res_coll, 'metadata_dirty', "false") shutil.rmtree(temp_path) return istorage
def create_bag_files(resource): """ create and update files needed by bagit operation that is conducted on iRODS server; no bagit operation is performed, only files that will be included in the bag are created or updated. Parameters: :param resource: A resource whose files will be created or updated to be included in the resource bag. :return: istorage, an IrodsStorage object that will be used by subsequent operation to create a bag on demand as needed. """ from hs_core.hydroshare.utils import current_site_url, get_file_mime_type istorage = resource.get_irods_storage() # the temp_path is a temporary holding path to make the files available to iRODS # we have to make temp_path unique even for the same resource with same update time # to accommodate asynchronous multiple file move operations for the same resource # TODO: This is always in /tmp; otherwise code breaks because open() is called on the result! temp_path = os.path.join(getattr(settings, 'IRODS_ROOT', '/tmp'), uuid4().hex) try: os.makedirs(temp_path) except OSError as ex: # TODO: there might be concurrent operations. if ex.errno == errno.EEXIST: shutil.rmtree(temp_path) os.makedirs(temp_path) else: raise Exception(ex.message) # an empty visualization directory will not be put into the zipped bag file by ibun command, # so creating an empty visualization directory to be put into the zip file as done by the two # statements below does not work. However, if visualization directory has content to be # uploaded, it will work. This is to be implemented as part of the resource model in the future. # The following two statements are placeholders serving as reminder # to_file_name = '{res_id}/data/visualization/'.format(res_id=resource.short_id) # istorage.saveFile('', to_file_name, create_directory=True) # create resourcemetadata.xml in local directory and upload it to iRODS from_file_name = os.path.join(temp_path, 'resourcemetadata.xml') with open(from_file_name, 'w') as out: # resources that don't support file types this would write only resource level metadata # resource types that support file types this would write resource level metadata # as well as file type metadata out.write(resource.get_metadata_xml()) to_file_name = os.path.join(resource.root_path, 'data', 'resourcemetadata.xml') istorage.saveFile(from_file_name, to_file_name, True) # URLs are found in the /data/ subdirectory to comply with bagit format assumptions current_site_url = current_site_url() # This is the qualified resource url. hs_res_url = os.path.join(current_site_url, 'resource', resource.short_id, 'data') # this is the path to the resourcemedata file for download metadata_url = os.path.join(hs_res_url, 'resourcemetadata.xml') # this is the path to the resourcemap file for download res_map_url = os.path.join(hs_res_url, 'resourcemap.xml') # make the resource map: utils.namespaces['citoterms'] = Namespace('http://purl.org/spar/cito/') utils.namespaceSearchOrder.append('citoterms') ag_url = os.path.join(hs_res_url, 'resourcemap.xml#aggregation') a = Aggregation(ag_url) # Set properties of the aggregation a._dc.title = resource.metadata.title.value a._dcterms.type = URIRef(resource.metadata.type.url) a._citoterms.isDocumentedBy = metadata_url a._ore.isDescribedBy = res_map_url res_type_aggregation = AggregatedResource(resource.metadata.type.url) res_type_aggregation._rdfs.label = resource._meta.verbose_name res_type_aggregation._rdfs.isDefinedBy = current_site_url + "/terms" a.add_resource(res_type_aggregation) # Create a description of the metadata document that describes the whole resource and add it # to the aggregation resMetaFile = AggregatedResource(metadata_url) resMetaFile._dc.title = "Dublin Core science metadata document describing the HydroShare " \ "resource" resMetaFile._citoterms.documents = ag_url resMetaFile._ore.isAggregatedBy = ag_url resMetaFile._dc.format = "application/rdf+xml" # Create a description of the content file and add it to the aggregation files = ResourceFile.objects.filter(object_id=resource.id) resFiles = [] for n, f in enumerate(files): res_uri = '{hs_url}/resource/{res_id}/data/contents/{file_name}'.format( hs_url=current_site_url, res_id=resource.short_id, file_name=f.short_path) resFiles.append(AggregatedResource(res_uri)) resFiles[n]._ore.isAggregatedBy = ag_url resFiles[n]._dc.format = get_file_mime_type( os.path.basename(f.short_path)) # Add the resource files to the aggregation a.add_resource(resMetaFile) for f in resFiles: a.add_resource(f) # handle collection resource type # save contained resource urls into resourcemap.xml if resource.resource_type == "CollectionResource" and resource.resources: for contained_res in resource.resources.all(): contained_res_id = contained_res.short_id resource_map_url = '{hs_url}/resource/{res_id}/data/resourcemap.xml'.format( hs_url=current_site_url, res_id=contained_res_id) ar = AggregatedResource(resource_map_url) ar._ore.isAggregatedBy = ag_url ar._dc.format = "application/rdf+xml" a.add_resource(ar) # Register a serializer with the aggregation, which creates a new ResourceMap that needs a URI serializer = RdfLibSerializer('xml') resMap = a.register_serialization(serializer, res_map_url) resMap._dc.identifier = resource.short_id # Fetch the serialization remdoc = a.get_serialization() # change the namespace for the 'creator' element from 'dcterms' to 'dc' xml_string = remdoc.data.replace('dcterms:creator', 'dc:creator') # delete this extra element # <ore:aggregates rdf:resource="[hydroshare domain]/terms/[Resource class name]"/> xml_string = xml_string.replace( '<ore:aggregates rdf:resource="%s"/>\n' % str(resource.metadata.type.url), '') # create resourcemap.xml and upload it to iRODS from_file_name = os.path.join(temp_path, 'resourcemap.xml') with open(from_file_name, 'w') as out: out.write(xml_string) to_file_name = os.path.join(resource.root_path, 'data', 'resourcemap.xml') istorage.saveFile(from_file_name, to_file_name, False) res_coll = resource.root_path istorage.setAVU(res_coll, 'metadata_dirty', "false") shutil.rmtree(temp_path) return istorage
def data_store_structure(request): """ Get file hierarchy (collection of subcollections and data objects) for the requested directory in hydroshareZone or any federated zone used for CommonsShare resource backend store. It is invoked by an AJAX call and returns json object that holds content for files and folders under the requested directory/collection/subcollection. The AJAX request must be a POST request with input data passed in for res_id and store_path where store_path is the relative path under res_id collection/directory """ res_id = request.POST.get('res_id', None) if res_id is None: logger.error("no resource id in request") return HttpResponse('Bad request - resource id is not included', status=status.HTTP_500_INTERNAL_SERVER_ERROR) res_id = str(res_id).strip() try: resource, _, _ = authorize( request, res_id, needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE) except NotFound: logger.error("resource {} not found".format(res_id)) return HttpResponse('Bad request - resource not found', status=status.HTTP_400_BAD_REQUEST) except PermissionDenied: logger.error("permission denied for resource {}".format(res_id)) return HttpResponse('Permission denied', status=status.HTTP_401_UNAUTHORIZED) store_path = request.POST.get('store_path', None) if store_path is None: logger.error("store_path not included for resource {}".format(res_id)) return HttpResponse('Bad request - store_path is not included', status=status.HTTP_400_BAD_REQUEST) store_path = str(store_path).strip() if not store_path: logger.error("store_path empty for resource {}".format(res_id)) return HttpResponse('Bad request - store_path cannot be empty', status=status.HTTP_400_BAD_REQUEST) if not store_path.startswith('data/contents'): logger.error( "store_path doesn't start with data/contents for resource {}". format(res_id)) return HttpResponse( 'Bad request - store_path must start with data/contents/', status=status.HTTP_400_BAD_REQUEST) if store_path.find('/../') >= 0 or store_path.endswith('/..'): logger.error( "store_path cannot contain .. for resource {}".format(res_id)) return HttpResponse('Bad request - store_path cannot contain /../', status=status.HTTP_400_BAD_REQUEST) istorage = resource.get_irods_storage() res_coll = os.path.join(resource.root_path, store_path) try: store = istorage.listdir(res_coll) files = [] for fname in store[1]: # files fname = fname.decode('utf-8') name_with_full_path = os.path.join(res_coll, fname) size = istorage.size(name_with_full_path) mtype = get_file_mime_type(fname) idx = mtype.find('/') if idx >= 0: mtype = mtype[idx + 1:] f_pk = '' f_url = '' logical_file_type = '' logical_file_id = '' for f in ResourceFile.objects.filter(object_id=resource.id): if name_with_full_path == f.storage_path: f_pk = f.pk f_url = get_resource_file_url(f) if resource.resource_type == "CompositeResource": f_logical = f.get_or_create_logical_file logical_file_type = f.logical_file_type_name logical_file_id = f_logical.id break if f_pk: # file is found in Django files.append({ 'name': fname, 'size': size, 'type': mtype, 'pk': f_pk, 'url': f_url, 'logical_type': logical_file_type, 'logical_file_id': logical_file_id }) else: # file is not found in Django logger.error( "data_store_structure: filename {} in iRODs has no analogue in Django" .format(name_with_full_path)) # show reference file links if any which don't have physical presence in iRODS for f in ResourceFile.objects.filter(object_id=resource.id): if not f.resource_file and not f.fed_resource_file and f.reference_file_path: files.append({ 'name': f.reference_file_path, 'size': f.reference_file_size, 'type': 'Reference', 'pk': f.pk, 'url': '', 'logical_type': '', 'logical_file_id': '' }) except SessionException as ex: logger.error("session exception querying store_path {} for {}".format( store_path, res_id)) return HttpResponse(ex.stderr, status=status.HTTP_500_INTERNAL_SERVER_ERROR) return_object = { 'files': files, 'folders': store[0], 'can_be_public': resource.can_be_public_or_discoverable } if resource.resource_type == "CompositeResource": spatial_coverage_dict = get_coverage_data_dict(resource) temporal_coverage_dict = get_coverage_data_dict( resource, coverage_type='temporal') return_object['spatial_coverage'] = spatial_coverage_dict return_object['temporal_coverage'] = temporal_coverage_dict return HttpResponse(json.dumps(return_object), content_type="application/json")
def data_store_structure(request): """ Get file hierarchy (collection of subcollections and data objects) for the requested directory in hydroshareZone or any federated zone used for HydroShare resource backend store. It is invoked by an AJAX call and returns json object that holds content for files and folders under the requested directory/collection/subcollection. The AJAX request must be a POST request with input data passed in for res_id and store_path where store_path is the relative path to res_id/data/contents """ res_id = request.POST.get('res_id', None) if res_id is None: logger.error("no resource id in request") return HttpResponse('Bad request - resource id is not included', status=status.HTTP_500_INTERNAL_SERVER_ERROR) res_id = str(res_id).strip() try: resource, _, _ = authorize( request, res_id, needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE) except NotFound: logger.error("resource {} not found".format(res_id)) return HttpResponse('Bad request - resource not found', status=status.HTTP_400_BAD_REQUEST) except PermissionDenied: logger.error("permission denied for resource {}".format(res_id)) return HttpResponse('Permission denied', status=status.HTTP_401_UNAUTHORIZED) store_path = request.POST.get('store_path', None) try: store_path = _validate_path(store_path, 'store_path', check_path_empty=False) except ValidationError as ex: return HttpResponse(str(ex), status=status.HTTP_400_BAD_REQUEST) istorage = resource.get_irods_storage() directory_in_irods = resource.get_irods_path(store_path) try: store = istorage.listdir(directory_in_irods) except SessionException as ex: logger.error("session exception querying store_path {} for {}".format( store_path, res_id)) return HttpResponse(ex.stderr, status=status.HTTP_500_INTERNAL_SERVER_ERROR) files = [] dirs = [] aggregations = [] # folder path relative to 'data/contents/' needed for the UI folder_path = store_path[len("data/contents/"):] for dname in store[0]: # directories d_pk = dname d_store_path = os.path.join(store_path, d_pk) d_url = resource.get_url_of_path(d_store_path) main_file = '' folder_aggregation_type = '' folder_aggregation_name = '' folder_aggregation_id = '' folder_aggregation_type_to_set = '' if resource.resource_type == "CompositeResource": dir_path = resource.get_irods_path(d_store_path) # find if this folder *dir_path* represents (contains) an aggregation object aggregation_object = resource.get_folder_aggregation_object( dir_path) # folder aggregation type is not relevant for single file aggregation types - which # are: GenericLogicalFile, and RefTimeseriesLogicalFile if aggregation_object is not None: folder_aggregation_type = aggregation_object.get_aggregation_class_name( ) folder_aggregation_name = aggregation_object.get_aggregation_display_name( ) folder_aggregation_id = aggregation_object.id if not aggregation_object.is_fileset: main_file = aggregation_object.get_main_file.file_name else: # find if FileSet aggregation type that can be created from this folder if resource.can_set_folder_to_fileset(dir_path): folder_aggregation_type_to_set = FileSetLogicalFile.__name__ else: folder_aggregation_type_to_set = "" dirs.append({ 'name': d_pk, 'url': d_url, 'main_file': main_file, 'folder_aggregation_type': folder_aggregation_type, 'folder_aggregation_name': folder_aggregation_name, 'folder_aggregation_id': folder_aggregation_id, 'folder_aggregation_type_to_set': folder_aggregation_type_to_set, 'folder_short_path': os.path.join(folder_path, d_pk) }) is_federated = resource.is_federated for index, fname in enumerate(store[1]): # files f_store_path = os.path.join(store_path, fname) file_in_irods = resource.get_irods_path(f_store_path) size = store[2][index] mtype = get_file_mime_type(fname) idx = mtype.find('/') if idx >= 0: mtype = mtype[idx + 1:] if is_federated: f = ResourceFile.objects.filter( object_id=resource.id, fed_resource_file=file_in_irods).first() else: f = ResourceFile.objects.filter( object_id=resource.id, resource_file=file_in_irods).first() if not f: # skip metadata files continue f_ref_url = '' logical_file_type = '' logical_file_id = '' aggregation_name = '' if f.has_logical_file: main_extension = f.logical_file.get_main_file_type() if not main_extension: # accept any extension main_extension = "" if main_extension.endswith(f.extension): aggregations.append({ 'logical_file_id': f.logical_file.id, 'name': f.logical_file.dataset_name, 'logical_type': f.logical_file.get_aggregation_class_name(), 'aggregation_name': f.logical_file.get_aggregation_display_name(), 'main_file': f.logical_file.get_main_file.file_name, 'url': f.logical_file.url }) logical_file_type = f.logical_file_type_name logical_file_id = f.logical_file.id aggregation_name = f.aggregation_display_name if 'url' in f.logical_file.extra_data: f_ref_url = f.logical_file.extra_data['url'] files.append({ 'name': fname, 'size': size, 'type': mtype, 'pk': f.pk, 'url': f.url, 'reference_url': f_ref_url, 'aggregation_name': aggregation_name, 'logical_type': logical_file_type, 'logical_file_id': logical_file_id }) return_object = { 'files': files, 'folders': dirs, 'aggregations': aggregations, 'can_be_public': resource.can_be_public_or_discoverable } if resource.resource_type == "CompositeResource": return_object['spatial_coverage'] = get_coverage_data_dict(resource) return_object['temporal_coverage'] = get_coverage_data_dict( resource, coverage_type='temporal') return HttpResponse(json.dumps(return_object), content_type="application/json")
def data_store_structure(request): """ Get file hierarchy (collection of subcollections and data objects) for the requested directory in hydroshareZone or any federated zone used for HydroShare resource backend store. It is invoked by an AJAX call and returns json object that holds content for files and folders under the requested directory/collection/subcollection. The AJAX request must be a POST request with input data passed in for res_id and store_path where store_path is the relative path to res_id/data/contents """ res_id = request.POST.get('res_id', None) if res_id is None: logger.error("no resource id in request") return HttpResponse('Bad request - resource id is not included', status=status.HTTP_500_INTERNAL_SERVER_ERROR) res_id = str(res_id).strip() try: resource, _, _ = authorize(request, res_id, needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE) except NotFound: logger.error("resource {} not found".format(res_id)) return HttpResponse('Bad request - resource not found', status=status.HTTP_400_BAD_REQUEST) except PermissionDenied: logger.error("permission denied for resource {}".format(res_id)) return HttpResponse('Permission denied', status=status.HTTP_401_UNAUTHORIZED) store_path = request.POST.get('store_path', None) try: store_path = _validate_path(store_path, 'store_path', check_path_empty=False) except ValidationError as ex: return HttpResponse(ex.message, status=status.HTTP_400_BAD_REQUEST) istorage = resource.get_irods_storage() directory_in_irods = resource.get_irods_path(store_path) try: store = istorage.listdir(directory_in_irods) except SessionException as ex: logger.error("session exception querying store_path {} for {}".format(store_path, res_id)) return HttpResponse(ex.stderr, status=status.HTTP_500_INTERNAL_SERVER_ERROR) files = [] dirs = [] # folder path relative to 'data/contents/' needed for the UI folder_path = store_path[len("data/contents/"):] for dname in store[0]: # directories d_pk = dname.decode('utf-8') d_store_path = os.path.join(store_path, d_pk) d_url = resource.get_url_of_path(d_store_path) main_file = '' folder_aggregation_type = '' folder_aggregation_name = '' folder_aggregation_id = '' folder_aggregation_type_to_set = '' if resource.resource_type == "CompositeResource": dir_path = resource.get_public_path(d_store_path) # find if this folder *dir_path* represents (contains) an aggregation object aggregation_object = resource.get_folder_aggregation_object(dir_path) # folder aggregation type is not relevant for single file aggregation types - which # are: GenericLogicalFile, and RefTimeseriesLogicalFile if aggregation_object is not None and not \ aggregation_object.is_single_file_aggregation: folder_aggregation_type = aggregation_object.get_aggregation_class_name() folder_aggregation_name = aggregation_object.get_aggregation_display_name() folder_aggregation_id = aggregation_object.id main_file = '' if not aggregation_object.is_fileset: main_file = aggregation_object.get_main_file.file_name else: # find if any aggregation type that can be created from this folder folder_aggregation_type_to_set = \ resource.get_folder_aggregation_type_to_set(dir_path) if folder_aggregation_type_to_set is None: folder_aggregation_type_to_set = "" dirs.append({'name': d_pk, 'url': d_url, 'main_file': main_file, 'folder_aggregation_type': folder_aggregation_type, 'folder_aggregation_name': folder_aggregation_name, 'folder_aggregation_id': folder_aggregation_id, 'folder_aggregation_type_to_set': folder_aggregation_type_to_set, 'folder_short_path': os.path.join(folder_path, d_pk)}) is_federated = resource.is_federated for index, fname in enumerate(store[1]): # files fname = fname.decode('utf-8') f_store_path = os.path.join(store_path, fname) file_in_irods = resource.get_irods_path(f_store_path) size = store[2][index] mtype = get_file_mime_type(fname) idx = mtype.find('/') if idx >= 0: mtype = mtype[idx + 1:] if is_federated: f = ResourceFile.objects.filter(object_id=resource.id, fed_resource_file=file_in_irods).first() else: f = ResourceFile.objects.filter(object_id=resource.id, resource_file=file_in_irods).first() if not f: # skip metadata files continue f_ref_url = '' logical_file_type = '' logical_file_id = '' aggregation_name = '' is_single_file_aggregation = '' if resource.resource_type == "CompositeResource": if f.has_logical_file: logical_file_type = f.logical_file_type_name logical_file_id = f.logical_file.id aggregation_name = f.aggregation_display_name is_single_file_aggregation = f.logical_file.is_single_file_aggregation if 'url' in f.logical_file.extra_data: f_ref_url = f.logical_file.extra_data['url'] files.append({'name': fname, 'size': size, 'type': mtype, 'pk': f.pk, 'url': f.url, 'reference_url': f_ref_url, 'aggregation_name': aggregation_name, 'logical_type': logical_file_type, 'logical_file_id': logical_file_id, 'is_single_file_aggregation': is_single_file_aggregation}) return_object = {'files': files, 'folders': dirs, 'can_be_public': resource.can_be_public_or_discoverable} if resource.resource_type == "CompositeResource": return_object['spatial_coverage'] = get_coverage_data_dict(resource) return_object['temporal_coverage'] = get_coverage_data_dict(resource, coverage_type='temporal') return HttpResponse( json.dumps(return_object), content_type="application/json" )
def delete_resource_file(pk, filename_or_id, user): """ Deletes an individual file from a HydroShare resource. If the file does not exist, the Exceptions.NotFound exception is raised. REST URL: DELETE /resource/{pid}/files/{filename} Parameters: pid - The unique HydroShare identifier for the resource from which the file will be deleted filename - Name of the file to be deleted from the resource Returns: The pid of the resource from which the file was deleted Return Type: pid Raises: Exceptions.NotAuthorized - The user is not authorized Exceptions.NotFound - The resource identified by pid does not exist or the file identified by file does not exist Exception.ServiceFailure - The service is unable to process the request Note: For mutable resources (resources that have not been formally published), this method modifies the resource by deleting the file. For immutable resources (formally published resources), this method creates a new resource that is a new version of the formally published resource. HydroShare will record the update by storing the SystemMetadata.obsoletes and SystemMetadata.obsoletedBy fields for the respective resources in their system metadata HydroShare MUST check or set the values of SystemMetadata.obsoletes and SystemMetadata.obsoletedBy so that they accurately represent the relationship between the new and old objects. HydroShare MUST also set SystemMetadata.dateSysMetadataModified. The modified system metadata entries must then be available in HydroShare.listObjects() to ensure that any cataloging systems pick up the changes when filtering on SystmeMetadata.dateSysMetadataModified. A formally published resource can only be obsoleted by one newer version. Once a resource is obsoleted, no other resources can obsolete it. """ resource = utils.get_resource_by_shortkey(pk) res_cls = resource.__class__ try: file_id = int(filename_or_id) filter_condition = lambda fl: fl.id == file_id except ValueError: filter_condition = lambda fl: os.path.basename(fl.resource_file.name) == filename_or_id for f in ResourceFile.objects.filter(object_id=resource.id): if filter_condition(f): # send signal signals.pre_delete_file_from_resource.send(sender=res_cls, file=f, resource=resource) file_name = f.resource_file.name f.resource_file.delete() f.delete() delete_file_mime_type = utils.get_file_mime_type(file_name) delete_file_extension = os.path.splitext(file_name)[1] # if there is no other resource file with the same extension as the # file just deleted then delete the matching format metadata element for the resource resource_file_extensions = [os.path.splitext(f.resource_file.name)[1] for f in resource.files.all()] if delete_file_extension not in resource_file_extensions: format_element = resource.metadata.formats.filter(value=delete_file_mime_type).first() if format_element: resource.metadata.delete_element(format_element.term, format_element.id) break else: raise ObjectDoesNotExist(filename_or_id) if resource.public: if not resource.can_be_public: resource.public = False resource.save() # generate bag utils.resource_modified(resource, user) return filename_or_id
def data_store_structure(request): """ Get file hierarchy (collection of subcollections and data objects) for the requested directory in hydroshareZone or any federated zone used for HydroShare resource backend store. It is invoked by an AJAX call and returns json object that holds content for files and folders under the requested directory/collection/subcollection. The AJAX request must be a POST request with input data passed in for res_id and store_path where store_path is the relative path under res_id collection/directory """ res_id = request.POST.get('res_id', None) if res_id is None: return HttpResponse('Bad request - resource id is not included', status=status.HTTP_500_INTERNAL_SERVER_ERROR) res_id = str(res_id).strip() try: resource, _, _ = authorize( request, res_id, needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE) except NotFound: return HttpResponse('Bad request - resource not found', status=status.HTTP_400_BAD_REQUEST) except PermissionDenied: return HttpResponse('Permission denied', status=status.HTTP_401_UNAUTHORIZED) store_path = request.POST.get('store_path', None) if store_path is None: return HttpResponse('Bad request - store_path is not included', status=status.HTTP_400_BAD_REQUEST) store_path = str(store_path).strip() if not store_path: return HttpResponse('Bad request - store_path cannot be empty', status=status.HTTP_400_BAD_REQUEST) # this is federated if warranted, automatically, by choosing an appropriate session. istorage = resource.get_irods_storage() if resource.resource_federation_path: res_coll = os.path.join(resource.resource_federation_path, res_id, store_path) rel_path = store_path else: res_coll = os.path.join(res_id, store_path) rel_path = res_coll try: store = istorage.listdir(res_coll) files = [] for fname in store[1]: name_with_full_path = os.path.join(res_coll, fname) name_with_rel_path = os.path.join(rel_path, fname) size = istorage.size(name_with_full_path) mtype = get_file_mime_type(fname) idx = mtype.find('/') if idx >= 0: mtype = mtype[idx + 1:] f_pk = '' f_url = '' logical_file_type = '' logical_file_id = '' for f in ResourceFile.objects.filter(object_id=resource.id): if name_with_rel_path == get_resource_file_name_and_extension( f)[0]: f_pk = f.pk f_url = get_resource_file_url(f) if resource.resource_type == "CompositeResource": logical_file_type = f.logical_file_type_name logical_file_id = f.logical_file.id break files.append({ 'name': fname, 'size': size, 'type': mtype, 'pk': f_pk, 'url': f_url, 'logical_type': logical_file_type, 'logical_file_id': logical_file_id }) except SessionException as ex: return HttpResponse(ex.stderr, status=status.HTTP_500_INTERNAL_SERVER_ERROR) return_object = { 'files': files, 'folders': store[0], 'can_be_public': resource.can_be_public_or_discoverable } if resource.resource_type == "CompositeResource": spatial_coverage_dict = get_coverage_data_dict(resource) temporal_coverage_dict = get_coverage_data_dict( resource, coverage_type='temporal') return_object['spatial_coverage'] = spatial_coverage_dict return_object['temporal_coverage'] = temporal_coverage_dict return HttpResponse(json.dumps(return_object), content_type="application/json")