def get_quota_usage_from_irods(username): """ Query iRODS AVU to get quota usage for a user reported in iRODS quota microservices :param username: the user name to get quota usage for. :return: the combined quota usage from iRODS data zone and user zone; raise ValidationError if quota usage cannot be retrieved from iRODS """ attname = username + '-usage' istorage = IrodsStorage() # get quota size for user in iRODS data zone by retrieving AVU set on irods bagit path # collection try: uqDataZoneSize = istorage.getAVU(settings.IRODS_BAGIT_PATH, attname) if uqDataZoneSize is None: # user may not have resources in data zone, so corresponding quota size AVU may not # exist for this user uqDataZoneSize = -1 else: uqDataZoneSize = float(uqDataZoneSize) except SessionException: # user may not have resources in data zone, so corresponding quota size AVU may not exist # for this user uqDataZoneSize = -1 # get quota size for the user in iRODS user zone try: uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home', settings.HS_IRODS_PROXY_USER_IN_USER_ZONE, settings.IRODS_BAGIT_PATH) uqUserZoneSize = istorage.getAVU(uz_bagit_path, attname) if uqUserZoneSize is None: # user may not have resources in user zone, so corresponding quota size AVU may not # exist for this user uqUserZoneSize = -1 else: uqUserZoneSize = float(uqUserZoneSize) except SessionException: # user may not have resources in user zone, so corresponding quota size AVU may not exist # for this user uqUserZoneSize = -1 if uqDataZoneSize < 0 and uqUserZoneSize < 0: err_msg = 'no quota size AVU in data zone and user zone for user {}'.format( username) logger.error(err_msg) raise ValidationError(err_msg) elif uqUserZoneSize < 0: used_val = uqDataZoneSize elif uqDataZoneSize < 0: used_val = uqUserZoneSize else: used_val = uqDataZoneSize + uqUserZoneSize return used_val
def download(request, path, *args, **kwargs): split_path_strs = path.split('/') if split_path_strs[0] == 'bags': res_id = os.path.splitext(split_path_strs[1])[0] else: res_id = split_path_strs[0] _, authorized, _ = authorize(request, res_id, needed_permission=Action_To_Authorize.VIEW_RESOURCE, raises_exception=False) if not authorized: response = HttpResponse() response.content = "<h1>You do not have permission to download this resource!</h1>" return response if 'environment' in kwargs: environment = int(kwargs['environment']) environment = m.RodsEnvironment.objects.get(pk=environment) session = Session("/tmp/django_irods", settings.IRODS_ICOMMANDS_PATH, session_id=uuid4()) session.create_environment(environment) session.run('iinit', None, environment.auth) elif getattr(settings, 'IRODS_GLOBAL_SESSION', False): session = GLOBAL_SESSION elif icommands.ACTIVE_SESSION: session = icommands.ACTIVE_SESSION else: raise KeyError('settings must have IRODS_GLOBAL_SESSION set if there is no environment object') # do on-demand bag creation istorage = IrodsStorage() bag_modified = "false" # needs to check whether res_id collection exists before getting/setting AVU on it to accommodate the case # where the very same resource gets deleted by another request when it is getting downloaded if istorage.exists(res_id): bag_modified = istorage.getAVU(res_id, 'bag_modified') if bag_modified == "true": create_bag_by_irods(res_id, istorage) if istorage.exists(res_id): istorage.setAVU(res_id, 'bag_modified', "false") # obtain mime_type to set content_type mtype = 'application-x/octet-stream' mime_type = mimetypes.guess_type(path) if mime_type[0] is not None: mtype = mime_type[0] # retrieve file size to set up Content-Length header stdout = session.run("ils", None, "-l", path)[0].split() flen = int(stdout[3]) options = ('-',) # we're redirecting to stdout. proc = session.run_safe('iget', None, path, *options) response = FileResponse(proc.stdout, content_type=mtype) response['Content-Disposition'] = 'attachment; filename="{name}"'.format(name=path.split('/')[-1]) response['Content-Length'] = flen return response
def verify_user_quota_usage_avu_in_user_zone(self, attname, qsize): ''' Have to use HS_USER_ZONE_PROXY_USER with rodsadmin role to get user type AVU in user zone and verify its quota usage is set correctly :param attname: quota usage attribute name set on iRODS proxy user in user zone :param qsize: quota size (type string) to be verified to equal to the value set for attname. ''' istorage = IrodsStorage() istorage.set_user_session(username=settings.HS_USER_ZONE_PROXY_USER, password=settings.HS_USER_ZONE_PROXY_USER_PWD, host=settings.HS_USER_ZONE_HOST, port=settings.IRODS_PORT, zone=settings.HS_USER_IRODS_ZONE, sess_id='user_proxy_session') uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home', settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE, settings.IRODS_BAGIT_PATH) get_qsize = istorage.getAVU(uz_bagit_path, attname) self.assertEqual(qsize, get_qsize)
def verify_user_quota_usage_avu_in_user_zone(self, attname, qsize): ''' Have to use LINUX_ADMIN_USER_FOR_HS_USER_ZONE with rodsadmin role to get user type AVU in user zone and verify its quota usage is set correctly :param attname: quota usage attribute name set on iRODS proxy user in user zone :param qsize: quota size (type string) to be verified to equal to the value set for attname. ''' istorage = IrodsStorage() istorage.set_user_session(username=settings.LINUX_ADMIN_USER_FOR_HS_USER_ZONE, password=settings.LINUX_ADMIN_USER_PWD_FOR_HS_USER_ZONE, host=settings.HS_USER_ZONE_HOST, port=settings.IRODS_PORT, zone=settings.HS_USER_IRODS_ZONE, sess_id='user_proxy_session') uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home', settings.HS_IRODS_PROXY_USER_IN_USER_ZONE, settings.IRODS_BAGIT_PATH) get_qsize = istorage.getAVU(uz_bagit_path, attname) self.assertEqual(qsize, get_qsize)
def get_size_and_avu_for_irods_ref_files(username, password, host, port, zone, irods_fnames): """ use iget to transfer selected data object from irods zone to local as a NamedTemporaryFile :param username: iRODS login account username used to download irods data object for uploading :param password: iRODS login account password used to download irods data object for uploading :param host: iRODS login host used to download irods data object for uploading :param port: iRODS login port used to download irods data object for uploading :param zone: iRODS login zone used to download irods data object for uploading :param irods_fnames: the data object file name to download to local for uploading :raises SessionException(proc.returncode, stdout, stderr) defined in django_irods/icommands.py to capture iRODS exceptions raised from iRODS icommand subprocess run triggered from any method calls from IrodsStorage() if an error or exception ever occurs :return: list of file sizes corresponding to irods_fnames, and extra metadata dicts with each dict item corresponding to iRODS AVUs on the file or data object with file name preappended to attributes """ irods_storage = IrodsStorage() irods_storage.set_user_session(username=username, password=password, host=host, port=port, zone=zone) ifnames = string.split(irods_fnames, ',') ifsizes = [] ifextra_mds = {} for ifname in ifnames: size = irods_storage.size(ifname) ifsizes.append(size) extra_md_dict = irods_storage.getAVU(ifname, type='-d') for key, val in extra_md_dict.iteritems(): ukey = ifname + '_' + key ifextra_mds[ukey] = val # delete the user session after iRODS file operations are done irods_storage.delete_user_session() return ifsizes, ifextra_mds
def update_quota_usage_task(username): """update quota usage. This function runs as a celery task, invoked asynchronously with 1 minute delay to give enough time for iRODS real time quota update micro-services to update quota usage AVU for the user before this celery task to check this AVU to get the updated quota usage for the user. Note iRODS micro-service quota update only happens on HydroShare iRODS data zone and user zone independently, so the aggregation of usage in both zones need to be accounted for in this function to update Django DB as an aggregated usage for hydroshare internal zone. :param username: the name of the user that needs to update quota usage for. :return: True if quota usage update succeeds; False if there is an exception raised or quota cannot be updated. See log for details. """ hs_internal_zone = "hydroshare" uq = UserQuota.objects.filter(user__username=username, zone=hs_internal_zone).first() if uq is None: # the quota row does not exist in Django logger.error( 'quota row does not exist in Django for hydroshare zone for ' 'user ' + username) return False attname = username + '-usage' istorage = IrodsStorage() # get quota size for user in iRODS data zone by retrieving AVU set on irods bagit path # collection try: uqDataZoneSize = istorage.getAVU(settings.IRODS_BAGIT_PATH, attname) if uqDataZoneSize is None: # user may not have resources in data zone, so corresponding quota size AVU may not # exist for this user uqDataZoneSize = -1 else: uqDataZoneSize = float(uqDataZoneSize) except SessionException: # user may not have resources in data zone, so corresponding quota size AVU may not exist # for this user uqDataZoneSize = -1 # get quota size for the user in iRODS user zone try: uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home', settings.HS_IRODS_PROXY_USER_IN_USER_ZONE, settings.IRODS_BAGIT_PATH) uqUserZoneSize = istorage.getAVU(uz_bagit_path, attname) if uqUserZoneSize is None: # user may not have resources in user zone, so corresponding quota size AVU may not # exist for this user uqUserZoneSize = -1 else: uqUserZoneSize = float(uqUserZoneSize) except SessionException: # user may not have resources in user zone, so corresponding quota size AVU may not exist # for this user uqUserZoneSize = -1 if uqDataZoneSize < 0 and uqUserZoneSize < 0: logger.error( 'no quota size AVU in data zone and user zone for the user ' + username) return False elif uqUserZoneSize < 0: used_val = uqDataZoneSize elif uqDataZoneSize < 0: used_val = uqUserZoneSize else: used_val = uqDataZoneSize + uqUserZoneSize uq.update_used_value(used_val) return True
def download(request, path, rest_call=False, use_async=True, *args, **kwargs): split_path_strs = path.split('/') is_bag_download = False if split_path_strs[0] == 'bags': res_id = os.path.splitext(split_path_strs[1])[0] is_bag_download = True else: res_id = split_path_strs[0] res, authorized, _ = authorize( request, res_id, needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE, raises_exception=False) if not authorized: response = HttpResponse(status=401) content_msg = "You do not have permission to download this resource!" if rest_call: raise PermissionDenied(content_msg) else: signin_html = '</h1><div class="col-xs-12"><h2 class="page-title">' \ '<a href="/oauth_request/"><span class ="glyphicon glyphicon-log-in"></span>' \ 'Sign In</a></h2>' response.content = '<h1>' + content_msg + signin_html return response if not is_bag_download and "/data" not in path: idx_sep = path.find('/') path = path[idx_sep:] istorage = IrodsStorage() if 'environment' in kwargs: environment = int(kwargs['environment']) environment = m.RodsEnvironment.objects.get(pk=environment) session = Session("/tmp/django_irods", settings.IRODS_ICOMMANDS_PATH, session_id=uuid4()) session.create_environment(environment) session.run('iinit', None, environment.auth) elif getattr(settings, 'IRODS_GLOBAL_SESSION', False): session = GLOBAL_SESSION elif icommands.ACTIVE_SESSION: session = icommands.ACTIVE_SESSION else: raise KeyError('settings must have IRODS_GLOBAL_SESSION set ' 'if there is no environment object') if istorage.exists(res_id) and is_bag_download: bag_modified = istorage.getAVU(res_id, 'bag_modified') # make sure if bag_modified is not set to true, we still recreate the bag if the # bag file does not exist for some reason to resolve the error to download a nonexistent # bag when bag_modified is false due to the flag being out-of-sync with the real bag status if bag_modified is None or bag_modified.lower() == "false": # check whether the bag file exists bag_file_name = res_id + '.zip' bag_full_path = os.path.join('bags', bag_file_name) if not istorage.exists(bag_full_path): bag_modified = 'true' if bag_modified is None or bag_modified.lower() == "true": create_bag(res) resource_cls = check_resource_type(res.resource_type) # send signal for pre download file download_file_name = split_path_strs[-1] pre_download_file.send(sender=resource_cls, resource=res, download_file_name=download_file_name, request=request) # obtain mime_type to set content_type mtype = 'application-x/octet-stream' mime_type = mimetypes.guess_type(path) if mime_type[0] is not None: mtype = mime_type[0] # retrieve file size to set up Content-Length header stdout = session.run("ils", None, "-l", path)[0].split() flen = int(stdout[3]) options = ('-', ) # we're redirecting to stdout. proc = session.run_safe('iget', None, path, *options) response = FileResponse(proc.stdout, content_type=mtype) response['Content-Disposition'] = 'attachment; filename="{name}"'.format( name=path.split('/')[-1]) response['Content-Length'] = flen return response
def handle(self, *args, **options): resource_counter = 0 storage = IrodsStorage() avu_list = ['bag_modified', 'metadata_dirty', 'isPublic', 'resourceType'] for resource in BaseResource.objects.all(): if resource.storage_type == 'user': # resource is in user zone, so migrate it to data zone # copy files from iRODS user zone to data zone try: src_coll = resource.root_path tgt_coll = resource.short_id if storage.exists(tgt_coll): storage.delete(tgt_coll) storage.copyFiles(src_coll, tgt_coll) # copy AVU over for the resource collection from iRODS user zone to data zone for avu_name in avu_list: value = storage.getAVU(src_coll, avu_name) # bag_modified AVU needs to be set to true for the new resource so the bag # can be regenerated in the data zone if avu_name == 'bag_modified': storage.setAVU(tgt_coll, avu_name, 'true') # everything else gets copied literally else: storage.setAVU(tgt_coll, avu_name, value) # Just to be on the safe side, it is better not to delete resources from user # zone after it is migrated over to data zone in case there are issues with # migration. A simple irm iRODS command can be issued to delete all resource # collections afterwards if all works well after some time. Commenting the # deletion statement below rather than deleting it to serve as a reminder # that additional cleanup to delete all resource collections in user zone # is needed after we can confirm migration is successfully. # delete the original resource from user zone # storage.delete(src_coll) path_migrated = False for res_file in resource.files.all(): if res_file.resource_file.name: print('The resource_file field should be empty for resource {} but ' 'have the value of {}'.format(resource.short_id, res_file.resource_file.name)) break file_path = res_file.fed_resource_file.name if not file_path: print('The fed_resource_file field should not be empty for ' 'resource {}'.format(resource.short_id)) break elif file_path.startswith(resource.resource_federation_path): file_path = file_path[len(resource.resource_federation_path)+1:] res_file.resource_file.name = file_path res_file.fed_resource_file.name = '' res_file.save() path_migrated = True else: res_file.resource_file.name = file_path res_file.fed_resource_file.name = '' res_file.save() path_migrated = True print('fed_resource_file field does not contain absolute federation ' 'path which is an exception but can work after migration. ' 'file_path is {}'.format(file_path)) if path_migrated or resource.files.count() == 0: # update resource federation path to point resource to data zone resource.resource_federation_path = '' resource.save() print("Resource {} has been moved from user zone to data zone " "successfully".format(resource.short_id)) resource_counter += 1 else: continue except SessionException as ex: print("Resource {} failed to move: {}".format(resource.short_id, ex.stderr)) print("{} resources have been moved from user zone to data zone successfully".format( resource_counter))
def download(request, path, rest_call=False, use_async=True, use_reverse_proxy=True, *args, **kwargs): split_path_strs = path.split('/') is_bag_download = False is_zip_download = False is_sf_agg_file = False if split_path_strs[0] == 'bags': res_id = os.path.splitext(split_path_strs[1])[0] is_bag_download = True elif split_path_strs[0] == 'zips': if path.endswith('.zip'): res_id = os.path.splitext(split_path_strs[2])[0] else: res_id = os.path.splitext(split_path_strs[1])[0] is_zip_download = True else: res_id = split_path_strs[0] # if the resource does not exist in django, authorized will be false res, authorized, _ = authorize(request, res_id, needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE, raises_exception=False) if not authorized: response = HttpResponse(status=401) content_msg = "You do not have permission to download this resource!" if rest_call: raise PermissionDenied(content_msg) else: response.content = "<h1>" + content_msg + "</h1>" return response if res.resource_type == "CompositeResource" and not path.endswith(".zip"): for f in ResourceFile.objects.filter(object_id=res.id): if path == f.storage_path: if f.has_logical_file and f.logical_file.is_single_file_aggregation: is_sf_agg_file = True if res.resource_federation_path: # the resource is stored in federated zone istorage = IrodsStorage('federated') federated_path = res.resource_federation_path path = os.path.join(federated_path, path) session = icommands.ACTIVE_SESSION else: # TODO: From Alva: I do not understand the use case for changing the environment. # TODO: This seems an enormous potential vulnerability, as arguments are # TODO: passed from the URI directly to IRODS without verification. istorage = IrodsStorage() federated_path = '' if 'environment' in kwargs: environment = int(kwargs['environment']) environment = m.RodsEnvironment.objects.get(pk=environment) session = Session("/tmp/django_irods", settings.IRODS_ICOMMANDS_PATH, session_id=uuid4()) session.create_environment(environment) session.run('iinit', None, environment.auth) elif getattr(settings, 'IRODS_GLOBAL_SESSION', False): session = GLOBAL_SESSION elif icommands.ACTIVE_SESSION: session = icommands.ACTIVE_SESSION else: raise KeyError('settings must have IRODS_GLOBAL_SESSION set ' 'if there is no environment object') resource_cls = check_resource_type(res.resource_type) if federated_path: res_root = os.path.join(federated_path, res_id) else: res_root = res_id if is_zip_download or is_sf_agg_file: if not path.endswith(".zip"): # requesting folder that needs to be zipped input_path = path.split(res_id)[1] random_hash = random.getrandbits(32) daily_date = datetime.datetime.today().strftime('%Y-%m-%d') random_hash_path = 'zips/{daily_date}/{res_id}/{rand_folder}'.format( daily_date=daily_date, res_id=res_id, rand_folder=random_hash) output_path = '{random_hash_path}{path}.zip'.format(random_hash_path=random_hash_path, path=input_path) if res.resource_type == "CompositeResource": aggregation_name = input_path[len('/data/contents/'):] res.create_aggregation_xml_documents(aggregation_name=aggregation_name) if use_async: task = create_temp_zip.apply_async((res_id, input_path, output_path, is_sf_agg_file), countdown=3) delete_zip.apply_async((random_hash_path, ), countdown=(20 * 60)) # delete after 20 minutes if is_sf_agg_file: download_path = request.path.split(res_id)[0] + output_path else: download_path = request.path.split("zips")[0] + output_path if rest_call: return HttpResponse(json.dumps({'zip_status': 'Not ready', 'task_id': task.task_id, 'download_path': download_path}), content_type="application/json") request.session['task_id'] = task.task_id request.session['download_path'] = download_path return HttpResponseRedirect(res.get_absolute_url()) ret_status = create_temp_zip(res_id, input_path, output_path, is_sf_agg_file) delete_zip.apply_async((random_hash_path, ), countdown=(20 * 60)) # delete after 20 minutes if not ret_status: content_msg = "Zip cannot be created successfully. Check log for details." response = HttpResponse() if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response path = output_path bag_modified = istorage.getAVU(res_root, 'bag_modified') # make sure if bag_modified is not set to true, we still recreate the bag if the # bag file does not exist for some reason to resolve the error to download a nonexistent # bag when bag_modified is false due to the flag being out-of-sync with the real bag status if bag_modified is None or bag_modified.lower() == "false": # check whether the bag file exists bag_file_name = res_id + '.zip' if res_root.startswith(res_id): bag_full_path = os.path.join('bags', bag_file_name) else: bag_full_path = os.path.join(federated_path, 'bags', bag_file_name) # set bag_modified to 'true' if the bag does not exist so that it can be recreated # and the bag_modified AVU will be set correctly as well subsequently if not istorage.exists(bag_full_path): bag_modified = 'true' metadata_dirty = istorage.getAVU(res_root, 'metadata_dirty') # do on-demand bag creation # needs to check whether res_id collection exists before getting/setting AVU on it # to accommodate the case where the very same resource gets deleted by another request # when it is getting downloaded if is_bag_download: # send signal for pre_check_bag_flag pre_check_bag_flag.send(sender=resource_cls, resource=res) if bag_modified is None or bag_modified.lower() == "true": if metadata_dirty is None or metadata_dirty.lower() == 'true': create_bag_files(res) if use_async: # task parameter has to be passed in as a tuple or list, hence (res_id,) is needed # Note that since we are using JSON for task parameter serialization, no complex # object can be passed as parameters to a celery task task = create_bag_by_irods.apply_async((res_id,), countdown=3) if rest_call: return HttpResponse(json.dumps({'bag_status': 'Not ready', 'task_id': task.task_id}), content_type="application/json") request.session['task_id'] = task.task_id request.session['download_path'] = request.path return HttpResponseRedirect(res.get_absolute_url()) else: ret_status = create_bag_by_irods(res_id) if not ret_status: content_msg = "Bag cannot be created successfully. Check log for details." response = HttpResponse() if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response elif metadata_dirty is None or metadata_dirty.lower() == 'true': if path.endswith("resourcemap.xml") or path.endswith('resourcemetadata.xml'): # we need to regenerate the metadata xml files create_bag_files(res) # send signal for pre download file download_file_name = split_path_strs[-1] pre_download_file.send(sender=resource_cls, resource=res, download_file_name=download_file_name, request=request) # obtain mime_type to set content_type mtype = 'application-x/octet-stream' mime_type = mimetypes.guess_type(path) if mime_type[0] is not None: mtype = mime_type[0] # retrieve file size to set up Content-Length header stdout = session.run("ils", None, "-l", path)[0].split() flen = int(stdout[3]) # If this path is resource_federation_path, then the file is a local user file userpath = '/' + os.path.join( getattr(settings, 'HS_USER_IRODS_ZONE', 'hydroshareuserZone'), 'home', getattr(settings, 'HS_LOCAL_PROXY_USER_IN_FED_ZONE', 'localHydroProxy')) # Allow reverse proxy if request was forwarded by nginx # (HTTP_X_DJANGO_REVERSE_PROXY is 'true') # and reverse proxy is possible according to configuration. if use_reverse_proxy and getattr(settings, 'SENDFILE_ON', False) and \ 'HTTP_X_DJANGO_REVERSE_PROXY' in request.META: # The NGINX sendfile abstraction is invoked as follows: # 1. The request to download a file enters this routine via the /rest_download or /download # url in ./urls.py. It is redirected here from Django. The URI contains either the # unqualified resource path or the federated resource path, depending upon whether # the request is local or federated. # 2. This deals with unfederated resources by redirecting them to the uri # /irods-data/{resource-id}/... on nginx. This URI is configured to read the file # directly from the iRODS vault via NFS, and does not work for direct access to the # vault due to the 'internal;' declaration in NGINX. # 3. This deals with federated resources by reading their path, matching local vaults, and # redirecting to URIs that are in turn mapped to read from appropriate iRODS vaults. At # present, the only one of these is /irods-user, which handles files whose federation # path is stored in the variable 'userpath'. # 4. If there is no vault available for the resource, the file is transferred without # NGINX, exactly as it was transferred previously. # stop NGINX targets that are non-existent from hanging forever. if not istorage.exists(path): content_msg = "file path {} does not exist in iRODS".format(path) response = HttpResponse(status=404) if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response if not res.is_federated: # invoke X-Accel-Redirect on physical vault file in nginx response = HttpResponse(content_type=mtype) response['Content-Disposition'] = 'attachment; filename="{name}"'.format( name=path.split('/')[-1]) response['Content-Length'] = flen response['X-Accel-Redirect'] = '/'.join([ getattr(settings, 'IRODS_DATA_URI', '/irods-data'), path]) return response elif res.resource_federation_path == userpath: # this guarantees a "user" resource # invoke X-Accel-Redirect on physical vault file in nginx # if path is full user path; strip federation prefix if path.startswith(userpath): path = path[len(userpath)+1:] # invoke X-Accel-Redirect on physical vault file in nginx response = HttpResponse(content_type=mtype) response['Content-Disposition'] = 'attachment; filename="{name}"'.format( name=path.split('/')[-1]) response['Content-Length'] = flen response['X-Accel-Redirect'] = os.path.join( getattr(settings, 'IRODS_USER_URI', '/irods-user'), path) return response # if we get here, none of the above conditions are true if flen <= FILE_SIZE_LIMIT: options = ('-',) # we're redirecting to stdout. # this unusual way of calling works for federated or local resources proc = session.run_safe('iget', None, path, *options) response = FileResponse(proc.stdout, content_type=mtype) response['Content-Disposition'] = 'attachment; filename="{name}"'.format( name=path.split('/')[-1]) response['Content-Length'] = flen return response else: content_msg = "File larger than 1GB cannot be downloaded directly via HTTP. " \ "Please download the large file via iRODS clients." response = HttpResponse(status=403) if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response
def update_quota_usage_task(username): """update quota usage. This function runs as a celery task, invoked asynchronously with 1 minute delay to give enough time for iRODS real time quota update micro-services to update quota usage AVU for the user before this celery task to check this AVU to get the updated quota usage for the user. Note iRODS micro-service quota update only happens on HydroShare iRODS data zone and user zone independently, so the aggregation of usage in both zones need to be accounted for in this function to update Django DB as an aggregated usage for hydroshare internal zone. :param username: the name of the user that needs to update quota usage for. :return: True if quota usage update succeeds; False if there is an exception raised or quota cannot be updated. See log for details. """ hs_internal_zone = "hydroshare" uq = UserQuota.objects.filter(user__username=username, zone=hs_internal_zone).first() if uq is None: # the quota row does not exist in Django logger.error('quota row does not exist in Django for hydroshare zone for ' 'user ' + username) return False attname = username + '-usage' istorage = IrodsStorage() # get quota size for user in iRODS data zone by retrieving AVU set on irods bagit path # collection try: uqDataZoneSize = istorage.getAVU(settings.IRODS_BAGIT_PATH, attname) if uqDataZoneSize is None: # user may not have resources in data zone, so corresponding quota size AVU may not # exist for this user uqDataZoneSize = -1 else: uqDataZoneSize = float(uqDataZoneSize) except SessionException: # user may not have resources in data zone, so corresponding quota size AVU may not exist # for this user uqDataZoneSize = -1 # get quota size for the user in iRODS user zone try: uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home', settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE, settings.IRODS_BAGIT_PATH) uqUserZoneSize = istorage.getAVU(uz_bagit_path, attname) if uqUserZoneSize is None: # user may not have resources in user zone, so corresponding quota size AVU may not # exist for this user uqUserZoneSize = -1 else: uqUserZoneSize = float(uqUserZoneSize) except SessionException: # user may not have resources in user zone, so corresponding quota size AVU may not exist # for this user uqUserZoneSize = -1 if uqDataZoneSize < 0 and uqUserZoneSize < 0: logger.error('no quota size AVU in data zone and user zone for the user ' + username) return False elif uqUserZoneSize < 0: used_val = uqDataZoneSize elif uqDataZoneSize < 0: used_val = uqUserZoneSize else: used_val = uqDataZoneSize + uqUserZoneSize uq.update_used_value(used_val) return True
def download(request, path, rest_call=False, use_async=True, use_reverse_proxy=True, *args, **kwargs): split_path_strs = path.split('/') is_bag_download = False is_zip_download = False is_sf_agg_file = False if split_path_strs[0] == 'bags': res_id = os.path.splitext(split_path_strs[1])[0] is_bag_download = True elif split_path_strs[0] == 'zips': if path.endswith('.zip'): res_id = os.path.splitext(split_path_strs[2])[0] else: res_id = os.path.splitext(split_path_strs[1])[0] is_zip_download = True else: res_id = split_path_strs[0] # if the resource does not exist in django, authorized will be false res, authorized, _ = authorize( request, res_id, needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE, raises_exception=False) if not authorized: response = HttpResponse(status=401) content_msg = "You do not have permission to download this resource!" if rest_call: raise PermissionDenied(content_msg) else: response.content = "<h1>" + content_msg + "</h1>" return response if res.resource_type == "CompositeResource" and not path.endswith(".zip"): for f in ResourceFile.objects.filter(object_id=res.id): if path == f.storage_path: if f.has_logical_file and f.logical_file.is_single_file_aggregation: is_sf_agg_file = True if res.resource_federation_path: # the resource is stored in federated zone istorage = IrodsStorage('federated') federated_path = res.resource_federation_path path = os.path.join(federated_path, path) session = icommands.ACTIVE_SESSION else: # TODO: From Alva: I do not understand the use case for changing the environment. # TODO: This seems an enormous potential vulnerability, as arguments are # TODO: passed from the URI directly to IRODS without verification. istorage = IrodsStorage() federated_path = '' if 'environment' in kwargs: environment = int(kwargs['environment']) environment = m.RodsEnvironment.objects.get(pk=environment) session = Session("/tmp/django_irods", settings.IRODS_ICOMMANDS_PATH, session_id=uuid4()) session.create_environment(environment) session.run('iinit', None, environment.auth) elif getattr(settings, 'IRODS_GLOBAL_SESSION', False): session = GLOBAL_SESSION elif icommands.ACTIVE_SESSION: session = icommands.ACTIVE_SESSION else: raise KeyError('settings must have IRODS_GLOBAL_SESSION set ' 'if there is no environment object') resource_cls = check_resource_type(res.resource_type) if federated_path: res_root = os.path.join(federated_path, res_id) else: res_root = res_id if is_zip_download or is_sf_agg_file: if not path.endswith( ".zip"): # requesting folder that needs to be zipped input_path = path.split(res_id)[1] random_hash = random.getrandbits(32) daily_date = datetime.datetime.today().strftime('%Y-%m-%d') random_hash_path = 'zips/{daily_date}/{res_id}/{rand_folder}'.format( daily_date=daily_date, res_id=res_id, rand_folder=random_hash) output_path = '{random_hash_path}{path}.zip'.format( random_hash_path=random_hash_path, path=input_path) if res.resource_type == "CompositeResource": aggregation_name = input_path[len('/data/contents/'):] res.create_aggregation_xml_documents( aggregation_name=aggregation_name) if use_async: task = create_temp_zip.apply_async( (res_id, input_path, output_path, is_sf_agg_file), countdown=3) delete_zip.apply_async( (random_hash_path, ), countdown=(20 * 60)) # delete after 20 minutes if is_sf_agg_file: download_path = request.path.split(res_id)[0] + output_path else: download_path = request.path.split("zips")[0] + output_path if rest_call: return HttpResponse(json.dumps({ 'zip_status': 'Not ready', 'task_id': task.task_id, 'download_path': download_path }), content_type="application/json") request.session['task_id'] = task.task_id request.session['download_path'] = download_path return HttpResponseRedirect(res.get_absolute_url()) ret_status = create_temp_zip(res_id, input_path, output_path, is_sf_agg_file) delete_zip.apply_async( (random_hash_path, ), countdown=(20 * 60)) # delete after 20 minutes if not ret_status: content_msg = "Zip cannot be created successfully. Check log for details." response = HttpResponse() if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response path = output_path bag_modified = istorage.getAVU(res_root, 'bag_modified') # make sure if bag_modified is not set to true, we still recreate the bag if the # bag file does not exist for some reason to resolve the error to download a nonexistent # bag when bag_modified is false due to the flag being out-of-sync with the real bag status if bag_modified is None or bag_modified.lower() == "false": # check whether the bag file exists bag_file_name = res_id + '.zip' if res_root.startswith(res_id): bag_full_path = os.path.join('bags', bag_file_name) else: bag_full_path = os.path.join(federated_path, 'bags', bag_file_name) # set bag_modified to 'true' if the bag does not exist so that it can be recreated # and the bag_modified AVU will be set correctly as well subsequently if not istorage.exists(bag_full_path): bag_modified = 'true' metadata_dirty = istorage.getAVU(res_root, 'metadata_dirty') # do on-demand bag creation # needs to check whether res_id collection exists before getting/setting AVU on it # to accommodate the case where the very same resource gets deleted by another request # when it is getting downloaded if is_bag_download: # send signal for pre_check_bag_flag pre_check_bag_flag.send(sender=resource_cls, resource=res) if bag_modified is None or bag_modified.lower() == "true": if metadata_dirty is None or metadata_dirty.lower() == 'true': create_bag_files(res) if use_async: # task parameter has to be passed in as a tuple or list, hence (res_id,) is needed # Note that since we are using JSON for task parameter serialization, no complex # object can be passed as parameters to a celery task task = create_bag_by_irods.apply_async((res_id, ), countdown=3) if rest_call: return HttpResponse(json.dumps({ 'bag_status': 'Not ready', 'task_id': task.task_id }), content_type="application/json") request.session['task_id'] = task.task_id request.session['download_path'] = request.path return HttpResponseRedirect(res.get_absolute_url()) else: ret_status = create_bag_by_irods(res_id) if not ret_status: content_msg = "Bag cannot be created successfully. Check log for details." response = HttpResponse() if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response elif metadata_dirty is None or metadata_dirty.lower() == 'true': if path.endswith("resourcemap.xml") or path.endswith( 'resourcemetadata.xml'): # we need to regenerate the metadata xml files create_bag_files(res) # send signal for pre download file download_file_name = split_path_strs[-1] pre_download_file.send(sender=resource_cls, resource=res, download_file_name=download_file_name, request=request) # obtain mime_type to set content_type mtype = 'application-x/octet-stream' mime_type = mimetypes.guess_type(path) if mime_type[0] is not None: mtype = mime_type[0] # retrieve file size to set up Content-Length header stdout = session.run("ils", None, "-l", path)[0].split() flen = int(stdout[3]) # If this path is resource_federation_path, then the file is a local user file userpath = '/' + os.path.join( getattr(settings, 'HS_USER_IRODS_ZONE', 'hydroshareuserZone'), 'home', getattr(settings, 'HS_LOCAL_PROXY_USER_IN_FED_ZONE', 'localHydroProxy')) # Allow reverse proxy if request was forwarded by nginx # (HTTP_X_DJANGO_REVERSE_PROXY is 'true') # and reverse proxy is possible according to configuration. if use_reverse_proxy and getattr(settings, 'SENDFILE_ON', False) and \ 'HTTP_X_DJANGO_REVERSE_PROXY' in request.META: # The NGINX sendfile abstraction is invoked as follows: # 1. The request to download a file enters this routine via the /rest_download or /download # url in ./urls.py. It is redirected here from Django. The URI contains either the # unqualified resource path or the federated resource path, depending upon whether # the request is local or federated. # 2. This deals with unfederated resources by redirecting them to the uri # /irods-data/{resource-id}/... on nginx. This URI is configured to read the file # directly from the iRODS vault via NFS, and does not work for direct access to the # vault due to the 'internal;' declaration in NGINX. # 3. This deals with federated resources by reading their path, matching local vaults, and # redirecting to URIs that are in turn mapped to read from appropriate iRODS vaults. At # present, the only one of these is /irods-user, which handles files whose federation # path is stored in the variable 'userpath'. # 4. If there is no vault available for the resource, the file is transferred without # NGINX, exactly as it was transferred previously. # stop NGINX targets that are non-existent from hanging forever. if not istorage.exists(path): content_msg = "file path {} does not exist in iRODS".format(path) response = HttpResponse(status=404) if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response if not res.is_federated: # invoke X-Accel-Redirect on physical vault file in nginx response = HttpResponse(content_type=mtype) response[ 'Content-Disposition'] = 'attachment; filename="{name}"'.format( name=path.split('/')[-1]) response['Content-Length'] = flen response['X-Accel-Redirect'] = '/'.join( [getattr(settings, 'IRODS_DATA_URI', '/irods-data'), path]) return response elif res.resource_federation_path == userpath: # this guarantees a "user" resource # invoke X-Accel-Redirect on physical vault file in nginx # if path is full user path; strip federation prefix if path.startswith(userpath): path = path[len(userpath) + 1:] # invoke X-Accel-Redirect on physical vault file in nginx response = HttpResponse(content_type=mtype) response[ 'Content-Disposition'] = 'attachment; filename="{name}"'.format( name=path.split('/')[-1]) response['Content-Length'] = flen response['X-Accel-Redirect'] = os.path.join( getattr(settings, 'IRODS_USER_URI', '/irods-user'), path) return response # if we get here, none of the above conditions are true if flen <= FILE_SIZE_LIMIT: options = ('-', ) # we're redirecting to stdout. # this unusual way of calling works for federated or local resources proc = session.run_safe('iget', None, path, *options) response = FileResponse(proc.stdout, content_type=mtype) response[ 'Content-Disposition'] = 'attachment; filename="{name}"'.format( name=path.split('/')[-1]) response['Content-Length'] = flen return response else: content_msg = "File larger than 1GB cannot be downloaded directly via HTTP. " \ "Please download the large file via iRODS clients." response = HttpResponse(status=403) if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response