def nightly_zips_cleanup(): # delete 2 days ago date_folder = (date.today() - timedelta(2)).strftime('%Y-%m-%d') zips_daily_date = "zips/{daily_date}".format(daily_date=date_folder) istorage = IrodsStorage() if istorage.exists(zips_daily_date): istorage.delete(zips_daily_date)
def check_for_dangling_irods(echo_errors=True, log_errors=False, return_errors=False): """ This checks for resource trees in iRODS with no correspondence to Django at all :param log_errors: whether to log errors to Django log :param echo_errors: whether to print errors on stdout :param return_errors: whether to collect errors in an array and return them. """ istorage = IrodsStorage() # local only toplevel = istorage.listdir('.') # list the resources themselves logger = logging.getLogger(__name__) errors = [] for id in toplevel[0]: # directories try: get_resource_by_shortkey(id, or_404=False) except BaseResource.DoesNotExist: msg = "resource {} does not exist in Django".format(id) if echo_errors: print(msg) if log_errors: logger.error(msg) if return_errors: errors.append(msg) return errors
def is_federated(homepath): """ Check if the selected file via the iRODS browser is from a federated zone or not Args: homepath: the logical iRODS file name with full logical path, e.g., selected from iRODS browser Returns: True is the selected file indicated by homepath is from a federated zone, False if otherwise """ homepath = homepath.strip() homepath_list = homepath.split('/') # homepath is an iRODS logical path in the format of # /irods_zone/home/irods_account_username/collection_relative_path, so homepath_list[1] # is the irods_zone which we can use to form the fed_proxy_path to check whether # fed_proxy_path exists to hold hydroshare resources in a federated zone if homepath_list[1]: fed_proxy_path = os.path.join(homepath_list[1], 'home', settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE) fed_proxy_path = '/' + fed_proxy_path else: # the test path input is invalid, return False meaning it is not federated return False if settings.REMOTE_USE_IRODS: irods_storage = IrodsStorage('federated') else: irods_storage = IrodsStorage() # if the iRODS proxy user in hydroshare zone can list homepath and the federation zone proxy # user path, it is federated; otherwise, it is not federated return irods_storage.exists(homepath) and irods_storage.exists(fed_proxy_path)
def get_fed_zone_files(irods_fnames): """ Get the files from iRODS federated zone to Django server for metadata extraction on-demand for specific resource types Args: irods_fnames: the logical iRODS file names with full logical path separated by comma Returns: a list of the named temp files which have been copied over to local Django server or raise exceptions if input parameter is wrong or iRODS operations fail """ ret_file_list = [] if isinstance(irods_fnames, basestring): ifnames = string.split(irods_fnames, ',') elif isinstance(irods_fnames, list): ifnames = irods_fnames else: raise ValueError( "Input parameter to get_fed_zone_files() must be String or List") irods_storage = IrodsStorage('federated') for ifname in ifnames: fname = os.path.basename(ifname.rstrip(os.sep)) tmpdir = os.path.join(settings.TEMP_FILE_DIR, uuid4().hex) tmpfile = os.path.join(tmpdir, fname) try: os.makedirs(tmpdir) except OSError as ex: if ex.errno == errno.EEXIST: shutil.rmtree(tmpdir) os.makedirs(tmpdir) else: raise Exception(ex.message) irods_storage.getFile(ifname, tmpfile) ret_file_list.append(tmpfile) return ret_file_list
def migrate_tif_file(apps, schema_editor): # create a vrt file from tif file for each of the Raster Resources log = logging.getLogger() istorage = IrodsStorage() for res in RasterResource.objects.all(): try: if len(res.files.all()) == 1: res_file = res.files.all().first() vrt_file_path = create_vrt_file(res_file.resource_file) if os.path.isfile(vrt_file_path): files = (UploadedFile(file=open(vrt_file_path, 'r'), name=os.path.basename(vrt_file_path))) hydroshare.add_resource_files(res.short_id, files) bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id) if istorage.exists(bag_name): # delete the resource bag as the old bag is not valid istorage.delete(bag_name) print("Deleted bag for resource ID:" + str(res.short_id)) resource_modified(res, res.creator) log.info('Tif file conversion to VRT successful for resource:ID:{} ' 'Title:{}'.format(res.short_id, res.metadata.title.value)) else: log.error('Tif file conversion to VRT unsuccessful for resource:ID:{} ' 'Title:{}'.format(res.short_id, res.metadata.title.value)) if os.path.exists(vrt_file_path): shutil.rmtree(os.path.dirname(vrt_file_path)) except: pass
def assert_federated_irods_available(self): """assert federated iRODS is available before proceeding with federation-related tests.""" self.assertTrue(settings.REMOTE_USE_IRODS and settings.HS_USER_ZONE_HOST == 'users.local.org' and settings.IRODS_HOST == 'data.local.org', "irods docker containers are not set up properly for federation testing") self.irods_fed_storage = IrodsStorage('federated') self.irods_storage = IrodsStorage()
def tearDown(self): super(TestFolderDownloadZip, self).tearDown() if self.res: self.res.delete() GenericResource.objects.all().delete() istorage = IrodsStorage() if istorage.exists(self.output_path): istorage.delete(self.output_path)
def file_download_url_mapper(request, shortkey, filename): """ maps the file URIs in resourcemap document to django_irods download view function""" authorize(request, shortkey, view=True, edit=True, full=True, superuser=True) irods_file_path = '/'.join(request.path.split('/')[2:-1]) istorage = IrodsStorage() file_download_url = istorage.url(irods_file_path) return HttpResponseRedirect(file_download_url)
def upload_from_irods(username, password, host, port, zone, irods_fname, res_files): try: irods_storage = IrodsStorage() irods_storage.set_user_session(username=username, password=password, host=host, port=port, zone=zone) tmpFile = irods_storage.download(irods_fname) fname = os.path.basename(irods_fname.rstrip(os.sep)) res_files.append(UploadedFile(file=tmpFile, name=fname)) except Exception as ex: raise iRODSException(ex.message)
def delete_fed_zone_file(file_name_with_full_path): ''' Args: file_name_with_full_path: the absolute full logical path in a federated iRODS zone Returns: None, but exceptions will be raised if there is an issue with iRODS delete operation ''' istorage = IrodsStorage('federated') istorage.delete(file_name_with_full_path)
def save_files_to_user_zone(self, file_name_to_target_name_dict): """Save a list of files to iRODS user zone using the same IrodsStorage() object. :param file_name_to_target_name_dict: a dictionary in the form of {ori_file, target_file} where ori_file is the file to be save to, and the target_file is the full path file name in iRODS user zone to save ori_file to :return: """ self.irods_storage = IrodsStorage('federated') for file_name, target_name in file_name_to_target_name_dict.iteritems(): self.irods_storage.saveFile(file_name, target_name)
def create_empty_contents_directory(resource): res_id = resource.short_id if resource.resource_federation_path: istorage = IrodsStorage('federated') res_contents_dir = '{}/{}/data/contents'.format( resource.resource_federation_path, res_id) else: istorage = IrodsStorage() res_contents_dir = '{}/data/contents'.format(res_id) if not istorage.exists(res_contents_dir): istorage.session.run("imkdir", None, '-p', res_contents_dir)
def tearDown(self): super(TestFolderDownloadZip, self).tearDown() if self.res: self.res.delete() if self.test_file: os.remove(self.test_file.name) if self.refts_file: os.remove(self.refts_file.name) GenericResource.objects.all().delete() istorage = IrodsStorage() if istorage.exists("zips"): istorage.delete("zips")
def get_fed_zone_file_size(fname): """ Get size of a data object from iRODS user zone Args: fname: the logical iRODS file name with full logical path Returns: the size of the file """ irods_storage = IrodsStorage('federated') return irods_storage.size(fname)
def get_quota_usage_from_irods(username): """ Query iRODS AVU to get quota usage for a user reported in iRODS quota microservices :param username: the user name to get quota usage for. :return: the combined quota usage from iRODS data zone and user zone; raise ValidationError if quota usage cannot be retrieved from iRODS """ attname = username + '-usage' istorage = IrodsStorage() # get quota size for user in iRODS data zone by retrieving AVU set on irods bagit path # collection try: uqDataZoneSize = istorage.getAVU(settings.IRODS_BAGIT_PATH, attname) if uqDataZoneSize is None: # user may not have resources in data zone, so corresponding quota size AVU may not # exist for this user uqDataZoneSize = -1 else: uqDataZoneSize = float(uqDataZoneSize) except SessionException: # user may not have resources in data zone, so corresponding quota size AVU may not exist # for this user uqDataZoneSize = -1 # get quota size for the user in iRODS user zone try: uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home', settings.HS_IRODS_PROXY_USER_IN_USER_ZONE, settings.IRODS_BAGIT_PATH) uqUserZoneSize = istorage.getAVU(uz_bagit_path, attname) if uqUserZoneSize is None: # user may not have resources in user zone, so corresponding quota size AVU may not # exist for this user uqUserZoneSize = -1 else: uqUserZoneSize = float(uqUserZoneSize) except SessionException: # user may not have resources in user zone, so corresponding quota size AVU may not exist # for this user uqUserZoneSize = -1 if uqDataZoneSize < 0 and uqUserZoneSize < 0: err_msg = 'no quota size AVU in data zone and user zone for user {}'.format( username) logger.error(err_msg) raise ValidationError(err_msg) elif uqUserZoneSize < 0: used_val = uqDataZoneSize elif uqDataZoneSize < 0: used_val = uqUserZoneSize else: used_val = uqDataZoneSize + uqUserZoneSize return used_val
def upload_from_irods(username, password, host, port, zone, irods_fnames, res_files): """ use iget to transfer selected data object from irods zone to local as a NamedTemporaryFile :param username: iRODS login account username used to download irods data object for uploading :param password: iRODS login account password used to download irods data object for uploading :param host: iRODS login host used to download irods data object for uploading :param port: iRODS login port used to download irods data object for uploading :param zone: iRODS login zone used to download irods data object for uploading :param irods_fnames: the data object file name to download to local for uploading :param res_files: list of files for uploading to create resources :raises SessionException(proc.returncode, stdout, stderr) defined in django_irods/icommands.py to capture iRODS exceptions raised from iRODS icommand subprocess run triggered from any method calls from IrodsStorage() if an error or exception ever occurs :return: None, but the downloaded file from the iRODS will be appended to res_files list for uploading """ irods_storage = IrodsStorage() irods_storage.set_user_session(username=username, password=password, host=host, port=port, zone=zone) ifnames = string.split(irods_fnames, ',') for ifname in ifnames: size = irods_storage.size(ifname) tmpFile = irods_storage.download(ifname) fname = os.path.basename(ifname.rstrip(os.sep)) fileobj = File(file=tmpFile, name=fname) fileobj.size = size res_files.append(fileobj) # delete the user session after iRODS file operations are done irods_storage.delete_user_session()
def tearDown(self): super(TestFolderDownloadZip, self).tearDown() if self.res: self.res.delete() if self.test_file: self.test_file.close() os.remove(self.test_file.name) if self.refts_file: self.refts_file.close() os.remove(self.refts_file.name) GenericResource.objects.all().delete() istorage = IrodsStorage() if istorage.exists("zips"): istorage.delete("zips")
def resource_modified(resource, by_user=None, overwrite_bag=True): resource.last_changed_by = by_user resource.updated = now().isoformat() resource.save() if resource.metadata.dates.all().filter(type='modified'): res_modified_date = resource.metadata.dates.all().filter(type='modified')[0] resource.metadata.update_element('date', res_modified_date.id) if overwrite_bag: create_bag_files(resource) istorage = IrodsStorage() # set bag_modified-true AVU pair for the modified resource in iRODS to indicate # the resource is modified for on-demand bagging. istorage.setAVU(resource.short_id, "bag_modified", "true")
def resource_modified(resource, by_user=None, overwrite_bag=True): resource.last_changed_by = by_user resource.updated = now().isoformat() resource.save() if resource.metadata.dates.all().filter(type='modified'): res_modified_date = resource.metadata.dates.all().filter( type='modified')[0] resource.metadata.update_element('date', res_modified_date.id) if overwrite_bag: create_bag_files(resource) istorage = IrodsStorage() # set bag_modified-true AVU pair for the modified resource in iRODS to indicate # the resource is modified for on-demand bagging. istorage.setAVU(resource.short_id, "bag_modified", "true")
def test_create_temp_zip(self): input_path = "/data/contents/foo" try: self.assertTrue(create_temp_zip(self.res.short_id, input_path, self.output_path)) self.assertTrue(IrodsStorage().exists(self.output_path)) except Exception as ex: self.fail("create_temp_zip() raised exception.{}".format(ex.message))
def test_create_temp_zip(self): input_path = "{}/data/contents/foo".format(self.res.short_id) output_path = "zips/rand/foo.zip" self.assertTrue( create_temp_zip(self.res.short_id, input_path, output_path, False)) self.assertTrue(IrodsStorage().exists(output_path)) # test aggregation input_path = "{}/data/contents/multi_sites_formatted_version1.0.refts.json"\ .format(self.res.short_id) output_path = "zips/rand/multi_sites_formatted_version1.0.refts.json.zip" self.assertTrue( create_temp_zip(self.res.short_id, input_path, output_path, True, sf_zip=True)) self.assertTrue(IrodsStorage().exists(output_path))
def verify_user_quota_usage_avu_in_user_zone(self, attname, qsize): ''' Have to use HS_USER_ZONE_PROXY_USER with rodsadmin role to get user type AVU in user zone and verify its quota usage is set correctly :param attname: quota usage attribute name set on iRODS proxy user in user zone :param qsize: quota size (type string) to be verified to equal to the value set for attname. ''' istorage = IrodsStorage() istorage.set_user_session(username=settings.HS_USER_ZONE_PROXY_USER, password=settings.HS_USER_ZONE_PROXY_USER_PWD, host=settings.HS_USER_ZONE_HOST, port=settings.IRODS_PORT, zone=settings.HS_USER_IRODS_ZONE, sess_id='user_proxy_session') uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home', settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE, settings.IRODS_BAGIT_PATH) get_qsize = istorage.getAVU(uz_bagit_path, attname) self.assertEqual(qsize, get_qsize)
def verify_user_quota_usage_avu_in_user_zone(self, attname, qsize): ''' Have to use LINUX_ADMIN_USER_FOR_HS_USER_ZONE with rodsadmin role to get user type AVU in user zone and verify its quota usage is set correctly :param attname: quota usage attribute name set on iRODS proxy user in user zone :param qsize: quota size (type string) to be verified to equal to the value set for attname. ''' istorage = IrodsStorage() istorage.set_user_session(username=settings.LINUX_ADMIN_USER_FOR_HS_USER_ZONE, password=settings.LINUX_ADMIN_USER_PWD_FOR_HS_USER_ZONE, host=settings.HS_USER_ZONE_HOST, port=settings.IRODS_PORT, zone=settings.HS_USER_IRODS_ZONE, sess_id='user_proxy_session') uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home', settings.HS_IRODS_PROXY_USER_IN_USER_ZONE, settings.IRODS_BAGIT_PATH) get_qsize = istorage.getAVU(uz_bagit_path, attname) self.assertEqual(qsize, get_qsize)
def create_temp_zip(resource_id, input_path, output_path): from hs_core.hydroshare.utils import get_resource_by_shortkey res = get_resource_by_shortkey(resource_id) full_input_path = '{root_path}/{path}'.format(root_path=res.root_path, path=input_path) try: IrodsStorage().zipup(full_input_path, output_path) except SessionException as ex: logger.error(ex.stderr) return False return True
def is_federated(homepath): """ Check if the selected file via the iRODS browser is from a federated zone or not Args: homepath: the logical iRODS file name with full logical path, e.g., selected from iRODS browser Returns: True is the selected file indicated by homepath is from a federated zone, False if otherwise """ homepath = homepath.strip() homepath_list = homepath.split('/') # homepath is an iRODS logical path in the format of # /irods_zone/home/irods_account_username/collection_relative_path, so homepath_list[1] # is the irods_zone which we can use to form the fed_proxy_path to check whether # fed_proxy_path exists to hold hydroshare resources in a federated zone if homepath_list[1]: fed_proxy_path = os.path.join(homepath_list[1], 'home', settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE) fed_proxy_path = '/' + fed_proxy_path else: # the test path input is invalid, return False meaning it is not federated return False if settings.REMOTE_USE_IRODS: irods_storage = IrodsStorage('federated') else: irods_storage = IrodsStorage() # if the iRODS proxy user in hydroshare zone can list homepath and the federation zone proxy # user path, it is federated; otherwise, it is not federated return irods_storage.exists(homepath) and irods_storage.exists( fed_proxy_path)
def create_bag_by_irods(resource_id, istorage = None): """ create a resource bag on iRODS side by running the bagit rule followed by ibun zipping operation Parameters: :param resource_id: the resource uuid that is used to look for the resource to create the bag for. istorage: IrodsStorage object that is used to call irods bagit rule operation and zipping up operation :return: none """ if not istorage: istorage = IrodsStorage() # only proceed when the resource is not deleted potentially by another request when being downloaded if istorage.exists(resource_id): # call iRODS bagit rule here irods_dest_prefix = "/" + settings.IRODS_ZONE + "/home/" + settings.IRODS_USERNAME irods_bagit_input_path = os.path.join(irods_dest_prefix, resource_id) bagit_input_path = "*BAGITDATA='{path}'".format(path=irods_bagit_input_path) bagit_input_resource = "*DESTRESC='{def_res}'".format(def_res=settings.IRODS_DEFAULT_RESOURCE) bagit_rule_file = getattr(settings, 'IRODS_BAGIT_RULE', 'hydroshare/irods/ruleGenerateBagIt_HS.r') try: # call iRODS run and ibun command to create and zip the bag, # ignore SessionException for now as a workaround which could be raised # from potential race conditions when multiple ibun commands try to create the same zip file or # the very same resource gets deleted by another request when being downloaded istorage.runBagitRule(bagit_rule_file, bagit_input_path, bagit_input_resource) istorage.zipup(irods_bagit_input_path, 'bags/{res_id}.zip'.format(res_id=resource_id)) except SessionException: pass
def replace_resource_file_on_irods(new_file, original_resource_file, user): """ Replaces the specified resource file with file (new_file) by copying to iRODS (local or federated zone) :param new_file: file path for the file to be copied to iRODS :param original_resource_file: an instance of ResourceFile that is to be replaced :param user: user who is replacing the resource file. :return: """ ori_res = original_resource_file.resource if original_resource_file.resource_file: istorage = IrodsStorage() destination_file = original_resource_file.resource_file.name else: istorage = IrodsStorage('federated') if original_resource_file.fed_resource_file: destination_file = original_resource_file.fed_resource_file.name else: destination_file = os.path.join( ori_res.resource_federation_path, ori_res.short_id, original_resource_file.fed_resource_file_name_or_path) istorage.saveFile(new_file, destination_file, True) # need to do this so that the bag will be regenerated prior to download of the bag resource_modified(ori_res, by_user=user, overwrite_bag=False)
def setUp(self): super(TestFolderDownloadZip, self).setUp() self.group, _ = Group.objects.get_or_create(name='Hydroshare Author') self.user = create_account('*****@*****.**', username='******', first_name='Shaun', last_name='Livingston', superuser=False, groups=[]) self.res = create_resource(resource_type='CompositeResource', owner=self.user, title='Test Resource', metadata=[]) ResourceFile.create_folder(self.res, 'foo') # create files self.n1 = "test1.txt" test_file = open(self.n1, 'w') test_file.write("Test text file in test1.txt") test_file.close() self.test_file = open(self.n1, "rb") add_resource_files(self.res.short_id, self.test_file, folder='foo') # copy refts file into new file to be added to the resource as an aggregation reft_data_file = open( 'hs_core/tests/data/multi_sites_formatted_version1.0.refts.json', 'rb') refts_file = open('multi_sites_formatted_version1.0.refts.json', 'wb') refts_file.writelines(reft_data_file.readlines()) refts_file.close() self.refts_file = open('multi_sites_formatted_version1.0.refts.json', 'rb') add_resource_files(self.res.short_id, self.refts_file) self.res.create_aggregation_xml_documents() self.istorage = IrodsStorage()
def nightly_zips_cleanup(): # delete 2 days ago date_folder = (date.today() - timedelta(2)).strftime('%Y-%m-%d') zips_daily_date = "zips/{daily_date}".format(daily_date=date_folder) if __debug__: logger.debug("cleaning up {}".format(zips_daily_date)) istorage = IrodsStorage() if istorage.exists(zips_daily_date): istorage.delete(zips_daily_date) federated_prefixes = BaseResource.objects.all().values_list('resource_federation_path')\ .distinct() for p in federated_prefixes: prefix = p[0] # strip tuple if prefix != "": zips_daily_date = "{prefix}/zips/{daily_date}"\ .format(prefix=prefix, daily_date=date_folder) if __debug__: logger.debug("cleaning up {}".format(zips_daily_date)) istorage = IrodsStorage("federated") if istorage.exists(zips_daily_date): istorage.delete(zips_daily_date)
def migrate_tif_file(apps, schema_editor): # create a vrt file from tif file for each of the Raster Resources log = logging.getLogger() istorage = IrodsStorage() for res in RasterResource.objects.all(): try: if len(res.files.all()) == 1: res_file = res.files.all().first() vrt_file_path = create_vrt_file(res_file.resource_file) if os.path.isfile(vrt_file_path): files = (UploadedFile( file=open(vrt_file_path, 'r'), name=os.path.basename(vrt_file_path))) hydroshare.add_resource_files(res.short_id, files) bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id) if istorage.exists(bag_name): # delete the resource bag as the old bag is not valid istorage.delete(bag_name) print("Deleted bag for resource ID:" + str(res.short_id)) resource_modified(res, res.creator) log.info( 'Tif file conversion to VRT successful for resource:ID:{} ' 'Title:{}'.format(res.short_id, res.metadata.title.value)) else: log.error( 'Tif file conversion to VRT unsuccessful for resource:ID:{} ' 'Title:{}'.format(res.short_id, res.metadata.title.value)) if os.path.exists(vrt_file_path): shutil.rmtree(os.path.dirname(vrt_file_path)) except: pass
def handle(self, *args, **options): istorage = IrodsStorage() # reset quota for data zone root_path = '/{}/home/{}'.format(settings.IRODS_ZONE, settings.IRODS_USERNAME) istorage.setAVU(root_path, 'resetQuotaDir', 1) # reset quota for user zone user_root_path = '/{}/home/{}'.format(settings.HS_USER_IRODS_ZONE, settings.HS_IRODS_PROXY_USER_IN_USER_ZONE) istorage.setAVU(user_root_path, 'resetQuotaDir', 1)
def get_fed_zone_files(irods_fnames): """ Get the files from iRODS federated zone to Django server for metadata extraction on-demand for specific resource types Args: irods_fnames: the logical iRODS file names with full logical path separated by comma Returns: a list of the named temp files which have been copied over to local Django server or raise exceptions if input parameter is wrong or iRODS operations fail Note: application must delete these files after use. """ ret_file_list = [] if isinstance(irods_fnames, basestring): ifnames = string.split(irods_fnames, ',') elif isinstance(irods_fnames, list): ifnames = irods_fnames else: raise ValueError("Input parameter to get_fed_zone_files() must be String or List") irods_storage = IrodsStorage('federated') for ifname in ifnames: fname = os.path.basename(ifname.rstrip(os.sep)) # TODO: this is statistically unique but not guaranteed to be unique. tmpdir = os.path.join(settings.TEMP_FILE_DIR, uuid4().hex) tmpfile = os.path.join(tmpdir, fname) try: os.makedirs(tmpdir) except OSError as ex: if ex.errno == errno.EEXIST: shutil.rmtree(tmpdir) os.makedirs(tmpdir) else: raise Exception(ex.message) irods_storage.getFile(ifname, tmpfile) ret_file_list.append(tmpfile) return ret_file_list
def download(request, path, *args, **kwargs): split_path_strs = path.split('/') if split_path_strs[0] == 'bags': res_id = os.path.splitext(split_path_strs[1])[0] else: res_id = split_path_strs[0] _, authorized, _ = authorize(request, res_id, needed_permission=Action_To_Authorize.VIEW_RESOURCE, raises_exception=False) if not authorized: response = HttpResponse() response.content = "<h1>You do not have permission to download this resource!</h1>" return response if 'environment' in kwargs: environment = int(kwargs['environment']) environment = m.RodsEnvironment.objects.get(pk=environment) session = Session("/tmp/django_irods", settings.IRODS_ICOMMANDS_PATH, session_id=uuid4()) session.create_environment(environment) session.run('iinit', None, environment.auth) elif getattr(settings, 'IRODS_GLOBAL_SESSION', False): session = GLOBAL_SESSION elif icommands.ACTIVE_SESSION: session = icommands.ACTIVE_SESSION else: raise KeyError('settings must have IRODS_GLOBAL_SESSION set if there is no environment object') # do on-demand bag creation istorage = IrodsStorage() bag_modified = "false" # needs to check whether res_id collection exists before getting/setting AVU on it to accommodate the case # where the very same resource gets deleted by another request when it is getting downloaded if istorage.exists(res_id): bag_modified = istorage.getAVU(res_id, 'bag_modified') if bag_modified == "true": create_bag_by_irods(res_id, istorage) if istorage.exists(res_id): istorage.setAVU(res_id, 'bag_modified', "false") # obtain mime_type to set content_type mtype = 'application-x/octet-stream' mime_type = mimetypes.guess_type(path) if mime_type[0] is not None: mtype = mime_type[0] # retrieve file size to set up Content-Length header stdout = session.run("ils", None, "-l", path)[0].split() flen = int(stdout[3]) options = ('-',) # we're redirecting to stdout. proc = session.run_safe('iget', None, path, *options) response = FileResponse(proc.stdout, content_type=mtype) response['Content-Disposition'] = 'attachment; filename="{name}"'.format(name=path.split('/')[-1]) response['Content-Length'] = flen return response
def get_size_and_avu_for_irods_ref_files(username, password, host, port, zone, irods_fnames): """ use iget to transfer selected data object from irods zone to local as a NamedTemporaryFile :param username: iRODS login account username used to download irods data object for uploading :param password: iRODS login account password used to download irods data object for uploading :param host: iRODS login host used to download irods data object for uploading :param port: iRODS login port used to download irods data object for uploading :param zone: iRODS login zone used to download irods data object for uploading :param irods_fnames: the data object file name to download to local for uploading :raises SessionException(proc.returncode, stdout, stderr) defined in django_irods/icommands.py to capture iRODS exceptions raised from iRODS icommand subprocess run triggered from any method calls from IrodsStorage() if an error or exception ever occurs :return: list of file sizes corresponding to irods_fnames, and extra metadata dicts with each dict item corresponding to iRODS AVUs on the file or data object with file name preappended to attributes """ irods_storage = IrodsStorage() irods_storage.set_user_session(username=username, password=password, host=host, port=port, zone=zone) ifnames = string.split(irods_fnames, ',') ifsizes = [] ifextra_mds = {} for ifname in ifnames: size = irods_storage.size(ifname) ifsizes.append(size) extra_md_dict = irods_storage.getAVU(ifname, type='-d') for key, val in extra_md_dict.iteritems(): ukey = ifname + '_' + key ifextra_mds[ukey] = val # delete the user session after iRODS file operations are done irods_storage.delete_user_session() return ifsizes, ifextra_mds
def delete_bag(resource): """ delete the resource bag Parameters: :param resource: the resource to delete the bag for. :return: none """ res_id = resource.short_id istorage = IrodsStorage() # delete resource directory first to remove all generated bag-related files for the resource istorage.delete(res_id) # the resource bag may not exist due to on-demand bagging bagname = 'bags/{res_id}.zip'.format(res_id=res_id) if istorage.exists(bagname): # delete the resource bag istorage.delete(bagname) # delete the bags table for bag in resource.bags.all(): bag.delete()
def get_file_from_irods(res_file): """ Copy the file (res_file) from iRODS (local or federated zone) over to django (temp directory) which is necessary for manipulating the file (e.g. metadata extraction). Note: The caller is responsible for cleaning the temp directory :param res_file: an instance of ResourceFile :return: location of the copied file """ res = res_file.resource if res_file.fed_resource_file or res_file.fed_resource_file_name_or_path: istorage = IrodsStorage('federated') else: istorage = IrodsStorage() if res_file.resource_file: res_file_path = res_file.resource_file.name file_name = os.path.basename(res_file.resource_file.name) elif res_file.fed_resource_file: res_file_path = res_file.fed_resource_file.name file_name = os.path.basename(res_file.fed_resource_file.name) else: res_file_path = os.path.join(res.resource_federation_path, res.short_id, res_file.fed_resource_file_name_or_path) file_name = os.path.basename(res_file.fed_resource_file_name_or_path) tmpdir = os.path.join(settings.TEMP_FILE_DIR, uuid4().hex) tmpfile = os.path.join(tmpdir, file_name) try: os.makedirs(tmpdir) except OSError as ex: if ex.errno == errno.EEXIST: shutil.rmtree(tmpdir) os.makedirs(tmpdir) else: raise Exception(ex.message) istorage.getFile(res_file_path, tmpfile) copied_file = tmpfile return copied_file
def create_bag_by_irods(resource_id, istorage=None): """ create a resource bag on iRODS side by running the bagit rule followed by ibun zipping operation Parameters: :param resource_id: the resource uuid that is used to look for the resource to create the bag for. istorage: IrodsStorage object that is used to call irods bagit rule operation and zipping up operation :return: none """ if not istorage: istorage = IrodsStorage() # only proceed when the resource is not deleted potentially by another request when being downloaded if istorage.exists(resource_id): # call iRODS bagit rule here irods_dest_prefix = "/" + settings.IRODS_ZONE + "/home/" + settings.IRODS_USERNAME irods_bagit_input_path = os.path.join(irods_dest_prefix, resource_id) bagit_input_path = "*BAGITDATA='{path}'".format( path=irods_bagit_input_path) bagit_input_resource = "*DESTRESC='{def_res}'".format( def_res=settings.IRODS_DEFAULT_RESOURCE) bagit_rule_file = getattr(settings, 'IRODS_BAGIT_RULE', 'hydroshare/irods/ruleGenerateBagIt_HS.r') try: # call iRODS run and ibun command to create and zip the bag, # ignore SessionException for now as a workaround which could be raised # from potential race conditions when multiple ibun commands try to create the same zip file or # the very same resource gets deleted by another request when being downloaded istorage.runBagitRule(bagit_rule_file, bagit_input_path, bagit_input_resource) istorage.zipup(irods_bagit_input_path, 'bags/{res_id}.zip'.format(res_id=resource_id)) except SessionException: pass
def create_bag_files(resource): """ create and update all files needed by bagit operation that is conducted on iRODS server; no bagit operation is performed, only files that will be included in the bag are created or updated. Parameters: :param resource: A resource whose files will be created or updated to be included in the resource bag. :return: istorage, an IrodsStorage object,that will be used by subsequent operation to create a bag on demand as needed. """ from . import utils as hs_core_utils DATE_FORMAT = "YYYY-MM-DDThh:mm:ssTZD" istorage=IrodsStorage() dest_prefix = getattr(settings, 'BAGIT_TEMP_LOCATION', '/tmp/hydroshare/') bagit_path = os.path.join(dest_prefix, resource.short_id, arrow.get(resource.updated).format("YYYY.MM.DD.HH.mm.ss")) for d in (dest_prefix, bagit_path): try: os.makedirs(d) except OSError as ex: if ex.errno == errno.EEXIST: shutil.rmtree(d) os.makedirs(d) else: raise Exception(ex.message) # an empty visualization directory will not be put into the zipped bag file by ibun command, so creating an empty # visualization directory to be put into the zip file as done by the two statements below does not work. However, # if visualization directory has content to be uploaded, it will work. This is to be implemented as part of the # resource model in the future. The following two statements are placeholders serving as reminder # to_file_name = '{res_id}/data/visualization/'.format(res_id=resource.short_id) # istorage.saveFile('', to_file_name, create_directory=True) # create resourcemetadata.xml and upload it to iRODS from_file_name = '{path}/resourcemetadata.xml'.format(path=bagit_path) with open(from_file_name, 'w') as out: out.write(resource.metadata.get_xml()) to_file_name = '{res_id}/data/resourcemetadata.xml'.format(res_id=resource.short_id) istorage.saveFile(from_file_name, to_file_name, True) # make the resource map current_site_url = hs_core_utils.current_site_url() hs_res_url = '{hs_url}/resource/{res_id}/data'.format(hs_url=current_site_url, res_id=resource.short_id) metadata_url = os.path.join(hs_res_url, 'resourcemetadata.xml') res_map_url = os.path.join(hs_res_url, 'resourcemap.xml') ##make the resource map: # utils.namespaces['hsterms'] = Namespace('{hs_url}/hsterms/'.format(hs_url=current_site_url)) # utils.namespaceSearchOrder.append('hsterms') utils.namespaces['citoterms'] = Namespace('http://purl.org/spar/cito/') utils.namespaceSearchOrder.append('citoterms') ag_url = os.path.join(hs_res_url, 'resourcemap.xml#aggregation') a = Aggregation(ag_url) #Set properties of the aggregation a._dc.title = resource.title a._dcterms.type = resource._meta.object_name a._citoterms.isDocumentedBy = metadata_url a._ore.isDescribedBy = res_map_url #Create a description of the metadata document that describes the whole resource and add it to the aggregation resMetaFile = AggregatedResource(metadata_url) resMetaFile._dc.title = "Dublin Core science metadata document describing the HydroShare resource" resMetaFile._citoterms.documents = ag_url resMetaFile._ore.isAggregatedBy = ag_url resMetaFile._dc.format = "application/rdf+xml" #Create a description of the content file and add it to the aggregation files = ResourceFile.objects.filter(object_id=resource.id) resFiles = [] for n, f in enumerate(files): filename = os.path.basename(f.resource_file.name) resFiles.append(AggregatedResource(os.path.join('{hs_url}/resource/{res_id}/data/contents/{file_name}'.format( hs_url=current_site_url, res_id=resource.short_id, file_name=filename)))) resFiles[n]._ore.isAggregatedBy = ag_url resFiles[n]._dc.format = hs_core_utils.get_file_mime_type(filename) #Add the resource files to the aggregation a.add_resource(resMetaFile) for f in resFiles: a.add_resource(f) #Register a serializer with the aggregation. The registration creates a new ResourceMap, which needs a URI serializer = RdfLibSerializer('xml') resMap = a.register_serialization(serializer, res_map_url) resMap._dc.identifier = resource.short_id #"resource_identifier" #Fetch the serialization remdoc = a.get_serialization() # change the namespace for the 'creator' element from 'dcterms' to 'dc' xml_string = remdoc.data.replace('dcterms:creator', 'dc:creator') # create resourcemap.xml and upload it to iRODS from_file_name = os.path.join(bagit_path, 'resourcemap.xml') with open(from_file_name, 'w') as out: out.write(xml_string) to_file_name = os.path.join(resource.short_id, 'data', 'resourcemap.xml') istorage.saveFile(from_file_name, to_file_name, False) shutil.rmtree(bagit_path) return istorage
def delete_zip(zip_path): istorage = IrodsStorage() if istorage.exists(zip_path): istorage.delete(zip_path)
class TestCaseCommonUtilities(object): """Enable common utilities for iRODS testing.""" def assert_federated_irods_available(self): """assert federated iRODS is available before proceeding with federation-related tests.""" self.assertTrue(settings.REMOTE_USE_IRODS and settings.HS_USER_ZONE_HOST == 'users.local.org' and settings.IRODS_HOST == 'data.local.org', "irods docker containers are not set up properly for federation testing") self.irods_fed_storage = IrodsStorage('federated') self.irods_storage = IrodsStorage() def create_irods_user_in_user_zone(self): """Create corresponding irods account in user zone.""" try: exec_cmd = "{0} {1} {2}".format(settings.HS_USER_ZONE_PROXY_USER_CREATE_USER_CMD, self.user.username, self.user.username) output = run_ssh_command(host=settings.HS_USER_ZONE_HOST, uname=settings.HS_USER_ZONE_PROXY_USER, pwd=settings.HS_USER_ZONE_PROXY_USER_PWD, exec_cmd=exec_cmd) if output: if 'ERROR:' in output.upper(): # irods account failed to create self.assertRaises(SessionException(-1, output, output)) user_profile = UserProfile.objects.filter(user=self.user).first() user_profile.create_irods_user_account = True user_profile.save() except Exception as ex: self.assertRaises(SessionException(-1, ex.message, ex.message)) def delete_irods_user_in_user_zone(self): """Delete irods test user in user zone.""" try: exec_cmd = "{0} {1}".format(settings.HS_USER_ZONE_PROXY_USER_DELETE_USER_CMD, self.user.username) output = run_ssh_command(host=settings.HS_USER_ZONE_HOST, uname=settings.HS_USER_ZONE_PROXY_USER, pwd=settings.HS_USER_ZONE_PROXY_USER_PWD, exec_cmd=exec_cmd) if output: if 'ERROR:' in output.upper(): # there is an error from icommand run, report the error self.assertRaises(SessionException(-1, output, output)) user_profile = UserProfile.objects.filter(user=self.user).first() user_profile.create_irods_user_account = False user_profile.save() except Exception as ex: # there is an error from icommand run, report the error self.assertRaises(SessionException(-1, ex.message, ex.message)) def save_files_to_user_zone(self, file_name_to_target_name_dict): """Save a list of files to iRODS user zone. :param file_name_to_target_name_dict: a dictionary in the form of {ori_file, target_file} where ori_file is the file to be save to, and the target_file is the full path file name in iRODS user zone to save ori_file to :return: """ for file_name, target_name in file_name_to_target_name_dict.iteritems(): self.irods_fed_storage.saveFile(file_name, target_name) def check_file_exist(self, irods_path): """Check whether the input irods_path exist in iRODS. :param irods_path: the iRODS path to check whether it exists or not :return: True if exist, False otherwise. """ return self.irods_storage.exists(irods_path) def delete_directory(self, irods_path): """delete the input irods_path. :param irods_path: the iRODS path to be deleted :return: """ self.irods_fed_storage.delete(irods_path) def verify_user_quota_usage_avu_in_user_zone(self, attname, qsize): ''' Have to use HS_USER_ZONE_PROXY_USER with rodsadmin role to get user type AVU in user zone and verify its quota usage is set correctly :param attname: quota usage attribute name set on iRODS proxy user in user zone :param qsize: quota size (type string) to be verified to equal to the value set for attname. ''' istorage = IrodsStorage() istorage.set_user_session(username=settings.HS_USER_ZONE_PROXY_USER, password=settings.HS_USER_ZONE_PROXY_USER_PWD, host=settings.HS_USER_ZONE_HOST, port=settings.IRODS_PORT, zone=settings.HS_USER_IRODS_ZONE, sess_id='user_proxy_session') uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home', settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE, settings.IRODS_BAGIT_PATH) get_qsize = istorage.getAVU(uz_bagit_path, attname) self.assertEqual(qsize, get_qsize) def resource_file_oprs(self): """Test common iRODS file operations. This is a common test utility function to be called by both regular folder operation testing and federated zone folder operation testing. Make sure the calling TestCase object has the following attributes defined before calling this method: self.res: resource that has been created that contains files listed in file_name_list self.user: owner of the resource self.file_name_list: a list of three file names that have been added to the res object self.test_file_1 needs to be present for the calling object for doing regular folder operations without involving federated zone so that the same opened file can be re-added to the resource for testing the case where zipping cannot overwrite existing file """ user = self.user res = self.res file_name_list = self.file_name_list # create a folder, if folder is created successfully, no exception is raised, otherwise, # an iRODS exception will be raised which will be caught by the test runner and mark as # a test failure create_folder(res.short_id, 'data/contents/sub_test_dir') istorage = res.get_irods_storage() res_path = res.file_path store = istorage.listdir(res_path) self.assertIn('sub_test_dir', store[0], msg='resource does not contain created sub-folder') # rename the third file in file_name_list move_or_rename_file_or_folder(user, res.short_id, 'data/contents/' + file_name_list[2], 'data/contents/new_' + file_name_list[2]) # move the first two files in file_name_list to the new folder move_or_rename_file_or_folder(user, res.short_id, 'data/contents/' + file_name_list[0], 'data/contents/sub_test_dir/' + file_name_list[0]) move_or_rename_file_or_folder(user, res.short_id, 'data/contents/' + file_name_list[1], 'data/contents/sub_test_dir/' + file_name_list[1]) updated_res_file_names = [] for rf in ResourceFile.objects.filter(object_id=res.id): updated_res_file_names.append(rf.short_path) self.assertIn('new_' + file_name_list[2], updated_res_file_names, msg="resource does not contain the updated file new_" + file_name_list[2]) self.assertNotIn(file_name_list[2], updated_res_file_names, msg='resource still contains the old file ' + file_name_list[2] + ' after renaming') self.assertIn('sub_test_dir/' + file_name_list[0], updated_res_file_names, msg='resource does not contain ' + file_name_list[0] + ' moved to a folder') self.assertNotIn(file_name_list[0], updated_res_file_names, msg='resource still contains the old ' + file_name_list[0] + 'after moving to a folder') self.assertIn('sub_test_dir/' + file_name_list[1], updated_res_file_names, msg='resource does not contain ' + file_name_list[1] + 'moved to a new folder') self.assertNotIn(file_name_list[1], updated_res_file_names, msg='resource still contains the old ' + file_name_list[1] + ' after moving to a folder') # zip the folder output_zip_fname, size = \ zip_folder(user, res.short_id, 'data/contents/sub_test_dir', 'sub_test_dir.zip', True) self.assertGreater(size, 0, msg='zipped file has a size of 0') # Now resource should contain only two files: new_file3.txt and sub_test_dir.zip # since the folder is zipped into sub_test_dir.zip with the folder deleted self.assertEqual(res.files.all().count(), 2, msg="resource file count didn't match-") # test unzip does not allow override of existing files # add an existing file in the zip to the resource if res.resource_federation_path: fed_test_file1_full_path = '/{zone}/home/{uname}/{fname}'.format( zone=settings.HS_USER_IRODS_ZONE, uname=user.username, fname=file_name_list[0]) # TODO: why isn't this a method of resource? # TODO: Why do we repeat the resource_federation_path? add_resource_files(res.short_id, source_names=[fed_test_file1_full_path], move=False) else: # TODO: Why isn't this a method of resource? add_resource_files(res.short_id, self.test_file_1) # TODO: use ResourceFile.create_folder, which doesn't require data/contents prefix create_folder(res.short_id, 'data/contents/sub_test_dir') # TODO: use ResourceFile.rename, which doesn't require data/contents prefix move_or_rename_file_or_folder(user, res.short_id, 'data/contents/' + file_name_list[0], 'data/contents/sub_test_dir/' + file_name_list[0]) # Now resource should contain three files: file3_new.txt, sub_test_dir.zip, and file1.txt self.assertEqual(res.files.all().count(), 3, msg="resource file count didn't match") unzip_file(user, res.short_id, 'data/contents/sub_test_dir.zip', False) # Resource should still contain 5 files: file3_new.txt (2), sub_test_dir.zip, # and file1.txt (2) file_cnt = res.files.all().count() self.assertEqual(file_cnt, 5, msg="resource file count didn't match - " + str(file_cnt) + " != 5") # remove all files except the zippped file remove_folder(user, res.short_id, 'data/contents/sub_test_dir') remove_folder(user, res.short_id, 'data/contents/sub_test_dir-1') # Now resource should contain two files: file3_new.txt sub_test_dir.zip file_cnt = res.files.all().count() self.assertEqual(file_cnt, 2, msg="resource file count didn't match - " + str(file_cnt) + " != 2") unzip_file(user, res.short_id, 'data/contents/sub_test_dir.zip', True) # Now resource should contain three files: file1.txt, file2.txt, and file3_new.txt self.assertEqual(res.files.all().count(), 3, msg="resource file count didn't match") updated_res_file_names = [] for rf in ResourceFile.objects.filter(object_id=res.id): updated_res_file_names.append(rf.short_path) self.assertNotIn('sub_test_dir.zip', updated_res_file_names, msg="resource still contains the zip file after unzipping") self.assertIn('sub_test_dir/sub_test_dir/' + file_name_list[0], updated_res_file_names, msg='resource does not contain unzipped file ' + file_name_list[0]) self.assertIn('sub_test_dir/sub_test_dir/' + file_name_list[1], updated_res_file_names, msg='resource does not contain unzipped file ' + file_name_list[1]) self.assertIn('new_' + file_name_list[2], updated_res_file_names, msg='resource does not contain unzipped file new_' + file_name_list[2]) # rename a folder move_or_rename_file_or_folder(user, res.short_id, 'data/contents/sub_test_dir/sub_test_dir', 'data/contents/sub_dir') updated_res_file_names = [] for rf in ResourceFile.objects.filter(object_id=res.id): updated_res_file_names.append(rf.short_path) self.assertNotIn('sub_test_dir/sub_test_dir/' + file_name_list[0], updated_res_file_names, msg='resource still contains ' + file_name_list[0] + ' in the old folder after renaming') self.assertIn('sub_dir/' + file_name_list[0], updated_res_file_names, msg='resource does not contain ' + file_name_list[0] + ' in the new folder after renaming') self.assertNotIn('sub_test_dir/sub_test_dir/' + file_name_list[1], updated_res_file_names, msg='resource still contains ' + file_name_list[1] + ' in the old folder after renaming') self.assertIn('sub_dir/' + file_name_list[1], updated_res_file_names, msg='resource does not contain ' + file_name_list[1] + ' in the new folder after renaming') # remove a folder # TODO: utilize ResourceFile.remove_folder instead. Takes a short path. remove_folder(user, res.short_id, 'data/contents/sub_dir') # Now resource only contains one file self.assertEqual(res.files.all().count(), 1, msg="resource file count didn't match") updated_res_file_names = [] for rf in ResourceFile.objects.filter(object_id=res.id): updated_res_file_names.append(rf.short_path) self.assertEqual(len(updated_res_file_names), 1) self.assertEqual(updated_res_file_names[0], 'new_' + file_name_list[2]) def raster_metadata_extraction(self): """Test raster metadata extraction. This is a common test utility function to be called by both regular raster metadata extraction testing and federated zone raster metadata extraction testing. Make sure the calling TestCase object has self.resRaster attribute defined before calling this method which is the raster resource that has been created containing valid raster files. """ # there should be 2 content files self.assertEqual(self.resRaster.files.all().count(), 2) # test core metadata after metadata extraction extracted_title = "My Test Raster Resource" self.assertEqual(self.resRaster.metadata.title.value, extracted_title) # there should be 1 creator self.assertEqual(self.resRaster.metadata.creators.all().count(), 1) # there should be 1 coverage element - box type self.assertEqual(self.resRaster.metadata.coverages.all().count(), 1) self.assertEqual(self.resRaster.metadata.coverages.all().filter(type='box').count(), 1) box_coverage = self.resRaster.metadata.coverages.all().filter(type='box').first() self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(float(box_coverage.value['northlimit']), 42.11270614966863) self.assertEqual(float(box_coverage.value['eastlimit']), -111.45699925047542) self.assertEqual(float(box_coverage.value['southlimit']), 41.66222054591102) self.assertEqual(float(box_coverage.value['westlimit']), -111.81761887121905) # there should be 2 format elements self.assertEqual(self.resRaster.metadata.formats.all().count(), 2) self.assertEqual(self.resRaster.metadata.formats.all().filter( value='application/vrt').count(), 1) self.assertEqual(self.resRaster.metadata.formats.all().filter( value='image/tiff').count(), 1) # testing extended metadata element: original coverage ori_coverage = self.resRaster.metadata.originalCoverage self.assertNotEquals(ori_coverage, None) self.assertEqual(float(ori_coverage.value['northlimit']), 4662392.446916306) self.assertEqual(float(ori_coverage.value['eastlimit']), 461954.01909127034) self.assertEqual(float(ori_coverage.value['southlimit']), 4612592.446916306) self.assertEqual(float(ori_coverage.value['westlimit']), 432404.01909127034) self.assertEqual(ori_coverage.value['units'], 'meter') self.assertEqual(ori_coverage.value['projection'], "NAD83 / UTM zone 12N") self.assertEqual(ori_coverage.value['datum'], "North_American_Datum_1983") projection_string = u'PROJCS["NAD83 / UTM zone 12N",GEOGCS["NAD83",' \ u'DATUM["North_American_Datum_1983",' \ u'SPHEROID["GRS 1980",6378137,298.257222101,' \ u'AUTHORITY["EPSG","7019"]],' \ u'TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6269"]],' \ u'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],' \ u'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],' \ u'AUTHORITY["EPSG","4269"]],PROJECTION["Transverse_Mercator"],' \ u'PARAMETER["latitude_of_origin",0],' \ u'PARAMETER["central_meridian",-111],' \ u'PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],' \ u'PARAMETER["false_northing",0],' \ u'UNIT["metre",1,AUTHORITY["EPSG","9001"]],' \ u'AXIS["Easting",EAST],AXIS["Northing",' \ u'NORTH],AUTHORITY["EPSG","26912"]]' self.assertEqual(ori_coverage.value['projection_string'], projection_string) # testing extended metadata element: cell information cell_info = self.resRaster.metadata.cellInformation self.assertEqual(cell_info.rows, 1660) self.assertEqual(cell_info.columns, 985) self.assertEqual(cell_info.cellSizeXValue, 30.0) self.assertEqual(cell_info.cellSizeYValue, 30.0) self.assertEqual(cell_info.cellDataType, 'Float32') # testing extended metadata element: band information self.assertEqual(self.resRaster.metadata.bandInformations.count(), 1) band_info = self.resRaster.metadata.bandInformations.first() self.assertEqual(band_info.noDataValue, '-3.40282346639e+38') self.assertEqual(band_info.maximumValue, '3031.44311523') self.assertEqual(band_info.minimumValue, '1358.33459473') def netcdf_metadata_extraction(self, expected_creators_count=1): """Test NetCDF metadata extraction. This is a common test utility function to be called by both regular netcdf metadata extraction testing and federated zone netCDF metadata extraction testing. Make sure the calling TestCase object has self.resNetcdf attribute defined before calling this method which is the netCDF resource that has been created containing valid netCDF files. """ # there should 2 content file self.assertEqual(self.resNetcdf.files.all().count(), 2) # test core metadata after metadata extraction extracted_title = "Snow water equivalent estimation at TWDEF site from " \ "Oct 2009 to June 2010" self.assertEqual(self.resNetcdf.metadata.title.value, extracted_title) # there should be an abstract element self.assertNotEqual(self.resNetcdf.metadata.description, None) extracted_abstract = "This netCDF data is the simulation output from Utah Energy " \ "Balance (UEB) model.It includes the simulation result " \ "of snow water equivalent during the period " \ "Oct. 2009 to June 2010 for TWDEF site in Utah." self.assertEqual(self.resNetcdf.metadata.description.abstract, extracted_abstract) # there should be one source element self.assertEqual(self.resNetcdf.metadata.sources.all().count(), 1) # there should be one license element: self.assertNotEquals(self.resNetcdf.metadata.rights.statement, 1) # there should be one relation element self.assertEqual(self.resNetcdf.metadata.relations.all().filter(type='cites').count(), 1) # there should be creators equal to expected_creators_count self.assertEqual(self.resNetcdf.metadata.creators.all().count(), expected_creators_count) # there should be one contributor self.assertEqual(self.resNetcdf.metadata.contributors.all().count(), 1) # there should be 2 coverage element - box type and period type self.assertEqual(self.resNetcdf.metadata.coverages.all().count(), 2) self.assertEqual(self.resNetcdf.metadata.coverages.all().filter(type='box').count(), 1) self.assertEqual(self.resNetcdf.metadata.coverages.all().filter(type='period').count(), 1) box_coverage = self.resNetcdf.metadata.coverages.all().filter(type='box').first() self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(float(box_coverage.value['northlimit']), 41.867126409) self.assertEqual(float(box_coverage.value['eastlimit']), -111.505940368) self.assertEqual(float(box_coverage.value['southlimit']), 41.8639080745) self.assertEqual(float(box_coverage.value['westlimit']), -111.51138808) temporal_coverage = self.resNetcdf.metadata.coverages.all().filter(type='period').first() self.assertEqual(parser.parse(temporal_coverage.value['start']).date(), parser.parse('10/01/2009').date()) self.assertEqual(parser.parse(temporal_coverage.value['end']).date(), parser.parse('05/30/2010').date()) # there should be 2 format elements self.assertEqual(self.resNetcdf.metadata.formats.all().count(), 2) self.assertEqual(self.resNetcdf.metadata.formats.all(). filter(value='text/plain').count(), 1) self.assertEqual(self.resNetcdf.metadata.formats.all(). filter(value='application/x-netcdf').count(), 1) # there should be one subject element self.assertEqual(self.resNetcdf.metadata.subjects.all().count(), 1) subj_element = self.resNetcdf.metadata.subjects.all().first() self.assertEqual(subj_element.value, 'Snow water equivalent') # testing extended metadata element: original coverage ori_coverage = self.resNetcdf.metadata.ori_coverage.all().first() self.assertNotEquals(ori_coverage, None) self.assertEqual(ori_coverage.projection_string_type, 'Proj4 String') proj_text = u'+proj=tmerc +y_0=0.0 +k_0=0.9996 +x_0=500000.0 +lat_0=0.0 +lon_0=-111.0' self.assertEqual(ori_coverage.projection_string_text, proj_text) self.assertEqual(float(ori_coverage.value['northlimit']), 4.63515e+06) self.assertEqual(float(ori_coverage.value['eastlimit']), 458010.0) self.assertEqual(float(ori_coverage.value['southlimit']), 4.63479e+06) self.assertEqual(float(ori_coverage.value['westlimit']), 457560.0) self.assertEqual(ori_coverage.value['units'], 'Meter') self.assertEqual(ori_coverage.value['projection'], 'transverse_mercator') # testing extended metadata element: variables self.assertEqual(self.resNetcdf.metadata.variables.all().count(), 5) # test time variable var_time = self.resNetcdf.metadata.variables.all().filter(name='time').first() self.assertNotEquals(var_time, None) self.assertEqual(var_time.unit, 'hours since 2009-10-1 0:0:00 UTC') self.assertEqual(var_time.type, 'Float') self.assertEqual(var_time.shape, 'time') self.assertEqual(var_time.descriptive_name, 'time') # test x variable var_x = self.resNetcdf.metadata.variables.all().filter(name='x').first() self.assertNotEquals(var_x, None) self.assertEqual(var_x.unit, 'Meter') self.assertEqual(var_x.type, 'Float') self.assertEqual(var_x.shape, 'x') self.assertEqual(var_x.descriptive_name, 'x coordinate of projection') # test y variable var_y = self.resNetcdf.metadata.variables.all().filter(name='y').first() self.assertNotEquals(var_y, None) self.assertEqual(var_y.unit, 'Meter') self.assertEqual(var_y.type, 'Float') self.assertEqual(var_y.shape, 'y') self.assertEqual(var_y.descriptive_name, 'y coordinate of projection') # test SWE variable var_swe = self.resNetcdf.metadata.variables.all().filter(name='SWE').first() self.assertNotEquals(var_swe, None) self.assertEqual(var_swe.unit, 'm') self.assertEqual(var_swe.type, 'Float') self.assertEqual(var_swe.shape, 'y,x,time') self.assertEqual(var_swe.descriptive_name, 'Snow water equivalent') self.assertEqual(var_swe.method, 'model simulation of UEB model') self.assertEqual(var_swe.missing_value, '-9999') # test grid mapping variable var_grid = self.resNetcdf.metadata.variables.all().\ filter(name='transverse_mercator').first() self.assertNotEquals(var_grid, None) self.assertEqual(var_grid.unit, 'Unknown') self.assertEqual(var_grid.type, 'Unknown') self.assertEqual(var_grid.shape, 'Not defined') def timeseries_metadata_extraction(self): """Test timeseries metadata extraction. This is a common test utility function to be called by both regular timeseries metadata extraction testing and federated zone timeseries metadata extraction testing. Make sure the calling TestCase object has self.resTimeSeries attribute defined before calling this method which is the timeseries resource that has been created containing valid timeseries file. """ # there should one content file self.assertEqual(self.resTimeSeries.files.all().count(), 1) # there should be one contributor element self.assertEqual(self.resTimeSeries.metadata.contributors.all().count(), 1) # test core metadata after metadata extraction extracted_title = "Water temperature data from the Little Bear River, UT" self.assertEqual(self.resTimeSeries.metadata.title.value, extracted_title) # there should be an abstract element self.assertNotEqual(self.resTimeSeries.metadata.description, None) extracted_abstract = "This dataset contains time series of observations of water " \ "temperature in the Little Bear River, UT. Data were recorded every " \ "30 minutes. The values were recorded using a HydroLab MS5 " \ "multi-parameter water quality sonde connected to a Campbell " \ "Scientific datalogger." self.assertEqual(self.resTimeSeries.metadata.description.abstract.strip(), extracted_abstract) # there should be 2 coverage element - box type and period type self.assertEqual(self.resTimeSeries.metadata.coverages.all().count(), 2) self.assertEqual(self.resTimeSeries.metadata.coverages.all().filter(type='box').count(), 1) self.assertEqual(self.resTimeSeries.metadata.coverages.all().filter( type='period').count(), 1) box_coverage = self.resTimeSeries.metadata.coverages.all().filter(type='box').first() self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(float(box_coverage.value['northlimit']), 41.718473) self.assertEqual(float(box_coverage.value['eastlimit']), -111.799324) self.assertEqual(float(box_coverage.value['southlimit']), 41.495409) self.assertEqual(float(box_coverage.value['westlimit']), -111.946402) temporal_coverage = self.resTimeSeries.metadata.coverages.all().filter( type='period').first() self.assertEqual(parser.parse(temporal_coverage.value['start']).date(), parser.parse('01/01/2008').date()) self.assertEqual(parser.parse(temporal_coverage.value['end']).date(), parser.parse('01/30/2008').date()) # there should be one format element self.assertEqual(self.resTimeSeries.metadata.formats.all().count(), 1) format_element = self.resTimeSeries.metadata.formats.all().first() self.assertEqual(format_element.value, 'application/sqlite') # there should be one subject element self.assertEqual(self.resTimeSeries.metadata.subjects.all().count(), 1) subj_element = self.resTimeSeries.metadata.subjects.all().first() self.assertEqual(subj_element.value, 'Temperature') # there should be a total of 7 timeseries self.assertEqual(self.resTimeSeries.metadata.time_series_results.all().count(), 7) # testing extended metadata elements # test 'site' - there should be 7 sites self.assertEqual(self.resTimeSeries.metadata.sites.all().count(), 7) # each site be associated with one series id for site in self.resTimeSeries.metadata.sites.all(): self.assertEqual(len(site.series_ids), 1) # test the data for a specific site site = self.resTimeSeries.metadata.sites.filter(site_code='USU-LBR-Paradise').first() self.assertNotEqual(site, None) site_name = 'Little Bear River at McMurdy Hollow near Paradise, Utah' self.assertEqual(site.site_name, site_name) self.assertEqual(site.elevation_m, 1445) self.assertEqual(site.elevation_datum, 'NGVD29') self.assertEqual(site.site_type, 'Stream') # test 'variable' - there should be 1 variable element self.assertEqual(self.resTimeSeries.metadata.variables.all().count(), 1) variable = self.resTimeSeries.metadata.variables.all().first() # there should be 7 series ids associated with this one variable self.assertEqual(len(variable.series_ids), 7) # test the data for a variable self.assertEqual(variable.variable_code, 'USU36') self.assertEqual(variable.variable_name, 'Temperature') self.assertEqual(variable.variable_type, 'Water Quality') self.assertEqual(variable.no_data_value, -9999) self.assertEqual(variable.variable_definition, None) self.assertEqual(variable.speciation, 'Not Applicable') # test 'method' - there should be 1 method element self.assertEqual(self.resTimeSeries.metadata.methods.all().count(), 1) method = self.resTimeSeries.metadata.methods.all().first() # there should be 7 series ids associated with this one method element self.assertEqual(len(method.series_ids), 7) self.assertEqual(method.method_code, '28') method_name = 'Quality Control Level 1 Data Series created from raw QC Level 0 data ' \ 'using ODM Tools.' self.assertEqual(method.method_name, method_name) self.assertEqual(method.method_type, 'Instrument deployment') method_des = 'Quality Control Level 1 Data Series created from raw QC Level 0 data ' \ 'using ODM Tools.' self.assertEqual(method.method_description, method_des) self.assertEqual(method.method_link, None) # test 'processing_level' - there should be 1 processing_level element self.assertEqual(self.resTimeSeries.metadata.processing_levels.all().count(), 1) proc_level = self.resTimeSeries.metadata.processing_levels.all().first() # there should be 7 series ids associated with this one element self.assertEqual(len(proc_level.series_ids), 7) self.assertEqual(proc_level.processing_level_code, '1') self.assertEqual(proc_level.definition, 'Quality controlled data') explanation = 'Quality controlled data that have passed quality assurance procedures ' \ 'such as routine estimation of timing and sensor calibration or visual ' \ 'inspection and removal of obvious errors. An example is USGS published ' \ 'streamflow records following parsing through USGS quality control ' \ 'procedures.' self.assertEqual(proc_level.explanation, explanation) # test 'timeseries_result' - there should be 7 timeseries_result element self.assertEqual(self.resTimeSeries.metadata.time_series_results.all().count(), 7) ts_result = self.resTimeSeries.metadata.time_series_results.filter( series_ids__contains=['182d8fa3-1ebc-11e6-ad49-f45c8999816f']).first() self.assertNotEqual(ts_result, None) # there should be only 1 series id associated with this element self.assertEqual(len(ts_result.series_ids), 1) self.assertEqual(ts_result.units_type, 'Temperature') self.assertEqual(ts_result.units_name, 'degree celsius') self.assertEqual(ts_result.units_abbreviation, 'degC') self.assertEqual(ts_result.status, 'Unknown') self.assertEqual(ts_result.sample_medium, 'Surface Water') self.assertEqual(ts_result.value_count, 1441) self.assertEqual(ts_result.aggregation_statistics, 'Average') # test for CV lookup tables # there should be 23 CV_VariableType records self.assertEqual(self.resTimeSeries.metadata.cv_variable_types.all().count(), 23) # there should be 805 CV_VariableName records self.assertEqual(self.resTimeSeries.metadata.cv_variable_names.all().count(), 805) # there should be 145 CV_Speciation records self.assertEqual(self.resTimeSeries.metadata.cv_speciations.all().count(), 145) # there should be 51 CV_SiteType records self.assertEqual(self.resTimeSeries.metadata.cv_site_types.all().count(), 51) # there should be 5 CV_ElevationDatum records self.assertEqual(self.resTimeSeries.metadata.cv_elevation_datums.all().count(), 5) # there should be 25 CV_MethodType records self.assertEqual(self.resTimeSeries.metadata.cv_method_types.all().count(), 25) # there should be 179 CV_UnitsType records self.assertEqual(self.resTimeSeries.metadata.cv_units_types.all().count(), 179) # there should be 4 CV_Status records self.assertEqual(self.resTimeSeries.metadata.cv_statuses.all().count(), 4) # there should be 17 CV_Medium records self.assertEqual(self.resTimeSeries.metadata.cv_mediums.all().count(), 18) # there should be 17 CV_aggregationStatistics records self.assertEqual(self.resTimeSeries.metadata.cv_aggregation_statistics.all().count(), 17) # there should not be any UTCOffset element self.assertEqual(self.resTimeSeries.metadata.utc_offset, None)
def update_quota_usage_task(username): """update quota usage. This function runs as a celery task, invoked asynchronously with 1 minute delay to give enough time for iRODS real time quota update micro-services to update quota usage AVU for the user before this celery task to check this AVU to get the updated quota usage for the user. Note iRODS micro-service quota update only happens on HydroShare iRODS data zone and user zone independently, so the aggregation of usage in both zones need to be accounted for in this function to update Django DB as an aggregated usage for hydroshare internal zone. :param username: the name of the user that needs to update quota usage for. :return: True if quota usage update succeeds; False if there is an exception raised or quota cannot be updated. See log for details. """ hs_internal_zone = "hydroshare" uq = UserQuota.objects.filter(user__username=username, zone=hs_internal_zone).first() if uq is None: # the quota row does not exist in Django logger.error('quota row does not exist in Django for hydroshare zone for ' 'user ' + username) return False attname = username + '-usage' istorage = IrodsStorage() # get quota size for user in iRODS data zone by retrieving AVU set on irods bagit path # collection try: uqDataZoneSize = istorage.getAVU(settings.IRODS_BAGIT_PATH, attname) if uqDataZoneSize is None: # user may not have resources in data zone, so corresponding quota size AVU may not # exist for this user uqDataZoneSize = -1 else: uqDataZoneSize = float(uqDataZoneSize) except SessionException: # user may not have resources in data zone, so corresponding quota size AVU may not exist # for this user uqDataZoneSize = -1 # get quota size for the user in iRODS user zone try: uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home', settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE, settings.IRODS_BAGIT_PATH) uqUserZoneSize = istorage.getAVU(uz_bagit_path, attname) if uqUserZoneSize is None: # user may not have resources in user zone, so corresponding quota size AVU may not # exist for this user uqUserZoneSize = -1 else: uqUserZoneSize = float(uqUserZoneSize) except SessionException: # user may not have resources in user zone, so corresponding quota size AVU may not exist # for this user uqUserZoneSize = -1 if uqDataZoneSize < 0 and uqUserZoneSize < 0: logger.error('no quota size AVU in data zone and user zone for the user ' + username) return False elif uqUserZoneSize < 0: used_val = uqDataZoneSize elif uqDataZoneSize < 0: used_val = uqUserZoneSize else: used_val = uqDataZoneSize + uqUserZoneSize uq.update_used_value(used_val) return True
def get_file_storage(): return IrodsStorage() if getattr(settings, 'USE_IRODS', False) else DefaultStorage()
class TestFolderDownloadZip(TestCase): def setUp(self): super(TestFolderDownloadZip, self).setUp() self.group, _ = Group.objects.get_or_create(name='Hydroshare Author') self.user = create_account('*****@*****.**', username='******', first_name='Shaun', last_name='Livingston', superuser=False, groups=[]) self.res = create_resource(resource_type='CompositeResource', owner=self.user, title='Test Resource', metadata=[]) ResourceFile.create_folder(self.res, 'foo') # create files self.n1 = "test1.txt" test_file = open(self.n1, 'w') test_file.write("Test text file in test1.txt") test_file.close() self.test_file = open(self.n1, "rb") add_resource_files(self.res.short_id, self.test_file, folder='foo') # copy refts file into new file to be added to the resource as an aggregation reft_data_file = open( 'hs_core/tests/data/multi_sites_formatted_version1.0.refts.json', 'rb') refts_file = open('multi_sites_formatted_version1.0.refts.json', 'wb') refts_file.writelines(reft_data_file.readlines()) refts_file.close() self.refts_file = open('multi_sites_formatted_version1.0.refts.json', 'rb') add_resource_files(self.res.short_id, self.refts_file) self.res.create_aggregation_xml_documents() self.istorage = IrodsStorage() def tearDown(self): super(TestFolderDownloadZip, self).tearDown() if self.res: self.res.delete() if self.test_file: self.test_file.close() os.remove(self.test_file.name) if self.refts_file: self.refts_file.close() os.remove(self.refts_file.name) GenericResource.objects.all().delete() if self.istorage.exists("zips"): self.istorage.delete("zips") def test_create_temp_zip(self): input_path = "{}/data/contents/foo".format(self.res.short_id) output_path = "zips/rand/foo.zip" self.assertTrue( create_temp_zip(self.res.short_id, input_path, output_path, None, False)) self.assertTrue(self.istorage.exists(output_path)) # test aggregation input_path = "{}/data/contents/multi_sites_formatted_version1.0.refts.json"\ .format(self.res.short_id) output_path = "zips/rand/multi_sites_formatted_version1.0.refts.json.zip" self.assertTrue( create_temp_zip(self.res.short_id, input_path, output_path, None, sf_zip=True)) self.assertTrue(self.istorage.exists(output_path)) def test_create_temp_zip_aggregation(self): input_path = "{}/data/contents/" \ "multi_sites_formatted_version1.0.refts.json".format(self.res.short_id) output_path = "zips/rand/aggregation.zip" self.assertTrue( create_temp_zip(self.res.short_id, input_path, output_path, "multi_sites_formatted_version1.0.refts.json", False)) self.assertTrue(self.istorage.exists(output_path))
class TestCaseCommonUtilities(object): """Enable common utilities for iRODS testing.""" def is_federated_irods_available(self): """Check if federated iRODS is available.""" if not settings.REMOTE_USE_IRODS or settings.HS_USER_ZONE_HOST != 'users.local.org' \ or settings.IRODS_HOST != 'data.local.org': return False else: return True def create_irods_user_in_user_zone(self): """Create corresponding irods account in user zone.""" try: exec_cmd = "{0} {1} {2}".format( settings.HS_USER_ZONE_PROXY_USER_CREATE_USER_CMD, self.user.username, self.user.username) output = run_ssh_command(host=settings.HS_USER_ZONE_HOST, uname=settings.HS_USER_ZONE_PROXY_USER, pwd=settings.HS_USER_ZONE_PROXY_USER_PWD, exec_cmd=exec_cmd) if output: if 'ERROR:' in output.upper(): # irods account failed to create self.assertRaises(SessionException(-1, output, output)) user_profile = UserProfile.objects.filter(user=self.user).first() user_profile.create_irods_user_account = True user_profile.save() except Exception as ex: self.assertRaises(SessionException(-1, ex.message, ex.message)) def delete_irods_user_in_user_zone(self): """Delete irods test user in user zone.""" try: exec_cmd = "{0} {1}".format( settings.HS_USER_ZONE_PROXY_USER_DELETE_USER_CMD, self.user.username) output = run_ssh_command(host=settings.HS_USER_ZONE_HOST, uname=settings.HS_USER_ZONE_PROXY_USER, pwd=settings.HS_USER_ZONE_PROXY_USER_PWD, exec_cmd=exec_cmd) if output: if 'ERROR:' in output.upper(): # there is an error from icommand run, report the error self.assertRaises(SessionException(-1, output, output)) user_profile = UserProfile.objects.filter(user=self.user).first() user_profile.create_irods_user_account = False user_profile.save() except Exception as ex: # there is an error from icommand run, report the error self.assertRaises(SessionException(-1, ex.message, ex.message)) def save_files_to_user_zone(self, file_name_to_target_name_dict): """Save a list of files to iRODS user zone using the same IrodsStorage() object. :param file_name_to_target_name_dict: a dictionary in the form of {ori_file, target_file} where ori_file is the file to be save to, and the target_file is the full path file name in iRODS user zone to save ori_file to :return: """ self.irods_storage = IrodsStorage('federated') for file_name, target_name in file_name_to_target_name_dict.iteritems( ): self.irods_storage.saveFile(file_name, target_name) def resource_file_oprs(self): """Test common iRODS file operations. This is a common test utility function to be called by both regular folder operation testing and federated zone folder operation testing. Make sure the calling TestCase object has the following attributes defined before calling this method: self.res: resource that has been created that contains files listed in file_name_list self.user: owner of the resource self.file_name_list: a list of three file names that have been added to the res object self.test_file_1 needs to be present for the calling object for doing regular folder operations without involving federated zone so that the same opened file can be re-added to the resource for testing the case where zipping cannot overwrite existing file """ user = self.user res = self.res file_name_list = self.file_name_list # create a folder, if folder is created successfully, no exception is raised, otherwise, # an iRODS exception will be raised which will be caught by the test runner and mark as # a test failure create_folder(res.short_id, 'data/contents/sub_test_dir') istorage = res.get_irods_storage() res_path = res.file_path store = istorage.listdir(res_path) self.assertIn('sub_test_dir', store[0], msg='resource does not contain created sub-folder') # rename the third file in file_name_list move_or_rename_file_or_folder(user, res.short_id, 'data/contents/' + file_name_list[2], 'data/contents/new_' + file_name_list[2]) # move the first two files in file_name_list to the new folder move_or_rename_file_or_folder( user, res.short_id, 'data/contents/' + file_name_list[0], 'data/contents/sub_test_dir/' + file_name_list[0]) move_or_rename_file_or_folder( user, res.short_id, 'data/contents/' + file_name_list[1], 'data/contents/sub_test_dir/' + file_name_list[1]) updated_res_file_names = [] for rf in ResourceFile.objects.filter(object_id=res.id): updated_res_file_names.append(rf.short_path) self.assertIn('new_' + file_name_list[2], updated_res_file_names, msg="resource does not contain the updated file new_" + file_name_list[2]) self.assertNotIn(file_name_list[2], updated_res_file_names, msg='resource still contains the old file ' + file_name_list[2] + ' after renaming') self.assertIn('sub_test_dir/' + file_name_list[0], updated_res_file_names, msg='resource does not contain ' + file_name_list[0] + ' moved to a folder') self.assertNotIn(file_name_list[0], updated_res_file_names, msg='resource still contains the old ' + file_name_list[0] + 'after moving to a folder') self.assertIn('sub_test_dir/' + file_name_list[1], updated_res_file_names, msg='resource does not contain ' + file_name_list[1] + 'moved to a new folder') self.assertNotIn(file_name_list[1], updated_res_file_names, msg='resource still contains the old ' + file_name_list[1] + ' after moving to a folder') # zip the folder output_zip_fname, size = \ zip_folder(user, res.short_id, 'data/contents/sub_test_dir', 'sub_test_dir.zip', True) self.assertGreater(size, 0, msg='zipped file has a size of 0') # Now resource should contain only two files: new_file3.txt and sub_test_dir.zip # since the folder is zipped into sub_test_dir.zip with the folder deleted self.assertEqual(res.files.all().count(), 2, msg="resource file count didn't match-") # test unzip does not allow override of existing files # add an existing file in the zip to the resource if res.resource_federation_path: fed_test_file1_full_path = '/{zone}/home/{uname}/{fname}'.format( zone=settings.HS_USER_IRODS_ZONE, uname=user.username, fname=file_name_list[0]) # TODO: why isn't this a method of resource? # TODO: Why do we repeat the resource_federation_path? add_resource_files(res.short_id, source_names=[fed_test_file1_full_path], move=False) else: # TODO: Why isn't this a method of resource? add_resource_files(res.short_id, self.test_file_1) # TODO: use ResourceFile.create_folder, which doesn't require data/contents prefix create_folder(res.short_id, 'data/contents/sub_test_dir') # TODO: use ResourceFile.rename, which doesn't require data/contents prefix move_or_rename_file_or_folder( user, res.short_id, 'data/contents/' + file_name_list[0], 'data/contents/sub_test_dir/' + file_name_list[0]) # Now resource should contain three files: file3_new.txt, sub_test_dir.zip, and file1.txt self.assertEqual(res.files.all().count(), 3, msg="resource file count didn't match") with self.assertRaises(SessionException): unzip_file(user, res.short_id, 'data/contents/sub_test_dir.zip', False) # Resource should still contain three files: file3_new.txt, sub_test_dir.zip, and file1.txt file_cnt = res.files.all().count() self.assertEqual(file_cnt, 3, msg="resource file count didn't match - " + str(file_cnt) + " != 3") # test unzipping the file succeeds now after deleting the existing folder # TODO: this causes a multiple delete because the paths are valid now. istorage = res.get_irods_storage() remove_folder(user, res.short_id, 'data/contents/sub_test_dir') # Now resource should contain two files: file3_new.txt and sub_test_dir.zip file_cnt = res.files.all().count() self.assertEqual(file_cnt, 2, msg="resource file count didn't match - " + str(file_cnt) + " != 2") unzip_file(user, res.short_id, 'data/contents/sub_test_dir.zip', True) # Now resource should contain three files: file1.txt, file2.txt, and file3_new.txt self.assertEqual(res.files.all().count(), 3, msg="resource file count didn't match") updated_res_file_names = [] for rf in ResourceFile.objects.filter(object_id=res.id): updated_res_file_names.append(rf.short_path) self.assertNotIn( 'sub_test_dir.zip', updated_res_file_names, msg="resource still contains the zip file after unzipping") self.assertIn('sub_test_dir/' + file_name_list[0], updated_res_file_names, msg='resource does not contain unzipped file ' + file_name_list[0]) self.assertIn('sub_test_dir/' + file_name_list[1], updated_res_file_names, msg='resource does not contain unzipped file ' + file_name_list[1]) self.assertIn('new_' + file_name_list[2], updated_res_file_names, msg='resource does not contain unzipped file new_' + file_name_list[2]) # rename a folder move_or_rename_file_or_folder(user, res.short_id, 'data/contents/sub_test_dir', 'data/contents/sub_dir') updated_res_file_names = [] for rf in ResourceFile.objects.filter(object_id=res.id): updated_res_file_names.append(rf.short_path) self.assertNotIn('sub_test_dir/' + file_name_list[0], updated_res_file_names, msg='resource still contains ' + file_name_list[0] + ' in the old folder after renaming') self.assertIn('sub_dir/' + file_name_list[0], updated_res_file_names, msg='resource does not contain ' + file_name_list[0] + ' in the new folder after renaming') self.assertNotIn('sub_test_dir/' + file_name_list[1], updated_res_file_names, msg='resource still contains ' + file_name_list[1] + ' in the old folder after renaming') self.assertIn('sub_dir/' + file_name_list[1], updated_res_file_names, msg='resource does not contain ' + file_name_list[1] + ' in the new folder after renaming') # remove a folder # TODO: utilize ResourceFile.remove_folder instead. Takes a short path. remove_folder(user, res.short_id, 'data/contents/sub_dir') # Now resource only contains one file self.assertEqual(res.files.all().count(), 1, msg="resource file count didn't match") updated_res_file_names = [] for rf in ResourceFile.objects.filter(object_id=res.id): updated_res_file_names.append(rf.short_path) self.assertEqual(len(updated_res_file_names), 1) self.assertEqual(updated_res_file_names[0], 'new_' + file_name_list[2]) def raster_metadata_extraction(self): """Test raster metadata extraction. This is a common test utility function to be called by both regular raster metadata extraction testing and federated zone raster metadata extraction testing. Make sure the calling TestCase object has self.resRaster attribute defined before calling this method which is the raster resource that has been created containing valid raster files. """ # there should be 2 content files self.assertEqual(self.resRaster.files.all().count(), 2) # test core metadata after metadata extraction extracted_title = "My Test Raster Resource" self.assertEqual(self.resRaster.metadata.title.value, extracted_title) # there should be 1 creator self.assertEqual(self.resRaster.metadata.creators.all().count(), 1) # there should be 1 coverage element - box type self.assertEqual(self.resRaster.metadata.coverages.all().count(), 1) self.assertEqual( self.resRaster.metadata.coverages.all().filter(type='box').count(), 1) box_coverage = self.resRaster.metadata.coverages.all().filter( type='box').first() self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(box_coverage.value['northlimit'], 42.11270614966863) self.assertEqual(box_coverage.value['eastlimit'], -111.45699925047542) self.assertEqual(box_coverage.value['southlimit'], 41.66222054591102) self.assertEqual(box_coverage.value['westlimit'], -111.81761887121905) # there should be 2 format elements self.assertEqual(self.resRaster.metadata.formats.all().count(), 2) self.assertEqual( self.resRaster.metadata.formats.all().filter( value='application/vrt').count(), 1) self.assertEqual( self.resRaster.metadata.formats.all().filter( value='image/tiff').count(), 1) # testing extended metadata element: original coverage ori_coverage = self.resRaster.metadata.originalCoverage self.assertNotEquals(ori_coverage, None) self.assertEqual(ori_coverage.value['northlimit'], 4662392.446916306) self.assertEqual(ori_coverage.value['eastlimit'], 461954.01909127034) self.assertEqual(ori_coverage.value['southlimit'], 4612592.446916306) self.assertEqual(ori_coverage.value['westlimit'], 432404.01909127034) self.assertEqual(ori_coverage.value['units'], 'meter') self.assertEqual(ori_coverage.value['projection'], "NAD83 / UTM zone 12N") self.assertEqual(ori_coverage.value['datum'], "North_American_Datum_1983") projection_string = u'PROJCS["NAD83 / UTM zone 12N",GEOGCS["NAD83",' \ u'DATUM["North_American_Datum_1983",' \ u'SPHEROID["GRS 1980",6378137,298.257222101,' \ u'AUTHORITY["EPSG","7019"]],' \ u'TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6269"]],' \ u'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],' \ u'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],' \ u'AUTHORITY["EPSG","4269"]],PROJECTION["Transverse_Mercator"],' \ u'PARAMETER["latitude_of_origin",0],' \ u'PARAMETER["central_meridian",-111],' \ u'PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],' \ u'PARAMETER["false_northing",0],' \ u'UNIT["metre",1,AUTHORITY["EPSG","9001"]],' \ u'AXIS["Easting",EAST],AXIS["Northing",' \ u'NORTH],AUTHORITY["EPSG","26912"]]' self.assertEqual(ori_coverage.value['projection_string'], projection_string) # testing extended metadata element: cell information cell_info = self.resRaster.metadata.cellInformation self.assertEqual(cell_info.rows, 1660) self.assertEqual(cell_info.columns, 985) self.assertEqual(cell_info.cellSizeXValue, 30.0) self.assertEqual(cell_info.cellSizeYValue, 30.0) self.assertEqual(cell_info.cellDataType, 'Float32') # testing extended metadata element: band information self.assertEqual(self.resRaster.metadata.bandInformations.count(), 1) band_info = self.resRaster.metadata.bandInformations.first() self.assertEqual(band_info.noDataValue, '-3.40282346639e+38') self.assertEqual(band_info.maximumValue, '3031.44311523') self.assertEqual(band_info.minimumValue, '1358.33459473') def netcdf_metadata_extraction(self, expected_creators_count=1): """Test NetCDF metadata extraction. This is a common test utility function to be called by both regular netcdf metadata extraction testing and federated zone netCDF metadata extraction testing. Make sure the calling TestCase object has self.resNetcdf attribute defined before calling this method which is the netCDF resource that has been created containing valid netCDF files. """ # there should 2 content file self.assertEqual(self.resNetcdf.files.all().count(), 2) # test core metadata after metadata extraction extracted_title = "Snow water equivalent estimation at TWDEF site from " \ "Oct 2009 to June 2010" self.assertEqual(self.resNetcdf.metadata.title.value, extracted_title) # there should be an abstract element self.assertNotEqual(self.resNetcdf.metadata.description, None) extracted_abstract = "This netCDF data is the simulation output from Utah Energy " \ "Balance (UEB) model.It includes the simulation result " \ "of snow water equivalent during the period " \ "Oct. 2009 to June 2010 for TWDEF site in Utah." self.assertEqual(self.resNetcdf.metadata.description.abstract, extracted_abstract) # there should be one source element self.assertEqual(self.resNetcdf.metadata.sources.all().count(), 1) # there should be one license element: self.assertNotEquals(self.resNetcdf.metadata.rights.statement, 1) # there should be one relation element self.assertEqual( self.resNetcdf.metadata.relations.all().filter( type='cites').count(), 1) # there should be creators equal to expected_creators_count self.assertEqual(self.resNetcdf.metadata.creators.all().count(), expected_creators_count) # there should be one contributor self.assertEqual(self.resNetcdf.metadata.contributors.all().count(), 1) # there should be 2 coverage element - box type and period type self.assertEqual(self.resNetcdf.metadata.coverages.all().count(), 2) self.assertEqual( self.resNetcdf.metadata.coverages.all().filter(type='box').count(), 1) self.assertEqual( self.resNetcdf.metadata.coverages.all().filter( type='period').count(), 1) box_coverage = self.resNetcdf.metadata.coverages.all().filter( type='box').first() self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(box_coverage.value['northlimit'], 41.867126409) self.assertEqual(box_coverage.value['eastlimit'], -111.505940368) self.assertEqual(box_coverage.value['southlimit'], 41.8639080745) self.assertEqual(box_coverage.value['westlimit'], -111.51138808) temporal_coverage = self.resNetcdf.metadata.coverages.all().filter( type='period').first() self.assertEqual( parser.parse(temporal_coverage.value['start']).date(), parser.parse('10/01/2009').date()) self.assertEqual( parser.parse(temporal_coverage.value['end']).date(), parser.parse('05/30/2010').date()) # there should be 2 format elements self.assertEqual(self.resNetcdf.metadata.formats.all().count(), 2) self.assertEqual( self.resNetcdf.metadata.formats.all().filter( value='text/plain').count(), 1) self.assertEqual( self.resNetcdf.metadata.formats.all().filter( value='application/x-netcdf').count(), 1) # there should be one subject element self.assertEqual(self.resNetcdf.metadata.subjects.all().count(), 1) subj_element = self.resNetcdf.metadata.subjects.all().first() self.assertEqual(subj_element.value, 'Snow water equivalent') # testing extended metadata element: original coverage ori_coverage = self.resNetcdf.metadata.ori_coverage.all().first() self.assertNotEquals(ori_coverage, None) self.assertEqual(ori_coverage.projection_string_type, 'Proj4 String') proj_text = u'+proj=tmerc +y_0=0.0 +k_0=0.9996 +x_0=500000.0 +lat_0=0.0 +lon_0=-111.0' self.assertEqual(ori_coverage.projection_string_text, proj_text) self.assertEqual(ori_coverage.value['northlimit'], '4.63515e+06') self.assertEqual(ori_coverage.value['eastlimit'], '458010.0') self.assertEqual(ori_coverage.value['southlimit'], '4.63479e+06') self.assertEqual(ori_coverage.value['westlimit'], '457560.0') self.assertEqual(ori_coverage.value['units'], 'Meter') self.assertEqual(ori_coverage.value['projection'], 'transverse_mercator') # testing extended metadata element: variables self.assertEqual(self.resNetcdf.metadata.variables.all().count(), 5) # test time variable var_time = self.resNetcdf.metadata.variables.all().filter( name='time').first() self.assertNotEquals(var_time, None) self.assertEqual(var_time.unit, 'hours since 2009-10-1 0:0:00 UTC') self.assertEqual(var_time.type, 'Float') self.assertEqual(var_time.shape, 'time') self.assertEqual(var_time.descriptive_name, 'time') # test x variable var_x = self.resNetcdf.metadata.variables.all().filter( name='x').first() self.assertNotEquals(var_x, None) self.assertEqual(var_x.unit, 'Meter') self.assertEqual(var_x.type, 'Float') self.assertEqual(var_x.shape, 'x') self.assertEqual(var_x.descriptive_name, 'x coordinate of projection') # test y variable var_y = self.resNetcdf.metadata.variables.all().filter( name='y').first() self.assertNotEquals(var_y, None) self.assertEqual(var_y.unit, 'Meter') self.assertEqual(var_y.type, 'Float') self.assertEqual(var_y.shape, 'y') self.assertEqual(var_y.descriptive_name, 'y coordinate of projection') # test SWE variable var_swe = self.resNetcdf.metadata.variables.all().filter( name='SWE').first() self.assertNotEquals(var_swe, None) self.assertEqual(var_swe.unit, 'm') self.assertEqual(var_swe.type, 'Float') self.assertEqual(var_swe.shape, 'y,x,time') self.assertEqual(var_swe.descriptive_name, 'Snow water equivalent') self.assertEqual(var_swe.method, 'model simulation of UEB model') self.assertEqual(var_swe.missing_value, '-9999') # test grid mapping variable var_grid = self.resNetcdf.metadata.variables.all().\ filter(name='transverse_mercator').first() self.assertNotEquals(var_grid, None) self.assertEqual(var_grid.unit, 'Unknown') self.assertEqual(var_grid.type, 'Unknown') self.assertEqual(var_grid.shape, 'Not defined') def timeseries_metadata_extraction(self): """Test timeseries metadata extraction. This is a common test utility function to be called by both regular timeseries metadata extraction testing and federated zone timeseries metadata extraction testing. Make sure the calling TestCase object has self.resTimeSeries attribute defined before calling this method which is the timeseries resource that has been created containing valid timeseries file. """ # there should one content file self.assertEqual(self.resTimeSeries.files.all().count(), 1) # there should be one contributor element self.assertEqual( self.resTimeSeries.metadata.contributors.all().count(), 1) # test core metadata after metadata extraction extracted_title = "Water temperature data from the Little Bear River, UT" self.assertEqual(self.resTimeSeries.metadata.title.value, extracted_title) # there should be an abstract element self.assertNotEqual(self.resTimeSeries.metadata.description, None) extracted_abstract = "This dataset contains time series of observations of water " \ "temperature in the Little Bear River, UT. Data were recorded every " \ "30 minutes. The values were recorded using a HydroLab MS5 " \ "multi-parameter water quality sonde connected to a Campbell " \ "Scientific datalogger." self.assertEqual( self.resTimeSeries.metadata.description.abstract.strip(), extracted_abstract) # there should be 2 coverage element - box type and period type self.assertEqual(self.resTimeSeries.metadata.coverages.all().count(), 2) self.assertEqual( self.resTimeSeries.metadata.coverages.all().filter( type='box').count(), 1) self.assertEqual( self.resTimeSeries.metadata.coverages.all().filter( type='period').count(), 1) box_coverage = self.resTimeSeries.metadata.coverages.all().filter( type='box').first() self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326') self.assertEqual(box_coverage.value['units'], 'Decimal degrees') self.assertEqual(box_coverage.value['northlimit'], 41.718473) self.assertEqual(box_coverage.value['eastlimit'], -111.799324) self.assertEqual(box_coverage.value['southlimit'], 41.495409) self.assertEqual(box_coverage.value['westlimit'], -111.946402) temporal_coverage = self.resTimeSeries.metadata.coverages.all().filter( type='period').first() self.assertEqual( parser.parse(temporal_coverage.value['start']).date(), parser.parse('01/01/2008').date()) self.assertEqual( parser.parse(temporal_coverage.value['end']).date(), parser.parse('01/31/2008').date()) # there should be one format element self.assertEqual(self.resTimeSeries.metadata.formats.all().count(), 1) format_element = self.resTimeSeries.metadata.formats.all().first() self.assertEqual(format_element.value, 'application/sqlite') # there should be one subject element self.assertEqual(self.resTimeSeries.metadata.subjects.all().count(), 1) subj_element = self.resTimeSeries.metadata.subjects.all().first() self.assertEqual(subj_element.value, 'Temperature') # there should be a total of 7 timeseries self.assertEqual( self.resTimeSeries.metadata.time_series_results.all().count(), 7) # testing extended metadata elements # test 'site' - there should be 7 sites self.assertEqual(self.resTimeSeries.metadata.sites.all().count(), 7) # each site be associated with one series id for site in self.resTimeSeries.metadata.sites.all(): self.assertEqual(len(site.series_ids), 1) # test the data for a specific site site = self.resTimeSeries.metadata.sites.filter( site_code='USU-LBR-Paradise').first() self.assertNotEqual(site, None) site_name = 'Little Bear River at McMurdy Hollow near Paradise, Utah' self.assertEqual(site.site_name, site_name) self.assertEqual(site.elevation_m, 1445) self.assertEqual(site.elevation_datum, 'NGVD29') self.assertEqual(site.site_type, 'Stream') # test 'variable' - there should be 1 variable element self.assertEqual(self.resTimeSeries.metadata.variables.all().count(), 1) variable = self.resTimeSeries.metadata.variables.all().first() # there should be 7 series ids associated with this one variable self.assertEqual(len(variable.series_ids), 7) # test the data for a variable self.assertEqual(variable.variable_code, 'USU36') self.assertEqual(variable.variable_name, 'Temperature') self.assertEqual(variable.variable_type, 'Water Quality') self.assertEqual(variable.no_data_value, -9999) self.assertEqual(variable.variable_definition, None) self.assertEqual(variable.speciation, 'Not Applicable') # test 'method' - there should be 1 method element self.assertEqual(self.resTimeSeries.metadata.methods.all().count(), 1) method = self.resTimeSeries.metadata.methods.all().first() # there should be 7 series ids associated with this one method element self.assertEqual(len(method.series_ids), 7) self.assertEqual(method.method_code, '28') method_name = 'Quality Control Level 1 Data Series created from raw QC Level 0 data ' \ 'using ODM Tools.' self.assertEqual(method.method_name, method_name) self.assertEqual(method.method_type, 'Instrument deployment') method_des = 'Quality Control Level 1 Data Series created from raw QC Level 0 data ' \ 'using ODM Tools.' self.assertEqual(method.method_description, method_des) self.assertEqual(method.method_link, None) # test 'processing_level' - there should be 1 processing_level element self.assertEqual( self.resTimeSeries.metadata.processing_levels.all().count(), 1) proc_level = self.resTimeSeries.metadata.processing_levels.all().first( ) # there should be 7 series ids associated with this one element self.assertEqual(len(proc_level.series_ids), 7) self.assertEqual(proc_level.processing_level_code, 1) self.assertEqual(proc_level.definition, 'Quality controlled data') explanation = 'Quality controlled data that have passed quality assurance procedures ' \ 'such as routine estimation of timing and sensor calibration or visual ' \ 'inspection and removal of obvious errors. An example is USGS published ' \ 'streamflow records following parsing through USGS quality control ' \ 'procedures.' self.assertEqual(proc_level.explanation, explanation) # test 'timeseries_result' - there should be 7 timeseries_result element self.assertEqual( self.resTimeSeries.metadata.time_series_results.all().count(), 7) ts_result = self.resTimeSeries.metadata.time_series_results.filter( series_ids__contains=['182d8fa3-1ebc-11e6-ad49-f45c8999816f' ]).first() self.assertNotEqual(ts_result, None) # there should be only 1 series id associated with this element self.assertEqual(len(ts_result.series_ids), 1) self.assertEqual(ts_result.units_type, 'Temperature') self.assertEqual(ts_result.units_name, 'degree celsius') self.assertEqual(ts_result.units_abbreviation, 'degC') self.assertEqual(ts_result.status, 'Unknown') self.assertEqual(ts_result.sample_medium, 'Surface Water') self.assertEqual(ts_result.value_count, 1441) self.assertEqual(ts_result.aggregation_statistics, 'Average') # test for CV lookup tables # there should be 23 CV_VariableType records self.assertEqual( self.resTimeSeries.metadata.cv_variable_types.all().count(), 23) # there should be 805 CV_VariableName records self.assertEqual( self.resTimeSeries.metadata.cv_variable_names.all().count(), 805) # there should be 145 CV_Speciation records self.assertEqual( self.resTimeSeries.metadata.cv_speciations.all().count(), 145) # there should be 51 CV_SiteType records self.assertEqual( self.resTimeSeries.metadata.cv_site_types.all().count(), 51) # there should be 5 CV_ElevationDatum records self.assertEqual( self.resTimeSeries.metadata.cv_elevation_datums.all().count(), 5) # there should be 25 CV_MethodType records self.assertEqual( self.resTimeSeries.metadata.cv_method_types.all().count(), 25) # there should be 179 CV_UnitsType records self.assertEqual( self.resTimeSeries.metadata.cv_units_types.all().count(), 179) # there should be 4 CV_Status records self.assertEqual(self.resTimeSeries.metadata.cv_statuses.all().count(), 4) # there should be 17 CV_Medium records self.assertEqual(self.resTimeSeries.metadata.cv_mediums.all().count(), 18) # there should be 17 CV_aggregationStatistics records self.assertEqual( self.resTimeSeries.metadata.cv_aggregation_statistics.all().count( ), 17) # there should not be any UTCOffset element self.assertEqual(self.resTimeSeries.metadata.utc_offset, None)
def download(request, path, rest_call=False, use_async=True, use_reverse_proxy=True, *args, **kwargs): split_path_strs = path.split('/') is_bag_download = False is_zip_download = False is_sf_agg_file = False if split_path_strs[0] == 'bags': res_id = os.path.splitext(split_path_strs[1])[0] is_bag_download = True elif split_path_strs[0] == 'zips': if path.endswith('.zip'): res_id = os.path.splitext(split_path_strs[2])[0] else: res_id = os.path.splitext(split_path_strs[1])[0] is_zip_download = True else: res_id = split_path_strs[0] # if the resource does not exist in django, authorized will be false res, authorized, _ = authorize(request, res_id, needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE, raises_exception=False) if not authorized: response = HttpResponse(status=401) content_msg = "You do not have permission to download this resource!" if rest_call: raise PermissionDenied(content_msg) else: response.content = "<h1>" + content_msg + "</h1>" return response if res.resource_type == "CompositeResource" and not path.endswith(".zip"): for f in ResourceFile.objects.filter(object_id=res.id): if path == f.storage_path: if f.has_logical_file and f.logical_file.is_single_file_aggregation: is_sf_agg_file = True if res.resource_federation_path: # the resource is stored in federated zone istorage = IrodsStorage('federated') federated_path = res.resource_federation_path path = os.path.join(federated_path, path) session = icommands.ACTIVE_SESSION else: # TODO: From Alva: I do not understand the use case for changing the environment. # TODO: This seems an enormous potential vulnerability, as arguments are # TODO: passed from the URI directly to IRODS without verification. istorage = IrodsStorage() federated_path = '' if 'environment' in kwargs: environment = int(kwargs['environment']) environment = m.RodsEnvironment.objects.get(pk=environment) session = Session("/tmp/django_irods", settings.IRODS_ICOMMANDS_PATH, session_id=uuid4()) session.create_environment(environment) session.run('iinit', None, environment.auth) elif getattr(settings, 'IRODS_GLOBAL_SESSION', False): session = GLOBAL_SESSION elif icommands.ACTIVE_SESSION: session = icommands.ACTIVE_SESSION else: raise KeyError('settings must have IRODS_GLOBAL_SESSION set ' 'if there is no environment object') resource_cls = check_resource_type(res.resource_type) if federated_path: res_root = os.path.join(federated_path, res_id) else: res_root = res_id if is_zip_download or is_sf_agg_file: if not path.endswith(".zip"): # requesting folder that needs to be zipped input_path = path.split(res_id)[1] random_hash = random.getrandbits(32) daily_date = datetime.datetime.today().strftime('%Y-%m-%d') random_hash_path = 'zips/{daily_date}/{res_id}/{rand_folder}'.format( daily_date=daily_date, res_id=res_id, rand_folder=random_hash) output_path = '{random_hash_path}{path}.zip'.format(random_hash_path=random_hash_path, path=input_path) if res.resource_type == "CompositeResource": aggregation_name = input_path[len('/data/contents/'):] res.create_aggregation_xml_documents(aggregation_name=aggregation_name) if use_async: task = create_temp_zip.apply_async((res_id, input_path, output_path, is_sf_agg_file), countdown=3) delete_zip.apply_async((random_hash_path, ), countdown=(20 * 60)) # delete after 20 minutes if is_sf_agg_file: download_path = request.path.split(res_id)[0] + output_path else: download_path = request.path.split("zips")[0] + output_path if rest_call: return HttpResponse(json.dumps({'zip_status': 'Not ready', 'task_id': task.task_id, 'download_path': download_path}), content_type="application/json") request.session['task_id'] = task.task_id request.session['download_path'] = download_path return HttpResponseRedirect(res.get_absolute_url()) ret_status = create_temp_zip(res_id, input_path, output_path, is_sf_agg_file) delete_zip.apply_async((random_hash_path, ), countdown=(20 * 60)) # delete after 20 minutes if not ret_status: content_msg = "Zip cannot be created successfully. Check log for details." response = HttpResponse() if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response path = output_path bag_modified = istorage.getAVU(res_root, 'bag_modified') # make sure if bag_modified is not set to true, we still recreate the bag if the # bag file does not exist for some reason to resolve the error to download a nonexistent # bag when bag_modified is false due to the flag being out-of-sync with the real bag status if bag_modified is None or bag_modified.lower() == "false": # check whether the bag file exists bag_file_name = res_id + '.zip' if res_root.startswith(res_id): bag_full_path = os.path.join('bags', bag_file_name) else: bag_full_path = os.path.join(federated_path, 'bags', bag_file_name) # set bag_modified to 'true' if the bag does not exist so that it can be recreated # and the bag_modified AVU will be set correctly as well subsequently if not istorage.exists(bag_full_path): bag_modified = 'true' metadata_dirty = istorage.getAVU(res_root, 'metadata_dirty') # do on-demand bag creation # needs to check whether res_id collection exists before getting/setting AVU on it # to accommodate the case where the very same resource gets deleted by another request # when it is getting downloaded if is_bag_download: # send signal for pre_check_bag_flag pre_check_bag_flag.send(sender=resource_cls, resource=res) if bag_modified is None or bag_modified.lower() == "true": if metadata_dirty is None or metadata_dirty.lower() == 'true': create_bag_files(res) if use_async: # task parameter has to be passed in as a tuple or list, hence (res_id,) is needed # Note that since we are using JSON for task parameter serialization, no complex # object can be passed as parameters to a celery task task = create_bag_by_irods.apply_async((res_id,), countdown=3) if rest_call: return HttpResponse(json.dumps({'bag_status': 'Not ready', 'task_id': task.task_id}), content_type="application/json") request.session['task_id'] = task.task_id request.session['download_path'] = request.path return HttpResponseRedirect(res.get_absolute_url()) else: ret_status = create_bag_by_irods(res_id) if not ret_status: content_msg = "Bag cannot be created successfully. Check log for details." response = HttpResponse() if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response elif metadata_dirty is None or metadata_dirty.lower() == 'true': if path.endswith("resourcemap.xml") or path.endswith('resourcemetadata.xml'): # we need to regenerate the metadata xml files create_bag_files(res) # send signal for pre download file download_file_name = split_path_strs[-1] pre_download_file.send(sender=resource_cls, resource=res, download_file_name=download_file_name, request=request) # obtain mime_type to set content_type mtype = 'application-x/octet-stream' mime_type = mimetypes.guess_type(path) if mime_type[0] is not None: mtype = mime_type[0] # retrieve file size to set up Content-Length header stdout = session.run("ils", None, "-l", path)[0].split() flen = int(stdout[3]) # If this path is resource_federation_path, then the file is a local user file userpath = '/' + os.path.join( getattr(settings, 'HS_USER_IRODS_ZONE', 'hydroshareuserZone'), 'home', getattr(settings, 'HS_LOCAL_PROXY_USER_IN_FED_ZONE', 'localHydroProxy')) # Allow reverse proxy if request was forwarded by nginx # (HTTP_X_DJANGO_REVERSE_PROXY is 'true') # and reverse proxy is possible according to configuration. if use_reverse_proxy and getattr(settings, 'SENDFILE_ON', False) and \ 'HTTP_X_DJANGO_REVERSE_PROXY' in request.META: # The NGINX sendfile abstraction is invoked as follows: # 1. The request to download a file enters this routine via the /rest_download or /download # url in ./urls.py. It is redirected here from Django. The URI contains either the # unqualified resource path or the federated resource path, depending upon whether # the request is local or federated. # 2. This deals with unfederated resources by redirecting them to the uri # /irods-data/{resource-id}/... on nginx. This URI is configured to read the file # directly from the iRODS vault via NFS, and does not work for direct access to the # vault due to the 'internal;' declaration in NGINX. # 3. This deals with federated resources by reading their path, matching local vaults, and # redirecting to URIs that are in turn mapped to read from appropriate iRODS vaults. At # present, the only one of these is /irods-user, which handles files whose federation # path is stored in the variable 'userpath'. # 4. If there is no vault available for the resource, the file is transferred without # NGINX, exactly as it was transferred previously. # stop NGINX targets that are non-existent from hanging forever. if not istorage.exists(path): content_msg = "file path {} does not exist in iRODS".format(path) response = HttpResponse(status=404) if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response if not res.is_federated: # invoke X-Accel-Redirect on physical vault file in nginx response = HttpResponse(content_type=mtype) response['Content-Disposition'] = 'attachment; filename="{name}"'.format( name=path.split('/')[-1]) response['Content-Length'] = flen response['X-Accel-Redirect'] = '/'.join([ getattr(settings, 'IRODS_DATA_URI', '/irods-data'), path]) return response elif res.resource_federation_path == userpath: # this guarantees a "user" resource # invoke X-Accel-Redirect on physical vault file in nginx # if path is full user path; strip federation prefix if path.startswith(userpath): path = path[len(userpath)+1:] # invoke X-Accel-Redirect on physical vault file in nginx response = HttpResponse(content_type=mtype) response['Content-Disposition'] = 'attachment; filename="{name}"'.format( name=path.split('/')[-1]) response['Content-Length'] = flen response['X-Accel-Redirect'] = os.path.join( getattr(settings, 'IRODS_USER_URI', '/irods-user'), path) return response # if we get here, none of the above conditions are true if flen <= FILE_SIZE_LIMIT: options = ('-',) # we're redirecting to stdout. # this unusual way of calling works for federated or local resources proc = session.run_safe('iget', None, path, *options) response = FileResponse(proc.stdout, content_type=mtype) response['Content-Disposition'] = 'attachment; filename="{name}"'.format( name=path.split('/')[-1]) response['Content-Length'] = flen return response else: content_msg = "File larger than 1GB cannot be downloaded directly via HTTP. " \ "Please download the large file via iRODS clients." response = HttpResponse(status=403) if rest_call: response.content = content_msg else: response.content = "<h1>" + content_msg + "</h1>" return response
def migrate_tif_file(apps, schema_editor): log = logging.getLogger() istorage = IrodsStorage() copy_res_fail = [] vrt_update_fail = [] vrt_update_success = [] meta_update_fail = [] meta_update_success = [] # start migration for each raster resource that has raster files for res in RasterResource.objects.all(): if res.files.all(): # copy all the resource files to temp dir try: temp_dir = tempfile.mkdtemp() for res_file in res.files.all(): shutil.copy(res_file.resource_file.file.name, os.path.join(temp_dir, os.path.basename(res_file.resource_file.name))) vrt_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == f[-4:]].pop() except Exception as e: log.exception(e.message) copy_res_fail.append('{}:{}'.format(res.short_id, res.metadata.title.value)) continue # update vrt file if the raster resource that has a single tif file try: if len(os.listdir(temp_dir)) == 2: # create new vrt file tif_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.tif' == f[-4:]].pop() with open(os.devnull, 'w') as fp: subprocess.Popen(['gdal_translate', '-of', 'VRT', tif_file_path, vrt_file_path], stdout=fp, stderr=fp).wait() # remember to add .wait() # modify the vrt file contents tree = ET.parse(vrt_file_path) root = tree.getroot() for element in root.iter('SourceFilename'): element.attrib['relativeToVRT'] = '1' tree.write(vrt_file_path) # delete vrt res file for f in res.files.all(): if 'vrt' == f.resource_file.name[-3:]: f.resource_file.delete() f.delete() # add new vrt file to resource new_file = UploadedFile(file=open(vrt_file_path, 'r'), name=os.path.basename(vrt_file_path)) hydroshare.add_resource_files(res.short_id, new_file) # update the bag bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id) if istorage.exists(bag_name): # delete the resource bag as the old bag is not valid istorage.delete(bag_name) resource_modified(res, res.creator) vrt_update_success.append('{}:{}'.format(res.short_id,res.metadata.title.value)) except Exception as e: log.exception(e.message) vrt_update_fail.append('{}:{}'.format(res.short_id,res.metadata.title.value)) # update the metadata for the band information of all the raster resources try: meta_updated = False # extract meta ori_dir = os.getcwd() os.chdir(temp_dir) res_md_dict = raster_meta_extract.get_raster_meta_dict(vrt_file_path) os.chdir(ori_dir) shutil.rmtree(temp_dir) # update band information metadata in django if res_md_dict['band_info']: for i, band_meta in res_md_dict['band_info'].items(): band_obj = res.metadata.bandInformation.filter(name='Band_{}'.format(i)).first() if band_obj: res.metadata.update_element('bandInformation', band_obj.id, maximumValue=band_meta['maximumValue'], minimumValue=band_meta['minimumValue'], noDataValue=band_meta['noDataValue'], ) meta_updated = True # update the bag if meta is updated if meta_updated: bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id) if istorage.exists(bag_name): # delete the resource bag as the old bag is not valid istorage.delete(bag_name) resource_modified(res, res.creator) meta_update_success.append('{}:{}'.format(res.short_id, res.metadata.title.value)) except Exception as e: log.exception(e.message) meta_update_fail.append('{}:{}'.format(res.short_id, res.metadata.title.value)) # Print migration results print 'Copy resource to temp folder failure: Number: {} List: {}'.format(len(copy_res_fail), copy_res_fail) print 'VRT file update success: Number: {} List{}'.format(len(vrt_update_success), vrt_update_success) print 'VRT file update fail: Number: {} List{}'.format(len(vrt_update_fail), vrt_update_fail) print 'Meta update success: Number: {} List {}'.format(len(meta_update_success), meta_update_success) print 'Meta update fail: Number: {} List {}'.format(len(meta_update_fail), meta_update_fail)