Python IrodsStorage.exists Examples

Programming Language: Python

Namespace/Package Name: django_irods.storage

Class/Type: IrodsStorage

Method/Function: exists

Examples at hotexamples.com: 27

Python IrodsStorage.exists - 27 examples found. These are the top rated real world Python examples of django_irods.storage.IrodsStorage.exists extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

IrodsStorage(30)

exists(16)

delete(12)

getAVU(7)

saveFile(3)

setAVU(3)

set_user_session(3)

size(3)

delete_user_session(2)

getFile(2)

copyFiles(1)

download(1)

listdir(1)

runBagitRule(1)

url(1)

zipup(1)

Example #1

Show file

File: utils.py Project: hydroshare/hydroshare

def is_federated(homepath):
    """
    Check if the selected file via the iRODS browser is from a federated zone or not
    Args:
        homepath: the logical iRODS file name with full logical path, e.g., selected from
                  iRODS browser

    Returns:
    True is the selected file indicated by homepath is from a federated zone, False if otherwise
    """
    homepath = homepath.strip()
    homepath_list = homepath.split('/')
    # homepath is an iRODS logical path in the format of
    # /irods_zone/home/irods_account_username/collection_relative_path, so homepath_list[1]
    # is the irods_zone which we can use to form the fed_proxy_path to check whether
    # fed_proxy_path exists to hold hydroshare resources in a federated zone
    if homepath_list[1]:
        fed_proxy_path = os.path.join(homepath_list[1], 'home',
                                      settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE)
        fed_proxy_path = '/' + fed_proxy_path
    else:
        # the test path input is invalid, return False meaning it is not federated
        return False
    if settings.REMOTE_USE_IRODS:
        irods_storage = IrodsStorage('federated')
    else:
        irods_storage = IrodsStorage()

    # if the iRODS proxy user in hydroshare zone can list homepath and the federation zone proxy
    # user path, it is federated; otherwise, it is not federated
    return irods_storage.exists(homepath) and irods_storage.exists(fed_proxy_path)

Example #2

Show file

def is_federated(homepath):
    """
    Check if the selected file via the iRODS browser is from a federated zone or not
    Args:
        homepath: the logical iRODS file name with full logical path, e.g., selected from
                  iRODS browser

    Returns:
    True is the selected file indicated by homepath is from a federated zone, False if otherwise
    """
    homepath = homepath.strip()
    homepath_list = homepath.split('/')
    # homepath is an iRODS logical path in the format of
    # /irods_zone/home/irods_account_username/collection_relative_path, so homepath_list[1]
    # is the irods_zone which we can use to form the fed_proxy_path to check whether
    # fed_proxy_path exists to hold hydroshare resources in a federated zone
    if homepath_list[1]:
        fed_proxy_path = os.path.join(homepath_list[1], 'home',
                                      settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE)
        fed_proxy_path = '/' + fed_proxy_path
    else:
        # the test path input is invalid, return False meaning it is not federated
        return False
    if settings.REMOTE_USE_IRODS:
        irods_storage = IrodsStorage('federated')
    else:
        irods_storage = IrodsStorage()

    # if the iRODS proxy user in hydroshare zone can list homepath and the federation zone proxy
    # user path, it is federated; otherwise, it is not federated
    return irods_storage.exists(homepath) and irods_storage.exists(
        fed_proxy_path)

Example #3

Show file

File: views.py Project: JeffHeard/django_irods

def download(request, path, *args, **kwargs):
    
    split_path_strs = path.split('/')
    if split_path_strs[0] == 'bags':
        res_id = os.path.splitext(split_path_strs[1])[0]
    else:
        res_id = split_path_strs[0]
    _, authorized, _ = authorize(request, res_id, needed_permission=Action_To_Authorize.VIEW_RESOURCE,
                                 raises_exception=False)
    if not authorized:
        response = HttpResponse()
        response.content = "<h1>You do not have permission to download this resource!</h1>"
        return response

    if 'environment' in kwargs:
        environment = int(kwargs['environment'])
        environment = m.RodsEnvironment.objects.get(pk=environment)
        session = Session("/tmp/django_irods", settings.IRODS_ICOMMANDS_PATH, session_id=uuid4())
        session.create_environment(environment)
        session.run('iinit', None, environment.auth)
    elif getattr(settings, 'IRODS_GLOBAL_SESSION', False):
        session = GLOBAL_SESSION
    elif icommands.ACTIVE_SESSION:
        session = icommands.ACTIVE_SESSION
    else:
        raise KeyError('settings must have IRODS_GLOBAL_SESSION set if there is no environment object')

    # do on-demand bag creation
    istorage = IrodsStorage()
    bag_modified = "false"
    # needs to check whether res_id collection exists before getting/setting AVU on it to accommodate the case
    # where the very same resource gets deleted by another request when it is getting downloaded
    if istorage.exists(res_id):
        bag_modified = istorage.getAVU(res_id, 'bag_modified')
    if bag_modified == "true":
        create_bag_by_irods(res_id, istorage)
        if istorage.exists(res_id):
            istorage.setAVU(res_id, 'bag_modified', "false")

    # obtain mime_type to set content_type
    mtype = 'application-x/octet-stream'
    mime_type = mimetypes.guess_type(path)
    if mime_type[0] is not None:
        mtype = mime_type[0]

    # retrieve file size to set up Content-Length header
    stdout = session.run("ils", None, "-l", path)[0].split()
    flen = int(stdout[3])

    options = ('-',) # we're redirecting to stdout.
    proc = session.run_safe('iget', None, path, *options)
    response = FileResponse(proc.stdout, content_type=mtype)
    response['Content-Disposition'] = 'attachment; filename="{name}"'.format(name=path.split('/')[-1])
    response['Content-Length'] = flen
    return response

Example #4

Show file

File: custom_migration_for_tif_to_vrt_20160223.py Project: hydroshare/hydroshare

def migrate_tif_file(apps, schema_editor):
    # create a vrt file from tif file for each of the Raster Resources
    log = logging.getLogger()
    istorage = IrodsStorage()
    for res in RasterResource.objects.all():
        try:
            if len(res.files.all()) == 1:
                res_file = res.files.all().first()
                vrt_file_path = create_vrt_file(res_file.resource_file)
                if os.path.isfile(vrt_file_path):
                    files = (UploadedFile(file=open(vrt_file_path, 'r'),
                                          name=os.path.basename(vrt_file_path)))
                    hydroshare.add_resource_files(res.short_id, files)

                    bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id)
                    if istorage.exists(bag_name):
                        # delete the resource bag as the old bag is not valid
                        istorage.delete(bag_name)
                        print("Deleted bag for resource ID:" + str(res.short_id))

                    resource_modified(res, res.creator)

                    log.info('Tif file conversion to VRT successful for resource:ID:{} '
                             'Title:{}'.format(res.short_id, res.metadata.title.value))
                else:
                    log.error('Tif file conversion to VRT unsuccessful for resource:ID:{} '
                              'Title:{}'.format(res.short_id, res.metadata.title.value))

                if os.path.exists(vrt_file_path):
                    shutil.rmtree(os.path.dirname(vrt_file_path))

        except:
            pass

Example #5

Show file

File: tasks.py Project: xuezhaokun/hydroshare

def nightly_zips_cleanup():
    # delete 2 days ago
    date_folder = (date.today() - timedelta(2)).strftime('%Y-%m-%d')
    zips_daily_date = "zips/{daily_date}".format(daily_date=date_folder)
    istorage = IrodsStorage()
    if istorage.exists(zips_daily_date):
        istorage.delete(zips_daily_date)

Example #6

Show file

File: hs_bagit.py Project: kob-aha/hydroshare

def create_bag_by_irods(resource_id, istorage = None):
    """
    create a resource bag on iRODS side by running the bagit rule followed by ibun zipping operation

    Parameters:
    :param resource_id: the resource uuid that is used to look for the resource to create the bag for.
           istorage: IrodsStorage object that is used to call irods bagit rule operation and zipping up operation

    :return: none
    """
    if not istorage:
        istorage = IrodsStorage()

    # only proceed when the resource is not deleted potentially by another request when being downloaded
    if istorage.exists(resource_id):
        # call iRODS bagit rule here
        irods_dest_prefix = "/" + settings.IRODS_ZONE + "/home/" + settings.IRODS_USERNAME
        irods_bagit_input_path = os.path.join(irods_dest_prefix, resource_id)
        bagit_input_path = "*BAGITDATA='{path}'".format(path=irods_bagit_input_path)
        bagit_input_resource = "*DESTRESC='{def_res}'".format(def_res=settings.IRODS_DEFAULT_RESOURCE)
        bagit_rule_file = getattr(settings, 'IRODS_BAGIT_RULE', 'hydroshare/irods/ruleGenerateBagIt_HS.r')

        try:
            # call iRODS run and ibun command to create and zip the bag,
            # ignore SessionException for now as a workaround which could be raised
            # from potential race conditions when multiple ibun commands try to create the same zip file or
            # the very same resource gets deleted by another request when being downloaded
            istorage.runBagitRule(bagit_rule_file, bagit_input_path, bagit_input_resource)
            istorage.zipup(irods_bagit_input_path, 'bags/{res_id}.zip'.format(res_id=resource_id))
        except SessionException:
            pass

Example #7

Show file

File: test_folder_download_zip.py Project: xuezhaokun/hydroshare

 def tearDown(self):
     super(TestFolderDownloadZip, self).tearDown()
     if self.res:
         self.res.delete()
     GenericResource.objects.all().delete()
     istorage = IrodsStorage()
     if istorage.exists(self.output_path):
         istorage.delete(self.output_path)

Example #8

Show file

def create_empty_contents_directory(resource):
    res_id = resource.short_id
    if resource.resource_federation_path:
        istorage = IrodsStorage('federated')
        res_contents_dir = '{}/{}/data/contents'.format(
            resource.resource_federation_path, res_id)
    else:
        istorage = IrodsStorage()
        res_contents_dir = '{}/data/contents'.format(res_id)
    if not istorage.exists(res_contents_dir):
        istorage.session.run("imkdir", None, '-p', res_contents_dir)

Example #9

Show file

File: test_folder_download_zip.py Project: kjlippold/hydroshare

 def tearDown(self):
     super(TestFolderDownloadZip, self).tearDown()
     if self.res:
         self.res.delete()
     if self.test_file:
         os.remove(self.test_file.name)
     if self.refts_file:
         os.remove(self.refts_file.name)
     GenericResource.objects.all().delete()
     istorage = IrodsStorage()
     if istorage.exists("zips"):
         istorage.delete("zips")

Example #10

Show file

File: tasks.py Project: hydroshare/hydroshare

def nightly_zips_cleanup():
    # delete 2 days ago
    date_folder = (date.today() - timedelta(2)).strftime('%Y-%m-%d')
    zips_daily_date = "zips/{daily_date}".format(daily_date=date_folder)
    if __debug__:
        logger.debug("cleaning up {}".format(zips_daily_date))
    istorage = IrodsStorage()
    if istorage.exists(zips_daily_date):
        istorage.delete(zips_daily_date)
    federated_prefixes = BaseResource.objects.all().values_list('resource_federation_path')\
        .distinct()

    for p in federated_prefixes:
        prefix = p[0]  # strip tuple
        if prefix != "":
            zips_daily_date = "{prefix}/zips/{daily_date}"\
                .format(prefix=prefix, daily_date=date_folder)
            if __debug__:
                logger.debug("cleaning up {}".format(zips_daily_date))
            istorage = IrodsStorage("federated")
            if istorage.exists(zips_daily_date):
                istorage.delete(zips_daily_date)

Example #11

Show file

def nightly_zips_cleanup():
    # delete 2 days ago
    date_folder = (date.today() - timedelta(2)).strftime('%Y-%m-%d')
    zips_daily_date = "zips/{daily_date}".format(daily_date=date_folder)
    if __debug__:
        logger.debug("cleaning up {}".format(zips_daily_date))
    istorage = IrodsStorage()
    if istorage.exists(zips_daily_date):
        istorage.delete(zips_daily_date)
    federated_prefixes = BaseResource.objects.all().values_list('resource_federation_path')\
        .distinct()

    for p in federated_prefixes:
        prefix = p[0]  # strip tuple
        if prefix != "":
            zips_daily_date = "{prefix}/zips/{daily_date}"\
                .format(prefix=prefix, daily_date=date_folder)
            if __debug__:
                logger.debug("cleaning up {}".format(zips_daily_date))
            istorage = IrodsStorage("federated")
            if istorage.exists(zips_daily_date):
                istorage.delete(zips_daily_date)

Example #12

Show file

File: test_folder_download_zip.py Project: hydroshare/hydroshare

 def tearDown(self):
     super(TestFolderDownloadZip, self).tearDown()
     if self.res:
         self.res.delete()
     if self.test_file:
         self.test_file.close()
         os.remove(self.test_file.name)
     if self.refts_file:
         self.refts_file.close()
         os.remove(self.refts_file.name)
     GenericResource.objects.all().delete()
     istorage = IrodsStorage()
     if istorage.exists("zips"):
         istorage.delete("zips")

Example #13

Show file

def migrate_tif_file(apps, schema_editor):
    # create a vrt file from tif file for each of the Raster Resources
    log = logging.getLogger()
    istorage = IrodsStorage()
    for res in RasterResource.objects.all():
        try:
            if len(res.files.all()) == 1:
                res_file = res.files.all().first()
                vrt_file_path = create_vrt_file(res_file.resource_file)
                if os.path.isfile(vrt_file_path):
                    files = (UploadedFile(
                        file=open(vrt_file_path, 'r'),
                        name=os.path.basename(vrt_file_path)))
                    hydroshare.add_resource_files(res.short_id, files)

                    bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id)
                    if istorage.exists(bag_name):
                        # delete the resource bag as the old bag is not valid
                        istorage.delete(bag_name)
                        print("Deleted bag for resource ID:" +
                              str(res.short_id))

                    resource_modified(res, res.creator)

                    log.info(
                        'Tif file conversion to VRT successful for resource:ID:{} '
                        'Title:{}'.format(res.short_id,
                                          res.metadata.title.value))
                else:
                    log.error(
                        'Tif file conversion to VRT unsuccessful for resource:ID:{} '
                        'Title:{}'.format(res.short_id,
                                          res.metadata.title.value))

                if os.path.exists(vrt_file_path):
                    shutil.rmtree(os.path.dirname(vrt_file_path))

        except:
            pass

Example #14

Show file

def delete_bag(resource):
    """
    delete the resource bag

    Parameters:
    :param resource: the resource to delete the bag for.
    :return: none
    """
    res_id = resource.short_id
    istorage = IrodsStorage()

    # delete resource directory first to remove all generated bag-related files for the resource
    istorage.delete(res_id)

    # the resource bag may not exist due to on-demand bagging
    bagname = 'bags/{res_id}.zip'.format(res_id=res_id)
    if istorage.exists(bagname):
        # delete the resource bag
        istorage.delete(bagname)

    # delete the bags table
    for bag in resource.bags.all():
        bag.delete()

Example #15

Show file

File: hs_bagit.py Project: kob-aha/hydroshare

def delete_bag(resource):
    """
    delete the resource bag

    Parameters:
    :param resource: the resource to delete the bag for.
    :return: none
    """
    res_id = resource.short_id
    istorage = IrodsStorage()

    # delete resource directory first to remove all generated bag-related files for the resource
    istorage.delete(res_id)

    # the resource bag may not exist due to on-demand bagging
    bagname = 'bags/{res_id}.zip'.format(res_id=res_id)
    if istorage.exists(bagname):
        # delete the resource bag
        istorage.delete(bagname)

    # delete the bags table
    for bag in resource.bags.all():
        bag.delete()

Example #16

Show file

def create_bag_by_irods(resource_id, istorage=None):
    """
    create a resource bag on iRODS side by running the bagit rule followed by ibun zipping operation

    Parameters:
    :param resource_id: the resource uuid that is used to look for the resource to create the bag for.
           istorage: IrodsStorage object that is used to call irods bagit rule operation and zipping up operation

    :return: none
    """
    if not istorage:
        istorage = IrodsStorage()

    # only proceed when the resource is not deleted potentially by another request when being downloaded
    if istorage.exists(resource_id):
        # call iRODS bagit rule here
        irods_dest_prefix = "/" + settings.IRODS_ZONE + "/home/" + settings.IRODS_USERNAME
        irods_bagit_input_path = os.path.join(irods_dest_prefix, resource_id)
        bagit_input_path = "*BAGITDATA='{path}'".format(
            path=irods_bagit_input_path)
        bagit_input_resource = "*DESTRESC='{def_res}'".format(
            def_res=settings.IRODS_DEFAULT_RESOURCE)
        bagit_rule_file = getattr(settings, 'IRODS_BAGIT_RULE',
                                  'hydroshare/irods/ruleGenerateBagIt_HS.r')

        try:
            # call iRODS run and ibun command to create and zip the bag,
            # ignore SessionException for now as a workaround which could be raised
            # from potential race conditions when multiple ibun commands try to create the same zip file or
            # the very same resource gets deleted by another request when being downloaded
            istorage.runBagitRule(bagit_rule_file, bagit_input_path,
                                  bagit_input_resource)
            istorage.zipup(irods_bagit_input_path,
                           'bags/{res_id}.zip'.format(res_id=resource_id))
        except SessionException:
            pass

Example #17

Show file

def download(request, path, rest_call=False, use_async=True, *args, **kwargs):

    split_path_strs = path.split('/')
    is_bag_download = False
    if split_path_strs[0] == 'bags':
        res_id = os.path.splitext(split_path_strs[1])[0]
        is_bag_download = True
    else:
        res_id = split_path_strs[0]
    res, authorized, _ = authorize(
        request,
        res_id,
        needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE,
        raises_exception=False)
    if not authorized:
        response = HttpResponse(status=401)
        content_msg = "You do not have permission to download this resource!"
        if rest_call:
            raise PermissionDenied(content_msg)
        else:
            signin_html = '</h1><div class="col-xs-12"><h2 class="page-title">' \
                          '<a href="/oauth_request/"><span class ="glyphicon glyphicon-log-in"></span>' \
                          'Sign In</a></h2>'
            response.content = '<h1>' + content_msg + signin_html
            return response

    if not is_bag_download and "/data" not in path:
        idx_sep = path.find('/')
        path = path[idx_sep:]

    istorage = IrodsStorage()

    if 'environment' in kwargs:
        environment = int(kwargs['environment'])
        environment = m.RodsEnvironment.objects.get(pk=environment)
        session = Session("/tmp/django_irods",
                          settings.IRODS_ICOMMANDS_PATH,
                          session_id=uuid4())
        session.create_environment(environment)
        session.run('iinit', None, environment.auth)
    elif getattr(settings, 'IRODS_GLOBAL_SESSION', False):
        session = GLOBAL_SESSION
    elif icommands.ACTIVE_SESSION:
        session = icommands.ACTIVE_SESSION
    else:
        raise KeyError('settings must have IRODS_GLOBAL_SESSION set '
                       'if there is no environment object')

    if istorage.exists(res_id) and is_bag_download:
        bag_modified = istorage.getAVU(res_id, 'bag_modified')
        # make sure if bag_modified is not set to true, we still recreate the bag if the
        # bag file does not exist for some reason to resolve the error to download a nonexistent
        # bag when bag_modified is false due to the flag being out-of-sync with the real bag status

        if bag_modified is None or bag_modified.lower() == "false":
            # check whether the bag file exists
            bag_file_name = res_id + '.zip'
            bag_full_path = os.path.join('bags', bag_file_name)

            if not istorage.exists(bag_full_path):
                bag_modified = 'true'

        if bag_modified is None or bag_modified.lower() == "true":
            create_bag(res)

    resource_cls = check_resource_type(res.resource_type)

    # send signal for pre download file
    download_file_name = split_path_strs[-1]
    pre_download_file.send(sender=resource_cls,
                           resource=res,
                           download_file_name=download_file_name,
                           request=request)

    # obtain mime_type to set content_type
    mtype = 'application-x/octet-stream'
    mime_type = mimetypes.guess_type(path)
    if mime_type[0] is not None:
        mtype = mime_type[0]
    # retrieve file size to set up Content-Length header
    stdout = session.run("ils", None, "-l", path)[0].split()
    flen = int(stdout[3])
    options = ('-', )  # we're redirecting to stdout.
    proc = session.run_safe('iget', None, path, *options)
    response = FileResponse(proc.stdout, content_type=mtype)
    response['Content-Disposition'] = 'attachment; filename="{name}"'.format(
        name=path.split('/')[-1])
    response['Content-Length'] = flen
    return response

Example #18

Show file

class TestCaseCommonUtilities(object):
    """Enable common utilities for iRODS testing."""
    def assert_federated_irods_available(self):
        """assert federated iRODS is available before proceeding with federation-related tests."""
        self.assertTrue(
            settings.REMOTE_USE_IRODS
            and settings.HS_USER_ZONE_HOST == 'users.local.org'
            and settings.IRODS_HOST == 'data.local.org',
            "irods docker containers are not set up properly for federation testing"
        )
        self.irods_fed_storage = IrodsStorage('federated')
        self.irods_storage = IrodsStorage()

    def create_irods_user_in_user_zone(self):
        """Create corresponding irods account in user zone."""
        try:
            exec_cmd = "{0} {1} {2}".format(
                settings.LINUX_ADMIN_USER_CREATE_USER_IN_USER_ZONE_CMD,
                self.user.username, self.user.username)
            output = run_ssh_command(
                host=settings.HS_USER_ZONE_HOST,
                uname=settings.LINUX_ADMIN_USER_FOR_HS_USER_ZONE,
                pwd=settings.LINUX_ADMIN_USER_PWD_FOR_HS_USER_ZONE,
                exec_cmd=exec_cmd)
            for out_str in output:
                if 'ERROR:' in out_str.upper():
                    # irods account failed to create
                    self.assertRaises(SessionException(-1, out_str, out_str))

            user_profile = UserProfile.objects.filter(user=self.user).first()
            user_profile.create_irods_user_account = True
            user_profile.save()
        except Exception as ex:
            self.assertRaises(SessionException(-1, str(ex), str(ex)))

    def delete_irods_user_in_user_zone(self):
        """Delete irods test user in user zone."""
        try:
            exec_cmd = "{0} {1}".format(
                settings.LINUX_ADMIN_USER_DELETE_USER_IN_USER_ZONE_CMD,
                self.user.username)
            output = run_ssh_command(
                host=settings.HS_USER_ZONE_HOST,
                uname=settings.LINUX_ADMIN_USER_FOR_HS_USER_ZONE,
                pwd=settings.LINUX_ADMIN_USER_PWD_FOR_HS_USER_ZONE,
                exec_cmd=exec_cmd)
            if output:
                for out_str in output:
                    if 'ERROR:' in out_str.upper():
                        # there is an error from icommand run, report the error
                        self.assertRaises(
                            SessionException(-1, out_str, out_str))

            user_profile = UserProfile.objects.filter(user=self.user).first()
            user_profile.create_irods_user_account = False
            user_profile.save()
        except Exception as ex:
            # there is an error from icommand run, report the error
            self.assertRaises(SessionException(-1, str(ex), str(ex)))

    def save_files_to_user_zone(self, file_name_to_target_name_dict):
        """Save a list of files to iRODS user zone.

        :param file_name_to_target_name_dict: a dictionary in the form of {ori_file, target_file}
        where ori_file is the file to be save to, and the target_file is the full path file name
        in iRODS user zone to save ori_file to
        :return:
        """
        for file_name, target_name in list(
                file_name_to_target_name_dict.items()):
            self.irods_fed_storage.saveFile(file_name, target_name)

    def check_file_exist(self, irods_path):
        """Check whether the input irods_path exist in iRODS.

        :param irods_path: the iRODS path to check whether it exists or not
        :return: True if exist, False otherwise.
        """
        return self.irods_storage.exists(irods_path)

    def delete_directory(self, irods_path):
        """delete the input irods_path.
        :param irods_path: the iRODS path to be deleted
        :return:
        """
        self.irods_fed_storage.delete(irods_path)

    def verify_user_quota_usage_avu_in_user_zone(self, attname, qsize):
        '''
        Have to use LINUX_ADMIN_USER_FOR_HS_USER_ZONE with rodsadmin role to get user type AVU
        in user zone and verify its quota usage is set correctly
        :param attname: quota usage attribute name set on iRODS proxy user in user zone
        :param qsize: quota size (type string) to be verified to equal to the value set for attname.
        '''
        istorage = IrodsStorage()
        istorage.set_user_session(
            username=settings.LINUX_ADMIN_USER_FOR_HS_USER_ZONE,
            password=settings.LINUX_ADMIN_USER_PWD_FOR_HS_USER_ZONE,
            host=settings.HS_USER_ZONE_HOST,
            port=settings.IRODS_PORT,
            zone=settings.HS_USER_IRODS_ZONE,
            sess_id='user_proxy_session')

        uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home',
                                     settings.HS_IRODS_PROXY_USER_IN_USER_ZONE,
                                     settings.IRODS_BAGIT_PATH)
        get_qsize = istorage.getAVU(uz_bagit_path, attname)
        self.assertEqual(qsize, get_qsize)

    def resource_file_oprs(self):
        """Test common iRODS file operations.

        This is a common test utility function to be called by both regular folder operation
        testing and federated zone folder operation testing.
        Make sure the calling TestCase object has the following attributes defined before calling
        this method:
        self.res: resource that has been created that contains files listed in file_name_list
        self.user: owner of the resource
        self.file_name_list: a list of three file names that have been added to the res object
        self.test_file_1 needs to be present for the calling object for doing regular folder
        operations without involving federated zone so that the same opened file can be re-added
        to the resource for testing the case where zipping cannot overwrite existing file
        """
        user = self.user
        res = self.res
        file_name_list = self.file_name_list
        # create a folder, if folder is created successfully, no exception is raised, otherwise,
        # an iRODS exception will be raised which will be caught by the test runner and mark as
        # a test failure
        create_folder(res.short_id, 'data/contents/sub_test_dir')
        istorage = res.get_irods_storage()
        res_path = res.file_path
        store = istorage.listdir(res_path)
        self.assertIn('sub_test_dir',
                      store[0],
                      msg='resource does not contain created sub-folder')

        # create a temporary zips folder to make sure no duplicate folders are returned from listdir()
        zip_res_coll_path = os.path.join('zips', '2020-02-03', res.short_id,
                                         'data', 'contents', 'sub_test_dir')
        istorage.session.run("imkdir", None, '-p', zip_res_coll_path)
        store = istorage.listdir(res_path)
        self.assertEqual(store[0].count('sub_test_dir'),
                         1,
                         msg='duplicate folder: sub_test_dir occurred more '
                         'than once')

        # rename the third file in file_name_list
        move_or_rename_file_or_folder(user, res.short_id,
                                      'data/contents/' + file_name_list[2],
                                      'data/contents/new_' + file_name_list[2])
        # move the first two files in file_name_list to the new folder
        move_or_rename_file_or_folder(
            user, res.short_id, 'data/contents/' + file_name_list[0],
            'data/contents/sub_test_dir/' + file_name_list[0])
        move_or_rename_file_or_folder(
            user, res.short_id, 'data/contents/' + file_name_list[1],
            'data/contents/sub_test_dir/' + file_name_list[1])
        updated_res_file_names = []
        for rf in ResourceFile.objects.filter(object_id=res.id):
            updated_res_file_names.append(rf.short_path)

        self.assertIn('new_' + file_name_list[2],
                      updated_res_file_names,
                      msg="resource does not contain the updated file new_" +
                      file_name_list[2])
        self.assertNotIn(file_name_list[2],
                         updated_res_file_names,
                         msg='resource still contains the old file ' +
                         file_name_list[2] + ' after renaming')
        self.assertIn('sub_test_dir/' + file_name_list[0],
                      updated_res_file_names,
                      msg='resource does not contain ' + file_name_list[0] +
                      ' moved to a folder')
        self.assertNotIn(file_name_list[0],
                         updated_res_file_names,
                         msg='resource still contains the old ' +
                         file_name_list[0] + 'after moving to a folder')
        self.assertIn('sub_test_dir/' + file_name_list[1],
                      updated_res_file_names,
                      msg='resource does not contain ' + file_name_list[1] +
                      'moved to a new folder')
        self.assertNotIn(file_name_list[1],
                         updated_res_file_names,
                         msg='resource still contains the old ' +
                         file_name_list[1] + ' after moving to a folder')

        # zip the folder
        output_zip_fname, size = \
            zip_folder(user, res.short_id, 'data/contents/sub_test_dir',
                       'sub_test_dir.zip', True)
        self.assertGreater(size, 0, msg='zipped file has a size of 0')
        # Now resource should contain only two files: new_file3.txt and sub_test_dir.zip
        # since the folder is zipped into sub_test_dir.zip with the folder deleted
        self.assertEqual(res.files.all().count(),
                         2,
                         msg="resource file count didn't match-")

        # test unzip does not allow override of existing files
        # add an existing file in the zip to the resource
        if res.resource_federation_path:
            fed_test_file1_full_path = '/{zone}/home/{uname}/{fname}'.format(
                zone=settings.HS_USER_IRODS_ZONE,
                uname=user.username,
                fname=file_name_list[0])
            # TODO: why isn't this a method of resource?
            # TODO: Why do we repeat the resource_federation_path?
            add_resource_files(res.short_id,
                               source_names=[fed_test_file1_full_path],
                               move=False)

        else:
            # TODO: Why isn't this a method of resource?
            add_resource_files(res.short_id, self.test_file_1)

        # TODO: use ResourceFile.create_folder, which doesn't require data/contents prefix
        create_folder(res.short_id, 'data/contents/sub_test_dir')

        # TODO: use ResourceFile.rename, which doesn't require data/contents prefix
        move_or_rename_file_or_folder(
            user, res.short_id, 'data/contents/' + file_name_list[0],
            'data/contents/sub_test_dir/' + file_name_list[0])
        # Now resource should contain three files: file3_new.txt, sub_test_dir.zip, and file1.txt
        self.assertEqual(res.files.all().count(),
                         3,
                         msg="resource file count didn't match")
        unzip_file(user, res.short_id, 'data/contents/sub_test_dir.zip', False)

        # Resource should still contain 5 files: file3_new.txt (2), sub_test_dir.zip,
        # and file1.txt (2)
        file_cnt = res.files.all().count()
        self.assertEqual(file_cnt,
                         5,
                         msg="resource file count didn't match - " +
                         str(file_cnt) + " != 5")

        # remove all files except the zippped file
        remove_folder(user, res.short_id, 'data/contents/sub_test_dir')
        remove_folder(user, res.short_id, 'data/contents/sub_test_dir-1')

        # Now resource should contain two files: file3_new.txt sub_test_dir.zip
        file_cnt = res.files.all().count()
        self.assertEqual(file_cnt,
                         2,
                         msg="resource file count didn't match - " +
                         str(file_cnt) + " != 2")
        unzip_file(user, res.short_id, 'data/contents/sub_test_dir.zip', True)
        # Now resource should contain three files: file1.txt, file2.txt, and file3_new.txt
        self.assertEqual(res.files.all().count(),
                         3,
                         msg="resource file count didn't match")
        updated_res_file_names = []
        for rf in ResourceFile.objects.filter(object_id=res.id):
            updated_res_file_names.append(rf.short_path)
        self.assertNotIn(
            'sub_test_dir.zip',
            updated_res_file_names,
            msg="resource still contains the zip file after unzipping")
        self.assertIn('sub_test_dir/sub_test_dir/' + file_name_list[0],
                      updated_res_file_names,
                      msg='resource does not contain unzipped file ' +
                      file_name_list[0])
        self.assertIn('sub_test_dir/sub_test_dir/' + file_name_list[1],
                      updated_res_file_names,
                      msg='resource does not contain unzipped file ' +
                      file_name_list[1])
        self.assertIn('new_' + file_name_list[2],
                      updated_res_file_names,
                      msg='resource does not contain unzipped file new_' +
                      file_name_list[2])

        # rename a folder
        move_or_rename_file_or_folder(
            user, res.short_id, 'data/contents/sub_test_dir/sub_test_dir',
            'data/contents/sub_dir')
        updated_res_file_names = []
        for rf in ResourceFile.objects.filter(object_id=res.id):
            updated_res_file_names.append(rf.short_path)

        self.assertNotIn('sub_test_dir/sub_test_dir/' + file_name_list[0],
                         updated_res_file_names,
                         msg='resource still contains ' + file_name_list[0] +
                         ' in the old folder after renaming')
        self.assertIn('sub_dir/' + file_name_list[0],
                      updated_res_file_names,
                      msg='resource does not contain ' + file_name_list[0] +
                      ' in the new folder after renaming')
        self.assertNotIn('sub_test_dir/sub_test_dir/' + file_name_list[1],
                         updated_res_file_names,
                         msg='resource still contains ' + file_name_list[1] +
                         ' in the old folder after renaming')
        self.assertIn('sub_dir/' + file_name_list[1],
                      updated_res_file_names,
                      msg='resource does not contain ' + file_name_list[1] +
                      ' in the new folder after renaming')

        # remove a folder
        # TODO: utilize ResourceFile.remove_folder instead. Takes a short path.
        remove_folder(user, res.short_id, 'data/contents/sub_dir')
        # Now resource only contains one file
        self.assertEqual(res.files.all().count(),
                         1,
                         msg="resource file count didn't match")
        updated_res_file_names = []
        for rf in ResourceFile.objects.filter(object_id=res.id):
            updated_res_file_names.append(rf.short_path)

        self.assertEqual(len(updated_res_file_names), 1)
        self.assertEqual(updated_res_file_names[0], 'new_' + file_name_list[2])

    def raster_metadata_extraction(self):
        """Test raster metadata extraction.

        This is a common test utility function to be called by both regular raster metadata
        extraction testing and federated zone raster metadata extraction testing.
        Make sure the calling TestCase object has self.resRaster attribute defined before calling
        this method which is the raster resource that has been created containing valid raster
        files.
        """
        # there should be 2 content files
        self.assertEqual(self.resRaster.files.all().count(), 2)

        # test core metadata after metadata extraction
        extracted_title = "My Test Raster Resource"
        self.assertEqual(self.resRaster.metadata.title.value, extracted_title)

        # there should be 1 creator
        self.assertEqual(self.resRaster.metadata.creators.all().count(), 1)

        # there should be 1 coverage element - box type
        self.assertEqual(self.resRaster.metadata.coverages.all().count(), 1)
        self.assertEqual(
            self.resRaster.metadata.coverages.all().filter(type='box').count(),
            1)

        box_coverage = self.resRaster.metadata.coverages.all().filter(
            type='box').first()
        self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
        self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
        self.assertEqual(float(box_coverage.value['northlimit']),
                         42.11270614966863)
        self.assertEqual(float(box_coverage.value['eastlimit']),
                         -111.45699925047542)
        self.assertEqual(float(box_coverage.value['southlimit']),
                         41.66222054591102)
        self.assertEqual(float(box_coverage.value['westlimit']),
                         -111.81761887121905)

        # there should be 2 format elements
        self.assertEqual(self.resRaster.metadata.formats.all().count(), 2)
        self.assertEqual(
            self.resRaster.metadata.formats.all().filter(
                value='application/vrt').count(), 1)
        self.assertEqual(
            self.resRaster.metadata.formats.all().filter(
                value='image/tiff').count(), 1)

        # testing extended metadata element: original coverage
        ori_coverage = self.resRaster.metadata.originalCoverage
        self.assertNotEqual(ori_coverage, None)
        self.assertEqual(float(ori_coverage.value['northlimit']),
                         4662392.446916306)
        self.assertEqual(float(ori_coverage.value['eastlimit']),
                         461954.01909127034)
        self.assertEqual(float(ori_coverage.value['southlimit']),
                         4612592.446916306)
        self.assertEqual(float(ori_coverage.value['westlimit']),
                         432404.01909127034)
        self.assertEqual(ori_coverage.value['units'], 'meter')
        self.assertEqual(ori_coverage.value['projection'],
                         "NAD83 / UTM zone 12N")
        self.assertEqual(ori_coverage.value['datum'],
                         "North_American_Datum_1983")
        projection_string = 'PROJCS["NAD83 / UTM zone 12N",GEOGCS["NAD83",' \
                            'DATUM["North_American_Datum_1983",' \
                            'SPHEROID["GRS 1980",6378137,298.257222101,' \
                            'AUTHORITY["EPSG","7019"]],' \
                            'TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6269"]],' \
                            'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],' \
                            'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],' \
                            'AUTHORITY["EPSG","4269"]],PROJECTION["Transverse_Mercator"],' \
                            'PARAMETER["latitude_of_origin",0],' \
                            'PARAMETER["central_meridian",-111],' \
                            'PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],' \
                            'PARAMETER["false_northing",0],' \
                            'UNIT["metre",1,AUTHORITY["EPSG","9001"]],' \
                            'AXIS["Easting",EAST],AXIS["Northing",' \
                            'NORTH],AUTHORITY["EPSG","26912"]]'
        self.assertEqual(ori_coverage.value['projection_string'],
                         projection_string)

        # testing extended metadata element: cell information
        cell_info = self.resRaster.metadata.cellInformation
        self.assertEqual(cell_info.rows, 1660)
        self.assertEqual(cell_info.columns, 985)
        self.assertEqual(cell_info.cellSizeXValue, 30.0)
        self.assertEqual(cell_info.cellSizeYValue, 30.0)
        self.assertEqual(cell_info.cellDataType, 'Float32')

        # testing extended metadata element: band information
        self.assertEqual(self.resRaster.metadata.bandInformations.count(), 1)
        band_info = self.resRaster.metadata.bandInformations.first()
        self.assertEqual(band_info.noDataValue, '-3.4028234663852886e+38')
        self.assertEqual(band_info.maximumValue, '3031.443115234375')
        self.assertEqual(band_info.minimumValue, '1358.3345947265625')

    def netcdf_metadata_extraction(self, expected_creators_count=1):
        """Test NetCDF metadata extraction.

        This is a common test utility function to be called by both regular netcdf metadata
        extraction testing and federated zone netCDF metadata extraction testing.
        Make sure the calling TestCase object has self.resNetcdf attribute defined before calling
        this method which is the netCDF resource that has been created containing valid netCDF
        files.
        """
        # there should 2 content file
        self.assertEqual(self.resNetcdf.files.all().count(), 2)

        # test core metadata after metadata extraction
        extracted_title = "Snow water equivalent estimation at TWDEF site from " \
                          "Oct 2009 to June 2010"
        self.assertEqual(self.resNetcdf.metadata.title.value, extracted_title)

        # there should be an abstract element
        self.assertNotEqual(self.resNetcdf.metadata.description, None)
        extracted_abstract = "This netCDF data is the simulation output from Utah Energy " \
                             "Balance (UEB) model.It includes the simulation result " \
                             "of snow water equivalent during the period " \
                             "Oct. 2009 to June 2010 for TWDEF site in Utah."
        self.assertEqual(self.resNetcdf.metadata.description.abstract,
                         extracted_abstract)

        # there should be one source element
        self.assertEqual(self.resNetcdf.metadata.sources.all().count(), 1)

        # there should be one license element:
        self.assertNotEqual(self.resNetcdf.metadata.rights.statement, 1)

        # there should be one relation element
        self.assertEqual(
            self.resNetcdf.metadata.relations.all().filter(
                type='cites').count(), 1)

        # there should be creators equal to expected_creators_count
        self.assertEqual(self.resNetcdf.metadata.creators.all().count(),
                         expected_creators_count)

        # there should be one contributor
        self.assertEqual(self.resNetcdf.metadata.contributors.all().count(), 1)

        # there should be 2 coverage element - box type and period type
        self.assertEqual(self.resNetcdf.metadata.coverages.all().count(), 2)
        self.assertEqual(
            self.resNetcdf.metadata.coverages.all().filter(type='box').count(),
            1)
        self.assertEqual(
            self.resNetcdf.metadata.coverages.all().filter(
                type='period').count(), 1)

        box_coverage = self.resNetcdf.metadata.coverages.all().filter(
            type='box').first()
        self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
        self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
        self.assertEqual(float(box_coverage.value['northlimit']),
                         41.86712640899591)
        self.assertEqual(float(box_coverage.value['eastlimit']),
                         -111.50594036845686)
        self.assertEqual(float(box_coverage.value['southlimit']),
                         41.8639080745171)
        self.assertEqual(float(box_coverage.value['westlimit']),
                         -111.51138807956221)

        temporal_coverage = self.resNetcdf.metadata.coverages.all().filter(
            type='period').first()
        self.assertEqual(
            parser.parse(temporal_coverage.value['start']).date(),
            parser.parse('10/01/2009').date())
        self.assertEqual(
            parser.parse(temporal_coverage.value['end']).date(),
            parser.parse('05/30/2010').date())

        # there should be 2 format elements
        self.assertEqual(self.resNetcdf.metadata.formats.all().count(), 2)
        self.assertEqual(
            self.resNetcdf.metadata.formats.all().filter(
                value='text/plain').count(), 1)
        self.assertEqual(
            self.resNetcdf.metadata.formats.all().filter(
                value='application/x-netcdf').count(), 1)

        # there should be one subject element
        self.assertEqual(self.resNetcdf.metadata.subjects.all().count(), 1)
        subj_element = self.resNetcdf.metadata.subjects.all().first()
        self.assertEqual(subj_element.value, 'Snow water equivalent')

        # testing extended metadata element: original coverage
        ori_coverage = self.resNetcdf.metadata.ori_coverage.all().first()
        self.assertNotEqual(ori_coverage, None)
        self.assertEqual(ori_coverage.projection_string_type, 'Proj4 String')
        proj_text = '+proj=tmerc +y_0=0.0 +x_0=500000.0 +k_0=0.9996 +lat_0=0.0 +lon_0=-111.0'
        self.assertEqual(ori_coverage.projection_string_text, proj_text)
        self.assertEqual(float(ori_coverage.value['northlimit']), 4.63515e+06)
        self.assertEqual(float(ori_coverage.value['eastlimit']), 458010.0)
        self.assertEqual(float(ori_coverage.value['southlimit']), 4.63479e+06)
        self.assertEqual(float(ori_coverage.value['westlimit']), 457560.0)
        self.assertEqual(ori_coverage.value['units'], 'Meter')
        self.assertEqual(ori_coverage.value['projection'],
                         'transverse_mercator')

        # testing extended metadata element: variables
        self.assertEqual(self.resNetcdf.metadata.variables.all().count(), 5)

        # test time variable
        var_time = self.resNetcdf.metadata.variables.all().filter(
            name='time').first()
        self.assertNotEqual(var_time, None)
        self.assertEqual(var_time.unit, 'hours since 2009-10-1 0:0:00 UTC')
        self.assertEqual(var_time.type, 'Float')
        self.assertEqual(var_time.shape, 'time')
        self.assertEqual(var_time.descriptive_name, 'time')

        # test x variable
        var_x = self.resNetcdf.metadata.variables.all().filter(
            name='x').first()
        self.assertNotEqual(var_x, None)
        self.assertEqual(var_x.unit, 'Meter')
        self.assertEqual(var_x.type, 'Float')
        self.assertEqual(var_x.shape, 'x')
        self.assertEqual(var_x.descriptive_name, 'x coordinate of projection')

        # test y variable
        var_y = self.resNetcdf.metadata.variables.all().filter(
            name='y').first()
        self.assertNotEqual(var_y, None)
        self.assertEqual(var_y.unit, 'Meter')
        self.assertEqual(var_y.type, 'Float')
        self.assertEqual(var_y.shape, 'y')
        self.assertEqual(var_y.descriptive_name, 'y coordinate of projection')

        # test SWE variable
        var_swe = self.resNetcdf.metadata.variables.all().filter(
            name='SWE').first()
        self.assertNotEqual(var_swe, None)
        self.assertEqual(var_swe.unit, 'm')
        self.assertEqual(var_swe.type, 'Float')
        self.assertEqual(var_swe.shape, 'y,x,time')
        self.assertEqual(var_swe.descriptive_name, 'Snow water equivalent')
        self.assertEqual(var_swe.method, 'model simulation of UEB model')
        self.assertEqual(var_swe.missing_value, '-9999')

        # test grid mapping variable
        var_grid = self.resNetcdf.metadata.variables.all().\
            filter(name='transverse_mercator').first()
        self.assertNotEqual(var_grid, None)
        self.assertEqual(var_grid.unit, 'Unknown')
        self.assertEqual(var_grid.type, 'Unknown')
        self.assertEqual(var_grid.shape, 'Not defined')

    def timeseries_metadata_extraction(self):
        """Test timeseries metadata extraction.

        This is a common test utility function to be called by both regular timeseries metadata
        extraction testing and federated zone timeseries metadata extraction testing.
        Make sure the calling TestCase object has self.resTimeSeries attribute defined before
        calling this method which is the timeseries resource that has been created containing
        valid timeseries file.
        """
        # there should one content file
        self.assertEqual(self.resTimeSeries.files.all().count(), 1)

        # there should be one contributor element
        self.assertEqual(
            self.resTimeSeries.metadata.contributors.all().count(), 1)

        # test core metadata after metadata extraction
        extracted_title = "Water temperature data from the Little Bear River, UT"
        self.assertEqual(self.resTimeSeries.metadata.title.value,
                         extracted_title)

        # there should be an abstract element
        self.assertNotEqual(self.resTimeSeries.metadata.description, None)
        extracted_abstract = "This dataset contains time series of observations of water " \
                             "temperature in the Little Bear River, UT. Data were recorded every " \
                             "30 minutes. The values were recorded using a HydroLab MS5 " \
                             "multi-parameter water quality sonde connected to a Campbell " \
                             "Scientific datalogger."

        self.assertEqual(
            self.resTimeSeries.metadata.description.abstract.strip(),
            extracted_abstract)

        # there should be 2 coverage element -  box type and period type
        self.assertEqual(self.resTimeSeries.metadata.coverages.all().count(),
                         2)
        self.assertEqual(
            self.resTimeSeries.metadata.coverages.all().filter(
                type='box').count(), 1)
        self.assertEqual(
            self.resTimeSeries.metadata.coverages.all().filter(
                type='period').count(), 1)

        box_coverage = self.resTimeSeries.metadata.coverages.all().filter(
            type='box').first()
        self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
        self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
        self.assertEqual(float(box_coverage.value['northlimit']), 41.718473)
        self.assertEqual(float(box_coverage.value['eastlimit']), -111.799324)
        self.assertEqual(float(box_coverage.value['southlimit']), 41.495409)
        self.assertEqual(float(box_coverage.value['westlimit']), -111.946402)

        temporal_coverage = self.resTimeSeries.metadata.coverages.all().filter(
            type='period').first()
        self.assertEqual(
            parser.parse(temporal_coverage.value['start']).date(),
            parser.parse('01/01/2008').date())
        self.assertEqual(
            parser.parse(temporal_coverage.value['end']).date(),
            parser.parse('01/30/2008').date())

        # there should be one format element
        self.assertEqual(self.resTimeSeries.metadata.formats.all().count(), 1)
        format_element = self.resTimeSeries.metadata.formats.all().first()
        self.assertEqual(format_element.value, 'application/sqlite')

        # there should be one subject element
        self.assertEqual(self.resTimeSeries.metadata.subjects.all().count(), 1)
        subj_element = self.resTimeSeries.metadata.subjects.all().first()
        self.assertEqual(subj_element.value, 'Temperature')

        # there should be a total of 7 timeseries
        self.assertEqual(
            self.resTimeSeries.metadata.time_series_results.all().count(), 7)

        # testing extended metadata elements

        # test 'site' - there should be 7 sites
        self.assertEqual(self.resTimeSeries.metadata.sites.all().count(), 7)
        # each site be associated with one series id
        for site in self.resTimeSeries.metadata.sites.all():
            self.assertEqual(len(site.series_ids), 1)

        # test the data for a specific site
        site = self.resTimeSeries.metadata.sites.filter(
            site_code='USU-LBR-Paradise').first()
        self.assertNotEqual(site, None)
        site_name = 'Little Bear River at McMurdy Hollow near Paradise, Utah'
        self.assertEqual(site.site_name, site_name)
        self.assertEqual(site.elevation_m, 1445)
        self.assertEqual(site.elevation_datum, 'NGVD29')
        self.assertEqual(site.site_type, 'Stream')

        # test 'variable' - there should be 1 variable element
        self.assertEqual(self.resTimeSeries.metadata.variables.all().count(),
                         1)
        variable = self.resTimeSeries.metadata.variables.all().first()
        # there should be 7 series ids associated with this one variable
        self.assertEqual(len(variable.series_ids), 7)
        # test the data for a variable
        self.assertEqual(variable.variable_code, 'USU36')
        self.assertEqual(variable.variable_name, 'Temperature')
        self.assertEqual(variable.variable_type, 'Water Quality')
        self.assertEqual(variable.no_data_value, -9999)
        self.assertEqual(variable.variable_definition, None)
        self.assertEqual(variable.speciation, 'Not Applicable')

        # test 'method' - there should be 1 method element
        self.assertEqual(self.resTimeSeries.metadata.methods.all().count(), 1)
        method = self.resTimeSeries.metadata.methods.all().first()
        # there should be 7 series ids associated with this one method element
        self.assertEqual(len(method.series_ids), 7)
        self.assertEqual(method.method_code, '28')
        method_name = 'Quality Control Level 1 Data Series created from raw QC Level 0 data ' \
                      'using ODM Tools.'
        self.assertEqual(method.method_name, method_name)
        self.assertEqual(method.method_type, 'Instrument deployment')
        method_des = 'Quality Control Level 1 Data Series created from raw QC Level 0 data ' \
                     'using ODM Tools.'
        self.assertEqual(method.method_description, method_des)
        self.assertEqual(method.method_link, None)

        # test 'processing_level' - there should be 1 processing_level element
        self.assertEqual(
            self.resTimeSeries.metadata.processing_levels.all().count(), 1)
        proc_level = self.resTimeSeries.metadata.processing_levels.all().first(
        )
        # there should be 7 series ids associated with this one element
        self.assertEqual(len(proc_level.series_ids), 7)
        self.assertEqual(proc_level.processing_level_code, '1')
        self.assertEqual(proc_level.definition, 'Quality controlled data')
        explanation = 'Quality controlled data that have passed quality assurance procedures ' \
                      'such as routine estimation of timing and sensor calibration or visual ' \
                      'inspection and removal of obvious errors. An example is USGS published ' \
                      'streamflow records following parsing through USGS quality control ' \
                      'procedures.'
        self.assertEqual(proc_level.explanation, explanation)

        # test 'timeseries_result' - there should be 7 timeseries_result element
        self.assertEqual(
            self.resTimeSeries.metadata.time_series_results.all().count(), 7)
        ts_result = self.resTimeSeries.metadata.time_series_results.filter(
            series_ids__contains=['182d8fa3-1ebc-11e6-ad49-f45c8999816f'
                                  ]).first()
        self.assertNotEqual(ts_result, None)
        # there should be only 1 series id associated with this element
        self.assertEqual(len(ts_result.series_ids), 1)
        self.assertEqual(ts_result.units_type, 'Temperature')
        self.assertEqual(ts_result.units_name, 'degree celsius')
        self.assertEqual(ts_result.units_abbreviation, 'degC')
        self.assertEqual(ts_result.status, 'Unknown')
        self.assertEqual(ts_result.sample_medium, 'Surface Water')
        self.assertEqual(ts_result.value_count, 1441)
        self.assertEqual(ts_result.aggregation_statistics, 'Average')

        # test for CV lookup tables
        # there should be 23 CV_VariableType records
        self.assertEqual(
            self.resTimeSeries.metadata.cv_variable_types.all().count(), 23)
        # there should be 805 CV_VariableName records
        self.assertEqual(
            self.resTimeSeries.metadata.cv_variable_names.all().count(), 805)
        # there should be 145 CV_Speciation records
        self.assertEqual(
            self.resTimeSeries.metadata.cv_speciations.all().count(), 145)
        # there should be 51 CV_SiteType records
        self.assertEqual(
            self.resTimeSeries.metadata.cv_site_types.all().count(), 51)
        # there should be 5 CV_ElevationDatum records
        self.assertEqual(
            self.resTimeSeries.metadata.cv_elevation_datums.all().count(), 5)
        # there should be 25 CV_MethodType records
        self.assertEqual(
            self.resTimeSeries.metadata.cv_method_types.all().count(), 25)
        # there should be 179 CV_UnitsType records
        self.assertEqual(
            self.resTimeSeries.metadata.cv_units_types.all().count(), 179)
        # there should be 4 CV_Status records
        self.assertEqual(self.resTimeSeries.metadata.cv_statuses.all().count(),
                         4)
        # there should be 17 CV_Medium records
        self.assertEqual(self.resTimeSeries.metadata.cv_mediums.all().count(),
                         18)
        # there should be 17 CV_aggregationStatistics records
        self.assertEqual(
            self.resTimeSeries.metadata.cv_aggregation_statistics.all().count(
            ), 17)
        # there should not be any UTCOffset element
        self.assertEqual(self.resTimeSeries.metadata.utc_offset, None)

Example #19

Show file

File: testing.py Project: hydroshare/hydroshare

class TestCaseCommonUtilities(object):
    """Enable common utilities for iRODS testing."""
    def assert_federated_irods_available(self):
        """assert federated iRODS is available before proceeding with federation-related tests."""
        self.assertTrue(settings.REMOTE_USE_IRODS and
                        settings.HS_USER_ZONE_HOST == 'users.local.org' and
                        settings.IRODS_HOST == 'data.local.org',
                        "irods docker containers are not set up properly for federation testing")
        self.irods_fed_storage = IrodsStorage('federated')
        self.irods_storage = IrodsStorage()

    def create_irods_user_in_user_zone(self):
        """Create corresponding irods account in user zone."""
        try:
            exec_cmd = "{0} {1} {2}".format(settings.HS_USER_ZONE_PROXY_USER_CREATE_USER_CMD,
                                            self.user.username, self.user.username)
            output = run_ssh_command(host=settings.HS_USER_ZONE_HOST,
                                     uname=settings.HS_USER_ZONE_PROXY_USER,
                                     pwd=settings.HS_USER_ZONE_PROXY_USER_PWD,
                                     exec_cmd=exec_cmd)
            if output:
                if 'ERROR:' in output.upper():
                    # irods account failed to create
                    self.assertRaises(SessionException(-1, output, output))

            user_profile = UserProfile.objects.filter(user=self.user).first()
            user_profile.create_irods_user_account = True
            user_profile.save()
        except Exception as ex:
            self.assertRaises(SessionException(-1, ex.message, ex.message))

    def delete_irods_user_in_user_zone(self):
        """Delete irods test user in user zone."""
        try:
            exec_cmd = "{0} {1}".format(settings.HS_USER_ZONE_PROXY_USER_DELETE_USER_CMD,
                                        self.user.username)
            output = run_ssh_command(host=settings.HS_USER_ZONE_HOST,
                                     uname=settings.HS_USER_ZONE_PROXY_USER,
                                     pwd=settings.HS_USER_ZONE_PROXY_USER_PWD,
                                     exec_cmd=exec_cmd)
            if output:
                if 'ERROR:' in output.upper():
                    # there is an error from icommand run, report the error
                    self.assertRaises(SessionException(-1, output, output))

            user_profile = UserProfile.objects.filter(user=self.user).first()
            user_profile.create_irods_user_account = False
            user_profile.save()
        except Exception as ex:
            # there is an error from icommand run, report the error
            self.assertRaises(SessionException(-1, ex.message, ex.message))

    def save_files_to_user_zone(self, file_name_to_target_name_dict):
        """Save a list of files to iRODS user zone.

        :param file_name_to_target_name_dict: a dictionary in the form of {ori_file, target_file}
        where ori_file is the file to be save to, and the target_file is the full path file name
        in iRODS user zone to save ori_file to
        :return:
        """
        for file_name, target_name in file_name_to_target_name_dict.iteritems():
            self.irods_fed_storage.saveFile(file_name, target_name)

    def check_file_exist(self, irods_path):
        """Check whether the input irods_path exist in iRODS.

        :param irods_path: the iRODS path to check whether it exists or not
        :return: True if exist, False otherwise.
        """
        return self.irods_storage.exists(irods_path)

    def delete_directory(self, irods_path):
        """delete the input irods_path.
        :param irods_path: the iRODS path to be deleted
        :return:
        """
        self.irods_fed_storage.delete(irods_path)

    def verify_user_quota_usage_avu_in_user_zone(self, attname, qsize):
        '''
        Have to use HS_USER_ZONE_PROXY_USER with rodsadmin role to get user type AVU in user zone
        and verify its quota usage is set correctly
        :param attname: quota usage attribute name set on iRODS proxy user in user zone
        :param qsize: quota size (type string) to be verified to equal to the value set for attname.
        '''
        istorage = IrodsStorage()
        istorage.set_user_session(username=settings.HS_USER_ZONE_PROXY_USER,
                                  password=settings.HS_USER_ZONE_PROXY_USER_PWD,
                                  host=settings.HS_USER_ZONE_HOST,
                                  port=settings.IRODS_PORT,
                                  zone=settings.HS_USER_IRODS_ZONE,
                                  sess_id='user_proxy_session')

        uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home',
                                     settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE,
                                     settings.IRODS_BAGIT_PATH)
        get_qsize = istorage.getAVU(uz_bagit_path, attname)
        self.assertEqual(qsize, get_qsize)

    def resource_file_oprs(self):
        """Test common iRODS file operations.

        This is a common test utility function to be called by both regular folder operation
        testing and federated zone folder operation testing.
        Make sure the calling TestCase object has the following attributes defined before calling
        this method:
        self.res: resource that has been created that contains files listed in file_name_list
        self.user: owner of the resource
        self.file_name_list: a list of three file names that have been added to the res object
        self.test_file_1 needs to be present for the calling object for doing regular folder
        operations without involving federated zone so that the same opened file can be re-added
        to the resource for testing the case where zipping cannot overwrite existing file
        """
        user = self.user
        res = self.res
        file_name_list = self.file_name_list
        # create a folder, if folder is created successfully, no exception is raised, otherwise,
        # an iRODS exception will be raised which will be caught by the test runner and mark as
        # a test failure
        create_folder(res.short_id, 'data/contents/sub_test_dir')
        istorage = res.get_irods_storage()
        res_path = res.file_path
        store = istorage.listdir(res_path)
        self.assertIn('sub_test_dir', store[0], msg='resource does not contain created sub-folder')

        # rename the third file in file_name_list
        move_or_rename_file_or_folder(user, res.short_id,
                                      'data/contents/' + file_name_list[2],
                                      'data/contents/new_' + file_name_list[2])
        # move the first two files in file_name_list to the new folder
        move_or_rename_file_or_folder(user, res.short_id,
                                      'data/contents/' + file_name_list[0],
                                      'data/contents/sub_test_dir/' + file_name_list[0])
        move_or_rename_file_or_folder(user, res.short_id,
                                      'data/contents/' + file_name_list[1],
                                      'data/contents/sub_test_dir/' + file_name_list[1])
        updated_res_file_names = []
        for rf in ResourceFile.objects.filter(object_id=res.id):
            updated_res_file_names.append(rf.short_path)

        self.assertIn('new_' + file_name_list[2], updated_res_file_names,
                      msg="resource does not contain the updated file new_" + file_name_list[2])
        self.assertNotIn(file_name_list[2], updated_res_file_names,
                         msg='resource still contains the old file ' + file_name_list[2] +
                             ' after renaming')
        self.assertIn('sub_test_dir/' + file_name_list[0], updated_res_file_names,
                      msg='resource does not contain ' + file_name_list[0] + ' moved to a folder')
        self.assertNotIn(file_name_list[0], updated_res_file_names,
                         msg='resource still contains the old ' + file_name_list[0] +
                             'after moving to a folder')
        self.assertIn('sub_test_dir/' + file_name_list[1], updated_res_file_names,
                      msg='resource does not contain ' + file_name_list[1] +
                          'moved to a new folder')
        self.assertNotIn(file_name_list[1], updated_res_file_names,
                         msg='resource still contains the old ' + file_name_list[1] +
                             ' after moving to a folder')

        # zip the folder
        output_zip_fname, size = \
            zip_folder(user, res.short_id, 'data/contents/sub_test_dir',
                       'sub_test_dir.zip', True)
        self.assertGreater(size, 0, msg='zipped file has a size of 0')
        # Now resource should contain only two files: new_file3.txt and sub_test_dir.zip
        # since the folder is zipped into sub_test_dir.zip with the folder deleted
        self.assertEqual(res.files.all().count(), 2,
                         msg="resource file count didn't match-")

        # test unzip does not allow override of existing files
        # add an existing file in the zip to the resource
        if res.resource_federation_path:
            fed_test_file1_full_path = '/{zone}/home/{uname}/{fname}'.format(
                zone=settings.HS_USER_IRODS_ZONE, uname=user.username, fname=file_name_list[0])
            # TODO: why isn't this a method of resource?
            # TODO: Why do we repeat the resource_federation_path?
            add_resource_files(res.short_id, source_names=[fed_test_file1_full_path],
                               move=False)

        else:
            # TODO: Why isn't this a method of resource?
            add_resource_files(res.short_id, self.test_file_1)

        # TODO: use ResourceFile.create_folder, which doesn't require data/contents prefix
        create_folder(res.short_id, 'data/contents/sub_test_dir')

        # TODO: use ResourceFile.rename, which doesn't require data/contents prefix
        move_or_rename_file_or_folder(user, res.short_id,
                                      'data/contents/' + file_name_list[0],
                                      'data/contents/sub_test_dir/' + file_name_list[0])
        # Now resource should contain three files: file3_new.txt, sub_test_dir.zip, and file1.txt
        self.assertEqual(res.files.all().count(), 3, msg="resource file count didn't match")
        unzip_file(user, res.short_id, 'data/contents/sub_test_dir.zip', False)

        # Resource should still contain 5 files: file3_new.txt (2), sub_test_dir.zip,
        # and file1.txt (2)
        file_cnt = res.files.all().count()
        self.assertEqual(file_cnt, 5, msg="resource file count didn't match - " +
                                          str(file_cnt) + " != 5")

        # remove all files except the zippped file
        remove_folder(user, res.short_id, 'data/contents/sub_test_dir')
        remove_folder(user, res.short_id, 'data/contents/sub_test_dir-1')

        # Now resource should contain two files: file3_new.txt sub_test_dir.zip
        file_cnt = res.files.all().count()
        self.assertEqual(file_cnt, 2, msg="resource file count didn't match - " +
                                          str(file_cnt) + " != 2")
        unzip_file(user, res.short_id, 'data/contents/sub_test_dir.zip', True)
        # Now resource should contain three files: file1.txt, file2.txt, and file3_new.txt
        self.assertEqual(res.files.all().count(), 3, msg="resource file count didn't match")
        updated_res_file_names = []
        for rf in ResourceFile.objects.filter(object_id=res.id):
            updated_res_file_names.append(rf.short_path)
        self.assertNotIn('sub_test_dir.zip', updated_res_file_names,
                         msg="resource still contains the zip file after unzipping")
        self.assertIn('sub_test_dir/sub_test_dir/' + file_name_list[0], updated_res_file_names,
                      msg='resource does not contain unzipped file ' + file_name_list[0])
        self.assertIn('sub_test_dir/sub_test_dir/' + file_name_list[1], updated_res_file_names,
                      msg='resource does not contain unzipped file ' + file_name_list[1])
        self.assertIn('new_' + file_name_list[2], updated_res_file_names,
                      msg='resource does not contain unzipped file new_' + file_name_list[2])

        # rename a folder
        move_or_rename_file_or_folder(user, res.short_id,
                                      'data/contents/sub_test_dir/sub_test_dir',
                                      'data/contents/sub_dir')
        updated_res_file_names = []
        for rf in ResourceFile.objects.filter(object_id=res.id):
            updated_res_file_names.append(rf.short_path)

        self.assertNotIn('sub_test_dir/sub_test_dir/' + file_name_list[0], updated_res_file_names,
                         msg='resource still contains ' + file_name_list[0] +
                             ' in the old folder after renaming')
        self.assertIn('sub_dir/' + file_name_list[0], updated_res_file_names,
                      msg='resource does not contain ' + file_name_list[0] +
                          ' in the new folder after renaming')
        self.assertNotIn('sub_test_dir/sub_test_dir/' + file_name_list[1], updated_res_file_names,
                         msg='resource still contains ' + file_name_list[1] +
                             ' in the old folder after renaming')
        self.assertIn('sub_dir/' + file_name_list[1], updated_res_file_names,
                      msg='resource does not contain ' + file_name_list[1] +
                          ' in the new folder after renaming')

        # remove a folder
        # TODO: utilize ResourceFile.remove_folder instead. Takes a short path.
        remove_folder(user, res.short_id, 'data/contents/sub_dir')
        # Now resource only contains one file
        self.assertEqual(res.files.all().count(), 1, msg="resource file count didn't match")
        updated_res_file_names = []
        for rf in ResourceFile.objects.filter(object_id=res.id):
            updated_res_file_names.append(rf.short_path)

        self.assertEqual(len(updated_res_file_names), 1)
        self.assertEqual(updated_res_file_names[0], 'new_' + file_name_list[2])

    def raster_metadata_extraction(self):
        """Test raster metadata extraction.

        This is a common test utility function to be called by both regular raster metadata
        extraction testing and federated zone raster metadata extraction testing.
        Make sure the calling TestCase object has self.resRaster attribute defined before calling
        this method which is the raster resource that has been created containing valid raster
        files.
        """
        # there should be 2 content files
        self.assertEqual(self.resRaster.files.all().count(), 2)

        # test core metadata after metadata extraction
        extracted_title = "My Test Raster Resource"
        self.assertEqual(self.resRaster.metadata.title.value, extracted_title)

        # there should be 1 creator
        self.assertEqual(self.resRaster.metadata.creators.all().count(), 1)

        # there should be 1 coverage element - box type
        self.assertEqual(self.resRaster.metadata.coverages.all().count(), 1)
        self.assertEqual(self.resRaster.metadata.coverages.all().filter(type='box').count(), 1)

        box_coverage = self.resRaster.metadata.coverages.all().filter(type='box').first()
        self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
        self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
        self.assertEqual(float(box_coverage.value['northlimit']), 42.11270614966863)
        self.assertEqual(float(box_coverage.value['eastlimit']), -111.45699925047542)
        self.assertEqual(float(box_coverage.value['southlimit']), 41.66222054591102)
        self.assertEqual(float(box_coverage.value['westlimit']), -111.81761887121905)

        # there should be 2 format elements
        self.assertEqual(self.resRaster.metadata.formats.all().count(), 2)
        self.assertEqual(self.resRaster.metadata.formats.all().filter(
            value='application/vrt').count(), 1)
        self.assertEqual(self.resRaster.metadata.formats.all().filter(
            value='image/tiff').count(), 1)

        # testing extended metadata element: original coverage
        ori_coverage = self.resRaster.metadata.originalCoverage
        self.assertNotEquals(ori_coverage, None)
        self.assertEqual(float(ori_coverage.value['northlimit']), 4662392.446916306)
        self.assertEqual(float(ori_coverage.value['eastlimit']), 461954.01909127034)
        self.assertEqual(float(ori_coverage.value['southlimit']), 4612592.446916306)
        self.assertEqual(float(ori_coverage.value['westlimit']), 432404.01909127034)
        self.assertEqual(ori_coverage.value['units'], 'meter')
        self.assertEqual(ori_coverage.value['projection'], "NAD83 / UTM zone 12N")
        self.assertEqual(ori_coverage.value['datum'], "North_American_Datum_1983")
        projection_string = u'PROJCS["NAD83 / UTM zone 12N",GEOGCS["NAD83",' \
                            u'DATUM["North_American_Datum_1983",' \
                            u'SPHEROID["GRS 1980",6378137,298.257222101,' \
                            u'AUTHORITY["EPSG","7019"]],' \
                            u'TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6269"]],' \
                            u'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],' \
                            u'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],' \
                            u'AUTHORITY["EPSG","4269"]],PROJECTION["Transverse_Mercator"],' \
                            u'PARAMETER["latitude_of_origin",0],' \
                            u'PARAMETER["central_meridian",-111],' \
                            u'PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],' \
                            u'PARAMETER["false_northing",0],' \
                            u'UNIT["metre",1,AUTHORITY["EPSG","9001"]],' \
                            u'AXIS["Easting",EAST],AXIS["Northing",' \
                            u'NORTH],AUTHORITY["EPSG","26912"]]'
        self.assertEqual(ori_coverage.value['projection_string'], projection_string)

        # testing extended metadata element: cell information
        cell_info = self.resRaster.metadata.cellInformation
        self.assertEqual(cell_info.rows, 1660)
        self.assertEqual(cell_info.columns, 985)
        self.assertEqual(cell_info.cellSizeXValue, 30.0)
        self.assertEqual(cell_info.cellSizeYValue, 30.0)
        self.assertEqual(cell_info.cellDataType, 'Float32')

        # testing extended metadata element: band information
        self.assertEqual(self.resRaster.metadata.bandInformations.count(), 1)
        band_info = self.resRaster.metadata.bandInformations.first()
        self.assertEqual(band_info.noDataValue, '-3.40282346639e+38')
        self.assertEqual(band_info.maximumValue, '3031.44311523')
        self.assertEqual(band_info.minimumValue, '1358.33459473')

    def netcdf_metadata_extraction(self, expected_creators_count=1):
        """Test NetCDF metadata extraction.

        This is a common test utility function to be called by both regular netcdf metadata
        extraction testing and federated zone netCDF metadata extraction testing.
        Make sure the calling TestCase object has self.resNetcdf attribute defined before calling
        this method which is the netCDF resource that has been created containing valid netCDF
        files.
        """
        # there should 2 content file
        self.assertEqual(self.resNetcdf.files.all().count(), 2)

        # test core metadata after metadata extraction
        extracted_title = "Snow water equivalent estimation at TWDEF site from " \
                          "Oct 2009 to June 2010"
        self.assertEqual(self.resNetcdf.metadata.title.value, extracted_title)

        # there should be an abstract element
        self.assertNotEqual(self.resNetcdf.metadata.description, None)
        extracted_abstract = "This netCDF data is the simulation output from Utah Energy " \
                             "Balance (UEB) model.It includes the simulation result " \
                             "of snow water equivalent during the period " \
                             "Oct. 2009 to June 2010 for TWDEF site in Utah."
        self.assertEqual(self.resNetcdf.metadata.description.abstract, extracted_abstract)

        # there should be one source element
        self.assertEqual(self.resNetcdf.metadata.sources.all().count(), 1)

        # there should be one license element:
        self.assertNotEquals(self.resNetcdf.metadata.rights.statement, 1)

        # there should be one relation element
        self.assertEqual(self.resNetcdf.metadata.relations.all().filter(type='cites').count(), 1)

        # there should be creators equal to expected_creators_count
        self.assertEqual(self.resNetcdf.metadata.creators.all().count(), expected_creators_count)

        # there should be one contributor
        self.assertEqual(self.resNetcdf.metadata.contributors.all().count(), 1)

        # there should be 2 coverage element - box type and period type
        self.assertEqual(self.resNetcdf.metadata.coverages.all().count(), 2)
        self.assertEqual(self.resNetcdf.metadata.coverages.all().filter(type='box').count(), 1)
        self.assertEqual(self.resNetcdf.metadata.coverages.all().filter(type='period').count(), 1)

        box_coverage = self.resNetcdf.metadata.coverages.all().filter(type='box').first()
        self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
        self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
        self.assertEqual(float(box_coverage.value['northlimit']), 41.867126409)
        self.assertEqual(float(box_coverage.value['eastlimit']), -111.505940368)
        self.assertEqual(float(box_coverage.value['southlimit']), 41.8639080745)
        self.assertEqual(float(box_coverage.value['westlimit']), -111.51138808)

        temporal_coverage = self.resNetcdf.metadata.coverages.all().filter(type='period').first()
        self.assertEqual(parser.parse(temporal_coverage.value['start']).date(),
                         parser.parse('10/01/2009').date())
        self.assertEqual(parser.parse(temporal_coverage.value['end']).date(),
                         parser.parse('05/30/2010').date())

        # there should be 2 format elements
        self.assertEqual(self.resNetcdf.metadata.formats.all().count(), 2)
        self.assertEqual(self.resNetcdf.metadata.formats.all().
                         filter(value='text/plain').count(), 1)
        self.assertEqual(self.resNetcdf.metadata.formats.all().
                         filter(value='application/x-netcdf').count(), 1)

        # there should be one subject element
        self.assertEqual(self.resNetcdf.metadata.subjects.all().count(), 1)
        subj_element = self.resNetcdf.metadata.subjects.all().first()
        self.assertEqual(subj_element.value, 'Snow water equivalent')

        # testing extended metadata element: original coverage
        ori_coverage = self.resNetcdf.metadata.ori_coverage.all().first()
        self.assertNotEquals(ori_coverage, None)
        self.assertEqual(ori_coverage.projection_string_type, 'Proj4 String')
        proj_text = u'+proj=tmerc +y_0=0.0 +k_0=0.9996 +x_0=500000.0 +lat_0=0.0 +lon_0=-111.0'
        self.assertEqual(ori_coverage.projection_string_text, proj_text)
        self.assertEqual(float(ori_coverage.value['northlimit']), 4.63515e+06)
        self.assertEqual(float(ori_coverage.value['eastlimit']), 458010.0)
        self.assertEqual(float(ori_coverage.value['southlimit']), 4.63479e+06)
        self.assertEqual(float(ori_coverage.value['westlimit']), 457560.0)
        self.assertEqual(ori_coverage.value['units'], 'Meter')
        self.assertEqual(ori_coverage.value['projection'], 'transverse_mercator')

        # testing extended metadata element: variables
        self.assertEqual(self.resNetcdf.metadata.variables.all().count(), 5)

        # test time variable
        var_time = self.resNetcdf.metadata.variables.all().filter(name='time').first()
        self.assertNotEquals(var_time, None)
        self.assertEqual(var_time.unit, 'hours since 2009-10-1 0:0:00 UTC')
        self.assertEqual(var_time.type, 'Float')
        self.assertEqual(var_time.shape, 'time')
        self.assertEqual(var_time.descriptive_name, 'time')

        # test x variable
        var_x = self.resNetcdf.metadata.variables.all().filter(name='x').first()
        self.assertNotEquals(var_x, None)
        self.assertEqual(var_x.unit, 'Meter')
        self.assertEqual(var_x.type, 'Float')
        self.assertEqual(var_x.shape, 'x')
        self.assertEqual(var_x.descriptive_name, 'x coordinate of projection')

        # test y variable
        var_y = self.resNetcdf.metadata.variables.all().filter(name='y').first()
        self.assertNotEquals(var_y, None)
        self.assertEqual(var_y.unit, 'Meter')
        self.assertEqual(var_y.type, 'Float')
        self.assertEqual(var_y.shape, 'y')
        self.assertEqual(var_y.descriptive_name, 'y coordinate of projection')

        # test SWE variable
        var_swe = self.resNetcdf.metadata.variables.all().filter(name='SWE').first()
        self.assertNotEquals(var_swe, None)
        self.assertEqual(var_swe.unit, 'm')
        self.assertEqual(var_swe.type, 'Float')
        self.assertEqual(var_swe.shape, 'y,x,time')
        self.assertEqual(var_swe.descriptive_name, 'Snow water equivalent')
        self.assertEqual(var_swe.method, 'model simulation of UEB model')
        self.assertEqual(var_swe.missing_value, '-9999')

        # test grid mapping variable
        var_grid = self.resNetcdf.metadata.variables.all().\
            filter(name='transverse_mercator').first()
        self.assertNotEquals(var_grid, None)
        self.assertEqual(var_grid.unit, 'Unknown')
        self.assertEqual(var_grid.type, 'Unknown')
        self.assertEqual(var_grid.shape, 'Not defined')

    def timeseries_metadata_extraction(self):
        """Test timeseries metadata extraction.

        This is a common test utility function to be called by both regular timeseries metadata
        extraction testing and federated zone timeseries metadata extraction testing.
        Make sure the calling TestCase object has self.resTimeSeries attribute defined before
        calling this method which is the timeseries resource that has been created containing
        valid timeseries file.
        """
        # there should one content file
        self.assertEqual(self.resTimeSeries.files.all().count(), 1)

        # there should be one contributor element
        self.assertEqual(self.resTimeSeries.metadata.contributors.all().count(), 1)

        # test core metadata after metadata extraction
        extracted_title = "Water temperature data from the Little Bear River, UT"
        self.assertEqual(self.resTimeSeries.metadata.title.value, extracted_title)

        # there should be an abstract element
        self.assertNotEqual(self.resTimeSeries.metadata.description, None)
        extracted_abstract = "This dataset contains time series of observations of water " \
                             "temperature in the Little Bear River, UT. Data were recorded every " \
                             "30 minutes. The values were recorded using a HydroLab MS5 " \
                             "multi-parameter water quality sonde connected to a Campbell " \
                             "Scientific datalogger."

        self.assertEqual(self.resTimeSeries.metadata.description.abstract.strip(),
                         extracted_abstract)

        # there should be 2 coverage element -  box type and period type
        self.assertEqual(self.resTimeSeries.metadata.coverages.all().count(), 2)
        self.assertEqual(self.resTimeSeries.metadata.coverages.all().filter(type='box').count(), 1)
        self.assertEqual(self.resTimeSeries.metadata.coverages.all().filter(
            type='period').count(), 1)

        box_coverage = self.resTimeSeries.metadata.coverages.all().filter(type='box').first()
        self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
        self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
        self.assertEqual(float(box_coverage.value['northlimit']), 41.718473)
        self.assertEqual(float(box_coverage.value['eastlimit']), -111.799324)
        self.assertEqual(float(box_coverage.value['southlimit']), 41.495409)
        self.assertEqual(float(box_coverage.value['westlimit']), -111.946402)

        temporal_coverage = self.resTimeSeries.metadata.coverages.all().filter(
            type='period').first()
        self.assertEqual(parser.parse(temporal_coverage.value['start']).date(),
                         parser.parse('01/01/2008').date())
        self.assertEqual(parser.parse(temporal_coverage.value['end']).date(),
                         parser.parse('01/30/2008').date())

        # there should be one format element
        self.assertEqual(self.resTimeSeries.metadata.formats.all().count(), 1)
        format_element = self.resTimeSeries.metadata.formats.all().first()
        self.assertEqual(format_element.value, 'application/sqlite')

        # there should be one subject element
        self.assertEqual(self.resTimeSeries.metadata.subjects.all().count(), 1)
        subj_element = self.resTimeSeries.metadata.subjects.all().first()
        self.assertEqual(subj_element.value, 'Temperature')

        # there should be a total of 7 timeseries
        self.assertEqual(self.resTimeSeries.metadata.time_series_results.all().count(), 7)

        # testing extended metadata elements

        # test 'site' - there should be 7 sites
        self.assertEqual(self.resTimeSeries.metadata.sites.all().count(), 7)
        # each site be associated with one series id
        for site in self.resTimeSeries.metadata.sites.all():
            self.assertEqual(len(site.series_ids), 1)

        # test the data for a specific site
        site = self.resTimeSeries.metadata.sites.filter(site_code='USU-LBR-Paradise').first()
        self.assertNotEqual(site, None)
        site_name = 'Little Bear River at McMurdy Hollow near Paradise, Utah'
        self.assertEqual(site.site_name, site_name)
        self.assertEqual(site.elevation_m, 1445)
        self.assertEqual(site.elevation_datum, 'NGVD29')
        self.assertEqual(site.site_type, 'Stream')

        # test 'variable' - there should be 1 variable element
        self.assertEqual(self.resTimeSeries.metadata.variables.all().count(), 1)
        variable = self.resTimeSeries.metadata.variables.all().first()
        # there should be 7 series ids associated with this one variable
        self.assertEqual(len(variable.series_ids), 7)
        # test the data for a variable
        self.assertEqual(variable.variable_code, 'USU36')
        self.assertEqual(variable.variable_name, 'Temperature')
        self.assertEqual(variable.variable_type, 'Water Quality')
        self.assertEqual(variable.no_data_value, -9999)
        self.assertEqual(variable.variable_definition, None)
        self.assertEqual(variable.speciation, 'Not Applicable')

        # test 'method' - there should be 1 method element
        self.assertEqual(self.resTimeSeries.metadata.methods.all().count(), 1)
        method = self.resTimeSeries.metadata.methods.all().first()
        # there should be 7 series ids associated with this one method element
        self.assertEqual(len(method.series_ids), 7)
        self.assertEqual(method.method_code, '28')
        method_name = 'Quality Control Level 1 Data Series created from raw QC Level 0 data ' \
                      'using ODM Tools.'
        self.assertEqual(method.method_name, method_name)
        self.assertEqual(method.method_type, 'Instrument deployment')
        method_des = 'Quality Control Level 1 Data Series created from raw QC Level 0 data ' \
                     'using ODM Tools.'
        self.assertEqual(method.method_description, method_des)
        self.assertEqual(method.method_link, None)

        # test 'processing_level' - there should be 1 processing_level element
        self.assertEqual(self.resTimeSeries.metadata.processing_levels.all().count(), 1)
        proc_level = self.resTimeSeries.metadata.processing_levels.all().first()
        # there should be 7 series ids associated with this one element
        self.assertEqual(len(proc_level.series_ids), 7)
        self.assertEqual(proc_level.processing_level_code, '1')
        self.assertEqual(proc_level.definition, 'Quality controlled data')
        explanation = 'Quality controlled data that have passed quality assurance procedures ' \
                      'such as routine estimation of timing and sensor calibration or visual ' \
                      'inspection and removal of obvious errors. An example is USGS published ' \
                      'streamflow records following parsing through USGS quality control ' \
                      'procedures.'
        self.assertEqual(proc_level.explanation, explanation)

        # test 'timeseries_result' - there should be 7 timeseries_result element
        self.assertEqual(self.resTimeSeries.metadata.time_series_results.all().count(), 7)
        ts_result = self.resTimeSeries.metadata.time_series_results.filter(
            series_ids__contains=['182d8fa3-1ebc-11e6-ad49-f45c8999816f']).first()
        self.assertNotEqual(ts_result, None)
        # there should be only 1 series id associated with this element
        self.assertEqual(len(ts_result.series_ids), 1)
        self.assertEqual(ts_result.units_type, 'Temperature')
        self.assertEqual(ts_result.units_name, 'degree celsius')
        self.assertEqual(ts_result.units_abbreviation, 'degC')
        self.assertEqual(ts_result.status, 'Unknown')
        self.assertEqual(ts_result.sample_medium, 'Surface Water')
        self.assertEqual(ts_result.value_count, 1441)
        self.assertEqual(ts_result.aggregation_statistics, 'Average')

        # test for CV lookup tables
        # there should be 23 CV_VariableType records
        self.assertEqual(self.resTimeSeries.metadata.cv_variable_types.all().count(), 23)
        # there should be 805 CV_VariableName records
        self.assertEqual(self.resTimeSeries.metadata.cv_variable_names.all().count(), 805)
        # there should be 145 CV_Speciation records
        self.assertEqual(self.resTimeSeries.metadata.cv_speciations.all().count(), 145)
        # there should be 51 CV_SiteType records
        self.assertEqual(self.resTimeSeries.metadata.cv_site_types.all().count(), 51)
        # there should be 5 CV_ElevationDatum records
        self.assertEqual(self.resTimeSeries.metadata.cv_elevation_datums.all().count(), 5)
        # there should be 25 CV_MethodType records
        self.assertEqual(self.resTimeSeries.metadata.cv_method_types.all().count(), 25)
        # there should be 179 CV_UnitsType records
        self.assertEqual(self.resTimeSeries.metadata.cv_units_types.all().count(), 179)
        # there should be 4 CV_Status records
        self.assertEqual(self.resTimeSeries.metadata.cv_statuses.all().count(), 4)
        # there should be 17 CV_Medium records
        self.assertEqual(self.resTimeSeries.metadata.cv_mediums.all().count(), 18)
        # there should be 17 CV_aggregationStatistics records
        self.assertEqual(self.resTimeSeries.metadata.cv_aggregation_statistics.all().count(), 17)
        # there should not be any UTCOffset element
        self.assertEqual(self.resTimeSeries.metadata.utc_offset, None)

Example #20

Show file

File: tasks.py Project: hydroshare/hydroshare

def delete_zip(zip_path):
    istorage = IrodsStorage()
    if istorage.exists(zip_path):
        istorage.delete(zip_path)

Example #21

Show file

File: views.py Project: hydroshare/django_irods

def download(request, path, rest_call=False, use_async=True, use_reverse_proxy=True,
             *args, **kwargs):
    split_path_strs = path.split('/')
    is_bag_download = False
    is_zip_download = False
    is_sf_agg_file = False
    if split_path_strs[0] == 'bags':
        res_id = os.path.splitext(split_path_strs[1])[0]
        is_bag_download = True
    elif split_path_strs[0] == 'zips':
        if path.endswith('.zip'):
            res_id = os.path.splitext(split_path_strs[2])[0]
        else:
            res_id = os.path.splitext(split_path_strs[1])[0]
        is_zip_download = True
    else:
        res_id = split_path_strs[0]

    # if the resource does not exist in django, authorized will be false
    res, authorized, _ = authorize(request, res_id,
                                   needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE,
                                   raises_exception=False)
    if not authorized:
        response = HttpResponse(status=401)
        content_msg = "You do not have permission to download this resource!"
        if rest_call:
            raise PermissionDenied(content_msg)
        else:
            response.content = "<h1>" + content_msg + "</h1>"
            return response

    if res.resource_type == "CompositeResource" and not path.endswith(".zip"):
        for f in ResourceFile.objects.filter(object_id=res.id):
            if path == f.storage_path:
                if f.has_logical_file and f.logical_file.is_single_file_aggregation:
                    is_sf_agg_file = True

    if res.resource_federation_path:
        # the resource is stored in federated zone
        istorage = IrodsStorage('federated')
        federated_path = res.resource_federation_path
        path = os.path.join(federated_path, path)
        session = icommands.ACTIVE_SESSION
    else:
        # TODO: From Alva: I do not understand the use case for changing the environment.
        # TODO: This seems an enormous potential vulnerability, as arguments are
        # TODO: passed from the URI directly to IRODS without verification.
        istorage = IrodsStorage()
        federated_path = ''
        if 'environment' in kwargs:
            environment = int(kwargs['environment'])
            environment = m.RodsEnvironment.objects.get(pk=environment)
            session = Session("/tmp/django_irods", settings.IRODS_ICOMMANDS_PATH,
                              session_id=uuid4())
            session.create_environment(environment)
            session.run('iinit', None, environment.auth)
        elif getattr(settings, 'IRODS_GLOBAL_SESSION', False):
            session = GLOBAL_SESSION
        elif icommands.ACTIVE_SESSION:
            session = icommands.ACTIVE_SESSION
        else:
            raise KeyError('settings must have IRODS_GLOBAL_SESSION set '
                           'if there is no environment object')

    resource_cls = check_resource_type(res.resource_type)

    if federated_path:
        res_root = os.path.join(federated_path, res_id)
    else:
        res_root = res_id

    if is_zip_download or is_sf_agg_file:
        if not path.endswith(".zip"):  # requesting folder that needs to be zipped
            input_path = path.split(res_id)[1]
            random_hash = random.getrandbits(32)
            daily_date = datetime.datetime.today().strftime('%Y-%m-%d')
            random_hash_path = 'zips/{daily_date}/{res_id}/{rand_folder}'.format(
                daily_date=daily_date, res_id=res_id,
                rand_folder=random_hash)
            output_path = '{random_hash_path}{path}.zip'.format(random_hash_path=random_hash_path,
                                                                path=input_path)

            if res.resource_type == "CompositeResource":
                aggregation_name = input_path[len('/data/contents/'):]
                res.create_aggregation_xml_documents(aggregation_name=aggregation_name)

            if use_async:
                task = create_temp_zip.apply_async((res_id, input_path, output_path,
                                                    is_sf_agg_file), countdown=3)
                delete_zip.apply_async((random_hash_path, ),
                                       countdown=(20 * 60))  # delete after 20 minutes
                if is_sf_agg_file:
                    download_path = request.path.split(res_id)[0] + output_path
                else:
                    download_path = request.path.split("zips")[0] + output_path
                if rest_call:
                    return HttpResponse(json.dumps({'zip_status': 'Not ready',
                                                    'task_id': task.task_id,
                                                    'download_path': download_path}),
                                        content_type="application/json")
                request.session['task_id'] = task.task_id
                request.session['download_path'] = download_path
                return HttpResponseRedirect(res.get_absolute_url())

            ret_status = create_temp_zip(res_id, input_path, output_path, is_sf_agg_file)
            delete_zip.apply_async((random_hash_path, ),
                                   countdown=(20 * 60))  # delete after 20 minutes
            if not ret_status:
                content_msg = "Zip cannot be created successfully. Check log for details."
                response = HttpResponse()
                if rest_call:
                    response.content = content_msg
                else:
                    response.content = "<h1>" + content_msg + "</h1>"
                return response

            path = output_path

    bag_modified = istorage.getAVU(res_root, 'bag_modified')
    # make sure if bag_modified is not set to true, we still recreate the bag if the
    # bag file does not exist for some reason to resolve the error to download a nonexistent
    # bag when bag_modified is false due to the flag being out-of-sync with the real bag status
    if bag_modified is None or bag_modified.lower() == "false":
        # check whether the bag file exists
        bag_file_name = res_id + '.zip'
        if res_root.startswith(res_id):
            bag_full_path = os.path.join('bags', bag_file_name)
        else:
            bag_full_path = os.path.join(federated_path, 'bags', bag_file_name)
        # set bag_modified to 'true' if the bag does not exist so that it can be recreated
        # and the bag_modified AVU will be set correctly as well subsequently
        if not istorage.exists(bag_full_path):
            bag_modified = 'true'

    metadata_dirty = istorage.getAVU(res_root, 'metadata_dirty')
    # do on-demand bag creation
    # needs to check whether res_id collection exists before getting/setting AVU on it
    # to accommodate the case where the very same resource gets deleted by another request
    # when it is getting downloaded

    if is_bag_download:
        # send signal for pre_check_bag_flag
        pre_check_bag_flag.send(sender=resource_cls, resource=res)
        if bag_modified is None or bag_modified.lower() == "true":
            if metadata_dirty is None or metadata_dirty.lower() == 'true':
                create_bag_files(res)
            if use_async:
                # task parameter has to be passed in as a tuple or list, hence (res_id,) is needed
                # Note that since we are using JSON for task parameter serialization, no complex
                # object can be passed as parameters to a celery task
                task = create_bag_by_irods.apply_async((res_id,), countdown=3)
                if rest_call:
                    return HttpResponse(json.dumps({'bag_status': 'Not ready',
                                                    'task_id': task.task_id}),
                                        content_type="application/json")

                request.session['task_id'] = task.task_id
                request.session['download_path'] = request.path
                return HttpResponseRedirect(res.get_absolute_url())
            else:
                ret_status = create_bag_by_irods(res_id)
                if not ret_status:
                    content_msg = "Bag cannot be created successfully. Check log for details."
                    response = HttpResponse()
                    if rest_call:
                        response.content = content_msg
                    else:
                        response.content = "<h1>" + content_msg + "</h1>"
                    return response

    elif metadata_dirty is None or metadata_dirty.lower() == 'true':
        if path.endswith("resourcemap.xml") or path.endswith('resourcemetadata.xml'):
            # we need to regenerate the metadata xml files
            create_bag_files(res)

    # send signal for pre download file
    download_file_name = split_path_strs[-1]
    pre_download_file.send(sender=resource_cls, resource=res,
                           download_file_name=download_file_name,
                           request=request)

    # obtain mime_type to set content_type
    mtype = 'application-x/octet-stream'
    mime_type = mimetypes.guess_type(path)
    if mime_type[0] is not None:
        mtype = mime_type[0]
    # retrieve file size to set up Content-Length header
    stdout = session.run("ils", None, "-l", path)[0].split()
    flen = int(stdout[3])

    # If this path is resource_federation_path, then the file is a local user file
    userpath = '/' + os.path.join(
        getattr(settings, 'HS_USER_IRODS_ZONE', 'hydroshareuserZone'),
        'home',
        getattr(settings, 'HS_LOCAL_PROXY_USER_IN_FED_ZONE', 'localHydroProxy'))

    # Allow reverse proxy if request was forwarded by nginx
    # (HTTP_X_DJANGO_REVERSE_PROXY is 'true')
    # and reverse proxy is possible according to configuration.

    if use_reverse_proxy and getattr(settings, 'SENDFILE_ON', False) and \
       'HTTP_X_DJANGO_REVERSE_PROXY' in request.META:

        # The NGINX sendfile abstraction is invoked as follows:
        # 1. The request to download a file enters this routine via the /rest_download or /download
        #    url in ./urls.py. It is redirected here from Django. The URI contains either the
        #    unqualified resource path or the federated resource path, depending upon whether
        #    the request is local or federated.
        # 2. This deals with unfederated resources by redirecting them to the uri
        #    /irods-data/{resource-id}/... on nginx. This URI is configured to read the file
        #    directly from the iRODS vault via NFS, and does not work for direct access to the
        #    vault due to the 'internal;' declaration in NGINX.
        # 3. This deals with federated resources by reading their path, matching local vaults, and
        #    redirecting to URIs that are in turn mapped to read from appropriate iRODS vaults. At
        #    present, the only one of these is /irods-user, which handles files whose federation
        #    path is stored in the variable 'userpath'.
        # 4. If there is no vault available for the resource, the file is transferred without
        #    NGINX, exactly as it was transferred previously.

        # stop NGINX targets that are non-existent from hanging forever.
        if not istorage.exists(path):
            content_msg = "file path {} does not exist in iRODS".format(path)
            response = HttpResponse(status=404)
            if rest_call:
                response.content = content_msg
            else:
                response.content = "<h1>" + content_msg + "</h1>"
            return response

        if not res.is_federated:
            # invoke X-Accel-Redirect on physical vault file in nginx
            response = HttpResponse(content_type=mtype)
            response['Content-Disposition'] = 'attachment; filename="{name}"'.format(
                name=path.split('/')[-1])
            response['Content-Length'] = flen
            response['X-Accel-Redirect'] = '/'.join([
                getattr(settings, 'IRODS_DATA_URI', '/irods-data'), path])
            return response

        elif res.resource_federation_path == userpath:  # this guarantees a "user" resource
            # invoke X-Accel-Redirect on physical vault file in nginx
            # if path is full user path; strip federation prefix
            if path.startswith(userpath):
                path = path[len(userpath)+1:]
            # invoke X-Accel-Redirect on physical vault file in nginx
            response = HttpResponse(content_type=mtype)
            response['Content-Disposition'] = 'attachment; filename="{name}"'.format(
                name=path.split('/')[-1])
            response['Content-Length'] = flen
            response['X-Accel-Redirect'] = os.path.join(
                getattr(settings, 'IRODS_USER_URI', '/irods-user'), path)
            return response

    # if we get here, none of the above conditions are true
    if flen <= FILE_SIZE_LIMIT:
        options = ('-',)  # we're redirecting to stdout.
        # this unusual way of calling works for federated or local resources
        proc = session.run_safe('iget', None, path, *options)
        response = FileResponse(proc.stdout, content_type=mtype)
        response['Content-Disposition'] = 'attachment; filename="{name}"'.format(
            name=path.split('/')[-1])
        response['Content-Length'] = flen
        return response

    else:
        content_msg = "File larger than 1GB cannot be downloaded directly via HTTP. " \
                      "Please download the large file via iRODS clients."
        response = HttpResponse(status=403)
        if rest_call:
            response.content = content_msg
        else:
            response.content = "<h1>" + content_msg + "</h1>"
        return response

Example #22

Show file

File: custom_migration_for_raster_meta_update_20160512.py Project: hydroshare/hydroshare

def migrate_tif_file(apps, schema_editor):
    log = logging.getLogger()
    istorage = IrodsStorage()

    copy_res_fail = []
    vrt_update_fail = []
    vrt_update_success = []
    meta_update_fail = []
    meta_update_success = []

    # start migration for each raster resource that has raster files
    for res in RasterResource.objects.all():
        if res.files.all():
            # copy all the resource files to temp dir
            try:
                temp_dir = tempfile.mkdtemp()
                for res_file in res.files.all():
                    shutil.copy(res_file.resource_file.file.name,
                                os.path.join(temp_dir, os.path.basename(res_file.resource_file.name)))

                vrt_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.vrt' == f[-4:]].pop()

            except Exception as e:
                log.exception(e.message)
                copy_res_fail.append('{}:{}'.format(res.short_id, res.metadata.title.value))
                continue

            # update vrt file if the raster resource that has a single tif file
            try:
                if len(os.listdir(temp_dir)) == 2:
                    # create new vrt file
                    tif_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if '.tif' == f[-4:]].pop()
                    with open(os.devnull, 'w') as fp:
                        subprocess.Popen(['gdal_translate', '-of', 'VRT', tif_file_path, vrt_file_path], stdout=fp, stderr=fp).wait()   # remember to add .wait()

                    # modify the vrt file contents
                    tree = ET.parse(vrt_file_path)
                    root = tree.getroot()
                    for element in root.iter('SourceFilename'):
                        element.attrib['relativeToVRT'] = '1'
                    tree.write(vrt_file_path)

                    # delete vrt res file
                    for f in res.files.all():
                        if 'vrt' == f.resource_file.name[-3:]:
                            f.resource_file.delete()
                            f.delete()

                    # add new vrt file to resource
                    new_file = UploadedFile(file=open(vrt_file_path, 'r'), name=os.path.basename(vrt_file_path))
                    hydroshare.add_resource_files(res.short_id, new_file)

                    # update the bag
                    bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id)
                    if istorage.exists(bag_name):
                        # delete the resource bag as the old bag is not valid
                        istorage.delete(bag_name)
                    resource_modified(res, res.creator)
                    vrt_update_success.append('{}:{}'.format(res.short_id,res.metadata.title.value))

            except Exception as e:
                log.exception(e.message)
                vrt_update_fail.append('{}:{}'.format(res.short_id,res.metadata.title.value))

            # update the metadata for the band information of all the raster resources
            try:
                meta_updated = False

                # extract meta
                ori_dir = os.getcwd()
                os.chdir(temp_dir)
                res_md_dict = raster_meta_extract.get_raster_meta_dict(vrt_file_path)
                os.chdir(ori_dir)
                shutil.rmtree(temp_dir)

                # update band information metadata in django
                if res_md_dict['band_info']:
                    for i, band_meta in res_md_dict['band_info'].items():
                        band_obj = res.metadata.bandInformation.filter(name='Band_{}'.format(i)).first()
                        if band_obj:
                            res.metadata.update_element('bandInformation',
                                                        band_obj.id,
                                                        maximumValue=band_meta['maximumValue'],
                                                        minimumValue=band_meta['minimumValue'],
                                                        noDataValue=band_meta['noDataValue'],
                                                        )
                            meta_updated = True

                # update the bag if meta is updated
                if meta_updated:
                    bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id)
                    if istorage.exists(bag_name):
                        # delete the resource bag as the old bag is not valid
                        istorage.delete(bag_name)
                    resource_modified(res, res.creator)
                    meta_update_success.append('{}:{}'.format(res.short_id, res.metadata.title.value))

            except Exception as e:
                log.exception(e.message)
                meta_update_fail.append('{}:{}'.format(res.short_id, res.metadata.title.value))

    # Print migration results
    print 'Copy resource to temp folder failure: Number: {} List: {}'.format(len(copy_res_fail), copy_res_fail)
    print 'VRT file update success: Number: {} List{}'.format(len(vrt_update_success), vrt_update_success)
    print 'VRT file update fail: Number: {} List{}'.format(len(vrt_update_fail), vrt_update_fail)
    print 'Meta update success: Number: {} List {}'.format(len(meta_update_success), meta_update_success)
    print 'Meta update fail: Number: {} List {}'.format(len(meta_update_fail), meta_update_fail)

Example #23

Show file

def migrate_tif_file(apps, schema_editor):
    log = logging.getLogger()
    istorage = IrodsStorage()

    copy_res_fail = []
    vrt_update_fail = []
    vrt_update_success = []
    meta_update_fail = []
    meta_update_success = []

    # start migration for each raster resource that has raster files
    for res in RasterResource.objects.all():
        if res.files.all():
            # copy all the resource files to temp dir
            try:
                temp_dir = tempfile.mkdtemp()
                for res_file in res.files.all():
                    shutil.copy(
                        res_file.resource_file.file.name,
                        os.path.join(
                            temp_dir,
                            os.path.basename(res_file.resource_file.name)))

                vrt_file_path = [
                    os.path.join(temp_dir, f) for f in os.listdir(temp_dir)
                    if '.vrt' == f[-4:]
                ].pop()

            except Exception as e:
                log.exception(e.message)
                copy_res_fail.append('{}:{}'.format(res.short_id,
                                                    res.metadata.title.value))
                continue

            # update vrt file if the raster resource that has a single tif file
            try:
                if len(os.listdir(temp_dir)) == 2:
                    # create new vrt file
                    tif_file_path = [
                        os.path.join(temp_dir, f) for f in os.listdir(temp_dir)
                        if '.tif' == f[-4:]
                    ].pop()
                    with open(os.devnull, 'w') as fp:
                        subprocess.Popen(
                            [
                                'gdal_translate', '-of', 'VRT', tif_file_path,
                                vrt_file_path
                            ],
                            stdout=fp,
                            stderr=fp).wait()  # remember to add .wait()

                    # modify the vrt file contents
                    tree = ET.parse(vrt_file_path)
                    root = tree.getroot()
                    for element in root.iter('SourceFilename'):
                        element.attrib['relativeToVRT'] = '1'
                    tree.write(vrt_file_path)

                    # delete vrt res file
                    for f in res.files.all():
                        if 'vrt' == f.resource_file.name[-3:]:
                            f.resource_file.delete()
                            f.delete()

                    # add new vrt file to resource
                    new_file = UploadedFile(
                        file=open(vrt_file_path, 'r'),
                        name=os.path.basename(vrt_file_path))
                    hydroshare.add_resource_files(res.short_id, new_file)

                    # update the bag
                    bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id)
                    if istorage.exists(bag_name):
                        # delete the resource bag as the old bag is not valid
                        istorage.delete(bag_name)
                    resource_modified(res, res.creator)
                    vrt_update_success.append('{}:{}'.format(
                        res.short_id, res.metadata.title.value))

            except Exception as e:
                log.exception(e.message)
                vrt_update_fail.append('{}:{}'.format(
                    res.short_id, res.metadata.title.value))

            # update the metadata for the band information of all the raster resources
            try:
                meta_updated = False

                # extract meta
                ori_dir = os.getcwd()
                os.chdir(temp_dir)
                res_md_dict = raster_meta_extract.get_raster_meta_dict(
                    vrt_file_path)
                os.chdir(ori_dir)
                shutil.rmtree(temp_dir)

                # update band information metadata in django
                if res_md_dict['band_info']:
                    for i, band_meta in res_md_dict['band_info'].items():
                        band_obj = res.metadata.bandInformation.filter(
                            name='Band_{}'.format(i)).first()
                        if band_obj:
                            res.metadata.update_element(
                                'bandInformation',
                                band_obj.id,
                                maximumValue=band_meta['maximumValue'],
                                minimumValue=band_meta['minimumValue'],
                                noDataValue=band_meta['noDataValue'],
                            )
                            meta_updated = True

                # update the bag if meta is updated
                if meta_updated:
                    bag_name = 'bags/{res_id}.zip'.format(res_id=res.short_id)
                    if istorage.exists(bag_name):
                        # delete the resource bag as the old bag is not valid
                        istorage.delete(bag_name)
                    resource_modified(res, res.creator)
                    meta_update_success.append('{}:{}'.format(
                        res.short_id, res.metadata.title.value))

            except Exception as e:
                log.exception(e.message)
                meta_update_fail.append('{}:{}'.format(
                    res.short_id, res.metadata.title.value))

    # Print migration results
    print 'Copy resource to temp folder failure: Number: {} List: {}'.format(
        len(copy_res_fail), copy_res_fail)
    print 'VRT file update success: Number: {} List{}'.format(
        len(vrt_update_success), vrt_update_success)
    print 'VRT file update fail: Number: {} List{}'.format(
        len(vrt_update_fail), vrt_update_fail)
    print 'Meta update success: Number: {} List {}'.format(
        len(meta_update_success), meta_update_success)
    print 'Meta update fail: Number: {} List {}'.format(
        len(meta_update_fail), meta_update_fail)

Example #24

Show file

class TestFolderDownloadZip(TestCase):
    def setUp(self):
        super(TestFolderDownloadZip, self).setUp()
        self.group, _ = Group.objects.get_or_create(name='Hydroshare Author')
        self.user = create_account('*****@*****.**',
                                   username='******',
                                   first_name='Shaun',
                                   last_name='Livingston',
                                   superuser=False,
                                   groups=[])

        self.res = create_resource(resource_type='CompositeResource',
                                   owner=self.user,
                                   title='Test Resource',
                                   metadata=[])

        ResourceFile.create_folder(self.res, 'foo')

        # create files
        self.n1 = "test1.txt"
        test_file = open(self.n1, 'w')
        test_file.write("Test text file in test1.txt")
        test_file.close()

        self.test_file = open(self.n1, "rb")
        add_resource_files(self.res.short_id, self.test_file, folder='foo')

        # copy refts file into new file to be added to the resource as an aggregation
        reft_data_file = open(
            'hs_core/tests/data/multi_sites_formatted_version1.0.refts.json',
            'rb')
        refts_file = open('multi_sites_formatted_version1.0.refts.json', 'wb')
        refts_file.writelines(reft_data_file.readlines())
        refts_file.close()
        self.refts_file = open('multi_sites_formatted_version1.0.refts.json',
                               'rb')

        add_resource_files(self.res.short_id, self.refts_file)
        self.res.create_aggregation_xml_documents()
        self.istorage = IrodsStorage()

    def tearDown(self):
        super(TestFolderDownloadZip, self).tearDown()
        if self.res:
            self.res.delete()
        if self.test_file:
            self.test_file.close()
            os.remove(self.test_file.name)
        if self.refts_file:
            self.refts_file.close()
            os.remove(self.refts_file.name)
        GenericResource.objects.all().delete()
        if self.istorage.exists("zips"):
            self.istorage.delete("zips")

    def test_create_temp_zip(self):
        input_path = "{}/data/contents/foo".format(self.res.short_id)
        output_path = "zips/rand/foo.zip"

        self.assertTrue(
            create_temp_zip(self.res.short_id, input_path, output_path, None,
                            False))
        self.assertTrue(self.istorage.exists(output_path))

        # test aggregation
        input_path = "{}/data/contents/multi_sites_formatted_version1.0.refts.json"\
                     .format(self.res.short_id)
        output_path = "zips/rand/multi_sites_formatted_version1.0.refts.json.zip"

        self.assertTrue(
            create_temp_zip(self.res.short_id,
                            input_path,
                            output_path,
                            None,
                            sf_zip=True))
        self.assertTrue(self.istorage.exists(output_path))

    def test_create_temp_zip_aggregation(self):
        input_path = "{}/data/contents/" \
                     "multi_sites_formatted_version1.0.refts.json".format(self.res.short_id)
        output_path = "zips/rand/aggregation.zip"

        self.assertTrue(
            create_temp_zip(self.res.short_id, input_path, output_path,
                            "multi_sites_formatted_version1.0.refts.json",
                            False))
        self.assertTrue(self.istorage.exists(output_path))

Example #25

Show file

def delete_zip(zip_path):
    istorage = IrodsStorage()
    if istorage.exists(zip_path):
        istorage.delete(zip_path)

Example #26

Show file

def download(request,
             path,
             rest_call=False,
             use_async=True,
             use_reverse_proxy=True,
             *args,
             **kwargs):
    split_path_strs = path.split('/')
    is_bag_download = False
    is_zip_download = False
    is_sf_agg_file = False
    if split_path_strs[0] == 'bags':
        res_id = os.path.splitext(split_path_strs[1])[0]
        is_bag_download = True
    elif split_path_strs[0] == 'zips':
        if path.endswith('.zip'):
            res_id = os.path.splitext(split_path_strs[2])[0]
        else:
            res_id = os.path.splitext(split_path_strs[1])[0]
        is_zip_download = True
    else:
        res_id = split_path_strs[0]

    # if the resource does not exist in django, authorized will be false
    res, authorized, _ = authorize(
        request,
        res_id,
        needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE,
        raises_exception=False)
    if not authorized:
        response = HttpResponse(status=401)
        content_msg = "You do not have permission to download this resource!"
        if rest_call:
            raise PermissionDenied(content_msg)
        else:
            response.content = "<h1>" + content_msg + "</h1>"
            return response

    if res.resource_type == "CompositeResource" and not path.endswith(".zip"):
        for f in ResourceFile.objects.filter(object_id=res.id):
            if path == f.storage_path:
                if f.has_logical_file and f.logical_file.is_single_file_aggregation:
                    is_sf_agg_file = True

    if res.resource_federation_path:
        # the resource is stored in federated zone
        istorage = IrodsStorage('federated')
        federated_path = res.resource_federation_path
        path = os.path.join(federated_path, path)
        session = icommands.ACTIVE_SESSION
    else:
        # TODO: From Alva: I do not understand the use case for changing the environment.
        # TODO: This seems an enormous potential vulnerability, as arguments are
        # TODO: passed from the URI directly to IRODS without verification.
        istorage = IrodsStorage()
        federated_path = ''
        if 'environment' in kwargs:
            environment = int(kwargs['environment'])
            environment = m.RodsEnvironment.objects.get(pk=environment)
            session = Session("/tmp/django_irods",
                              settings.IRODS_ICOMMANDS_PATH,
                              session_id=uuid4())
            session.create_environment(environment)
            session.run('iinit', None, environment.auth)
        elif getattr(settings, 'IRODS_GLOBAL_SESSION', False):
            session = GLOBAL_SESSION
        elif icommands.ACTIVE_SESSION:
            session = icommands.ACTIVE_SESSION
        else:
            raise KeyError('settings must have IRODS_GLOBAL_SESSION set '
                           'if there is no environment object')

    resource_cls = check_resource_type(res.resource_type)

    if federated_path:
        res_root = os.path.join(federated_path, res_id)
    else:
        res_root = res_id

    if is_zip_download or is_sf_agg_file:
        if not path.endswith(
                ".zip"):  # requesting folder that needs to be zipped
            input_path = path.split(res_id)[1]
            random_hash = random.getrandbits(32)
            daily_date = datetime.datetime.today().strftime('%Y-%m-%d')
            random_hash_path = 'zips/{daily_date}/{res_id}/{rand_folder}'.format(
                daily_date=daily_date, res_id=res_id, rand_folder=random_hash)
            output_path = '{random_hash_path}{path}.zip'.format(
                random_hash_path=random_hash_path, path=input_path)

            if res.resource_type == "CompositeResource":
                aggregation_name = input_path[len('/data/contents/'):]
                res.create_aggregation_xml_documents(
                    aggregation_name=aggregation_name)

            if use_async:
                task = create_temp_zip.apply_async(
                    (res_id, input_path, output_path, is_sf_agg_file),
                    countdown=3)
                delete_zip.apply_async(
                    (random_hash_path, ),
                    countdown=(20 * 60))  # delete after 20 minutes
                if is_sf_agg_file:
                    download_path = request.path.split(res_id)[0] + output_path
                else:
                    download_path = request.path.split("zips")[0] + output_path
                if rest_call:
                    return HttpResponse(json.dumps({
                        'zip_status':
                        'Not ready',
                        'task_id':
                        task.task_id,
                        'download_path':
                        download_path
                    }),
                                        content_type="application/json")
                request.session['task_id'] = task.task_id
                request.session['download_path'] = download_path
                return HttpResponseRedirect(res.get_absolute_url())

            ret_status = create_temp_zip(res_id, input_path, output_path,
                                         is_sf_agg_file)
            delete_zip.apply_async(
                (random_hash_path, ),
                countdown=(20 * 60))  # delete after 20 minutes
            if not ret_status:
                content_msg = "Zip cannot be created successfully. Check log for details."
                response = HttpResponse()
                if rest_call:
                    response.content = content_msg
                else:
                    response.content = "<h1>" + content_msg + "</h1>"
                return response

            path = output_path

    bag_modified = istorage.getAVU(res_root, 'bag_modified')
    # make sure if bag_modified is not set to true, we still recreate the bag if the
    # bag file does not exist for some reason to resolve the error to download a nonexistent
    # bag when bag_modified is false due to the flag being out-of-sync with the real bag status
    if bag_modified is None or bag_modified.lower() == "false":
        # check whether the bag file exists
        bag_file_name = res_id + '.zip'
        if res_root.startswith(res_id):
            bag_full_path = os.path.join('bags', bag_file_name)
        else:
            bag_full_path = os.path.join(federated_path, 'bags', bag_file_name)
        # set bag_modified to 'true' if the bag does not exist so that it can be recreated
        # and the bag_modified AVU will be set correctly as well subsequently
        if not istorage.exists(bag_full_path):
            bag_modified = 'true'

    metadata_dirty = istorage.getAVU(res_root, 'metadata_dirty')
    # do on-demand bag creation
    # needs to check whether res_id collection exists before getting/setting AVU on it
    # to accommodate the case where the very same resource gets deleted by another request
    # when it is getting downloaded

    if is_bag_download:
        # send signal for pre_check_bag_flag
        pre_check_bag_flag.send(sender=resource_cls, resource=res)
        if bag_modified is None or bag_modified.lower() == "true":
            if metadata_dirty is None or metadata_dirty.lower() == 'true':
                create_bag_files(res)
            if use_async:
                # task parameter has to be passed in as a tuple or list, hence (res_id,) is needed
                # Note that since we are using JSON for task parameter serialization, no complex
                # object can be passed as parameters to a celery task
                task = create_bag_by_irods.apply_async((res_id, ), countdown=3)
                if rest_call:
                    return HttpResponse(json.dumps({
                        'bag_status': 'Not ready',
                        'task_id': task.task_id
                    }),
                                        content_type="application/json")

                request.session['task_id'] = task.task_id
                request.session['download_path'] = request.path
                return HttpResponseRedirect(res.get_absolute_url())
            else:
                ret_status = create_bag_by_irods(res_id)
                if not ret_status:
                    content_msg = "Bag cannot be created successfully. Check log for details."
                    response = HttpResponse()
                    if rest_call:
                        response.content = content_msg
                    else:
                        response.content = "<h1>" + content_msg + "</h1>"
                    return response

    elif metadata_dirty is None or metadata_dirty.lower() == 'true':
        if path.endswith("resourcemap.xml") or path.endswith(
                'resourcemetadata.xml'):
            # we need to regenerate the metadata xml files
            create_bag_files(res)

    # send signal for pre download file
    download_file_name = split_path_strs[-1]
    pre_download_file.send(sender=resource_cls,
                           resource=res,
                           download_file_name=download_file_name,
                           request=request)

    # obtain mime_type to set content_type
    mtype = 'application-x/octet-stream'
    mime_type = mimetypes.guess_type(path)
    if mime_type[0] is not None:
        mtype = mime_type[0]
    # retrieve file size to set up Content-Length header
    stdout = session.run("ils", None, "-l", path)[0].split()
    flen = int(stdout[3])

    # If this path is resource_federation_path, then the file is a local user file
    userpath = '/' + os.path.join(
        getattr(settings, 'HS_USER_IRODS_ZONE', 'hydroshareuserZone'), 'home',
        getattr(settings, 'HS_LOCAL_PROXY_USER_IN_FED_ZONE',
                'localHydroProxy'))

    # Allow reverse proxy if request was forwarded by nginx
    # (HTTP_X_DJANGO_REVERSE_PROXY is 'true')
    # and reverse proxy is possible according to configuration.

    if use_reverse_proxy and getattr(settings, 'SENDFILE_ON', False) and \
       'HTTP_X_DJANGO_REVERSE_PROXY' in request.META:

        # The NGINX sendfile abstraction is invoked as follows:
        # 1. The request to download a file enters this routine via the /rest_download or /download
        #    url in ./urls.py. It is redirected here from Django. The URI contains either the
        #    unqualified resource path or the federated resource path, depending upon whether
        #    the request is local or federated.
        # 2. This deals with unfederated resources by redirecting them to the uri
        #    /irods-data/{resource-id}/... on nginx. This URI is configured to read the file
        #    directly from the iRODS vault via NFS, and does not work for direct access to the
        #    vault due to the 'internal;' declaration in NGINX.
        # 3. This deals with federated resources by reading their path, matching local vaults, and
        #    redirecting to URIs that are in turn mapped to read from appropriate iRODS vaults. At
        #    present, the only one of these is /irods-user, which handles files whose federation
        #    path is stored in the variable 'userpath'.
        # 4. If there is no vault available for the resource, the file is transferred without
        #    NGINX, exactly as it was transferred previously.

        # stop NGINX targets that are non-existent from hanging forever.
        if not istorage.exists(path):
            content_msg = "file path {} does not exist in iRODS".format(path)
            response = HttpResponse(status=404)
            if rest_call:
                response.content = content_msg
            else:
                response.content = "<h1>" + content_msg + "</h1>"
            return response

        if not res.is_federated:
            # invoke X-Accel-Redirect on physical vault file in nginx
            response = HttpResponse(content_type=mtype)
            response[
                'Content-Disposition'] = 'attachment; filename="{name}"'.format(
                    name=path.split('/')[-1])
            response['Content-Length'] = flen
            response['X-Accel-Redirect'] = '/'.join(
                [getattr(settings, 'IRODS_DATA_URI', '/irods-data'), path])
            return response

        elif res.resource_federation_path == userpath:  # this guarantees a "user" resource
            # invoke X-Accel-Redirect on physical vault file in nginx
            # if path is full user path; strip federation prefix
            if path.startswith(userpath):
                path = path[len(userpath) + 1:]
            # invoke X-Accel-Redirect on physical vault file in nginx
            response = HttpResponse(content_type=mtype)
            response[
                'Content-Disposition'] = 'attachment; filename="{name}"'.format(
                    name=path.split('/')[-1])
            response['Content-Length'] = flen
            response['X-Accel-Redirect'] = os.path.join(
                getattr(settings, 'IRODS_USER_URI', '/irods-user'), path)
            return response

    # if we get here, none of the above conditions are true
    if flen <= FILE_SIZE_LIMIT:
        options = ('-', )  # we're redirecting to stdout.
        # this unusual way of calling works for federated or local resources
        proc = session.run_safe('iget', None, path, *options)
        response = FileResponse(proc.stdout, content_type=mtype)
        response[
            'Content-Disposition'] = 'attachment; filename="{name}"'.format(
                name=path.split('/')[-1])
        response['Content-Length'] = flen
        return response

    else:
        content_msg = "File larger than 1GB cannot be downloaded directly via HTTP. " \
                      "Please download the large file via iRODS clients."
        response = HttpResponse(status=403)
        if rest_call:
            response.content = content_msg
        else:
            response.content = "<h1>" + content_msg + "</h1>"
        return response

Example #27

Show file

File: migrate_resources_from_userzone_to_datazone.py Project: Lihao-CAU/hydroshare

    def handle(self, *args, **options):
        resource_counter = 0
        storage = IrodsStorage()
        avu_list = ['bag_modified', 'metadata_dirty', 'isPublic', 'resourceType']
        for resource in BaseResource.objects.all():
            if resource.storage_type == 'user':
                # resource is in user zone, so migrate it to data zone
                # copy files from iRODS user zone to data zone
                try:
                    src_coll = resource.root_path
                    tgt_coll = resource.short_id

                    if storage.exists(tgt_coll):
                        storage.delete(tgt_coll)
                    storage.copyFiles(src_coll, tgt_coll)
                    # copy AVU over for the resource collection from iRODS user zone to data zone

                    for avu_name in avu_list:
                        value = storage.getAVU(src_coll, avu_name)
                        # bag_modified AVU needs to be set to true for the new resource so the bag
                        # can be regenerated in the data zone
                        if avu_name == 'bag_modified':
                            storage.setAVU(tgt_coll, avu_name, 'true')
                        # everything else gets copied literally
                        else:
                            storage.setAVU(tgt_coll, avu_name, value)

                    # Just to be on the safe side, it is better not to delete resources from user
                    # zone after it is migrated over to data zone in case there are issues with
                    # migration. A simple irm iRODS command can be issued to delete all resource
                    # collections afterwards if all works well after some time. Commenting the
                    # deletion statement below rather than deleting it to serve as a reminder
                    # that additional cleanup to delete all resource collections in user zone
                    # is needed after we can confirm migration is successfully.
                    # delete the original resource from user zone
                    # storage.delete(src_coll)

                    path_migrated = False
                    for res_file in resource.files.all():
                        if res_file.resource_file.name:
                            print('The resource_file field should be empty for resource {} but '
                                  'have the value of {}'.format(resource.short_id,
                                                                res_file.resource_file.name))
                            break
                        file_path = res_file.fed_resource_file.name
                        if not file_path:
                            print('The fed_resource_file field should not be empty for '
                                  'resource {}'.format(resource.short_id))
                            break
                        elif file_path.startswith(resource.resource_federation_path):
                            file_path = file_path[len(resource.resource_federation_path)+1:]
                            res_file.resource_file.name = file_path
                            res_file.fed_resource_file.name = ''
                            res_file.save()
                            path_migrated = True
                        else:
                            res_file.resource_file.name = file_path
                            res_file.fed_resource_file.name = ''
                            res_file.save()
                            path_migrated = True
                            print('fed_resource_file field does not contain absolute federation '
                                  'path which is an exception but can work after migration. '
                                  'file_path is {}'.format(file_path))
                    if path_migrated or resource.files.count() == 0:
                        # update resource federation path to point resource to data zone
                        resource.resource_federation_path = ''
                        resource.save()
                        print("Resource {} has been moved from user zone to data zone "
                              "successfully".format(resource.short_id))
                        resource_counter += 1
                    else:
                        continue
                except SessionException as ex:
                    print("Resource {} failed to move: {}".format(resource.short_id, ex.stderr))

        print("{} resources have been moved from user zone to data zone successfully".format(
            resource_counter))