def _process_uploaded_file(resource, validate_files_dict):
    log = logging.getLogger()

    # find a tif file or a zip file
    res_file = None
    for r_file in resource.files.all():
        if r_file.extension.lower() in ('.tif', '.tiff', '.zip'):
            res_file = r_file
            break

    if res_file:
        # get the file from irods to temp dir
        temp_file = utils.get_file_from_irods(res_file)
        # validate the file
        validation_results = raster.raster_file_validation(
            raster_file=temp_file, resource=resource)
        if not validation_results['error_info']:
            log.info("Geo raster file validation successful.")
            # extract metadata
            temp_dir = os.path.dirname(temp_file)
            temp_vrt_file_path = [
                os.path.join(temp_dir, f) for f in os.listdir(temp_dir)
                if '.vrt' == os.path.splitext(f)[1]
            ].pop()
            metadata = raster.extract_metadata(temp_vrt_file_path)
            # delete the original resource file if it is a zip file
            if res_file.extension.lower() == '.zip':
                file_name = delete_resource_file_only(resource, res_file)
                delete_format_metadata_after_delete_file(resource, file_name)
            # add all extracted files (tif and vrt)
            for f in validation_results['new_resource_files_to_add']:
                uploaded_file = UploadedFile(file=open(f, 'rb'),
                                             name=os.path.basename(f))
                utils.add_file_to_resource(resource, uploaded_file)

            # use the extracted metadata to populate resource metadata
            for element in metadata:
                # here k is the name of the element
                # v is a dict of all element attributes/field names and field values
                k, v = element.items()[0]
                resource.metadata.create_element(k, **v)
            log_msg = "Geo raster resource (ID:{}) - extracted metadata was saved to DB"
            log_msg = log_msg.format(resource.short_id)
            log.info(log_msg)
        else:
            # delete all the files in the resource
            for res_file in resource.files.all():
                delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            err_msg = "Uploaded file was not added to the resource. "
            err_msg += ", ".join(msg
                                 for msg in validation_results['error_info'])
            validate_files_dict['message'] = err_msg
            log_msg = "File validation failed for raster resource (ID:{})."
            log_msg = log_msg.format(resource.short_id)
            log.error(log_msg)

        # cleanup the temp file directory
        if os.path.exists(temp_file):
            shutil.rmtree(os.path.dirname(temp_file))
예제 #2
0
def _process_uploaded_csv_file(resource, res_file, validate_files_dict, user,
                               delete_existing_metadata=True):
    # get the csv file from iRODS to a temp directory
    fl_obj_name = utils.get_file_from_irods(res_file)
    validate_err_message = validate_csv_file(fl_obj_name)
    if not validate_err_message:
        # first delete relevant existing metadata elements
        if delete_existing_metadata:
            TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(is_dirty=False)
            _delete_extracted_metadata(resource)

        # delete the sqlite file if it exists
        _delete_resource_file(resource, ".sqlite")

        # add the blank sqlite file
        add_blank_sqlite_file(resource, upload_folder=None)
        resource_modified(resource, user, overwrite_bag=False)

        # populate CV metadata django models from the blank sqlite file
        extract_cv_metadata_from_blank_sqlite_file(resource)

    else:  # file validation failed
        # delete the invalid file just uploaded
        delete_resource_file_only(resource, res_file)
        validate_files_dict['are_files_valid'] = False
        validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE)
        validate_files_dict['message'] = validate_err_message

    # cleanup the temp csv file
    if os.path.exists(fl_obj_name):
        shutil.rmtree(os.path.dirname(fl_obj_name))
예제 #3
0
def _process_uploaded_csv_file(resource,
                               res_file,
                               validate_files_dict,
                               user,
                               delete_existing_metadata=True):
    # get the csv file from iRODS to a temp directory
    fl_obj_name = utils.get_file_from_irods(res_file)
    validate_err_message = validate_csv_file(fl_obj_name)
    if not validate_err_message:
        # first delete relevant existing metadata elements
        if delete_existing_metadata:
            TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(
                is_dirty=False)
            _delete_extracted_metadata(resource)

        # delete the sqlite file if it exists
        _delete_resource_file(resource, ".sqlite")

        # add the blank sqlite file
        add_blank_sqlite_file(resource, upload_folder='')
        resource_modified(resource, user, overwrite_bag=False)

        # populate CV metadata django models from the blank sqlite file
        extract_cv_metadata_from_blank_sqlite_file(resource)

    else:  # file validation failed
        # delete the invalid file just uploaded
        delete_resource_file_only(resource, res_file)
        validate_files_dict['are_files_valid'] = False
        validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE)
        validate_files_dict['message'] = validate_err_message

    # cleanup the temp csv file
    if os.path.exists(fl_obj_name):
        shutil.rmtree(os.path.dirname(fl_obj_name))
예제 #4
0
def _process_uploaded_file(resource, validate_files_dict):
    log = logging.getLogger()

    # find a tif file or a zip file
    res_file = None
    for r_file in resource.files.all():
        if r_file.extension.lower() in ('.tif', '.tiff', '.zip'):
            res_file = r_file
            break

    if res_file:
        # get the file from irods to temp dir
        temp_file = utils.get_file_from_irods(res_file)
        # validate the file
        validation_results = raster.raster_file_validation(raster_file=temp_file,
                                                           resource=resource)
        if not validation_results['error_info']:
            log.info("Geo raster file validation successful.")
            # extract metadata
            temp_dir = os.path.dirname(temp_file)
            temp_vrt_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if
                                  '.vrt' == os.path.splitext(f)[1]].pop()
            metadata = raster.extract_metadata(temp_vrt_file_path)
            # delete the original resource file if it is a zip file
            if res_file.extension.lower() == '.zip':
                file_name = delete_resource_file_only(resource, res_file)
                delete_format_metadata_after_delete_file(resource, file_name)
            # add all extracted files (tif and vrt)
            for f in validation_results['new_resource_files_to_add']:
                uploaded_file = UploadedFile(file=open(f, 'rb'),
                                             name=os.path.basename(f))
                utils.add_file_to_resource(resource, uploaded_file)

            # use the extracted metadata to populate resource metadata
            for element in metadata:
                # here k is the name of the element
                # v is a dict of all element attributes/field names and field values
                k, v = element.items()[0]
                resource.metadata.create_element(k, **v)
            log_msg = "Geo raster resource (ID:{}) - extracted metadata was saved to DB"
            log_msg = log_msg.format(resource.short_id)
            log.info(log_msg)
        else:
            # delete all the files in the resource
            for res_file in resource.files.all():
                delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            err_msg = "Uploaded file was not added to the resource. "
            err_msg += ", ".join(msg for msg in validation_results['error_info'])
            validate_files_dict['message'] = err_msg
            log_msg = "File validation failed for raster resource (ID:{})."
            log_msg = log_msg.format(resource.short_id)
            log.error(log_msg)

        # cleanup the temp file directory
        if os.path.exists(temp_file):
            shutil.rmtree(os.path.dirname(temp_file))
예제 #5
0
def _process_uploaded_sqlite_file(user,
                                  resource,
                                  res_file,
                                  validate_files_dict,
                                  delete_existing_metadata=True):
    # check if it a sqlite file
    fl_ext = utils.get_resource_file_name_and_extension(res_file)[2]

    if fl_ext == '.sqlite':
        # get the file from iRODS to a temp directory
        fl_obj_name = utils.get_file_from_irods(res_file)
        validate_err_message = _validate_odm2_db_file(fl_obj_name)
        if not validate_err_message:
            # first delete relevant existing metadata elements
            if delete_existing_metadata:
                TimeSeriesMetaData.objects.filter(
                    id=resource.metadata.id).update(is_dirty=False)
                _delete_extracted_metadata(resource)
            extract_err_message = _extract_metadata(resource, fl_obj_name)
            if extract_err_message:
                # delete the invalid file
                delete_resource_file_only(resource, res_file)
                # cleanup any extracted metadata
                _delete_extracted_metadata(resource)
                validate_files_dict['are_files_valid'] = False
                extract_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE)
                validate_files_dict['message'] = extract_err_message
            else:
                # set metadata is_dirty to False
                TimeSeriesMetaData.objects.filter(
                    id=resource.metadata.id).update(is_dirty=False)
                # delete the csv file if it exists
                _delete_resource_file(resource, ".csv")
                utils.resource_modified(resource, user, overwrite_bag=False)

        else:  # file validation failed
            # delete the invalid file just uploaded
            delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE)
            validate_files_dict['message'] = validate_err_message

        # cleanup the temp file
        if os.path.exists(fl_obj_name):
            shutil.rmtree(os.path.dirname(fl_obj_name))
    else:
        # delete the invalid file
        delete_resource_file_only(resource, res_file)
        validate_files_dict['are_files_valid'] = False
        err_message = "The uploaded file not a sqlite file. {}"
        err_message += err_message.format(FILE_UPLOAD_ERROR_MESSAGE)
        validate_files_dict['message'] = err_message
예제 #6
0
def list_tif_files(vrt_file):
    """
    lists tif files named in a vrt_file
    :param vrt_file: ResourceFile for of a vrt to list associated tif(f) files
    :return: List of string filenames read from vrt_file, empty list if not found
    """
    temp_vrt_file = utils.get_file_from_irods(vrt_file)
    with open(temp_vrt_file, 'r') as opened_vrt_file:
        vrt_string = opened_vrt_file.read()
        root = ET.fromstring(vrt_string)
        file_names_in_vrt = [
            file_name.text for file_name in root.iter('SourceFilename')
        ]
        return file_names_in_vrt
    return []
예제 #7
0
def _process_uploaded_sqlite_file(user, resource, res_file, validate_files_dict,
                                  delete_existing_metadata=True):
    # check if it a sqlite file
    fl_ext = utils.get_resource_file_name_and_extension(res_file)[2]

    if fl_ext == '.sqlite':
        # get the file from iRODS to a temp directory
        fl_obj_name = utils.get_file_from_irods(res_file)
        validate_err_message = validate_odm2_db_file(fl_obj_name)
        if not validate_err_message:
            # first delete relevant existing metadata elements
            if delete_existing_metadata:
                TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(is_dirty=False)
                _delete_extracted_metadata(resource)
            extract_err_message = extract_metadata(resource, fl_obj_name)
            if extract_err_message:
                # delete the invalid file
                delete_resource_file_only(resource, res_file)
                # cleanup any extracted metadata
                _delete_extracted_metadata(resource)
                validate_files_dict['are_files_valid'] = False
                extract_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE)
                validate_files_dict['message'] = extract_err_message
            else:
                # set metadata is_dirty to False
                TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(is_dirty=False)
                # delete the csv file if it exists
                _delete_resource_file(resource, ".csv")
                utils.resource_modified(resource, user, overwrite_bag=False)

        else:   # file validation failed
            # delete the invalid file just uploaded
            delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE)
            validate_files_dict['message'] = validate_err_message

        # cleanup the temp file
        if os.path.exists(fl_obj_name):
            shutil.rmtree(os.path.dirname(fl_obj_name))
    else:
        # delete the invalid file
        delete_resource_file_only(resource, res_file)
        validate_files_dict['are_files_valid'] = False
        err_message = "The uploaded file not a sqlite file. {}"
        err_message += err_message.format(FILE_UPLOAD_ERROR_MESSAGE)
        validate_files_dict['message'] = err_message
예제 #8
0
def netcdf_post_create_resource(sender, **kwargs):
    log = logging.getLogger()
    resource = kwargs['resource']
    validate_files_dict = kwargs['validate_files']
    res_file = resource.files.all().first()

    if res_file:
        temp_file = utils.get_file_from_irods(res_file)
        nc_dataset = nc_utils.get_nc_dataset(temp_file)
        nc_file_name = res_file.file_name

        if isinstance(nc_dataset, netCDF4.Dataset):
            # Extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(
                temp_file)
            # populate metadata list with extracted metadata
            metadata = []
            add_metadata_to_list(metadata, res_dublin_core_meta,
                                 res_type_specific_meta)
            for element in metadata:
                # here k is the name of the element
                # v is a dict of all element attributes/field names and field values
                k, v = list(element.items())[0]
                if k == 'title':
                    # update title element
                    title_element = resource.metadata.title
                    resource.metadata.update_element('title', title_element.id,
                                                     **v)
                elif k == 'rights':
                    rights_element = resource.metadata.rights
                    resource.metadata.update_element('rights',
                                                     rights_element.id, **v)
                elif k == 'creator':
                    resource.metadata.creators.all().delete()
                    resource.metadata.create_element('creator', **v)
                else:
                    resource.metadata.create_element(k, **v)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(temp_file, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file, mode="rb"),
                                         name=dump_file_name)
            utils.add_file_to_resource(resource, uploaded_file)
        else:
            delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            err_msg = "Uploaded file was not added to the resource." \
                      " Please provide a valid NetCDF file. "
            validate_files_dict['message'] = err_msg
            log_msg = "File validation failed for netcdf resource (ID:{})."
            log_msg = log_msg.format(resource.short_id)
            log.error(log_msg)

        # cleanup the temp file directory
        if os.path.exists(temp_file):
            shutil.rmtree(os.path.dirname(temp_file))

    # set metadata is dirty flag as false for resource creation
    metadata = resource.metadata
    metadata.is_dirty = False
    metadata.save()

    # since we are extracting metadata after resource creation
    # metadata xml files need to be regenerated - so need to set the
    # dirty bag flags
    if resource.files.all().count() > 0:
        utils.set_dirty_bag_flag(resource)
예제 #9
0
def get_all_related_shp_files(resource, selected_resource_file, file_type):
    """
    This helper function copies all the related shape files to a temp directory
    and return a list of those temp file paths as well as a list of existing related
    resource file objects
    :param resource: an instance of BaseResource to which the *selecetd_resource_file* belongs
    :param selected_resource_file: an instance of ResourceFile selected by the user to set
    GeoFeaureFile type (the file must be a .shp or a .zip file)
    :param file_type: a flag (True/False) to control resource VS file type actions
    :return: a list of temp file paths for all related shape files, and a list of corresponding
     resource file objects
    """
    def collect_shape_resource_files(res_file):
        # compare without the file extension (-4)
        if res_file.short_path.lower().endswith('.shp.xml'):
            if selected_resource_file.short_path[:
                                                 -4] == res_file.short_path[:
                                                                            -8]:
                shape_res_files.append(f)
        elif selected_resource_file.short_path[:-4] == res_file.short_path[:
                                                                           -4]:
            shape_res_files.append(res_file)

    shape_temp_files = []
    shape_res_files = []
    temp_dir = ''
    if selected_resource_file.extension.lower() == '.shp':
        for f in resource.files.all():
            if f.file_folder == selected_resource_file.file_folder:
                if f.extension.lower(
                ) == '.xml' and not f.file_name.lower().endswith('.shp.xml'):
                    continue
                if f.extension.lower(
                ) in GeoFeatureLogicalFile.get_allowed_storage_file_types():
                    collect_shape_resource_files(f)

        for f in shape_res_files:
            temp_file = utils.get_file_from_irods(f)
            if not temp_dir:
                temp_dir = os.path.dirname(temp_file)
            else:
                file_temp_dir = os.path.dirname(temp_file)
                dst_dir = os.path.join(temp_dir, os.path.basename(temp_file))
                shutil.copy(temp_file, dst_dir)
                shutil.rmtree(file_temp_dir)
                temp_file = dst_dir
            shape_temp_files.append(temp_file)

    elif selected_resource_file.extension.lower() == '.zip':
        temp_file = utils.get_file_from_irods(selected_resource_file)
        temp_dir = os.path.dirname(temp_file)
        if not zipfile.is_zipfile(temp_file):
            if os.path.isdir(temp_dir):
                shutil.rmtree(temp_dir)
            raise ValidationError('Selected file is not a zip file')
        zf = zipfile.ZipFile(temp_file, 'r')
        zf.extractall(temp_dir)
        zf.close()
        for dirpath, _, filenames in os.walk(temp_dir):
            for name in filenames:
                if name == selected_resource_file.file_name:
                    # skip the user selected zip file
                    continue
                file_path = os.path.abspath(os.path.join(dirpath, name))
                shape_temp_files.append(file_path)

        shape_res_files.append(selected_resource_file)

    return shape_temp_files, shape_res_files
    def set_file_type(cls, resource, user, file_id=None, folder_path=None):
        """ Creates a RefTimeseriesLogicalFile (aggregation) from a json resource file (.refts.json)
        """

        log = logging.getLogger()
        if file_id is None:
            raise ValueError("Must specify id of the file to be set as an aggregation type")

        # get the the selected resource file object
        res_file = utils.get_resource_file_by_id(resource, file_id)

        if res_file is None:
            raise ValidationError("File not found.")

        if not res_file.file_name.lower().endswith('.refts.json'):
            raise ValidationError("Selected file '{}' is not a Ref Time Series file.".format(
                res_file.file_name))

        if res_file.has_logical_file and not res_file.logical_file.is_fileset:
            raise ValidationError("Selected file '{}' is already part of an aggregation".format(
                res_file.file_name))

        try:
            json_file_content = _validate_json_file(res_file)
        except Exception as ex:
            log.exception("failed json validation")
            raise ValidationError(ex.message)

        # get the file from irods to temp dir
        temp_file = utils.get_file_from_irods(res_file)
        temp_dir = os.path.dirname(temp_file)

        with transaction.atomic():
            # create a reftiemseries logical file object to be associated with
            # resource files
            logical_file = cls.create(resource)
            # create logical file record in DB
            logical_file.save()
            logical_file.metadata.json_file_content = json_file_content
            logical_file.metadata.save()

            try:
                # make the json file part of the aggregation
                logical_file.add_resource_file(res_file)
                logical_file.dataset_name = logical_file.metadata.get_title_from_json()
                logical_file.save()
                # extract metadata
                _extract_metadata(resource, logical_file)
                log.info("RefTimeseries aggregation type - json file was added to the resource.")
                logical_file._finalize(user, resource, folder_created=False,
                                       res_files_to_delete=[])

                log.info("RefTimeseries aggregation type was created.")
            except Exception as ex:
                msg = "RefTimeseries aggregation type. Error when setting aggregation " \
                      "type. Error:{}"
                msg = msg.format(ex.message)
                log.exception(msg)
                raise ValidationError(msg)
            finally:
                # remove temp dir
                if os.path.isdir(temp_dir):
                    shutil.rmtree(temp_dir)
예제 #11
0
def raster_file_validation(raster_file, resource, raster_folder=None):
    """ Validates if the relevant files are valid for raster aggregation or raster resource type

    :param  raster_file: a temp file (extension tif or zip) retrieved from irods and stored on temp
    dir in django
    :param  raster_folder: (optional) folder in which raster file exists on irods.
    :param  resource: an instance of CompositeResource or GeoRasterResource in which
    raster_file exits.

    :return A list of error messages and a list of file paths for all files that belong to raster
    """

    error_info = []
    new_resource_files_to_add = []
    raster_resource_files = []
    create_vrt = True
    validation_results = {'error_info': error_info,
                          'new_resource_files_to_add': new_resource_files_to_add,
                          'raster_resource_files': raster_resource_files,
                          'vrt_created': create_vrt}
    file_name_part, ext = os.path.splitext(os.path.basename(raster_file))
    ext = ext.lower()

    if ext == '.tif' or ext == '.tiff':
        res_files = ResourceFile.list_folder(resource=resource, folder=raster_folder,
                                             sub_folders=False)

        # check if there is already a vrt file in that folder
        vrt_files = [f for f in res_files if f.extension.lower() == ".vrt"]
        tif_files = [f for f in res_files if f.extension.lower() == ".tif" or
                     f.extension.lower() == ".tiff"]
        if vrt_files:
            if len(vrt_files) > 1:
                error_info.append("More than one vrt file was found.")
                return validation_results
            create_vrt = False
        elif len(tif_files) != 1:
            # if there are more than one tif file and no vrt file, then we just use the
            # selected tif file to create the aggregation in case of composite resource
            if resource.resource_type == "CompositeResource":
                tif_files = [tif_file for tif_file in tif_files if
                             raster_file.endswith(tif_file.file_name)]
            else:
                # if there are more than one tif file, there needs to be one vrt file
                error_info.append("A vrt file is missing.")
                return validation_results

        raster_resource_files.extend(vrt_files)
        raster_resource_files.extend(tif_files)

        if vrt_files:
            temp_dir = os.path.dirname(raster_file)
            temp_vrt_file = utils.get_file_from_irods(vrt_files[0], temp_dir)
        else:
            # create the .vrt file
            try:
                temp_vrt_file = create_vrt_file(raster_file)
            except Exception as ex:
                error_info.append(ex.message)
            else:
                if os.path.isfile(temp_vrt_file):
                    new_resource_files_to_add.append(temp_vrt_file)

    elif ext == '.zip':
        try:
            extract_file_paths = _explode_raster_zip_file(raster_file)
        except Exception as ex:
            error_info.append(ex.message)
        else:
            if extract_file_paths:
                new_resource_files_to_add.extend(extract_file_paths)
    else:
        error_info.append("Invalid file mime type found.")

    if not error_info:
        if ext == ".zip":
            # in case of zip, there needs to be more than one file extracted out of the zip file
            if len(new_resource_files_to_add) < 2:
                error_info.append("Invalid zip file. Seems to contain only one file. "
                                  "Multiple tif files are expected.")
                return validation_results

            files_ext = [os.path.splitext(path)[1].lower() for path in new_resource_files_to_add]
            if files_ext.count('.vrt') > 1:
                error_info.append("Invalid zip file. Seems to contain multiple vrt files.")
                return validation_results
            elif files_ext.count('.vrt') == 0:
                error_info.append("Invalid zip file. No vrt file was found.")
                return validation_results
            elif files_ext.count('.tif') + files_ext.count('.tiff') < 1:
                error_info.append("Invalid zip file. No tif/tiff file was found.")
                return validation_results

            # check if there are files that are not raster related
            non_raster_files = [f_ext for f_ext in files_ext if f_ext
                                not in ('.tif', '.tiff', '.vrt')]
            if non_raster_files:
                error_info.append("Invalid zip file. Contains files that are not raster related.")
                return validation_results

            temp_vrt_file = new_resource_files_to_add[files_ext.index('.vrt')]

        # validate vrt file if we didn't create it
        if ext == '.zip' or not create_vrt:
            raster_dataset = gdal.Open(temp_vrt_file, GA_ReadOnly)
            if raster_dataset is None:
                error_info.append('Failed to open the vrt file.')
                return validation_results

            # check if the vrt file is valid
            try:
                raster_dataset.RasterXSize
                raster_dataset.RasterYSize
                raster_dataset.RasterCount
            except AttributeError:
                error_info.append('Raster size and band information are missing.')
                return validation_results

            # check if the raster file numbers and names are valid in vrt file
            with open(temp_vrt_file, 'r') as vrt_file:
                vrt_string = vrt_file.read()
                root = ET.fromstring(vrt_string)
                file_names_in_vrt = [file_name.text for file_name in root.iter('SourceFilename')]

            if ext == '.zip':
                file_names = [os.path.basename(path) for path in new_resource_files_to_add]
            else:
                file_names = [f.file_name for f in raster_resource_files]

            file_names = [f_name for f_name in file_names if not f_name.endswith('.vrt')]

            if len(file_names) > len(file_names_in_vrt):
                msg = 'One or more additional tif files were found which are not listed in ' \
                      'the provided {} file.'
                msg = msg.format(os.path.basename(temp_vrt_file))
                error_info.append(msg)
            else:
                for vrt_ref_raster_name in file_names_in_vrt:
                    if vrt_ref_raster_name in file_names \
                            or (os.path.split(vrt_ref_raster_name)[0] == '.' and
                                os.path.split(vrt_ref_raster_name)[1] in file_names):
                        continue
                    elif os.path.basename(vrt_ref_raster_name) in file_names:
                        msg = "Please specify {} as {} in the .vrt file, because it will " \
                              "be saved in the same folder with .vrt file in HydroShare."
                        msg = msg.format(vrt_ref_raster_name, os.path.basename(vrt_ref_raster_name))
                        error_info.append(msg)
                        break
                    else:
                        msg = "The file {tif} which is listed in the {vrt} file is missing."
                        msg = msg.format(tif=os.path.basename(vrt_ref_raster_name),
                                         vrt=os.path.basename(temp_vrt_file))
                        error_info.append(msg)
                        break

    return validation_results
예제 #12
0
    def set_file_type(cls, resource, user, file_id=None, folder_path=None):
        """ Creates a GeoRasterLogicalFile (aggregation) from a tif or a zip resource file, or a
        folder """

        log = logging.getLogger()
        res_file, folder_path = cls._validate_set_file_type_inputs(resource, file_id, folder_path)
        file_name = res_file.file_name
        # get file name without the extension - needed for naming the aggregation folder
        base_file_name = file_name[:-len(res_file.extension)]
        file_folder = res_file.file_folder
        aggregation_folder_created = False
        # determine if we need to create a new folder for the aggregation
        create_new_folder = cls._check_create_aggregation_folder(
            selected_res_file=res_file, selected_folder=folder_path,
            aggregation_file_count=1)

        upload_folder = ''
        # get the file from irods to temp dir
        temp_file = utils.get_file_from_irods(res_file)
        temp_dir = os.path.dirname(temp_file)
        res_files_to_delete = []
        raster_folder = folder_path if folder_path is not None else file_folder
        # validate the file
        validation_results = raster_file_validation(raster_file=temp_file, resource=resource,
                                                    raster_folder=raster_folder)

        if not validation_results['error_info']:
            msg = "Geographic raster aggregation. Error when creating aggregation. Error:{}"
            file_type_success = False
            log.info("Geographic raster aggregation validation successful.")
            # extract metadata
            temp_vrt_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if
                                  '.vrt' == os.path.splitext(f)[1]].pop()
            metadata = extract_metadata(temp_vrt_file_path)
            log.info("Geographic raster metadata extraction was successful.")

            with transaction.atomic():
                # create a geo raster logical file object to be associated with resource files
                logical_file = cls.initialize(base_file_name, resource)

                try:
                    if not folder_path:
                        # we are here means aggregation is being created by selecting a file
                        if create_new_folder:
                            # create a folder for the raster file type using the base file name
                            # as the name for the new folder
                            upload_folder = cls._create_aggregation_folder(resource, file_folder,
                                                                           base_file_name)
                            log.info("Folder created:{}".format(upload_folder))
                            aggregation_folder_created = True
                        else:
                            upload_folder = file_folder

                        # create logical file record in DB
                        logical_file.save()
                        if res_file.extension.lower() in [".tiff", ".tif"]:
                            if aggregation_folder_created:
                                tgt_folder = upload_folder

                                # copy any existing raster specific files to the new aggregation
                                # folder and make them part of the logical file
                                files_to_copy = validation_results['raster_resource_files']
                                logical_file.copy_resource_files(resource, files_to_copy,
                                                                 tgt_folder)
                                res_files_to_delete.extend(files_to_copy)
                            else:
                                # make the existing raster specific files part of the
                                # aggregation/file type
                                for raster_res_file in validation_results['raster_resource_files']:
                                    logical_file.add_resource_file(raster_res_file)

                        else:
                            # selected file must be a zip file
                            res_files_to_delete.append(res_file)
                    else:
                        # create logical file record in DB
                        logical_file.save()
                        # user selected a folder to create aggregation
                        upload_folder = folder_path

                        # make all the files in the selected folder as part of the aggregation
                        logical_file.add_resource_files_in_folder(resource, folder_path)

                    # add all new files to resource and make those part of the logical file
                    if validation_results['new_resource_files_to_add']:
                        files_to_add_to_resource = validation_results['new_resource_files_to_add']
                        logical_file.add_files_to_resource(
                            resource=resource, files_to_add=files_to_add_to_resource,
                            upload_folder=upload_folder)
                    log.info("Geographic raster aggregation type - new files were added "
                             "to the resource.")

                    # use the extracted metadata to populate file metadata
                    for element in metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        logical_file.metadata.create_element(k, **v)
                    log.info("Geographic raster aggregation type - metadata was saved to DB")
                    logical_file._finalize(user, resource,
                                           folder_created=aggregation_folder_created,
                                           res_files_to_delete=res_files_to_delete,
                                           reset_title=True)

                    file_type_success = True
                    post_add_raster_aggregation.send(
                        sender=AbstractLogicalFile,
                        resource=resource,
                        file=logical_file
                    )
                except Exception as ex:
                    msg = msg.format(ex.message)
                    log.exception(msg)
                finally:
                    # remove temp dir
                    if os.path.isdir(temp_dir):
                        shutil.rmtree(temp_dir)

            if not file_type_success:
                aggregation_from_folder = folder_path is not None
                cls._cleanup_on_fail_to_create_aggregation(user, resource, upload_folder,
                                                           file_folder, aggregation_from_folder)
                raise ValidationError(msg)
        else:
            # remove temp dir
            if os.path.isdir(temp_dir):
                shutil.rmtree(temp_dir)
            err_msg = "Geographic raster aggregation type validation failed. {}".format(
                ' '.join(validation_results['error_info']))
            log.error(err_msg)
            raise ValidationError(err_msg)
예제 #13
0
def netcdf_post_create_resource(sender, **kwargs):
    log = logging.getLogger()
    resource = kwargs['resource']
    validate_files_dict = kwargs['validate_files']
    res_file = resource.files.all().first()

    if res_file:
        temp_file = utils.get_file_from_irods(res_file)
        nc_dataset = nc_utils.get_nc_dataset(temp_file)
        nc_file_name = res_file.file_name

        if isinstance(nc_dataset, netCDF4.Dataset):
            # Extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(temp_file)
            # populate metadata list with extracted metadata
            metadata = []
            add_metadata_to_list(metadata, res_dublin_core_meta, res_type_specific_meta)
            for element in metadata:
                # here k is the name of the element
                # v is a dict of all element attributes/field names and field values
                k, v = element.items()[0]
                if k == 'title':
                    # update title element
                    title_element = resource.metadata.title
                    resource.metadata.update_element('title', title_element.id, **v)
                elif k == 'rights':
                    rights_element = resource.metadata.rights
                    resource.metadata.update_element('rights', rights_element.id, **v)
                elif k == 'creator':
                    resource.metadata.creators.all().delete()
                    resource.metadata.create_element('creator', **v)
                else:
                    resource.metadata.create_element(k, **v)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(temp_file, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file), name=dump_file_name)
            utils.add_file_to_resource(resource, uploaded_file)
        else:
            delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            err_msg = "Uploaded file was not added to the resource." \
                      " Please provide a valid NetCDF file. "
            validate_files_dict['message'] = err_msg
            log_msg = "File validation failed for netcdf resource (ID:{})."
            log_msg = log_msg.format(resource.short_id)
            log.error(log_msg)

        # cleanup the temp file directory
        if os.path.exists(temp_file):
            shutil.rmtree(os.path.dirname(temp_file))

    # set metadata is dirty flag as false for resource creation
    metadata = resource.metadata
    metadata.is_dirty = False
    metadata.save()

    # since we are extracting metadata after resource creation
    # metadata xml files need to be regenerated - so need to set the
    # dirty bag flags
    if resource.files.all().count() > 0:
        utils.set_dirty_bag_flag(resource)
예제 #14
0
    def set_file_type(cls, resource, user, file_id=None, folder_path=None):
        """ Creates a RefTimeseriesLogicalFile (aggregation) from a json resource file (.refts.json)
        """

        log = logging.getLogger()
        if file_id is None:
            raise ValueError("Must specify id of the file to be set as an aggregation type")

        # get the the selected resource file object
        res_file = utils.get_resource_file_by_id(resource, file_id)

        if res_file is None:
            raise ValidationError("File not found.")

        if not res_file.file_name.lower().endswith('.refts.json'):
            raise ValidationError("Selected file '{}' is not a Ref Time Series file.".format(
                res_file.file_name))

        if res_file.has_logical_file and not res_file.logical_file.is_fileset:
            raise ValidationError("Selected file '{}' is already part of an aggregation".format(
                res_file.file_name))

        try:
            json_file_content = _validate_json_file(res_file)
        except Exception as ex:
            log.exception("failed json validation")
            raise ValidationError(ex.message)

        # get the file from irods to temp dir
        temp_file = utils.get_file_from_irods(res_file)
        temp_dir = os.path.dirname(temp_file)

        with transaction.atomic():
            # create a reftiemseries logical file object to be associated with
            # resource files
            logical_file = cls.create(resource)
            # create logical file record in DB
            logical_file.save()
            logical_file.metadata.json_file_content = json_file_content
            logical_file.metadata.save()

            try:
                # make the json file part of the aggregation
                logical_file.add_resource_file(res_file)
                logical_file.dataset_name = logical_file.metadata.get_title_from_json()
                logical_file.save()
                # extract metadata
                _extract_metadata(resource, logical_file)
                log.info("RefTimeseries aggregation type - json file was added to the resource.")
                logical_file._finalize(user, resource, folder_created=False,
                                       res_files_to_delete=[])

                log.info("RefTimeseries aggregation type was created.")
                post_add_reftimeseries_aggregation.send(
                    sender=AbstractLogicalFile,
                    resource=resource,
                    file=logical_file
                )
            except Exception as ex:
                msg = "RefTimeseries aggregation type. Error when setting aggregation " \
                      "type. Error:{}"
                msg = msg.format(ex.message)
                log.exception(msg)
                raise ValidationError(msg)
            finally:
                # remove temp dir
                if os.path.isdir(temp_dir):
                    shutil.rmtree(temp_dir)
예제 #15
0
    def set_file_type(cls, resource, file_id, user):
        """
            Sets a tif or zip raster resource file to GeoRasterFile type
            :param resource: an instance of resource type CompositeResource
            :param file_id: id of the resource file to be set as GeoRasterFile type
            :param user: user who is setting the file type
            :return:
            """

        # had to import it here to avoid import loop
        from hs_core.views.utils import create_folder

        log = logging.getLogger()

        # get the file from irods
        res_file = utils.get_resource_file_by_id(resource, file_id)

        if res_file is None:
            raise ValidationError("File not found.")

        if res_file.extension != '.nc':
            raise ValidationError("Not a NetCDF file.")

        # base file name (no path included)
        file_name = res_file.file_name
        # file name without the extension
        nc_file_name = file_name.split(".")[0]

        resource_metadata = []
        file_type_metadata = []
        files_to_add_to_resource = []
        if res_file.has_generic_logical_file:
            # get the file from irods to temp dir
            temp_file = utils.get_file_from_irods(res_file)
            temp_dir = os.path.dirname(temp_file)
            files_to_add_to_resource.append(temp_file)
            # file validation and metadata extraction
            nc_dataset = nc_utils.get_nc_dataset(temp_file)
            if isinstance(nc_dataset, netCDF4.Dataset):
                # Extract the metadata from netcdf file
                res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(
                    temp_file)
                # populate resource_metadata and file_type_metadata lists with extracted metadata
                add_metadata_to_list(resource_metadata, res_dublin_core_meta,
                                     res_type_specific_meta,
                                     file_type_metadata, resource)

                # create the ncdump text file
                dump_file = create_header_info_txt_file(
                    temp_file, nc_file_name)
                files_to_add_to_resource.append(dump_file)
                file_folder = res_file.file_folder
                with transaction.atomic():
                    # first delete the netcdf file that we retrieved from irods
                    # for setting it to netcdf file type
                    delete_resource_file(resource.short_id, res_file.id, user)

                    # create a netcdf logical file object to be associated with
                    # resource files
                    logical_file = cls.create()

                    # by default set the dataset_name attribute of the logical file to the
                    # name of the file selected to set file type unless the extracted metadata
                    # has a value for title
                    dataset_title = res_dublin_core_meta.get('title', None)
                    if dataset_title is not None:
                        logical_file.dataset_name = dataset_title
                    else:
                        logical_file.dataset_name = nc_file_name
                    logical_file.save()

                    try:
                        # create a folder for the netcdf file type using the base file
                        # name as the name for the new folder
                        new_folder_path = cls.compute_file_type_folder(
                            resource, file_folder, nc_file_name)
                        fed_file_full_path = ''
                        if resource.resource_federation_path:
                            fed_file_full_path = os.path.join(
                                resource.root_path, new_folder_path)

                        create_folder(resource.short_id, new_folder_path)
                        log.info("Folder created:{}".format(new_folder_path))

                        new_folder_name = new_folder_path.split('/')[-1]
                        if file_folder is None:
                            upload_folder = new_folder_name
                        else:
                            upload_folder = os.path.join(
                                file_folder, new_folder_name)
                        # add all new files to the resource
                        for f in files_to_add_to_resource:
                            uploaded_file = UploadedFile(
                                file=open(f, 'rb'), name=os.path.basename(f))
                            new_res_file = utils.add_file_to_resource(
                                resource,
                                uploaded_file,
                                folder=upload_folder,
                                fed_res_file_name_or_path=fed_file_full_path)
                            # make each resource file we added as part of the logical file
                            logical_file.add_resource_file(new_res_file)

                        log.info(
                            "NetCDF file type - new files were added to the resource."
                        )
                    except Exception as ex:
                        msg = "NetCDF file type. Error when setting file type. Error:{}"
                        msg = msg.format(ex.message)
                        log.exception(msg)
                        # TODO: in case of any error put the original file back and
                        # delete the folder that was created
                        raise ValidationError(msg)
                    finally:
                        # remove temp dir
                        if os.path.isdir(temp_dir):
                            shutil.rmtree(temp_dir)

                    log.info("NetCDF file type was created.")

                    # use the extracted metadata to populate resource metadata
                    for element in resource_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'title':
                            # update title element
                            title_element = resource.metadata.title
                            resource.metadata.update_element(
                                'title', title_element.id, **v)
                        else:
                            resource.metadata.create_element(k, **v)

                    log.info("Resource - metadata was saved to DB")

                    # use the extracted metadata to populate file metadata
                    for element in file_type_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'subject':
                            logical_file.metadata.keywords = v
                            logical_file.metadata.save()
                        else:
                            logical_file.metadata.create_element(k, **v)
                    log.info("NetCDF file type - metadata was saved to DB")
            else:
                err_msg = "Not a valid NetCDF file. File type file validation failed."
                log.error(err_msg)
                # remove temp dir
                if os.path.isdir(temp_dir):
                    shutil.rmtree(temp_dir)
                raise ValidationError(err_msg)
from hs_core.hydroshare.utils import resource_modified, get_file_from_irods
from hs_file_types import raster_meta_extract
from hs_geo_raster_resource.models import RasterResource

copy_res_fail = []
meta_update_fail = []
meta_update_success = []
# start migration for each raster resource that has raster files
for res in RasterResource.objects.all():
    # copy all the resource files to temp dir
    temp_dir = ''
    res_file_tmp_path = ''
    try:
        temp_dir = tempfile.mkdtemp()
        for res_file in res.files.all():
            res_file_tmp_path = get_file_from_irods(res_file)
            shutil.copy(
                res_file_tmp_path,
                os.path.join(temp_dir, os.path.basename(res_file_tmp_path)))
            shutil.rmtree(os.path.dirname(res_file_tmp_path))

        vrt_file_path = [
            os.path.join(temp_dir, f) for f in os.listdir(temp_dir)
            if '.vrt' == f[-4:]
        ].pop()
    except Exception as e:
        if os.path.isdir(temp_dir):
            shutil.rmtree(temp_dir)
        if os.path.isfile(res_file_tmp_path):
            shutil.rmtree(os.path.dirname(res_file_tmp_path))
        copy_res_fail.append('{}:{}'.format(res.short_id,
예제 #17
0
    def set_file_type(cls, resource, user, file_id=None, folder_path=None):
        """ Creates a NetCDFLogicalFile (aggregation) from a netcdf file (.nc) resource file
        or a folder """

        log = logging.getLogger()
        res_file, folder_path = cls._validate_set_file_type_inputs(resource, file_id, folder_path)

        # base file name (no path included)
        file_name = res_file.file_name
        # file name without the extension - needed for naming the new aggregation folder
        nc_file_name = file_name[:-len(res_file.extension)]

        resource_metadata = []
        file_type_metadata = []
        upload_folder = ''
        res_files_to_delete = []
        # get the file from irods to temp dir
        temp_file = utils.get_file_from_irods(res_file)
        temp_dir = os.path.dirname(temp_file)

        # file validation and metadata extraction
        nc_dataset = nc_utils.get_nc_dataset(temp_file)
        if isinstance(nc_dataset, netCDF4.Dataset):
            msg = "NetCDF aggregation. Error when creating aggregation. Error:{}"
            file_type_success = False
            # extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(temp_file)
            # populate resource_metadata and file_type_metadata lists with extracted metadata
            add_metadata_to_list(resource_metadata, res_dublin_core_meta,
                                 res_type_specific_meta, file_type_metadata, resource)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(temp_file, nc_file_name)
            file_folder = res_file.file_folder
            aggregation_folder_created = False
            create_new_folder = cls._check_create_aggregation_folder(
                selected_res_file=res_file, selected_folder=folder_path,
                aggregation_file_count=1)

            with transaction.atomic():
                # create a netcdf logical file object to be associated with
                # resource files
                dataset_title = res_dublin_core_meta.get('title', nc_file_name)
                logical_file = cls.initialize(dataset_title, resource)

                try:
                    if folder_path is None:
                        # we are here means aggregation is being created by selecting a file

                        # create a folder for the netcdf file type using the base file
                        # name as the name for the new folder if the file is not already in a folder
                        if create_new_folder:
                            upload_folder = cls._create_aggregation_folder(resource, file_folder,
                                                                           nc_file_name)
                            aggregation_folder_created = True
                            log.info("NetCDF Aggregation creation - folder created:{}".format(
                                upload_folder))
                        else:
                            # selected nc file is already in a folder
                            upload_folder = file_folder

                        # create logical file record in DB
                        logical_file.save()
                        if aggregation_folder_created:
                            # copy the nc file to the new aggregation folder and make it part
                            # of the logical file
                            tgt_folder = upload_folder
                            files_to_copy = [res_file]
                            logical_file.copy_resource_files(resource, files_to_copy,
                                                             tgt_folder)
                            res_files_to_delete.append(res_file)
                        else:
                            # make the selected nc file as part of the aggregation/file type
                            logical_file.add_resource_file(res_file)

                    else:
                        # logical file record gets created in DB
                        logical_file.save()
                        # folder has been selected to create aggregation
                        upload_folder = folder_path
                        # make the .nc file part of the aggregation
                        logical_file.add_resource_file(res_file)

                    # add the new dump txt file to the resource
                    uploaded_file = UploadedFile(file=open(dump_file, 'rb'),
                                                 name=os.path.basename(dump_file))

                    new_res_file = utils.add_file_to_resource(
                        resource, uploaded_file, folder=upload_folder, add_to_aggregation=False
                    )

                    # make this new resource file we added part of the logical file
                    logical_file.add_resource_file(new_res_file)
                    log.info("NetCDF aggregation creation - a new file was added to the resource.")

                    # use the extracted metadata to populate resource metadata
                    for element in resource_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'title':
                            # update title element
                            title_element = resource.metadata.title
                            resource.metadata.update_element('title', title_element.id, **v)
                        else:
                            resource.metadata.create_element(k, **v)

                    log.info("NetCDF Aggregation creation - Resource metadata was saved to DB")

                    # use the extracted metadata to populate file metadata
                    for element in file_type_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'subject':
                            logical_file.metadata.keywords = v
                            logical_file.metadata.save()
                            # update resource level keywords
                            resource_keywords = [subject.value.lower() for subject in
                                                 resource.metadata.subjects.all()]
                            for kw in logical_file.metadata.keywords:
                                if kw.lower() not in resource_keywords:
                                    resource.metadata.create_element('subject', value=kw)
                        else:
                            logical_file.metadata.create_element(k, **v)
                    log.info("NetCDF aggregation - metadata was saved in aggregation")
                    logical_file._finalize(user, resource,
                                           folder_created=aggregation_folder_created,
                                           res_files_to_delete=res_files_to_delete)
                    file_type_success = True
                    post_add_netcdf_aggregation.send(
                        sender=AbstractLogicalFile,
                        resource=resource,
                        file=logical_file
                    )
                except Exception as ex:
                    msg = msg.format(ex.message)
                    log.exception(msg)
                finally:
                    # remove temp dir
                    if os.path.isdir(temp_dir):
                        shutil.rmtree(temp_dir)

            if not file_type_success:
                aggregation_from_folder = folder_path is not None
                cls._cleanup_on_fail_to_create_aggregation(user, resource, upload_folder,
                                                           file_folder, aggregation_from_folder)
                raise ValidationError(msg)

        else:
            err_msg = "Not a valid NetCDF file. NetCDF aggregation validation failed."
            log.error(err_msg)
            # remove temp dir
            if os.path.isdir(temp_dir):
                shutil.rmtree(temp_dir)
            raise ValidationError(err_msg)
예제 #18
0
def netcdf_file_update(instance, nc_res_file, txt_res_file, user):
    log = logging.getLogger()
    # check the instance type
    file_type = isinstance(instance, NetCDFLogicalFile)

    # get the file from irods to temp dir
    temp_nc_file = utils.get_file_from_irods(nc_res_file)
    nc_dataset = netCDF4.Dataset(temp_nc_file, 'a')

    try:
        # update title
        title = instance.dataset_name if file_type else instance.metadata.title.value

        if title.lower() != 'untitled resource':
            if hasattr(nc_dataset, 'title'):
                delattr(nc_dataset, 'title')
            nc_dataset.title = title

        # update keywords
        keywords = instance.metadata.keywords if file_type \
            else [item.value for item in instance.metadata.subjects.all()]

        if hasattr(nc_dataset, 'keywords'):
            delattr(nc_dataset, 'keywords')

        if keywords:
            nc_dataset.keywords = ', '.join(keywords)

        # update key/value metadata
        extra_metadata_dict = instance.metadata.extra_metadata if file_type \
            else instance.extra_metadata

        if hasattr(nc_dataset, 'hs_extra_metadata'):
            delattr(nc_dataset, 'hs_extra_metadata')

        if extra_metadata_dict:
            extra_metadata = []
            for k, v in extra_metadata_dict.items():
                extra_metadata.append("{}:{}".format(k, v))
            nc_dataset.hs_extra_metadata = ', '.join(extra_metadata)

        # update temporal coverage
        temporal_coverage = instance.metadata.temporal_coverage if file_type \
            else instance.metadata.coverages.all().filter(type='period').first()

        for attr_name in ['time_coverage_start', 'time_coverage_end']:
            if hasattr(nc_dataset, attr_name):
                delattr(nc_dataset, attr_name)

        if temporal_coverage:
            nc_dataset.time_coverage_start = temporal_coverage.value['start']
            nc_dataset.time_coverage_end = temporal_coverage.value['end']

        # update spatial coverage
        spatial_coverage = instance.metadata.spatial_coverage if file_type \
            else instance.metadata.coverages.all().filter(type='box').first()

        for attr_name in ['geospatial_lat_min', 'geospatial_lat_max', 'geospatial_lon_min',
                          'geospatial_lon_max']:
            if hasattr(nc_dataset, attr_name):
                delattr(nc_dataset, attr_name)

        if spatial_coverage:
            nc_dataset.geospatial_lat_min = spatial_coverage.value['southlimit']
            nc_dataset.geospatial_lat_max = spatial_coverage.value['northlimit']
            nc_dataset.geospatial_lon_min = spatial_coverage.value['westlimit']
            nc_dataset.geospatial_lon_max = spatial_coverage.value['eastlimit']

        # update variables
        if instance.metadata.variables.all():
            dataset_variables = nc_dataset.variables
            for variable in instance.metadata.variables.all():
                if variable.name in dataset_variables.keys():
                    dataset_variable = dataset_variables[variable.name]

                    # update units
                    if hasattr(dataset_variable, 'units'):
                        delattr(dataset_variable, 'units')
                    if variable.unit != 'Unknown':
                        dataset_variable.setncattr('units', variable.unit)

                    # update long_name
                    if hasattr(dataset_variable, 'long_name'):
                        delattr(dataset_variable, 'long_name')
                    if variable.descriptive_name:
                        dataset_variable.setncattr('long_name', variable.descriptive_name)

                    # update method
                    if hasattr(dataset_variable, 'comment'):
                        delattr(dataset_variable, 'comment')
                    if variable.method:
                        dataset_variable.setncattr('comment', variable.method)

                    # update missing value
                    if variable.missing_value:
                        if hasattr(dataset_variable, 'missing_value'):
                            missing_value = dataset_variable.missing_value
                            delattr(dataset_variable, 'missing_value')
                        else:
                            missing_value = ''
                        try:
                            dt = np.dtype(dataset_variable.datatype.name)
                            missing_value = np.fromstring(variable.missing_value + ' ',
                                                          dtype=dt.type, sep=" ")
                        except:
                            pass

                        if missing_value:
                            dataset_variable.setncattr('missing_value', missing_value)

        # Update metadata element that only apply to netCDF resource
        if not file_type:

            # update summary
            if hasattr(nc_dataset, 'summary'):
                delattr(nc_dataset, 'summary')
            if instance.metadata.description:
                nc_dataset.summary = instance.metadata.description.abstract

            # update contributor
            if hasattr(nc_dataset, 'contributor_name'):
                delattr(nc_dataset, 'contributor_name')

            contributor_list = instance.metadata.contributors.all()
            if contributor_list:
                res_contri_name = []
                for contributor in contributor_list:
                    res_contri_name.append(contributor.name)

                nc_dataset.contributor_name = ', '.join(res_contri_name)

            # update creator
            for attr_name in ['creator_name', 'creator_email', 'creator_url']:
                if hasattr(nc_dataset, attr_name):
                    delattr(nc_dataset, attr_name)

            creator = instance.metadata.creators.all().filter(order=1).first()
            if creator:
                nc_dataset.creator_name = creator.name if creator.name else creator.organization

                if creator.email:
                    nc_dataset.creator_email = creator.email
                if creator.description or creator.homepage:
                    nc_dataset.creator_url = creator.homepage if creator.homepage \
                        else 'https://www.hydroshare.org' + creator.description

            # update license
            if hasattr(nc_dataset, 'license'):
                delattr(nc_dataset, 'license')
            if instance.metadata.rights:
                nc_dataset.license = "{0} {1}".format(instance.metadata.rights.statement,
                                                      instance.metadata.rights.url)

            # update reference
            if hasattr(nc_dataset, 'references'):
                delattr(nc_dataset, 'references')

            reference_list = instance.metadata.relations.all().filter(type='cites')
            if reference_list:
                res_meta_ref = []
                for reference in reference_list:
                    res_meta_ref.append(reference.value)
                nc_dataset.references = ' \n'.join(res_meta_ref)

            # update source
            if hasattr(nc_dataset, 'source'):
                delattr(nc_dataset, 'source')

            source_list = instance.metadata.sources.all()
            if source_list:
                res_meta_source = []
                for source in source_list:
                    res_meta_source.append(source.derived_from)
                nc_dataset.source = ' \n'.join(res_meta_source)

        # close nc dataset
        nc_dataset.close()

    except Exception as ex:
        log.exception(ex.message)
        if os.path.exists(temp_nc_file):
            shutil.rmtree(os.path.dirname(temp_nc_file))
        raise ex

    # create the ncdump text file
    nc_file_name = os.path.basename(temp_nc_file).split(".")[0]
    temp_text_file = create_header_info_txt_file(temp_nc_file, nc_file_name)

    # push the updated nc file and the txt file to iRODS
    utils.replace_resource_file_on_irods(temp_nc_file, nc_res_file,
                                         user)
    utils.replace_resource_file_on_irods(temp_text_file, txt_res_file,
                                         user)

    metadata = instance.metadata
    if file_type:
        instance.create_aggregation_xml_documents(create_map_xml=False)
    metadata.is_dirty = False
    metadata.save()

    # cleanup the temp dir
    if os.path.exists(temp_nc_file):
        shutil.rmtree(os.path.dirname(temp_nc_file))
예제 #19
0
    def set_file_type(cls, resource, file_id, user):
        """
            Sets a tif or zip raster resource file to GeoRasterFile type
            :param resource: an instance of resource type CompositeResource
            :param file_id: id of the resource file to be set as GeoRasterFile type
            :param user: user who is setting the file type
            :return:
            """

        # had to import it here to avoid import loop
        from hs_core.views.utils import create_folder, remove_folder

        log = logging.getLogger()

        # get the file from irods
        res_file = utils.get_resource_file_by_id(resource, file_id)

        # base file name (no path included)
        file_name = utils.get_resource_file_name_and_extension(res_file)[1]
        # file name without the extension
        file_name = file_name[:-len(res_file.extension)]
        file_folder = res_file.file_folder
        upload_folder = ''
        if res_file is not None and res_file.has_generic_logical_file:
            # get the file from irods to temp dir
            temp_file = utils.get_file_from_irods(res_file)
            # validate the file
            error_info, files_to_add_to_resource = raster_file_validation(
                raster_file=temp_file)
            if not error_info:
                log.info("Geo raster file type file validation successful.")
                # extract metadata
                temp_dir = os.path.dirname(temp_file)
                temp_vrt_file_path = [
                    os.path.join(temp_dir, f) for f in os.listdir(temp_dir)
                    if '.vrt' == os.path.splitext(f)[1]
                ].pop()
                metadata = extract_metadata(temp_vrt_file_path)
                log.info(
                    "Geo raster file type metadata extraction was successful.")
                with transaction.atomic():
                    # create a geo raster logical file object to be associated with resource files
                    logical_file = cls.create()
                    # by default set the dataset_name attribute of the logical file to the
                    # name of the file selected to set file type
                    logical_file.dataset_name = file_name
                    logical_file.save()

                    try:
                        # create a folder for the raster file type using the base file name as the
                        # name for the new folder
                        new_folder_path = cls.compute_file_type_folder(
                            resource, file_folder, file_name)

                        log.info("Folder created:{}".format(new_folder_path))
                        create_folder(resource.short_id, new_folder_path)

                        new_folder_name = new_folder_path.split('/')[-1]
                        if file_folder is None:
                            upload_folder = new_folder_name
                        else:
                            upload_folder = os.path.join(
                                file_folder, new_folder_name)

                        # add all new files to the resource
                        for f in files_to_add_to_resource:
                            uploaded_file = UploadedFile(
                                file=open(f, 'rb'), name=os.path.basename(f))
                            # the added resource file will be part of a new generic logical file
                            # by default
                            new_res_file = utils.add_file_to_resource(
                                resource, uploaded_file, folder=upload_folder)

                            # delete the generic logical file object
                            if new_res_file.logical_file is not None:
                                # deleting the file level metadata object will delete the associated
                                # logical file object
                                new_res_file.logical_file.metadata.delete()

                            # make each resource file we added as part of the logical file
                            logical_file.add_resource_file(new_res_file)

                        log.info(
                            "Geo raster file type - new files were added to the resource."
                        )

                        # use the extracted metadata to populate file metadata
                        for element in metadata:
                            # here k is the name of the element
                            # v is a dict of all element attributes/field names and field values
                            k, v = element.items()[0]
                            logical_file.metadata.create_element(k, **v)
                        log.info(
                            "Geo raster file type - metadata was saved to DB")
                        # set resource to private if logical file is missing required metadata
                        resource.update_public_and_discoverable()
                        # delete the original resource file
                        delete_resource_file(resource.short_id, res_file.id,
                                             user)
                        log.info("Deleted original resource file.")
                    except Exception as ex:
                        msg = "Geo raster file type. Error when setting file type. Error:{}"
                        msg = msg.format(ex.message)
                        log.exception(msg)
                        if upload_folder:
                            # delete any new files uploaded as part of setting file type
                            folder_to_remove = os.path.join(
                                'data', 'contents', upload_folder)
                            remove_folder(user, resource.short_id,
                                          folder_to_remove)
                            log.info("Deleted newly created file type folder")
                        raise ValidationError(msg)
                    finally:
                        # remove temp dir
                        if os.path.isdir(temp_dir):
                            shutil.rmtree(temp_dir)
            else:
                err_msg = "Geo raster file type file validation failed.{}".format(
                    ' '.join(error_info))
                log.info(err_msg)
                raise ValidationError(err_msg)
        else:
            if res_file is None:
                err_msg = "Failed to set Geo raster file type. " \
                          "Resource doesn't have the specified file."
                log.error(err_msg)
                raise ValidationError(err_msg)
            else:
                err_msg = "Failed to set Geo raster file type." \
                          "The specified file doesn't have a generic logical file type."
                log.error(err_msg)
                raise ValidationError(err_msg)
예제 #20
0
def _process_uploaded_csv_file(resource,
                               res_file,
                               validate_files_dict,
                               user,
                               delete_existing_metadata=True):
    # get the csv file from iRODS to a temp directory
    fl_obj_name = utils.get_file_from_irods(res_file)
    validate_err_message = _validate_csv_file(resource, fl_obj_name)
    if not validate_err_message:
        # first delete relevant existing metadata elements
        if delete_existing_metadata:
            TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(
                is_dirty=False)
            _delete_extracted_metadata(resource)

        # delete the sqlite file if it exists
        _delete_resource_file(resource, ".sqlite")

        # add the blank sqlite file
        resource.add_blank_sqlite_file(user)

        # populate CV metadata django models from the blank sqlite file

        # copy the blank sqlite file to a temp directory
        temp_dir = tempfile.mkdtemp()
        odm2_sqlite_file_name = 'ODM2.sqlite'
        odm2_sqlite_file = 'hs_app_timeseries/files/{}'.format(
            odm2_sqlite_file_name)
        target_temp_sqlite_file = os.path.join(temp_dir, odm2_sqlite_file_name)
        shutil.copy(odm2_sqlite_file, target_temp_sqlite_file)

        con = sqlite3.connect(target_temp_sqlite_file)
        with con:
            # get the records in python dictionary format
            con.row_factory = sqlite3.Row
            cur = con.cursor()

            # populate the lookup CV tables that are needed later for metadata editing
            _create_cv_lookup_models(cur, resource.metadata, 'CV_VariableType',
                                     CVVariableType)
            _create_cv_lookup_models(cur, resource.metadata, 'CV_VariableName',
                                     CVVariableName)
            _create_cv_lookup_models(cur, resource.metadata, 'CV_Speciation',
                                     CVSpeciation)
            _create_cv_lookup_models(cur, resource.metadata, 'CV_SiteType',
                                     CVSiteType)
            _create_cv_lookup_models(cur, resource.metadata,
                                     'CV_ElevationDatum', CVElevationDatum)
            _create_cv_lookup_models(cur, resource.metadata, 'CV_MethodType',
                                     CVMethodType)
            _create_cv_lookup_models(cur, resource.metadata, 'CV_UnitsType',
                                     CVUnitsType)
            _create_cv_lookup_models(cur, resource.metadata, 'CV_Status',
                                     CVStatus)
            _create_cv_lookup_models(cur, resource.metadata, 'CV_Medium',
                                     CVMedium)
            _create_cv_lookup_models(cur, resource.metadata,
                                     'CV_AggregationStatistic',
                                     CVAggregationStatistic)

        # save some data from the csv file
        with open(fl_obj_name, 'r') as fl_obj:
            csv_reader = csv.reader(fl_obj, delimiter=',')
            # read the first row - header
            header = csv_reader.next()
            # read the 1st data row
            start_date_str = csv_reader.next()[0]
            last_row = None
            data_row_count = 1
            for row in csv_reader:
                last_row = row
                data_row_count += 1
            end_date_str = last_row[0]

            # save the series names along with number of data points for each series
            # columns starting with the 2nd column are data series names
            value_counts = {}
            for data_col_name in header[1:]:
                value_counts[data_col_name] = str(data_row_count)

            TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(
                value_counts=value_counts)

            # create the temporal coverage element
            resource.metadata.create_element('coverage',
                                             type='period',
                                             value={
                                                 'start': start_date_str,
                                                 'end': end_date_str
                                             })

        # cleanup the temp sqlite file directory
        if os.path.exists(temp_dir):
            shutil.rmtree(temp_dir)

    else:  # file validation failed
        # delete the invalid file just uploaded
        delete_resource_file_only(resource, res_file)
        validate_files_dict['are_files_valid'] = False
        validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE)
        validate_files_dict['message'] = validate_err_message

    # cleanup the temp csv file
    if os.path.exists(fl_obj_name):
        shutil.rmtree(os.path.dirname(fl_obj_name))
예제 #21
0
def get_all_related_shp_files(resource, selected_resource_file, file_type):
    """
    This helper function copies all the related shape files to a temp directory
    and return a list of those temp file paths as well as a list of existing related
    resource file objects
    :param resource: an instance of BaseResource to which the *selecetd_resource_file* belongs
    :param selected_resource_file: an instance of ResourceFile selected by the user to set
    GeoFeaureFile type (the file must be a .shp or a .zip file)
    :param file_type: a flag (True/False) to control resource VS file type actions
    :return: a list of temp file paths for all related shape files, and a list of corresponding
     resource file objects
    """

    def collect_shape_resource_files(res_file):
        # compare without the file extension (-4)
        if res_file.short_path.lower().endswith('.shp.xml'):
            if selected_resource_file.short_path[:-4] == res_file.short_path[:-8]:
                shape_res_files.append(f)
        elif selected_resource_file.short_path[:-4] == res_file.short_path[:-4]:
            shape_res_files.append(res_file)

    shape_temp_files = []
    shape_res_files = []
    temp_dir = ''
    if selected_resource_file.extension.lower() == '.shp':
        for f in resource.files.all():
            if f.file_folder == selected_resource_file.file_folder:
                if f.extension.lower() == '.xml' and not f.file_name.lower().endswith('.shp.xml'):
                    continue
                if f.extension.lower() in GeoFeatureLogicalFile.get_allowed_storage_file_types():
                    collect_shape_resource_files(f)

        for f in shape_res_files:
            temp_file = utils.get_file_from_irods(f)
            if not temp_dir:
                temp_dir = os.path.dirname(temp_file)
            else:
                file_temp_dir = os.path.dirname(temp_file)
                dst_dir = os.path.join(temp_dir, os.path.basename(temp_file))
                shutil.copy(temp_file, dst_dir)
                shutil.rmtree(file_temp_dir)
                temp_file = dst_dir
            shape_temp_files.append(temp_file)

    elif selected_resource_file.extension.lower() == '.zip':
        temp_file = utils.get_file_from_irods(selected_resource_file)
        temp_dir = os.path.dirname(temp_file)
        if not zipfile.is_zipfile(temp_file):
            if os.path.isdir(temp_dir):
                shutil.rmtree(temp_dir)
            raise ValidationError('Selected file is not a zip file')
        zf = zipfile.ZipFile(temp_file, 'r')
        zf.extractall(temp_dir)
        zf.close()
        for dirpath, _, filenames in os.walk(temp_dir):
            for name in filenames:
                if name == selected_resource_file.file_name:
                    # skip the user selected zip file
                    continue
                file_path = os.path.abspath(os.path.join(dirpath, name))
                shape_temp_files.append(file_path)

        shape_res_files.append(selected_resource_file)

    return shape_temp_files, shape_res_files
예제 #22
0
    def set_file_type(cls, resource, user, file_id=None, folder_path=None):
        """ Creates a GeoRasterLogicalFile (aggregation) from a tif or a zip resource file, or a
        folder """

        log = logging.getLogger()
        res_file, folder_path = cls._validate_set_file_type_inputs(resource, file_id, folder_path)
        file_name = res_file.file_name
        # get file name without the extension - needed for naming the aggregation folder
        base_file_name = file_name[:-len(res_file.extension)]
        file_folder = res_file.file_folder
        aggregation_folder_created = False
        # determine if we need to create a new folder for the aggregation
        create_new_folder = cls._check_create_aggregation_folder(
            selected_res_file=res_file, selected_folder=folder_path,
            aggregation_file_count=1)

        upload_folder = ''
        # get the file from irods to temp dir
        temp_file = utils.get_file_from_irods(res_file)
        temp_dir = os.path.dirname(temp_file)
        res_files_to_delete = []
        raster_folder = folder_path if folder_path is not None else file_folder
        # validate the file
        validation_results = raster_file_validation(raster_file=temp_file, resource=resource,
                                                    raster_folder=raster_folder)

        if not validation_results['error_info']:
            msg = "Geographic raster aggregation. Error when creating aggregation. Error:{}"
            file_type_success = False
            log.info("Geographic raster aggregation validation successful.")
            # extract metadata
            temp_vrt_file_path = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if
                                  '.vrt' == os.path.splitext(f)[1]].pop()
            metadata = extract_metadata(temp_vrt_file_path)
            log.info("Geographic raster metadata extraction was successful.")

            with transaction.atomic():
                # create a geo raster logical file object to be associated with resource files
                logical_file = cls.initialize(base_file_name)

                try:
                    if not folder_path:
                        # we are here means aggregation is being created by selecting a file
                        if create_new_folder:
                            # create a folder for the raster file type using the base file name
                            # as the name for the new folder
                            upload_folder = cls._create_aggregation_folder(resource, file_folder,
                                                                           base_file_name)
                            log.info("Folder created:{}".format(upload_folder))
                            aggregation_folder_created = True
                        else:
                            upload_folder = file_folder

                        if res_file.extension.lower() in [".tiff", ".tif"]:
                            if aggregation_folder_created:
                                tgt_folder = upload_folder

                                # copy any existing raster specific files to the new aggregation
                                # folder and make them part of the logical file
                                files_to_copy = validation_results['raster_resource_files']
                                logical_file.copy_resource_files(resource, files_to_copy,
                                                                 tgt_folder)
                                res_files_to_delete.extend(files_to_copy)
                            else:
                                # make the existing raster specific files part of the
                                # aggregation/file type
                                for raster_res_file in validation_results['raster_resource_files']:
                                    logical_file.add_resource_file(raster_res_file)

                        else:
                            # selected file must be a zip file
                            res_files_to_delete.append(res_file)
                    else:
                        # user selected a folder to create aggregation
                        upload_folder = folder_path

                        # make all the files in the selected folder as part of the aggregation
                        logical_file.add_resource_files_in_folder(resource, folder_path)

                    # add all new files to resource and make those part of the logical file
                    if validation_results['new_resource_files_to_add']:
                        files_to_add_to_resource = validation_results['new_resource_files_to_add']
                        logical_file.add_files_to_resource(
                            resource=resource, files_to_add=files_to_add_to_resource,
                            upload_folder=upload_folder)
                    log.info("Geographic raster aggregation type - new files were added "
                             "to the resource.")

                    # use the extracted metadata to populate file metadata
                    for element in metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        logical_file.metadata.create_element(k, **v)
                    log.info("Geographic raster aggregation type - metadata was saved to DB")
                    logical_file._finalize(user, resource,
                                           folder_created=aggregation_folder_created,
                                           res_files_to_delete=res_files_to_delete,
                                           reset_title=True)

                    file_type_success = True
                except Exception as ex:
                    msg = msg.format(ex.message)
                    log.exception(msg)
                finally:
                    # remove temp dir
                    if os.path.isdir(temp_dir):
                        shutil.rmtree(temp_dir)

            if not file_type_success:
                aggregation_from_folder = folder_path is not None
                cls._cleanup_on_fail_to_create_aggregation(user, resource, upload_folder,
                                                           file_folder, aggregation_from_folder)
                raise ValidationError(msg)
        else:
            # remove temp dir
            if os.path.isdir(temp_dir):
                shutil.rmtree(temp_dir)
            err_msg = "Geographic raster aggregation type validation failed. {}".format(
                ' '.join(validation_results['error_info']))
            log.error(err_msg)
            raise ValidationError(err_msg)
from hs_core.hydroshare.utils import resource_modified, get_file_from_irods
from hs_file_types import raster_meta_extract
from hs_geo_raster_resource.models import RasterResource

copy_res_fail = []
meta_update_fail = []
meta_update_success = []
# start migration for each raster resource that has raster files
for res in RasterResource.objects.all():
    # copy all the resource files to temp dir
    temp_dir = ''
    res_file_tmp_path = ''
    try:
        temp_dir = tempfile.mkdtemp()
        for res_file in res.files.all():
            res_file_tmp_path = get_file_from_irods(res_file)
            shutil.copy(res_file_tmp_path,
                        os.path.join(temp_dir, os.path.basename(res_file_tmp_path)))
            shutil.rmtree(os.path.dirname(res_file_tmp_path))

        vrt_file_path = [os.path.join(temp_dir, f)
                         for f in os.listdir(temp_dir) if '.vrt' == f[-4:]].pop()
    except Exception as e:
        if os.path.isdir(temp_dir):
            shutil.rmtree(temp_dir)
        if os.path.isfile(res_file_tmp_path):
            shutil.rmtree(os.path.dirname(res_file_tmp_path))
        copy_res_fail.append('{}:{}'.format(res.short_id, res.metadata.title.value))
        continue
    # update the metadata for the original coverage information of all the raster resources
    try:
예제 #24
0
def raster_file_validation(raster_file, resource, raster_folder=None):
    """ Validates if the relevant files are valid for raster aggregation or raster resource type

    :param  raster_file: a temp file (extension tif or zip) retrieved from irods and stored on temp
    dir in django
    :param  raster_folder: (optional) folder in which raster file exists on irods.
    :param  resource: an instance of CompositeResource or GeoRasterResource in which
    raster_file exits.

    :return A list of error messages and a list of file paths for all files that belong to raster
    """

    error_info = []
    new_resource_files_to_add = []
    raster_resource_files = []
    create_vrt = True
    validation_results = {
        'error_info': error_info,
        'new_resource_files_to_add': new_resource_files_to_add,
        'raster_resource_files': raster_resource_files,
        'vrt_created': create_vrt
    }
    file_name_part, ext = os.path.splitext(os.path.basename(raster_file))
    ext = ext.lower()

    if ext == '.tif' or ext == '.tiff':
        res_files = ResourceFile.list_folder(resource=resource,
                                             folder=raster_folder,
                                             sub_folders=False)

        # check if there is already a vrt file in that folder
        vrt_files = [f for f in res_files if f.extension.lower() == ".vrt"]
        tif_files = [
            f for f in res_files
            if f.extension.lower() == ".tif" or f.extension.lower() == ".tiff"
        ]
        if vrt_files:
            if len(vrt_files) > 1:
                error_info.append("More than one vrt file was found.")
                return validation_results
            create_vrt = False
        elif len(tif_files) != 1:
            # if there are more than one tif file and no vrt file, then we just use the
            # selected tif file to create the aggregation in case of composite resource
            if resource.resource_type == "CompositeResource":
                tif_files = [
                    tif_file for tif_file in tif_files
                    if raster_file.endswith(tif_file.file_name)
                ]
            else:
                # if there are more than one tif file, there needs to be one vrt file
                error_info.append("A vrt file is missing.")
                return validation_results

        raster_resource_files.extend(vrt_files)
        raster_resource_files.extend(tif_files)

        if vrt_files:
            temp_dir = os.path.dirname(raster_file)
            temp_vrt_file = utils.get_file_from_irods(vrt_files[0], temp_dir)
        else:
            # create the .vrt file
            try:
                temp_vrt_file = create_vrt_file(raster_file)
            except Exception as ex:
                error_info.append(str(ex))
            else:
                if os.path.isfile(temp_vrt_file):
                    new_resource_files_to_add.append(temp_vrt_file)

    elif ext == '.zip':
        try:
            extract_file_paths = _explode_raster_zip_file(raster_file)
        except Exception as ex:
            error_info.append(str(ex))
        else:
            if extract_file_paths:
                new_resource_files_to_add.extend(extract_file_paths)
    else:
        error_info.append("Invalid file mime type found.")

    if not error_info:
        if ext == ".zip":
            # in case of zip, there needs to be more than one file extracted out of the zip file
            if len(new_resource_files_to_add) < 2:
                error_info.append(
                    "Invalid zip file. Seems to contain only one file. "
                    "Multiple tif files are expected.")
                return validation_results

            files_ext = [
                os.path.splitext(path)[1].lower()
                for path in new_resource_files_to_add
            ]
            if files_ext.count('.vrt') > 1:
                error_info.append(
                    "Invalid zip file. Seems to contain multiple vrt files.")
                return validation_results
            elif files_ext.count('.vrt') == 0:
                error_info.append("Invalid zip file. No vrt file was found.")
                return validation_results
            elif files_ext.count('.tif') + files_ext.count('.tiff') < 1:
                error_info.append(
                    "Invalid zip file. No tif/tiff file was found.")
                return validation_results

            # check if there are files that are not raster related
            non_raster_files = [
                f_ext for f_ext in files_ext
                if f_ext not in ('.tif', '.tiff', '.vrt')
            ]
            if non_raster_files:
                error_info.append(
                    "Invalid zip file. Contains files that are not raster related."
                )
                return validation_results

            temp_vrt_file = new_resource_files_to_add[files_ext.index('.vrt')]

        # validate vrt file if we didn't create it
        if ext == '.zip' or not create_vrt:
            raster_dataset = gdal.Open(temp_vrt_file, GA_ReadOnly)
            if raster_dataset is None:
                error_info.append('Failed to open the vrt file.')
                return validation_results

            # check if the vrt file is valid
            try:
                raster_dataset.RasterXSize
                raster_dataset.RasterYSize
                raster_dataset.RasterCount
            except AttributeError:
                error_info.append(
                    'Raster size and band information are missing.')
                return validation_results

            # check if the raster file numbers and names are valid in vrt file
            with open(temp_vrt_file, 'r') as vrt_file:
                vrt_string = vrt_file.read()
                root = ET.fromstring(vrt_string)
                file_names_in_vrt = [
                    file_name.text for file_name in root.iter('SourceFilename')
                ]

            if ext == '.zip':
                file_names = [
                    os.path.basename(path)
                    for path in new_resource_files_to_add
                ]
            else:
                file_names = [f.file_name for f in raster_resource_files]

            file_names = [
                f_name for f_name in file_names if not f_name.endswith('.vrt')
            ]

            if len(file_names) > len(file_names_in_vrt):
                msg = 'One or more additional tif files were found which are not listed in ' \
                      'the provided {} file.'
                msg = msg.format(os.path.basename(temp_vrt_file))
                error_info.append(msg)
            else:
                for vrt_ref_raster_name in file_names_in_vrt:
                    if vrt_ref_raster_name in file_names \
                            or (os.path.split(vrt_ref_raster_name)[0] == '.' and
                                os.path.split(vrt_ref_raster_name)[1] in file_names):
                        continue
                    elif os.path.basename(vrt_ref_raster_name) in file_names:
                        msg = "Please specify {} as {} in the .vrt file, because it will " \
                              "be saved in the same folder with .vrt file in HydroShare."
                        msg = msg.format(vrt_ref_raster_name,
                                         os.path.basename(vrt_ref_raster_name))
                        error_info.append(msg)
                        break
                    else:
                        msg = "The file {tif} which is listed in the {vrt} file is missing."
                        msg = msg.format(
                            tif=os.path.basename(vrt_ref_raster_name),
                            vrt=os.path.basename(temp_vrt_file))
                        error_info.append(msg)
                        break

    return validation_results
예제 #25
0
    def set_file_type(cls, resource, user, file_id=None, folder_path=None):
        """ Creates a NetCDFLogicalFile (aggregation) from a netcdf file (.nc) resource file
        or a folder """

        log = logging.getLogger()
        res_file, folder_path = cls._validate_set_file_type_inputs(
            resource, file_id, folder_path)

        # base file name (no path included)
        file_name = res_file.file_name
        # file name without the extension - needed for naming the new aggregation folder
        nc_file_name = file_name[:-len(res_file.extension)]

        resource_metadata = []
        file_type_metadata = []
        upload_folder = ''
        res_files_to_delete = []
        # get the file from irods to temp dir
        temp_file = utils.get_file_from_irods(res_file)
        temp_dir = os.path.dirname(temp_file)

        # file validation and metadata extraction
        nc_dataset = nc_utils.get_nc_dataset(temp_file)
        if isinstance(nc_dataset, netCDF4.Dataset):
            msg = "NetCDF aggregation. Error when creating aggregation. Error:{}"
            file_type_success = False
            # extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(
                temp_file)
            # populate resource_metadata and file_type_metadata lists with extracted metadata
            add_metadata_to_list(resource_metadata, res_dublin_core_meta,
                                 res_type_specific_meta, file_type_metadata,
                                 resource)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(temp_file, nc_file_name)
            file_folder = res_file.file_folder
            aggregation_folder_created = False
            create_new_folder = cls._check_create_aggregation_folder(
                selected_res_file=res_file,
                selected_folder=folder_path,
                aggregation_file_count=1)

            with transaction.atomic():
                # create a netcdf logical file object to be associated with
                # resource files
                dataset_title = res_dublin_core_meta.get('title', nc_file_name)
                logical_file = cls.initialize(dataset_title, resource)

                try:
                    if folder_path is None:
                        # we are here means aggregation is being created by selecting a file

                        # create a folder for the netcdf file type using the base file
                        # name as the name for the new folder if the file is not already in a folder
                        if create_new_folder:
                            upload_folder = cls._create_aggregation_folder(
                                resource, file_folder, nc_file_name)
                            aggregation_folder_created = True
                            log.info(
                                "NetCDF Aggregation creation - folder created:{}"
                                .format(upload_folder))
                        else:
                            # selected nc file is already in a folder
                            upload_folder = file_folder

                        # create logical file record in DB
                        logical_file.save()
                        if aggregation_folder_created:
                            # copy the nc file to the new aggregation folder and make it part
                            # of the logical file
                            tgt_folder = upload_folder
                            files_to_copy = [res_file]
                            logical_file.copy_resource_files(
                                resource, files_to_copy, tgt_folder)
                            res_files_to_delete.append(res_file)
                        else:
                            # make the selected nc file as part of the aggregation/file type
                            logical_file.add_resource_file(res_file)

                    else:
                        # logical file record gets created in DB
                        logical_file.save()
                        # folder has been selected to create aggregation
                        upload_folder = folder_path
                        # make the .nc file part of the aggregation
                        logical_file.add_resource_file(res_file)

                    # add the new dump txt file to the resource
                    uploaded_file = UploadedFile(
                        file=open(dump_file, 'rb'),
                        name=os.path.basename(dump_file))

                    new_res_file = utils.add_file_to_resource(
                        resource,
                        uploaded_file,
                        folder=upload_folder,
                        add_to_aggregation=False)

                    # make this new resource file we added part of the logical file
                    logical_file.add_resource_file(new_res_file)
                    log.info(
                        "NetCDF aggregation creation - a new file was added to the resource."
                    )

                    # use the extracted metadata to populate resource metadata
                    for element in resource_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'title':
                            # update title element
                            title_element = resource.metadata.title
                            resource.metadata.update_element(
                                'title', title_element.id, **v)
                        else:
                            resource.metadata.create_element(k, **v)

                    log.info(
                        "NetCDF Aggregation creation - Resource metadata was saved to DB"
                    )

                    # use the extracted metadata to populate file metadata
                    for element in file_type_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'subject':
                            logical_file.metadata.keywords = v
                            logical_file.metadata.save()
                            # update resource level keywords
                            resource_keywords = [
                                subject.value.lower() for subject in
                                resource.metadata.subjects.all()
                            ]
                            for kw in logical_file.metadata.keywords:
                                if kw.lower() not in resource_keywords:
                                    resource.metadata.create_element('subject',
                                                                     value=kw)
                        else:
                            logical_file.metadata.create_element(k, **v)
                    log.info(
                        "NetCDF aggregation - metadata was saved in aggregation"
                    )
                    logical_file._finalize(
                        user,
                        resource,
                        folder_created=aggregation_folder_created,
                        res_files_to_delete=res_files_to_delete)
                    file_type_success = True
                except Exception as ex:
                    msg = msg.format(ex.message)
                    log.exception(msg)
                finally:
                    # remove temp dir
                    if os.path.isdir(temp_dir):
                        shutil.rmtree(temp_dir)

            if not file_type_success:
                aggregation_from_folder = folder_path is not None
                cls._cleanup_on_fail_to_create_aggregation(
                    user, resource, upload_folder, file_folder,
                    aggregation_from_folder)
                raise ValidationError(msg)

        else:
            err_msg = "Not a valid NetCDF file. NetCDF aggregation validation failed."
            log.error(err_msg)
            # remove temp dir
            if os.path.isdir(temp_dir):
                shutil.rmtree(temp_dir)
            raise ValidationError(err_msg)
예제 #26
0
    def update_netcdf_file(self, user):
        """
        writes metadata to the netcdf file associated with this instance of the logical file
        :return:
        """

        log = logging.getLogger()

        nc_res_file = ''
        txt_res_file = ''
        for f in self.files.all():
            if f.extension == '.nc':
                nc_res_file = f
                break

        for f in self.files.all():
            if f.extension == '.txt':
                txt_res_file = f
                break
        if not nc_res_file:
            msg = "No netcdf file exists for this logical file."
            log.exception(msg)
            raise ValidationError(msg)

        # get the file from irods to temp dir
        temp_nc_file = utils.get_file_from_irods(nc_res_file)
        nc_dataset = netCDF4.Dataset(temp_nc_file, 'a')
        try:
            # update title
            if hasattr(nc_dataset, 'title'):
                if nc_dataset.title != self.dataset_name:
                    delattr(nc_dataset, 'title')
                    nc_dataset.title = self.dataset_name
            else:
                nc_dataset.title = self.dataset_name

            # update keywords
            if self.metadata.keywords:
                if hasattr(nc_dataset, 'keywords'):
                    delattr(nc_dataset, 'keywords')
                nc_dataset.keywords = ', '.join(self.metadata.keywords)

            # update key/value metadata
            if self.metadata.extra_metadata:
                if hasattr(nc_dataset, 'hs_extra_metadata'):
                    delattr(nc_dataset, 'hs_extra_metadata')
                extra_metadata = []
                for k, v in self.metadata.extra_metadata.items():
                    extra_metadata.append("{}:{}".format(k, v))
                nc_dataset.hs_extra_metadata = ', '.join(extra_metadata)

            # update temporal coverage
            if self.metadata.temporal_coverage:
                for attr_name in ['time_coverage_start', 'time_coverage_end']:
                    if hasattr(nc_dataset, attr_name):
                        delattr(nc_dataset, attr_name)
                nc_dataset.time_coverage_start = self.metadata.temporal_coverage.value[
                    'start']
                nc_dataset.time_coverage_end = self.metadata.temporal_coverage.value[
                    'end']

            # update spatial coverage
            if self.metadata.spatial_coverage:
                for attr_name in [
                        'geospatial_lat_min', 'geospatial_lat_max',
                        'geospatial_lon_min', 'geospatial_lon_max'
                ]:
                    # clean up old info
                    if hasattr(nc_dataset, attr_name):
                        delattr(nc_dataset, attr_name)

                spatial_coverage = self.metadata.spatial_coverage
                nc_dataset.geospatial_lat_min = spatial_coverage.value[
                    'southlimit']
                nc_dataset.geospatial_lat_max = spatial_coverage.value[
                    'northlimit']
                nc_dataset.geospatial_lon_min = spatial_coverage.value[
                    'westlimit']
                nc_dataset.geospatial_lon_max = spatial_coverage.value[
                    'eastlimit']

            # update variables
            if self.metadata.variables.all():
                dataset_variables = nc_dataset.variables
                for variable in self.metadata.variables.all():
                    if variable.name in dataset_variables.keys():
                        dataset_variable = dataset_variables[variable.name]
                        if variable.unit != 'Unknown':
                            # clean up old info
                            if hasattr(dataset_variable, 'units'):
                                delattr(dataset_variable, 'units')
                                dataset_variable.setncattr(
                                    'units', variable.unit)
                        if variable.descriptive_name:
                            # clean up old info
                            if hasattr(dataset_variable, 'long_name'):
                                delattr(dataset_variable, 'long_name')
                            dataset_variable.setncattr(
                                'long_name', variable.descriptive_name)
                        if variable.method:
                            # clean up old info
                            if hasattr(dataset_variable, 'comment'):
                                delattr(dataset_variable, 'comment')
                            dataset_variable.setncattr('comment',
                                                       variable.method)
                        if variable.missing_value:
                            if hasattr(dataset_variable, 'missing_value'):
                                missing_value = dataset_variable.missing_value
                                delattr(dataset_variable, 'missing_value')
                            else:
                                missing_value = ''
                            try:
                                dt = np.dtype(dataset_variable.datatype.name)
                                missing_value = np.fromstring(
                                    variable.missing_value + ' ',
                                    dtype=dt.type,
                                    sep=" ")
                            except:
                                pass

                            if missing_value:
                                dataset_variable.setncattr(
                                    'missing_value', missing_value)

            # close nc dataset
            nc_dataset.close()
        except Exception as ex:
            log.exception(ex.message)
            if os.path.exists(temp_nc_file):
                shutil.rmtree(os.path.dirname(temp_nc_file))
            raise ex

        # create the ncdump text file
        nc_file_name = os.path.basename(temp_nc_file).split(".")[0]
        temp_text_file = create_header_info_txt_file(temp_nc_file,
                                                     nc_file_name)

        # push the updated nc file and the txt file to iRODS
        utils.replace_resource_file_on_irods(temp_nc_file, nc_res_file, user)
        utils.replace_resource_file_on_irods(temp_text_file, txt_res_file,
                                             user)
        self.metadata.is_dirty = False
        self.metadata.save()
        # cleanup the temp dir
        if os.path.exists(temp_nc_file):
            shutil.rmtree(os.path.dirname(temp_nc_file))
예제 #27
0
def netcdf_file_update(instance, nc_res_file, txt_res_file, user):
    log = logging.getLogger()
    # check the instance type
    file_type = isinstance(instance, NetCDFLogicalFile)

    # get the file from irods to temp dir
    temp_nc_file = utils.get_file_from_irods(nc_res_file)
    nc_dataset = netCDF4.Dataset(temp_nc_file, 'a')

    try:
        # update title
        title = instance.dataset_name if file_type else instance.metadata.title.value

        if title.lower() != 'untitled resource':
            if hasattr(nc_dataset, 'title'):
                delattr(nc_dataset, 'title')
            nc_dataset.title = title

        # update keywords
        keywords = instance.metadata.keywords if file_type \
            else [item.value for item in instance.metadata.subjects.all()]

        if hasattr(nc_dataset, 'keywords'):
            delattr(nc_dataset, 'keywords')

        if keywords:
            nc_dataset.keywords = ', '.join(keywords)

        # update key/value metadata
        extra_metadata_dict = instance.metadata.extra_metadata if file_type \
            else instance.extra_metadata

        if hasattr(nc_dataset, 'hs_extra_metadata'):
            delattr(nc_dataset, 'hs_extra_metadata')

        if extra_metadata_dict:
            extra_metadata = []
            for k, v in extra_metadata_dict.items():
                extra_metadata.append("{}:{}".format(k, v))
            nc_dataset.hs_extra_metadata = ', '.join(extra_metadata)

        # update temporal coverage
        temporal_coverage = instance.metadata.temporal_coverage if file_type \
            else instance.metadata.coverages.all().filter(type='period').first()

        for attr_name in ['time_coverage_start', 'time_coverage_end']:
            if hasattr(nc_dataset, attr_name):
                delattr(nc_dataset, attr_name)

        if temporal_coverage:
            nc_dataset.time_coverage_start = temporal_coverage.value['start']
            nc_dataset.time_coverage_end = temporal_coverage.value['end']

        # update spatial coverage
        spatial_coverage = instance.metadata.spatial_coverage if file_type \
            else instance.metadata.coverages.all().filter(type='box').first()

        for attr_name in [
                'geospatial_lat_min', 'geospatial_lat_max',
                'geospatial_lon_min', 'geospatial_lon_max'
        ]:
            if hasattr(nc_dataset, attr_name):
                delattr(nc_dataset, attr_name)

        if spatial_coverage:
            nc_dataset.geospatial_lat_min = spatial_coverage.value[
                'southlimit']
            nc_dataset.geospatial_lat_max = spatial_coverage.value[
                'northlimit']
            nc_dataset.geospatial_lon_min = spatial_coverage.value['westlimit']
            nc_dataset.geospatial_lon_max = spatial_coverage.value['eastlimit']

        # update variables
        if instance.metadata.variables.all():
            dataset_variables = nc_dataset.variables
            for variable in instance.metadata.variables.all():
                if variable.name in dataset_variables.keys():
                    dataset_variable = dataset_variables[variable.name]

                    # update units
                    if hasattr(dataset_variable, 'units'):
                        delattr(dataset_variable, 'units')
                    if variable.unit != 'Unknown':
                        dataset_variable.setncattr('units', variable.unit)

                    # update long_name
                    if hasattr(dataset_variable, 'long_name'):
                        delattr(dataset_variable, 'long_name')
                    if variable.descriptive_name:
                        dataset_variable.setncattr('long_name',
                                                   variable.descriptive_name)

                    # update method
                    if hasattr(dataset_variable, 'comment'):
                        delattr(dataset_variable, 'comment')
                    if variable.method:
                        dataset_variable.setncattr('comment', variable.method)

                    # update missing value
                    if variable.missing_value:
                        if hasattr(dataset_variable, 'missing_value'):
                            missing_value = dataset_variable.missing_value
                            delattr(dataset_variable, 'missing_value')
                        else:
                            missing_value = ''
                        try:
                            dt = np.dtype(dataset_variable.datatype.name)
                            missing_value = np.fromstring(
                                variable.missing_value + ' ',
                                dtype=dt.type,
                                sep=" ")
                        except:
                            pass

                        if missing_value:
                            dataset_variable.setncattr('missing_value',
                                                       missing_value)

        # Update metadata element that only apply to netCDF resource
        if not file_type:

            # update summary
            if hasattr(nc_dataset, 'summary'):
                delattr(nc_dataset, 'summary')
            if instance.metadata.description:
                nc_dataset.summary = instance.metadata.description.abstract

            # update contributor
            if hasattr(nc_dataset, 'contributor_name'):
                delattr(nc_dataset, 'contributor_name')

            contributor_list = instance.metadata.contributors.all()
            if contributor_list:
                res_contri_name = []
                for contributor in contributor_list:
                    res_contri_name.append(contributor.name)

                nc_dataset.contributor_name = ', '.join(res_contri_name)

            # update creator
            for attr_name in ['creator_name', 'creator_email', 'creator_url']:
                if hasattr(nc_dataset, attr_name):
                    delattr(nc_dataset, attr_name)

            creator = instance.metadata.creators.all().filter(order=1).first()
            if creator:
                nc_dataset.creator_name = creator.name if creator.name else creator.organization

                if creator.email:
                    nc_dataset.creator_email = creator.email
                if creator.description or creator.homepage:
                    nc_dataset.creator_url = creator.homepage if creator.homepage \
                        else 'https://www.hydroshare.org' + creator.description

            # update license
            if hasattr(nc_dataset, 'license'):
                delattr(nc_dataset, 'license')
            if instance.metadata.rights:
                nc_dataset.license = "{0} {1}".format(
                    instance.metadata.rights.statement,
                    instance.metadata.rights.url)

            # update reference
            if hasattr(nc_dataset, 'references'):
                delattr(nc_dataset, 'references')

            reference_list = instance.metadata.relations.all().filter(
                type='cites')
            if reference_list:
                res_meta_ref = []
                for reference in reference_list:
                    res_meta_ref.append(reference.value)
                nc_dataset.references = ' \n'.join(res_meta_ref)

            # update source
            if hasattr(nc_dataset, 'source'):
                delattr(nc_dataset, 'source')

            source_list = instance.metadata.sources.all()
            if source_list:
                res_meta_source = []
                for source in source_list:
                    res_meta_source.append(source.derived_from)
                nc_dataset.source = ' \n'.join(res_meta_source)

        # close nc dataset
        nc_dataset.close()

    except Exception as ex:
        log.exception(ex.message)
        if os.path.exists(temp_nc_file):
            shutil.rmtree(os.path.dirname(temp_nc_file))
        raise ex

    # create the ncdump text file
    nc_file_name = os.path.basename(temp_nc_file).split(".")[0]
    temp_text_file = create_header_info_txt_file(temp_nc_file, nc_file_name)

    # push the updated nc file and the txt file to iRODS
    utils.replace_resource_file_on_irods(temp_nc_file, nc_res_file, user)
    utils.replace_resource_file_on_irods(temp_text_file, txt_res_file, user)

    metadata = instance.metadata
    metadata.is_dirty = False
    metadata.save()

    # cleanup the temp dir
    if os.path.exists(temp_nc_file):
        shutil.rmtree(os.path.dirname(temp_nc_file))
예제 #28
0
    def set_file_type(cls, resource, file_id, user):
        """
            Sets a json resource file to RefTimeseriesFile type
            :param resource: an instance of resource type CompositeResource
            :param file_id: id of the resource file to be set as RefTimeSeriesFile type
            :param user: user who is setting the file type
            :return:
            """

        log = logging.getLogger()

        # get the the selected resource file object
        res_file = utils.get_resource_file_by_id(resource, file_id)

        if res_file is None:
            raise ValidationError("File not found.")

        if res_file.extension != '.refts':
            raise ValidationError("Not a Ref Time Series file.")

        files_to_add_to_resource = []
        if res_file.has_generic_logical_file:
            try:
                json_file_content = _validate_json_file(res_file)
            except Exception as ex:
                raise ValidationError(ex.message)

            # get the file from irods to temp dir
            temp_file = utils.get_file_from_irods(res_file)
            temp_dir = os.path.dirname(temp_file)
            files_to_add_to_resource.append(temp_file)
            file_folder = res_file.file_folder
            with transaction.atomic():
                # first delete the json file that we retrieved from irods
                # for setting it to reftimeseries file type
                delete_resource_file(resource.short_id, res_file.id, user)

                # create a reftiemseries logical file object to be associated with
                # resource files
                logical_file = cls.create()

                logical_file.metadata.json_file_content = json_file_content
                logical_file.metadata.save()

                try:
                    # add the json file back to the resource
                    uploaded_file = UploadedFile(
                        file=open(temp_file, 'rb'),
                        name=os.path.basename(temp_file))
                    # the added resource file will be part of a new generic logical file by default
                    new_res_file = utils.add_file_to_resource(
                        resource, uploaded_file, folder=file_folder)

                    # delete the generic logical file object
                    if new_res_file.logical_file is not None:
                        # deleting the file level metadata object will delete the associated
                        # logical file object
                        new_res_file.logical_file.metadata.delete()

                    # make the resource file we added as part of the logical file
                    logical_file.add_resource_file(new_res_file)
                    logical_file.metadata.save()
                    logical_file.dataset_name = logical_file.metadata.get_title_from_json(
                    )
                    logical_file.save()
                    # extract metadata
                    _extract_metadata(resource, logical_file)
                    log.info(
                        "RefTimeseries file type - json file was added to the resource."
                    )
                except Exception as ex:
                    msg = "RefTimeseries file type. Error when setting file type. Error:{}"
                    msg = msg.format(ex.message)
                    log.exception(msg)
                    raise ValidationError(msg)
                finally:
                    # remove temp dir
                    if os.path.isdir(temp_dir):
                        shutil.rmtree(temp_dir)

                log.info("RefTimeseries file type was created.")

        else:
            err_msg = "Selected file is not part of a GenericLogical file."
            log.error(err_msg)
            raise ValidationError(err_msg)
예제 #29
0
def create_scidas_virtual_app(request, res_id, cluster):
    user = get_user(request)
    if not user.is_authenticated() or not user.is_active:
        messages.error(
            request,
            "Only authorized user can make appliance provision request.")
        return HttpResponseRedirect(request.META['HTTP_REFERER'])

    res, _, _ = authorize(request,
                          res_id,
                          needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE)
    cluster_name = cluster
    if cluster_name != 'chameleon' and cluster_name != 'aws' and cluster_name != 'azure':
        cluster_name = ''
    file_data_list = []
    p_data = {}
    file_path = '/' + ds.IRODS_ZONE + '/home/' + ds.IRODS_USERNAME
    for rf in ResourceFile.objects.filter(object_id=res.id):
        fname = ''
        if rf.resource_file.name:
            fname = os.path.join(file_path, rf.resource_file.name)
        elif rf.fed_resource_file.name:
            fname = rf.fed_resource_file.name
        if fname:
            file_data_list.append(fname)
            if fname.endswith('.json') and not p_data:
                temp_json_file = get_file_from_irods(rf)
                with open(temp_json_file, 'r') as fp:
                    jdata = load(fp)
                    if 'id' in jdata and 'containers' in jdata:
                        p_data = jdata

    url = settings.PIVOT_URL
    app_id = user.username + '_cs_app_id'
    preset_url = ''
    if not p_data:
        p_data = {
            "id":
            app_id,
            "containers": [{
                "id":
                app_id,
                "image":
                "scidas/irods-jupyter-hydroshare",
                "resources": {
                    "cpus": 2,
                    "mem": 2048
                },
                "port_mappings": [{
                    "container_port": 8888,
                    "host_port": 0,
                    "protocol": "tcp"
                }],
                "args": ["--ip=0.0.0.0", "--NotebookApp.token=\"\""],
                "data":
                file_data_list
            }]
        }
    else:
        app_id = p_data['id']
        p_data['containers'][0]['data'] = file_data_list

    if cluster_name:
        p_data['containers'][0]['cluster'] = cluster_name

    if 'endpoints' in p_data['containers'][0]:
        if p_data['containers'][0]['endpoints']:
            preset_ep_data = p_data['containers'][0]['endpoints'][0]
            preset_url = 'http://' + preset_ep_data['host'] + ':' + str(
                preset_ep_data['host_port'])

    # delete the appliance before posting to create a new one in case it already exists
    app_url = url + '/' + app_id
    response = requests.delete(app_url)
    is_deleted = False
    if response.status_code != status.HTTP_404_NOT_FOUND and \
           response.status_code != status.HTTP_200_OK:
        idx = 0
        while idx < 2:
            get_response = requests.get(app_url)
            idx += 1
            if get_response.status_code == status.HTTP_404_NOT_FOUND:
                is_deleted = True
                break
            else:
                # appliance is not deleted successfully yet, wait and poll
                # again one more time
                time.sleep(2)
    else:
        is_deleted = True
    if not is_deleted:
        errmsg = 'The old appliance ' + app_id + ' cannot be deleted successfully'
        messages.error(request, errmsg)
        return HttpResponseRedirect(request.META['HTTP_REFERER'])

    response = requests.post(url, data=dumps(p_data))
    if response.status_code != status.HTTP_200_OK and \
            response.status_code != status.HTTP_201_CREATED:
        return HttpResponseBadRequest(content=response.text)
    while True:
        response = requests.get(app_url)
        if not response.status_code == status.HTTP_200_OK:
            return HttpResponseBadRequest(content=response.text)
        return_data = loads(response.content)
        con_ret_data_list = return_data['containers']
        con_ret_data = con_ret_data_list[0]
        con_state = con_ret_data['state']
        ep_data_list = con_ret_data['endpoints']
        if con_state == 'running' and (ep_data_list or preset_url):
            break
        else:
            # the jupyter appliance is not ready yet, need to wait and poll again
            time.sleep(2)

    if preset_url:
        app_url = preset_url
    else:
        ep_data = ep_data_list[0]
        app_url = 'http://' + ep_data['host'] + ':' + str(ep_data['host_port'])

    # make sure the new directed url is loaded and working before redirecting.
    # Since scidas will install dependencies included in requirements.txt, it will take some time
    # before the app_url is ready to go after the appliance is provisioned, hence wait for up to 30 seconds
    # before erroring out if connection to the url keeps being refused.
    idx = 0
    while True:
        try:
            ret = urlopen(app_url, timeout=10)
            break
        except URLError as ex:
            errmsg = ex.reason if hasattr(ex, 'reason') else 'URLError'
            idx += 1
            time.sleep(5)

        if idx > 6:
            messages.error(request, errmsg)
            return HttpResponseRedirect(request.META['HTTP_REFERER'])

    if ret.code == 200:
        return HttpResponseRedirect(app_url)
    else:
        messages.error(request, 'time out error')
        return HttpResponseRedirect(request.META['HTTP_REFERER'])