Esempio n. 1
0
def netcdf_pre_create_resource(sender, **kwargs):
    files = kwargs['files']
    metadata = kwargs['metadata']
    validate_files_dict = kwargs['validate_files']
    source_names = kwargs['source_names']

    if __debug__:
        assert(isinstance(source_names, list))

    file_selected = False
    in_file_name = ''
    nc_file_name = ''
    if files:
        file_selected = True
        in_file_name = files[0].file.name
        nc_file_name = os.path.splitext(files[0].name)[0]
    elif source_names:
        nc_file_name = os.path.splitext(os.path.basename(source_names[0]))[0]
        ref_tmpfiles = utils.get_fed_zone_files(source_names)
        if ref_tmpfiles:
            in_file_name = ref_tmpfiles[0]
            file_selected = True

    if file_selected and in_file_name:
        # file validation and metadata extraction
        nc_dataset = nc_utils.get_nc_dataset(in_file_name)

        if isinstance(nc_dataset, netCDF4.Dataset):
            # Extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(in_file_name)
            # populate metadata list with extracted metadata
            add_metadata_to_list(metadata, res_dublin_core_meta, res_type_specific_meta)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(in_file_name, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file), name=dump_file_name)
            files.append(uploaded_file)
        else:
            validate_files_dict['are_files_valid'] = False
            validate_files_dict['message'] = 'Please check if the uploaded file ' \
                                             'is in valid NetCDF format.'

        if source_names and in_file_name:
            shutil.rmtree(os.path.dirname(in_file_name))
Esempio n. 2
0
    def set_file_type(cls, resource, file_id, user):
        """
            Sets a tif or zip raster resource file to GeoRasterFile type
            :param resource: an instance of resource type CompositeResource
            :param file_id: id of the resource file to be set as GeoRasterFile type
            :param user: user who is setting the file type
            :return:
            """

        # had to import it here to avoid import loop
        from hs_core.views.utils import create_folder

        log = logging.getLogger()

        # get the file from irods
        res_file = utils.get_resource_file_by_id(resource, file_id)

        if res_file is None:
            raise ValidationError("File not found.")

        if res_file.extension != '.nc':
            raise ValidationError("Not a NetCDF file.")

        # base file name (no path included)
        file_name = res_file.file_name
        # file name without the extension
        nc_file_name = file_name.split(".")[0]

        resource_metadata = []
        file_type_metadata = []
        files_to_add_to_resource = []
        if res_file.has_generic_logical_file:
            # get the file from irods to temp dir
            temp_file = utils.get_file_from_irods(res_file)
            temp_dir = os.path.dirname(temp_file)
            files_to_add_to_resource.append(temp_file)
            # file validation and metadata extraction
            nc_dataset = nc_utils.get_nc_dataset(temp_file)
            if isinstance(nc_dataset, netCDF4.Dataset):
                # Extract the metadata from netcdf file
                res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(
                    temp_file)
                # populate resource_metadata and file_type_metadata lists with extracted metadata
                add_metadata_to_list(resource_metadata, res_dublin_core_meta,
                                     res_type_specific_meta,
                                     file_type_metadata, resource)

                # create the ncdump text file
                dump_file = create_header_info_txt_file(
                    temp_file, nc_file_name)
                files_to_add_to_resource.append(dump_file)
                file_folder = res_file.file_folder
                with transaction.atomic():
                    # first delete the netcdf file that we retrieved from irods
                    # for setting it to netcdf file type
                    delete_resource_file(resource.short_id, res_file.id, user)

                    # create a netcdf logical file object to be associated with
                    # resource files
                    logical_file = cls.create()

                    # by default set the dataset_name attribute of the logical file to the
                    # name of the file selected to set file type unless the extracted metadata
                    # has a value for title
                    dataset_title = res_dublin_core_meta.get('title', None)
                    if dataset_title is not None:
                        logical_file.dataset_name = dataset_title
                    else:
                        logical_file.dataset_name = nc_file_name
                    logical_file.save()

                    try:
                        # create a folder for the netcdf file type using the base file
                        # name as the name for the new folder
                        new_folder_path = cls.compute_file_type_folder(
                            resource, file_folder, nc_file_name)
                        fed_file_full_path = ''
                        if resource.resource_federation_path:
                            fed_file_full_path = os.path.join(
                                resource.root_path, new_folder_path)

                        create_folder(resource.short_id, new_folder_path)
                        log.info("Folder created:{}".format(new_folder_path))

                        new_folder_name = new_folder_path.split('/')[-1]
                        if file_folder is None:
                            upload_folder = new_folder_name
                        else:
                            upload_folder = os.path.join(
                                file_folder, new_folder_name)
                        # add all new files to the resource
                        for f in files_to_add_to_resource:
                            uploaded_file = UploadedFile(
                                file=open(f, 'rb'), name=os.path.basename(f))
                            new_res_file = utils.add_file_to_resource(
                                resource,
                                uploaded_file,
                                folder=upload_folder,
                                fed_res_file_name_or_path=fed_file_full_path)
                            # make each resource file we added as part of the logical file
                            logical_file.add_resource_file(new_res_file)

                        log.info(
                            "NetCDF file type - new files were added to the resource."
                        )
                    except Exception as ex:
                        msg = "NetCDF file type. Error when setting file type. Error:{}"
                        msg = msg.format(ex.message)
                        log.exception(msg)
                        # TODO: in case of any error put the original file back and
                        # delete the folder that was created
                        raise ValidationError(msg)
                    finally:
                        # remove temp dir
                        if os.path.isdir(temp_dir):
                            shutil.rmtree(temp_dir)

                    log.info("NetCDF file type was created.")

                    # use the extracted metadata to populate resource metadata
                    for element in resource_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'title':
                            # update title element
                            title_element = resource.metadata.title
                            resource.metadata.update_element(
                                'title', title_element.id, **v)
                        else:
                            resource.metadata.create_element(k, **v)

                    log.info("Resource - metadata was saved to DB")

                    # use the extracted metadata to populate file metadata
                    for element in file_type_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'subject':
                            logical_file.metadata.keywords = v
                            logical_file.metadata.save()
                        else:
                            logical_file.metadata.create_element(k, **v)
                    log.info("NetCDF file type - metadata was saved to DB")
            else:
                err_msg = "Not a valid NetCDF file. File type file validation failed."
                log.error(err_msg)
                # remove temp dir
                if os.path.isdir(temp_dir):
                    shutil.rmtree(temp_dir)
                raise ValidationError(err_msg)
Esempio n. 3
0
def netcdf_post_create_resource(sender, **kwargs):
    log = logging.getLogger()
    resource = kwargs['resource']
    validate_files_dict = kwargs['validate_files']
    res_file = resource.files.all().first()

    if res_file:
        temp_file = utils.get_file_from_irods(res_file)
        nc_dataset = nc_utils.get_nc_dataset(temp_file)
        nc_file_name = res_file.file_name

        if isinstance(nc_dataset, netCDF4.Dataset):
            # Extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(
                temp_file)
            # populate metadata list with extracted metadata
            metadata = []
            add_metadata_to_list(metadata, res_dublin_core_meta,
                                 res_type_specific_meta)
            for element in metadata:
                # here k is the name of the element
                # v is a dict of all element attributes/field names and field values
                k, v = list(element.items())[0]
                if k == 'title':
                    # update title element
                    title_element = resource.metadata.title
                    resource.metadata.update_element('title', title_element.id,
                                                     **v)
                elif k == 'rights':
                    rights_element = resource.metadata.rights
                    resource.metadata.update_element('rights',
                                                     rights_element.id, **v)
                elif k == 'creator':
                    resource.metadata.creators.all().delete()
                    resource.metadata.create_element('creator', **v)
                else:
                    resource.metadata.create_element(k, **v)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(temp_file, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file, mode="rb"),
                                         name=dump_file_name)
            utils.add_file_to_resource(resource, uploaded_file)
        else:
            delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            err_msg = "Uploaded file was not added to the resource." \
                      " Please provide a valid NetCDF file. "
            validate_files_dict['message'] = err_msg
            log_msg = "File validation failed for netcdf resource (ID:{})."
            log_msg = log_msg.format(resource.short_id)
            log.error(log_msg)

        # cleanup the temp file directory
        if os.path.exists(temp_file):
            shutil.rmtree(os.path.dirname(temp_file))

    # set metadata is dirty flag as false for resource creation
    metadata = resource.metadata
    metadata.is_dirty = False
    metadata.save()

    # since we are extracting metadata after resource creation
    # metadata xml files need to be regenerated - so need to set the
    # dirty bag flags
    if resource.files.all().count() > 0:
        utils.set_dirty_bag_flag(resource)
Esempio n. 4
0
def netcdf_pre_add_files_to_resource(sender, **kwargs):
    nc_res = kwargs['resource']
    files = kwargs['files']
    validate_files_dict = kwargs['validate_files']
    source_names = kwargs['source_names']

    if __debug__:
        assert (isinstance(source_names, list))

    if len(files) > 1:
        # file number validation
        validate_files_dict['are_files_valid'] = False
        validate_files_dict['message'] = 'Only one file can be uploaded.'

    file_selected = False
    in_file_name = ''
    nc_file_name = ''
    if files:
        file_selected = True
        in_file_name = files[0].file.name
        nc_file_name = os.path.splitext(files[0].name)[0]
    elif source_names:
        nc_file_name = os.path.splitext(os.path.basename(source_names[0]))[0]
        ref_tmpfiles = utils.get_fed_zone_files(source_names)
        if ref_tmpfiles:
            in_file_name = ref_tmpfiles[0]
            file_selected = True

    if file_selected and in_file_name:
        # file type validation and existing metadata update and create new ncdump text file
        nc_dataset = nc_utils.get_nc_dataset(in_file_name)
        if isinstance(nc_dataset, netCDF4.Dataset):
            # delete all existing resource files and metadata related
            for f in ResourceFile.objects.filter(object_id=nc_res.id):
                delete_resource_file_only(nc_res, f)

            # update resource modification info
            user = kwargs['user']
            utils.resource_modified(nc_res, user, overwrite_bag=False)

            # extract metadata
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(
                in_file_name)

            # update title info
            if res_dublin_core_meta.get('title'):
                if nc_res.metadata.title:
                    nc_res.metadata.title.delete()
                nc_res.metadata.create_element(
                    'title', value=res_dublin_core_meta['title'])

            # update description info
            if res_dublin_core_meta.get('description'):
                if nc_res.metadata.description:
                    nc_res.metadata.description.delete()
                nc_res.metadata.create_element(
                    'description',
                    abstract=res_dublin_core_meta.get('description'))

            # update creator info
            if res_dublin_core_meta.get('creator_name'):
                name = res_dublin_core_meta.get('creator_name')
                email = res_dublin_core_meta.get('creator_email', '')
                url = res_dublin_core_meta.get('creator_url', '')
                arguments = dict(name=name, email=email, homepage=url)
                creator = nc_res.metadata.creators.all().filter(
                    name=name).first()
                if creator:
                    order = creator.order
                    if order != 1:
                        creator.delete()
                        arguments['order'] = order
                        nc_res.metadata.create_element('creator', **arguments)
                else:
                    nc_res.metadata.create_element('creator', **arguments)

            # update contributor info
            if res_dublin_core_meta.get('contributor_name'):
                name_list = res_dublin_core_meta['contributor_name'].split(',')
                existing_contributor_names = [
                    contributor.name
                    for contributor in nc_res.metadata.contributors.all()
                ]
                for name in name_list:
                    if name not in existing_contributor_names:
                        nc_res.metadata.create_element('contributor',
                                                       name=name)

            # update subject info
            if res_dublin_core_meta.get('subject'):
                keywords = res_dublin_core_meta['subject'].split(',')
                existing_keywords = [
                    subject.value
                    for subject in nc_res.metadata.subjects.all()
                ]
                for keyword in keywords:
                    if keyword not in existing_keywords:
                        nc_res.metadata.create_element('subject',
                                                       value=keyword)

            # update source
            if res_dublin_core_meta.get('source'):
                for source in nc_res.metadata.sources.all():
                    source.delete()
                nc_res.metadata.create_element(
                    'source', derived_from=res_dublin_core_meta.get('source'))

            # update license element:
            if res_dublin_core_meta.get('rights'):
                raw_info = res_dublin_core_meta.get('rights')
                b = re.search("(?P<url>https?://[^\s]+)", raw_info)
                url = b.group('url') if b else ''
                statement = raw_info.replace(url, '') if url else raw_info
                if nc_res.metadata.rights:
                    nc_res.metadata.rights.delete()
                nc_res.metadata.create_element('rights',
                                               statement=statement,
                                               url=url)

            # update relation
            if res_dublin_core_meta.get('references'):
                nc_res.metadata.relations.filter(type='cites').all().delete()
                nc_res.metadata.create_element(
                    'relation',
                    type='cites',
                    value=res_dublin_core_meta['references'])

            # update box info
            nc_res.metadata.coverages.all().delete()
            if res_dublin_core_meta.get('box'):
                nc_res.metadata.create_element(
                    'coverage', type='box', value=res_dublin_core_meta['box'])

            # update period info
            if res_dublin_core_meta.get('period'):
                nc_res.metadata.create_element(
                    'coverage',
                    type='period',
                    value=res_dublin_core_meta['period'])

            # update variable info
            nc_res.metadata.variables.all().delete()
            for var_info in list(res_type_specific_meta.values()):
                nc_res.metadata.create_element(
                    'variable',
                    name=var_info['name'],
                    unit=var_info['unit'],
                    type=var_info['type'],
                    shape=var_info['shape'],
                    missing_value=var_info['missing_value'],
                    descriptive_name=var_info['descriptive_name'],
                    method=var_info['method'])

            # update the original spatial coverage meta
            nc_res.metadata.ori_coverage.all().delete()
            if res_dublin_core_meta.get('original-box'):
                if res_dublin_core_meta.get('projection-info'):
                    nc_res.metadata.create_element(
                        'originalcoverage',
                        value=res_dublin_core_meta['original-box'],
                        projection_string_type=res_dublin_core_meta[
                            'projection-info']['type'],
                        projection_string_text=res_dublin_core_meta[
                            'projection-info']['text'],
                        datum=res_dublin_core_meta['projection-info']['datum'])
                else:
                    nc_res.metadata.create_element(
                        'originalcoverage',
                        value=res_dublin_core_meta['original-box'])

            # create the ncdump text file
            dump_file = create_header_info_txt_file(in_file_name, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file, 'rb'),
                                         name=dump_file_name)
            files.append(uploaded_file)

        else:
            validate_files_dict['are_files_valid'] = False
            validate_files_dict['message'] = 'Please check if the uploaded file is ' \
                                             'invalid NetCDF format.'

        if source_names and in_file_name:
            shutil.rmtree(os.path.dirname(in_file_name))
Esempio n. 5
0
def netcdf_post_create_resource(sender, **kwargs):
    log = logging.getLogger()
    resource = kwargs['resource']
    validate_files_dict = kwargs['validate_files']
    res_file = resource.files.all().first()

    if res_file:
        temp_file = utils.get_file_from_irods(res_file)
        nc_dataset = nc_utils.get_nc_dataset(temp_file)
        nc_file_name = res_file.file_name

        if isinstance(nc_dataset, netCDF4.Dataset):
            # Extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(temp_file)
            # populate metadata list with extracted metadata
            metadata = []
            add_metadata_to_list(metadata, res_dublin_core_meta, res_type_specific_meta)
            for element in metadata:
                # here k is the name of the element
                # v is a dict of all element attributes/field names and field values
                k, v = element.items()[0]
                if k == 'title':
                    # update title element
                    title_element = resource.metadata.title
                    resource.metadata.update_element('title', title_element.id, **v)
                elif k == 'rights':
                    rights_element = resource.metadata.rights
                    resource.metadata.update_element('rights', rights_element.id, **v)
                elif k == 'creator':
                    resource.metadata.creators.all().delete()
                    resource.metadata.create_element('creator', **v)
                else:
                    resource.metadata.create_element(k, **v)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(temp_file, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file), name=dump_file_name)
            utils.add_file_to_resource(resource, uploaded_file)
        else:
            delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            err_msg = "Uploaded file was not added to the resource." \
                      " Please provide a valid NetCDF file. "
            validate_files_dict['message'] = err_msg
            log_msg = "File validation failed for netcdf resource (ID:{})."
            log_msg = log_msg.format(resource.short_id)
            log.error(log_msg)

        # cleanup the temp file directory
        if os.path.exists(temp_file):
            shutil.rmtree(os.path.dirname(temp_file))

    # set metadata is dirty flag as false for resource creation
    metadata = resource.metadata
    metadata.is_dirty = False
    metadata.save()

    # since we are extracting metadata after resource creation
    # metadata xml files need to be regenerated - so need to set the
    # dirty bag flags
    if resource.files.all().count() > 0:
        utils.set_dirty_bag_flag(resource)
Esempio n. 6
0
def netcdf_pre_add_files_to_resource(sender, **kwargs):
    nc_res = kwargs['resource']
    files = kwargs['files']
    validate_files_dict = kwargs['validate_files']
    source_names = kwargs['source_names']

    if __debug__:
        assert(isinstance(source_names, list))

    if len(files) > 1:
        # file number validation
        validate_files_dict['are_files_valid'] = False
        validate_files_dict['message'] = 'Only one file can be uploaded.'

    file_selected = False
    in_file_name = ''
    nc_file_name = ''
    if files:
        file_selected = True
        in_file_name = files[0].file.name
        nc_file_name = os.path.splitext(files[0].name)[0]
    elif source_names:
        nc_file_name = os.path.splitext(os.path.basename(source_names[0]))[0]
        ref_tmpfiles = utils.get_fed_zone_files(source_names)
        if ref_tmpfiles:
            in_file_name = ref_tmpfiles[0]
            file_selected = True

    if file_selected and in_file_name:
        # file type validation and existing metadata update and create new ncdump text file
        nc_dataset = nc_utils.get_nc_dataset(in_file_name)
        if isinstance(nc_dataset, netCDF4.Dataset):
            # delete all existing resource files and metadata related
            for f in ResourceFile.objects.filter(object_id=nc_res.id):
                delete_resource_file_only(nc_res, f)

            # update resource modification info
            user = kwargs['user']
            utils.resource_modified(nc_res, user, overwrite_bag=False)

            # extract metadata
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(in_file_name)

            # update title info
            if res_dublin_core_meta.get('title'):
                if nc_res.metadata.title:
                    nc_res.metadata.title.delete()
                nc_res.metadata.create_element('title', value=res_dublin_core_meta['title'])

            # update description info
            if res_dublin_core_meta.get('description'):
                if nc_res.metadata.description:
                    nc_res.metadata.description.delete()
                nc_res.metadata.create_element('description',
                                               abstract=res_dublin_core_meta.get('description'))

            # update creator info
            if res_dublin_core_meta.get('creator_name'):
                name = res_dublin_core_meta.get('creator_name')
                email = res_dublin_core_meta.get('creator_email', '')
                url = res_dublin_core_meta.get('creator_url', '')
                arguments = dict(name=name, email=email, homepage=url)
                creator = nc_res.metadata.creators.all().filter(name=name).first()
                if creator:
                    order = creator.order
                    if order != 1:
                        creator.delete()
                        arguments['order'] = order
                        nc_res.metadata.create_element('creator', **arguments)
                else:
                    nc_res.metadata.create_element('creator', **arguments)

            # update contributor info
            if res_dublin_core_meta.get('contributor_name'):
                name_list = res_dublin_core_meta['contributor_name'].split(',')
                existing_contributor_names = [contributor.name
                                              for contributor in nc_res.metadata.contributors.all()]
                for name in name_list:
                    if name not in existing_contributor_names:
                        nc_res.metadata.create_element('contributor', name=name)

            # update subject info
            if res_dublin_core_meta.get('subject'):
                keywords = res_dublin_core_meta['subject'].split(',')
                existing_keywords = [subject.value for subject in nc_res.metadata.subjects.all()]
                for keyword in keywords:
                    if keyword not in existing_keywords:
                        nc_res.metadata.create_element('subject', value=keyword)

            # update source
            if res_dublin_core_meta.get('source'):
                for source in nc_res.metadata.sources.all():
                    source.delete()
                nc_res.metadata.create_element('source',
                                               derived_from=res_dublin_core_meta.get('source'))

            # update license element:
            if res_dublin_core_meta.get('rights'):
                raw_info = res_dublin_core_meta.get('rights')
                b = re.search("(?P<url>https?://[^\s]+)", raw_info)
                url = b.group('url') if b else ''
                statement = raw_info.replace(url, '') if url else raw_info
                if nc_res.metadata.rights:
                    nc_res.metadata.rights.delete()
                nc_res.metadata.create_element('rights', statement=statement, url=url)

            # update relation
            if res_dublin_core_meta.get('references'):
                nc_res.metadata.relations.filter(type='cites').all().delete()
                nc_res.metadata.create_element('relation', type='cites',
                                               value=res_dublin_core_meta['references'])

            # update box info
            nc_res.metadata.coverages.all().delete()
            if res_dublin_core_meta.get('box'):
                nc_res.metadata.create_element('coverage', type='box',
                                               value=res_dublin_core_meta['box'])

            # update period info
            if res_dublin_core_meta.get('period'):
                nc_res.metadata.create_element('coverage', type='period',
                                               value=res_dublin_core_meta['period'])

            # update variable info
            nc_res.metadata.variables.all().delete()
            for var_info in res_type_specific_meta.values():
                nc_res.metadata.create_element('variable',
                                               name=var_info['name'],
                                               unit=var_info['unit'],
                                               type=var_info['type'],
                                               shape=var_info['shape'],
                                               missing_value=var_info['missing_value'],
                                               descriptive_name=var_info['descriptive_name'],
                                               method=var_info['method'])

            # update the original spatial coverage meta
            nc_res.metadata.ori_coverage.all().delete()
            if res_dublin_core_meta.get('original-box'):
                if res_dublin_core_meta.get('projection-info'):
                    nc_res.metadata.create_element(
                        'originalcoverage',
                        value=res_dublin_core_meta['original-box'],
                        projection_string_type=res_dublin_core_meta['projection-info']['type'],
                        projection_string_text=res_dublin_core_meta['projection-info']['text'],
                        datum=res_dublin_core_meta['projection-info']['datum'])
                else:
                    nc_res.metadata.create_element('originalcoverage',
                                                   value=res_dublin_core_meta['original-box'])

            # create the ncdump text file
            dump_file = create_header_info_txt_file(in_file_name, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file), name=dump_file_name)
            files.append(uploaded_file)

        else:
            validate_files_dict['are_files_valid'] = False
            validate_files_dict['message'] = 'Please check if the uploaded file is ' \
                                             'invalid NetCDF format.'

        if source_names and in_file_name:
            shutil.rmtree(os.path.dirname(in_file_name))
    def set_file_type(cls, resource, user, file_id=None, folder_path=None):
        """ Creates a NetCDFLogicalFile (aggregation) from a netcdf file (.nc) resource file
        or a folder """

        log = logging.getLogger()
        res_file, folder_path = cls._validate_set_file_type_inputs(
            resource, file_id, folder_path)

        # base file name (no path included)
        file_name = res_file.file_name
        # file name without the extension - needed for naming the new aggregation folder
        nc_file_name = file_name[:-len(res_file.extension)]

        resource_metadata = []
        file_type_metadata = []
        upload_folder = ''
        res_files_to_delete = []
        # get the file from irods to temp dir
        temp_file = utils.get_file_from_irods(res_file)
        temp_dir = os.path.dirname(temp_file)

        # file validation and metadata extraction
        nc_dataset = nc_utils.get_nc_dataset(temp_file)
        if isinstance(nc_dataset, netCDF4.Dataset):
            msg = "NetCDF aggregation. Error when creating aggregation. Error:{}"
            file_type_success = False
            # extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(
                temp_file)
            # populate resource_metadata and file_type_metadata lists with extracted metadata
            add_metadata_to_list(resource_metadata, res_dublin_core_meta,
                                 res_type_specific_meta, file_type_metadata,
                                 resource)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(temp_file, nc_file_name)
            file_folder = res_file.file_folder
            aggregation_folder_created = False
            create_new_folder = cls._check_create_aggregation_folder(
                selected_res_file=res_file,
                selected_folder=folder_path,
                aggregation_file_count=1)

            with transaction.atomic():
                # create a netcdf logical file object to be associated with
                # resource files
                dataset_title = res_dublin_core_meta.get('title', nc_file_name)
                logical_file = cls.initialize(dataset_title, resource)

                try:
                    if folder_path is None:
                        # we are here means aggregation is being created by selecting a file

                        # create a folder for the netcdf file type using the base file
                        # name as the name for the new folder if the file is not already in a folder
                        if create_new_folder:
                            upload_folder = cls._create_aggregation_folder(
                                resource, file_folder, nc_file_name)
                            aggregation_folder_created = True
                            log.info(
                                "NetCDF Aggregation creation - folder created:{}"
                                .format(upload_folder))
                        else:
                            # selected nc file is already in a folder
                            upload_folder = file_folder

                        # create logical file record in DB
                        logical_file.save()
                        if aggregation_folder_created:
                            # copy the nc file to the new aggregation folder and make it part
                            # of the logical file
                            tgt_folder = upload_folder
                            files_to_copy = [res_file]
                            logical_file.copy_resource_files(
                                resource, files_to_copy, tgt_folder)
                            res_files_to_delete.append(res_file)
                        else:
                            # make the selected nc file as part of the aggregation/file type
                            logical_file.add_resource_file(res_file)

                    else:
                        # logical file record gets created in DB
                        logical_file.save()
                        # folder has been selected to create aggregation
                        upload_folder = folder_path
                        # make the .nc file part of the aggregation
                        logical_file.add_resource_file(res_file)

                    # add the new dump txt file to the resource
                    uploaded_file = UploadedFile(
                        file=open(dump_file, 'rb'),
                        name=os.path.basename(dump_file))

                    new_res_file = utils.add_file_to_resource(
                        resource,
                        uploaded_file,
                        folder=upload_folder,
                        add_to_aggregation=False)

                    # make this new resource file we added part of the logical file
                    logical_file.add_resource_file(new_res_file)
                    log.info(
                        "NetCDF aggregation creation - a new file was added to the resource."
                    )

                    # use the extracted metadata to populate resource metadata
                    for element in resource_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'title':
                            # update title element
                            title_element = resource.metadata.title
                            resource.metadata.update_element(
                                'title', title_element.id, **v)
                        else:
                            resource.metadata.create_element(k, **v)

                    log.info(
                        "NetCDF Aggregation creation - Resource metadata was saved to DB"
                    )

                    # use the extracted metadata to populate file metadata
                    for element in file_type_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'subject':
                            logical_file.metadata.keywords = v
                            logical_file.metadata.save()
                            # update resource level keywords
                            resource_keywords = [
                                subject.value.lower() for subject in
                                resource.metadata.subjects.all()
                            ]
                            for kw in logical_file.metadata.keywords:
                                if kw.lower() not in resource_keywords:
                                    resource.metadata.create_element('subject',
                                                                     value=kw)
                        else:
                            logical_file.metadata.create_element(k, **v)
                    log.info(
                        "NetCDF aggregation - metadata was saved in aggregation"
                    )
                    logical_file._finalize(
                        user,
                        resource,
                        folder_created=aggregation_folder_created,
                        res_files_to_delete=res_files_to_delete)
                    file_type_success = True
                except Exception as ex:
                    msg = msg.format(ex.message)
                    log.exception(msg)
                finally:
                    # remove temp dir
                    if os.path.isdir(temp_dir):
                        shutil.rmtree(temp_dir)

            if not file_type_success:
                aggregation_from_folder = folder_path is not None
                cls._cleanup_on_fail_to_create_aggregation(
                    user, resource, upload_folder, file_folder,
                    aggregation_from_folder)
                raise ValidationError(msg)

        else:
            err_msg = "Not a valid NetCDF file. NetCDF aggregation validation failed."
            log.error(err_msg)
            # remove temp dir
            if os.path.isdir(temp_dir):
                shutil.rmtree(temp_dir)
            raise ValidationError(err_msg)
Esempio n. 8
0
    def set_file_type(cls, resource, user, file_id=None, folder_path=None):
        """ Creates a NetCDFLogicalFile (aggregation) from a netcdf file (.nc) resource file
        or a folder """

        log = logging.getLogger()
        res_file, folder_path = cls._validate_set_file_type_inputs(resource, file_id, folder_path)

        # base file name (no path included)
        file_name = res_file.file_name
        # file name without the extension - needed for naming the new aggregation folder
        nc_file_name = file_name[:-len(res_file.extension)]

        resource_metadata = []
        file_type_metadata = []
        upload_folder = ''
        res_files_to_delete = []
        # get the file from irods to temp dir
        temp_file = utils.get_file_from_irods(res_file)
        temp_dir = os.path.dirname(temp_file)

        # file validation and metadata extraction
        nc_dataset = nc_utils.get_nc_dataset(temp_file)
        if isinstance(nc_dataset, netCDF4.Dataset):
            msg = "NetCDF aggregation. Error when creating aggregation. Error:{}"
            file_type_success = False
            # extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(temp_file)
            # populate resource_metadata and file_type_metadata lists with extracted metadata
            add_metadata_to_list(resource_metadata, res_dublin_core_meta,
                                 res_type_specific_meta, file_type_metadata, resource)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(temp_file, nc_file_name)
            file_folder = res_file.file_folder
            aggregation_folder_created = False
            create_new_folder = cls._check_create_aggregation_folder(
                selected_res_file=res_file, selected_folder=folder_path,
                aggregation_file_count=1)

            with transaction.atomic():
                # create a netcdf logical file object to be associated with
                # resource files
                dataset_title = res_dublin_core_meta.get('title', nc_file_name)
                logical_file = cls.initialize(dataset_title, resource)

                try:
                    if folder_path is None:
                        # we are here means aggregation is being created by selecting a file

                        # create a folder for the netcdf file type using the base file
                        # name as the name for the new folder if the file is not already in a folder
                        if create_new_folder:
                            upload_folder = cls._create_aggregation_folder(resource, file_folder,
                                                                           nc_file_name)
                            aggregation_folder_created = True
                            log.info("NetCDF Aggregation creation - folder created:{}".format(
                                upload_folder))
                        else:
                            # selected nc file is already in a folder
                            upload_folder = file_folder

                        # create logical file record in DB
                        logical_file.save()
                        if aggregation_folder_created:
                            # copy the nc file to the new aggregation folder and make it part
                            # of the logical file
                            tgt_folder = upload_folder
                            files_to_copy = [res_file]
                            logical_file.copy_resource_files(resource, files_to_copy,
                                                             tgt_folder)
                            res_files_to_delete.append(res_file)
                        else:
                            # make the selected nc file as part of the aggregation/file type
                            logical_file.add_resource_file(res_file)

                    else:
                        # logical file record gets created in DB
                        logical_file.save()
                        # folder has been selected to create aggregation
                        upload_folder = folder_path
                        # make the .nc file part of the aggregation
                        logical_file.add_resource_file(res_file)

                    # add the new dump txt file to the resource
                    uploaded_file = UploadedFile(file=open(dump_file, 'rb'),
                                                 name=os.path.basename(dump_file))

                    new_res_file = utils.add_file_to_resource(
                        resource, uploaded_file, folder=upload_folder, add_to_aggregation=False
                    )

                    # make this new resource file we added part of the logical file
                    logical_file.add_resource_file(new_res_file)
                    log.info("NetCDF aggregation creation - a new file was added to the resource.")

                    # use the extracted metadata to populate resource metadata
                    for element in resource_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'title':
                            # update title element
                            title_element = resource.metadata.title
                            resource.metadata.update_element('title', title_element.id, **v)
                        else:
                            resource.metadata.create_element(k, **v)

                    log.info("NetCDF Aggregation creation - Resource metadata was saved to DB")

                    # use the extracted metadata to populate file metadata
                    for element in file_type_metadata:
                        # here k is the name of the element
                        # v is a dict of all element attributes/field names and field values
                        k, v = element.items()[0]
                        if k == 'subject':
                            logical_file.metadata.keywords = v
                            logical_file.metadata.save()
                            # update resource level keywords
                            resource_keywords = [subject.value.lower() for subject in
                                                 resource.metadata.subjects.all()]
                            for kw in logical_file.metadata.keywords:
                                if kw.lower() not in resource_keywords:
                                    resource.metadata.create_element('subject', value=kw)
                        else:
                            logical_file.metadata.create_element(k, **v)
                    log.info("NetCDF aggregation - metadata was saved in aggregation")
                    logical_file._finalize(user, resource,
                                           folder_created=aggregation_folder_created,
                                           res_files_to_delete=res_files_to_delete)
                    file_type_success = True
                    post_add_netcdf_aggregation.send(
                        sender=AbstractLogicalFile,
                        resource=resource,
                        file=logical_file
                    )
                except Exception as ex:
                    msg = msg.format(ex.message)
                    log.exception(msg)
                finally:
                    # remove temp dir
                    if os.path.isdir(temp_dir):
                        shutil.rmtree(temp_dir)

            if not file_type_success:
                aggregation_from_folder = folder_path is not None
                cls._cleanup_on_fail_to_create_aggregation(user, resource, upload_folder,
                                                           file_folder, aggregation_from_folder)
                raise ValidationError(msg)

        else:
            err_msg = "Not a valid NetCDF file. NetCDF aggregation validation failed."
            log.error(err_msg)
            # remove temp dir
            if os.path.isdir(temp_dir):
                shutil.rmtree(temp_dir)
            raise ValidationError(err_msg)
Esempio n. 9
0
    def set_file_type(cls, resource, user, file_id=None, folder_path=None):
        """ Creates a NetCDFLogicalFile (aggregation) from a netcdf file (.nc) resource file
        """

        log = logging.getLogger()
        with FileTypeContext(aggr_cls=cls, user=user, resource=resource, file_id=file_id,
                             folder_path=folder_path,
                             post_aggr_signal=post_add_netcdf_aggregation,
                             is_temp_file=True) as ft_ctx:

            # base file name (no path included)
            res_file = ft_ctx.res_file
            file_name = res_file.file_name
            # file name without the extension - needed for naming the new aggregation folder
            nc_file_name = file_name[:-len(res_file.extension)]

            resource_metadata = []
            file_type_metadata = []

            # file validation and metadata extraction
            temp_file = ft_ctx.temp_file
            nc_dataset = nc_utils.get_nc_dataset(temp_file)
            if isinstance(nc_dataset, netCDF4.Dataset):
                msg = "NetCDF aggregation. Error when creating aggregation. Error:{}"
                file_type_success = False
                # extract the metadata from netcdf file
                res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(temp_file)
                # populate resource_metadata and file_type_metadata lists with extracted metadata
                add_metadata_to_list(resource_metadata, res_dublin_core_meta,
                                     res_type_specific_meta, file_type_metadata, resource)

                # create the ncdump text file
                dump_file = create_header_info_txt_file(temp_file, nc_file_name)
                file_folder = res_file.file_folder
                upload_folder = file_folder
                dataset_title = res_dublin_core_meta.get('title', nc_file_name)

                with transaction.atomic():
                    try:
                        # create a netcdf logical file object
                        logical_file = cls.create_aggregation(dataset_name=dataset_title,
                                                              resource=resource,
                                                              res_files=[res_file],
                                                              new_files_to_upload=[dump_file],
                                                              folder_path=upload_folder)

                        log.info("NetCDF aggregation creation - a new file was added to the "
                                 "resource.")

                        # use the extracted metadata to populate resource metadata
                        for element in resource_metadata:
                            # here k is the name of the element
                            # v is a dict of all element attributes/field names and field values
                            k, v = list(element.items())[0]
                            if k == 'title':
                                # update title element
                                title_element = resource.metadata.title
                                resource.metadata.update_element('title', title_element.id, **v)
                            else:
                                resource.metadata.create_element(k, **v)

                        log.info("NetCDF Aggregation creation - Resource metadata was saved to DB")

                        # use the extracted metadata to populate file metadata
                        for element in file_type_metadata:
                            # here k is the name of the element
                            # v is a dict of all element attributes/field names and field values
                            k, v = list(element.items())[0]
                            if k == 'subject':
                                logical_file.metadata.keywords = v
                                logical_file.metadata.save()
                                # update resource level keywords
                                resource_keywords = [subject.value.lower() for subject in
                                                     resource.metadata.subjects.all()]
                                for kw in logical_file.metadata.keywords:
                                    if kw.lower() not in resource_keywords:
                                        resource.metadata.create_element('subject', value=kw)
                            else:
                                logical_file.metadata.create_element(k, **v)
                        log.info("NetCDF aggregation - metadata was saved in aggregation")

                        file_type_success = True
                        ft_ctx.logical_file = logical_file
                    except Exception as ex:
                        msg = msg.format(str(ex))
                        log.exception(msg)

                if not file_type_success:
                    raise ValidationError(msg)
            else:
                err_msg = "Not a valid NetCDF file. NetCDF aggregation validation failed."
                log.error(err_msg)
                raise ValidationError(err_msg)