Ejemplo n.º 1
0
def netcdf_pre_create_resource(sender, **kwargs):
    files = kwargs['files']
    metadata = kwargs['metadata']
    validate_files_dict = kwargs['validate_files']
    source_names = kwargs['source_names']

    if __debug__:
        assert(isinstance(source_names, list))

    file_selected = False
    in_file_name = ''
    nc_file_name = ''
    if files:
        file_selected = True
        in_file_name = files[0].file.name
        nc_file_name = os.path.splitext(files[0].name)[0]
    elif source_names:
        nc_file_name = os.path.splitext(os.path.basename(source_names[0]))[0]
        ref_tmpfiles = utils.get_fed_zone_files(source_names)
        if ref_tmpfiles:
            in_file_name = ref_tmpfiles[0]
            file_selected = True

    if file_selected and in_file_name:
        # file validation and metadata extraction
        nc_dataset = nc_utils.get_nc_dataset(in_file_name)

        if isinstance(nc_dataset, netCDF4.Dataset):
            # Extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(in_file_name)
            # populate metadata list with extracted metadata
            add_metadata_to_list(metadata, res_dublin_core_meta, res_type_specific_meta)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(in_file_name, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file), name=dump_file_name)
            files.append(uploaded_file)
        else:
            validate_files_dict['are_files_valid'] = False
            validate_files_dict['message'] = 'Please check if the uploaded file ' \
                                             'is in valid NetCDF format.'

        if source_names and in_file_name:
            shutil.rmtree(os.path.dirname(in_file_name))
Ejemplo n.º 2
0
def netcdf_post_create_resource(sender, **kwargs):
    log = logging.getLogger()
    resource = kwargs['resource']
    validate_files_dict = kwargs['validate_files']
    res_file = resource.files.all().first()

    if res_file:
        temp_file = utils.get_file_from_irods(res_file)
        nc_dataset = nc_utils.get_nc_dataset(temp_file)
        nc_file_name = res_file.file_name

        if isinstance(nc_dataset, netCDF4.Dataset):
            # Extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(
                temp_file)
            # populate metadata list with extracted metadata
            metadata = []
            add_metadata_to_list(metadata, res_dublin_core_meta,
                                 res_type_specific_meta)
            for element in metadata:
                # here k is the name of the element
                # v is a dict of all element attributes/field names and field values
                k, v = list(element.items())[0]
                if k == 'title':
                    # update title element
                    title_element = resource.metadata.title
                    resource.metadata.update_element('title', title_element.id,
                                                     **v)
                elif k == 'rights':
                    rights_element = resource.metadata.rights
                    resource.metadata.update_element('rights',
                                                     rights_element.id, **v)
                elif k == 'creator':
                    resource.metadata.creators.all().delete()
                    resource.metadata.create_element('creator', **v)
                else:
                    resource.metadata.create_element(k, **v)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(temp_file, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file, mode="rb"),
                                         name=dump_file_name)
            utils.add_file_to_resource(resource, uploaded_file)
        else:
            delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            err_msg = "Uploaded file was not added to the resource." \
                      " Please provide a valid NetCDF file. "
            validate_files_dict['message'] = err_msg
            log_msg = "File validation failed for netcdf resource (ID:{})."
            log_msg = log_msg.format(resource.short_id)
            log.error(log_msg)

        # cleanup the temp file directory
        if os.path.exists(temp_file):
            shutil.rmtree(os.path.dirname(temp_file))

    # set metadata is dirty flag as false for resource creation
    metadata = resource.metadata
    metadata.is_dirty = False
    metadata.save()

    # since we are extracting metadata after resource creation
    # metadata xml files need to be regenerated - so need to set the
    # dirty bag flags
    if resource.files.all().count() > 0:
        utils.set_dirty_bag_flag(resource)
Ejemplo n.º 3
0
def netcdf_pre_add_files_to_resource(sender, **kwargs):
    nc_res = kwargs['resource']
    files = kwargs['files']
    validate_files_dict = kwargs['validate_files']
    source_names = kwargs['source_names']

    if __debug__:
        assert (isinstance(source_names, list))

    if len(files) > 1:
        # file number validation
        validate_files_dict['are_files_valid'] = False
        validate_files_dict['message'] = 'Only one file can be uploaded.'

    file_selected = False
    in_file_name = ''
    nc_file_name = ''
    if files:
        file_selected = True
        in_file_name = files[0].file.name
        nc_file_name = os.path.splitext(files[0].name)[0]
    elif source_names:
        nc_file_name = os.path.splitext(os.path.basename(source_names[0]))[0]
        ref_tmpfiles = utils.get_fed_zone_files(source_names)
        if ref_tmpfiles:
            in_file_name = ref_tmpfiles[0]
            file_selected = True

    if file_selected and in_file_name:
        # file type validation and existing metadata update and create new ncdump text file
        nc_dataset = nc_utils.get_nc_dataset(in_file_name)
        if isinstance(nc_dataset, netCDF4.Dataset):
            # delete all existing resource files and metadata related
            for f in ResourceFile.objects.filter(object_id=nc_res.id):
                delete_resource_file_only(nc_res, f)

            # update resource modification info
            user = kwargs['user']
            utils.resource_modified(nc_res, user, overwrite_bag=False)

            # extract metadata
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(
                in_file_name)

            # update title info
            if res_dublin_core_meta.get('title'):
                if nc_res.metadata.title:
                    nc_res.metadata.title.delete()
                nc_res.metadata.create_element(
                    'title', value=res_dublin_core_meta['title'])

            # update description info
            if res_dublin_core_meta.get('description'):
                if nc_res.metadata.description:
                    nc_res.metadata.description.delete()
                nc_res.metadata.create_element(
                    'description',
                    abstract=res_dublin_core_meta.get('description'))

            # update creator info
            if res_dublin_core_meta.get('creator_name'):
                name = res_dublin_core_meta.get('creator_name')
                email = res_dublin_core_meta.get('creator_email', '')
                url = res_dublin_core_meta.get('creator_url', '')
                arguments = dict(name=name, email=email, homepage=url)
                creator = nc_res.metadata.creators.all().filter(
                    name=name).first()
                if creator:
                    order = creator.order
                    if order != 1:
                        creator.delete()
                        arguments['order'] = order
                        nc_res.metadata.create_element('creator', **arguments)
                else:
                    nc_res.metadata.create_element('creator', **arguments)

            # update contributor info
            if res_dublin_core_meta.get('contributor_name'):
                name_list = res_dublin_core_meta['contributor_name'].split(',')
                existing_contributor_names = [
                    contributor.name
                    for contributor in nc_res.metadata.contributors.all()
                ]
                for name in name_list:
                    if name not in existing_contributor_names:
                        nc_res.metadata.create_element('contributor',
                                                       name=name)

            # update subject info
            if res_dublin_core_meta.get('subject'):
                keywords = res_dublin_core_meta['subject'].split(',')
                existing_keywords = [
                    subject.value
                    for subject in nc_res.metadata.subjects.all()
                ]
                for keyword in keywords:
                    if keyword not in existing_keywords:
                        nc_res.metadata.create_element('subject',
                                                       value=keyword)

            # update source
            if res_dublin_core_meta.get('source'):
                for source in nc_res.metadata.sources.all():
                    source.delete()
                nc_res.metadata.create_element(
                    'source', derived_from=res_dublin_core_meta.get('source'))

            # update license element:
            if res_dublin_core_meta.get('rights'):
                raw_info = res_dublin_core_meta.get('rights')
                b = re.search("(?P<url>https?://[^\s]+)", raw_info)
                url = b.group('url') if b else ''
                statement = raw_info.replace(url, '') if url else raw_info
                if nc_res.metadata.rights:
                    nc_res.metadata.rights.delete()
                nc_res.metadata.create_element('rights',
                                               statement=statement,
                                               url=url)

            # update relation
            if res_dublin_core_meta.get('references'):
                nc_res.metadata.relations.filter(type='cites').all().delete()
                nc_res.metadata.create_element(
                    'relation',
                    type='cites',
                    value=res_dublin_core_meta['references'])

            # update box info
            nc_res.metadata.coverages.all().delete()
            if res_dublin_core_meta.get('box'):
                nc_res.metadata.create_element(
                    'coverage', type='box', value=res_dublin_core_meta['box'])

            # update period info
            if res_dublin_core_meta.get('period'):
                nc_res.metadata.create_element(
                    'coverage',
                    type='period',
                    value=res_dublin_core_meta['period'])

            # update variable info
            nc_res.metadata.variables.all().delete()
            for var_info in list(res_type_specific_meta.values()):
                nc_res.metadata.create_element(
                    'variable',
                    name=var_info['name'],
                    unit=var_info['unit'],
                    type=var_info['type'],
                    shape=var_info['shape'],
                    missing_value=var_info['missing_value'],
                    descriptive_name=var_info['descriptive_name'],
                    method=var_info['method'])

            # update the original spatial coverage meta
            nc_res.metadata.ori_coverage.all().delete()
            if res_dublin_core_meta.get('original-box'):
                if res_dublin_core_meta.get('projection-info'):
                    nc_res.metadata.create_element(
                        'originalcoverage',
                        value=res_dublin_core_meta['original-box'],
                        projection_string_type=res_dublin_core_meta[
                            'projection-info']['type'],
                        projection_string_text=res_dublin_core_meta[
                            'projection-info']['text'],
                        datum=res_dublin_core_meta['projection-info']['datum'])
                else:
                    nc_res.metadata.create_element(
                        'originalcoverage',
                        value=res_dublin_core_meta['original-box'])

            # create the ncdump text file
            dump_file = create_header_info_txt_file(in_file_name, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file, 'rb'),
                                         name=dump_file_name)
            files.append(uploaded_file)

        else:
            validate_files_dict['are_files_valid'] = False
            validate_files_dict['message'] = 'Please check if the uploaded file is ' \
                                             'invalid NetCDF format.'

        if source_names and in_file_name:
            shutil.rmtree(os.path.dirname(in_file_name))
Ejemplo n.º 4
0
def netcdf_post_create_resource(sender, **kwargs):
    log = logging.getLogger()
    resource = kwargs['resource']
    validate_files_dict = kwargs['validate_files']
    res_file = resource.files.all().first()

    if res_file:
        temp_file = utils.get_file_from_irods(res_file)
        nc_dataset = nc_utils.get_nc_dataset(temp_file)
        nc_file_name = res_file.file_name

        if isinstance(nc_dataset, netCDF4.Dataset):
            # Extract the metadata from netcdf file
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(temp_file)
            # populate metadata list with extracted metadata
            metadata = []
            add_metadata_to_list(metadata, res_dublin_core_meta, res_type_specific_meta)
            for element in metadata:
                # here k is the name of the element
                # v is a dict of all element attributes/field names and field values
                k, v = element.items()[0]
                if k == 'title':
                    # update title element
                    title_element = resource.metadata.title
                    resource.metadata.update_element('title', title_element.id, **v)
                elif k == 'rights':
                    rights_element = resource.metadata.rights
                    resource.metadata.update_element('rights', rights_element.id, **v)
                elif k == 'creator':
                    resource.metadata.creators.all().delete()
                    resource.metadata.create_element('creator', **v)
                else:
                    resource.metadata.create_element(k, **v)

            # create the ncdump text file
            dump_file = create_header_info_txt_file(temp_file, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file), name=dump_file_name)
            utils.add_file_to_resource(resource, uploaded_file)
        else:
            delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            err_msg = "Uploaded file was not added to the resource." \
                      " Please provide a valid NetCDF file. "
            validate_files_dict['message'] = err_msg
            log_msg = "File validation failed for netcdf resource (ID:{})."
            log_msg = log_msg.format(resource.short_id)
            log.error(log_msg)

        # cleanup the temp file directory
        if os.path.exists(temp_file):
            shutil.rmtree(os.path.dirname(temp_file))

    # set metadata is dirty flag as false for resource creation
    metadata = resource.metadata
    metadata.is_dirty = False
    metadata.save()

    # since we are extracting metadata after resource creation
    # metadata xml files need to be regenerated - so need to set the
    # dirty bag flags
    if resource.files.all().count() > 0:
        utils.set_dirty_bag_flag(resource)
Ejemplo n.º 5
0
def netcdf_pre_add_files_to_resource(sender, **kwargs):
    nc_res = kwargs['resource']
    files = kwargs['files']
    validate_files_dict = kwargs['validate_files']
    source_names = kwargs['source_names']

    if __debug__:
        assert(isinstance(source_names, list))

    if len(files) > 1:
        # file number validation
        validate_files_dict['are_files_valid'] = False
        validate_files_dict['message'] = 'Only one file can be uploaded.'

    file_selected = False
    in_file_name = ''
    nc_file_name = ''
    if files:
        file_selected = True
        in_file_name = files[0].file.name
        nc_file_name = os.path.splitext(files[0].name)[0]
    elif source_names:
        nc_file_name = os.path.splitext(os.path.basename(source_names[0]))[0]
        ref_tmpfiles = utils.get_fed_zone_files(source_names)
        if ref_tmpfiles:
            in_file_name = ref_tmpfiles[0]
            file_selected = True

    if file_selected and in_file_name:
        # file type validation and existing metadata update and create new ncdump text file
        nc_dataset = nc_utils.get_nc_dataset(in_file_name)
        if isinstance(nc_dataset, netCDF4.Dataset):
            # delete all existing resource files and metadata related
            for f in ResourceFile.objects.filter(object_id=nc_res.id):
                delete_resource_file_only(nc_res, f)

            # update resource modification info
            user = kwargs['user']
            utils.resource_modified(nc_res, user, overwrite_bag=False)

            # extract metadata
            res_dublin_core_meta, res_type_specific_meta = nc_meta.get_nc_meta_dict(in_file_name)

            # update title info
            if res_dublin_core_meta.get('title'):
                if nc_res.metadata.title:
                    nc_res.metadata.title.delete()
                nc_res.metadata.create_element('title', value=res_dublin_core_meta['title'])

            # update description info
            if res_dublin_core_meta.get('description'):
                if nc_res.metadata.description:
                    nc_res.metadata.description.delete()
                nc_res.metadata.create_element('description',
                                               abstract=res_dublin_core_meta.get('description'))

            # update creator info
            if res_dublin_core_meta.get('creator_name'):
                name = res_dublin_core_meta.get('creator_name')
                email = res_dublin_core_meta.get('creator_email', '')
                url = res_dublin_core_meta.get('creator_url', '')
                arguments = dict(name=name, email=email, homepage=url)
                creator = nc_res.metadata.creators.all().filter(name=name).first()
                if creator:
                    order = creator.order
                    if order != 1:
                        creator.delete()
                        arguments['order'] = order
                        nc_res.metadata.create_element('creator', **arguments)
                else:
                    nc_res.metadata.create_element('creator', **arguments)

            # update contributor info
            if res_dublin_core_meta.get('contributor_name'):
                name_list = res_dublin_core_meta['contributor_name'].split(',')
                existing_contributor_names = [contributor.name
                                              for contributor in nc_res.metadata.contributors.all()]
                for name in name_list:
                    if name not in existing_contributor_names:
                        nc_res.metadata.create_element('contributor', name=name)

            # update subject info
            if res_dublin_core_meta.get('subject'):
                keywords = res_dublin_core_meta['subject'].split(',')
                existing_keywords = [subject.value for subject in nc_res.metadata.subjects.all()]
                for keyword in keywords:
                    if keyword not in existing_keywords:
                        nc_res.metadata.create_element('subject', value=keyword)

            # update source
            if res_dublin_core_meta.get('source'):
                for source in nc_res.metadata.sources.all():
                    source.delete()
                nc_res.metadata.create_element('source',
                                               derived_from=res_dublin_core_meta.get('source'))

            # update license element:
            if res_dublin_core_meta.get('rights'):
                raw_info = res_dublin_core_meta.get('rights')
                b = re.search("(?P<url>https?://[^\s]+)", raw_info)
                url = b.group('url') if b else ''
                statement = raw_info.replace(url, '') if url else raw_info
                if nc_res.metadata.rights:
                    nc_res.metadata.rights.delete()
                nc_res.metadata.create_element('rights', statement=statement, url=url)

            # update relation
            if res_dublin_core_meta.get('references'):
                nc_res.metadata.relations.filter(type='cites').all().delete()
                nc_res.metadata.create_element('relation', type='cites',
                                               value=res_dublin_core_meta['references'])

            # update box info
            nc_res.metadata.coverages.all().delete()
            if res_dublin_core_meta.get('box'):
                nc_res.metadata.create_element('coverage', type='box',
                                               value=res_dublin_core_meta['box'])

            # update period info
            if res_dublin_core_meta.get('period'):
                nc_res.metadata.create_element('coverage', type='period',
                                               value=res_dublin_core_meta['period'])

            # update variable info
            nc_res.metadata.variables.all().delete()
            for var_info in res_type_specific_meta.values():
                nc_res.metadata.create_element('variable',
                                               name=var_info['name'],
                                               unit=var_info['unit'],
                                               type=var_info['type'],
                                               shape=var_info['shape'],
                                               missing_value=var_info['missing_value'],
                                               descriptive_name=var_info['descriptive_name'],
                                               method=var_info['method'])

            # update the original spatial coverage meta
            nc_res.metadata.ori_coverage.all().delete()
            if res_dublin_core_meta.get('original-box'):
                if res_dublin_core_meta.get('projection-info'):
                    nc_res.metadata.create_element(
                        'originalcoverage',
                        value=res_dublin_core_meta['original-box'],
                        projection_string_type=res_dublin_core_meta['projection-info']['type'],
                        projection_string_text=res_dublin_core_meta['projection-info']['text'],
                        datum=res_dublin_core_meta['projection-info']['datum'])
                else:
                    nc_res.metadata.create_element('originalcoverage',
                                                   value=res_dublin_core_meta['original-box'])

            # create the ncdump text file
            dump_file = create_header_info_txt_file(in_file_name, nc_file_name)
            dump_file_name = nc_file_name + '_header_info.txt'
            uploaded_file = UploadedFile(file=open(dump_file), name=dump_file_name)
            files.append(uploaded_file)

        else:
            validate_files_dict['are_files_valid'] = False
            validate_files_dict['message'] = 'Please check if the uploaded file is ' \
                                             'invalid NetCDF format.'

        if source_names and in_file_name:
            shutil.rmtree(os.path.dirname(in_file_name))