Esempio n. 1
0
def netcdf_pre_delete_file_from_resource(sender, **kwargs):
    nc_res = kwargs['resource']
    metadata = nc_res.metadata
    metadata.is_dirty = False
    metadata.save()

    del_file = kwargs['file']
    del_file_ext = utils.get_resource_file_name_and_extension(del_file)[2]

    # update resource modification info
    user = nc_res.creator
    utils.resource_modified(nc_res, user, overwrite_bag=False)

    # delete the netcdf header file or .nc file
    file_ext = {'.nc': 'application/x-netcdf',
                '.txt': 'text/plain'}

    if del_file_ext in file_ext:
        del file_ext[del_file_ext]
        for f in ResourceFile.objects.filter(object_id=nc_res.id):
            ext = utils.get_resource_file_name_and_extension(f)[2]
            if ext in file_ext:
                delete_resource_file_only(nc_res, f)
                nc_res.metadata.formats.filter(value=file_ext[ext]).delete()
                break

    # delete all the coverage info
    nc_res.metadata.coverages.all().delete()
    # delete all the extended meta info
    nc_res.metadata.variables.all().delete()
    nc_res.metadata.ori_coverage.all().delete()
Esempio n. 2
0
def netcdf_pre_delete_file_from_resource(sender, **kwargs):
    nc_res = kwargs['resource']
    metadata = nc_res.metadata
    metadata.is_dirty = False
    metadata.save()

    del_file = kwargs['file']
    del_file_ext = utils.get_resource_file_name_and_extension(del_file)[2]

    # update resource modification info
    user = nc_res.creator
    utils.resource_modified(nc_res, user, overwrite_bag=False)

    # delete the netcdf header file or .nc file
    file_ext = {'.nc': 'application/x-netcdf', '.txt': 'text/plain'}

    if del_file_ext in file_ext:
        del file_ext[del_file_ext]
        for f in ResourceFile.objects.filter(object_id=nc_res.id):
            ext = utils.get_resource_file_name_and_extension(f)[2]
            if ext in file_ext:
                delete_resource_file_only(nc_res, f)
                nc_res.metadata.formats.filter(value=file_ext[ext]).delete()
                break

    # delete all the coverage info
    nc_res.metadata.coverages.all().delete()
    # delete all the extended meta info
    nc_res.metadata.variables.all().delete()
    nc_res.metadata.ori_coverage.all().delete()
Esempio n. 3
0
def post_add_files_to_resource_handler(sender, **kwargs):
    resource = kwargs['resource']
    files = kwargs['files']
    validate_files_dict = kwargs['validate_files']
    user = kwargs['user']
    fed_res_fnames = kwargs['fed_res_file_names']
    if files:
        file_name = files[0].name
    elif fed_res_fnames:
        file_name = os.path.basename(fed_res_fnames[0])

    # extract metadata from the just uploaded file
    uploaded_file_to_process = None
    uploaded_file_ext = ''
    for res_file in resource.files.all():
        _, res_file_name, uploaded_file_ext = utils.get_resource_file_name_and_extension(
            res_file)
        if res_file_name == file_name:
            uploaded_file_to_process = res_file
            break

    if uploaded_file_to_process:
        if uploaded_file_ext == ".sqlite":
            _process_uploaded_sqlite_file(user,
                                          resource,
                                          uploaded_file_to_process,
                                          validate_files_dict,
                                          delete_existing_metadata=True)

        elif uploaded_file_ext == ".csv":
            _process_uploaded_csv_file(resource,
                                       uploaded_file_to_process,
                                       validate_files_dict,
                                       user,
                                       delete_existing_metadata=True)
Esempio n. 4
0
def post_add_files_to_resource_handler(sender, **kwargs):
    resource = kwargs['resource']
    files = kwargs['files']
    validate_files_dict = kwargs['validate_files']
    user = kwargs['user']
    source_names = kwargs['source_names']

    if __debug__:
        assert(isinstance(source_names, list))

    if files:
        file_name = files[0].name
    elif source_names:
        file_name = os.path.basename(source_names[0])

    # extract metadata from the just uploaded file
    uploaded_file_to_process = None
    uploaded_file_ext = ''
    for res_file in resource.files.all():
        _, res_file_name, uploaded_file_ext = utils.get_resource_file_name_and_extension(res_file)
        if res_file_name == file_name:
            uploaded_file_to_process = res_file
            break

    if uploaded_file_to_process:
        if uploaded_file_ext == ".sqlite":
            _process_uploaded_sqlite_file(user, resource, uploaded_file_to_process,
                                          validate_files_dict,
                                          delete_existing_metadata=True)

        elif uploaded_file_ext == ".csv":
            _process_uploaded_csv_file(resource, uploaded_file_to_process, validate_files_dict,
                                       user, delete_existing_metadata=True)
Esempio n. 5
0
def post_create_resource_handler(sender, **kwargs):
    resource = kwargs['resource']
    validate_files_dict = kwargs['validate_files']
    user = kwargs['user']

    # extract metadata from the just uploaded file
    res_file = resource.files.all().first()
    if res_file:
        # check if the uploaded file is a sqlite file or csv file
        file_ext = utils.get_resource_file_name_and_extension(res_file)[2]
        if file_ext == '.sqlite':
            # metadata can exist at this point if a timeseries resource is created
            # using REST API since the API caller can pass metadata information. Before
            # metadata can be extracted from the sqlite file and populated to database, existing
            # metadata needs to be deleted.
            _process_uploaded_sqlite_file(user,
                                          resource,
                                          res_file,
                                          validate_files_dict,
                                          delete_existing_metadata=True)
        elif file_ext == '.csv':
            _process_uploaded_csv_file(resource,
                                       res_file,
                                       validate_files_dict,
                                       user,
                                       delete_existing_metadata=False)
        # since we are extracting metadata after resource creation
        # metadata xml files need to be regenerated - so need to set the
        # dirty bag flags
        utils.set_dirty_bag_flag(resource)
Esempio n. 6
0
 def get_hs_term_dict(self):
     # get existing hs_term_dict from base class
     hs_term_dict = super(NetcdfResource, self).get_hs_term_dict()
     # add new terms for NetCDF res
     hs_term_dict["HS_FILE_NAME"] = ""
     for res_file in self.files.all():
         _, f_fullname, f_ext = get_resource_file_name_and_extension(res_file)
         if f_ext.lower() == '.nc':
             hs_term_dict["HS_FILE_NAME"] = f_fullname
             break
     return hs_term_dict
Esempio n. 7
0
 def get_hs_term_dict(self):
     # get existing hs_term_dict from base class
     hs_term_dict = super(NetcdfResource, self).get_hs_term_dict()
     # add new terms for NetCDF res
     hs_term_dict["HS_FILE_NAME"] = ""
     for res_file in self.files.all():
         _, f_fullname, f_ext = get_resource_file_name_and_extension(res_file)
         if f_ext.lower() == '.nc':
             hs_term_dict["HS_FILE_NAME"] = f_fullname
             break
     return hs_term_dict
Esempio n. 8
0
def assert_geofeature_file_type_metadata(self, expected_folder_name):
    # test files in the file type
    self.assertEqual(self.composite_resource.files.count(), 3)
    # check that there is no GenericLogicalFile object
    self.assertEqual(GenericLogicalFile.objects.count(), 0)
    # check that there is one GeoFeatureLogicalFile object
    self.assertEqual(GeoFeatureLogicalFile.objects.count(), 1)
    # check that there is one GeoFeatureFileMetaData object
    self.assertEqual(GeoFeatureFileMetaData.objects.count(), 1)

    logical_file = GeoFeatureLogicalFile.objects.first()
    self.assertEqual(logical_file.files.count(), 3)
    # check that the 3 resource files are now associated with GeoFeatureLogicalFile
    for res_file in self.composite_resource.files.all():
        self.assertEqual(res_file.logical_file_type_name,
                         "GeoFeatureLogicalFile")
        self.assertEqual(res_file.has_logical_file, True)
        self.assertTrue(
            isinstance(res_file.logical_file, GeoFeatureLogicalFile))
    # check that we put the 3 files in a new folder
    for res_file in self.composite_resource.files.all():
        file_path, base_file_name, _ = get_resource_file_name_and_extension(
            res_file)
        expected_file_path = "{}/data/contents/{}/{}"
        res_file.file_folder = expected_folder_name
        expected_file_path = expected_file_path.format(
            self.composite_resource.root_path, expected_folder_name,
            base_file_name)
        self.assertEqual(file_path, expected_file_path)
    # test extracted raster file type metadata
    # there should not be any resource level coverage
    self.assertEqual(self.composite_resource.metadata.coverages.count(), 0)
    self.assertNotEqual(logical_file.metadata.geometryinformation, None)
    self.assertEqual(logical_file.metadata.geometryinformation.featureCount,
                     51)
    self.assertEqual(logical_file.metadata.geometryinformation.geometryType,
                     "MULTIPOLYGON")

    self.assertNotEqual(logical_file.metadata.originalcoverage, None)
    self.assertEqual(logical_file.metadata.originalcoverage.datum, 'unknown')
    self.assertEqual(logical_file.metadata.originalcoverage.projection_name,
                     'unknown')
    self.assertGreater(
        len(logical_file.metadata.originalcoverage.projection_string), 0)
    self.assertEqual(logical_file.metadata.originalcoverage.unit, 'unknown')
    self.assertEqual(logical_file.metadata.originalcoverage.eastlimit,
                     -66.9692712587578)
    self.assertEqual(logical_file.metadata.originalcoverage.northlimit,
                     71.406235393967)
    self.assertEqual(logical_file.metadata.originalcoverage.southlimit,
                     18.921786345087)
    self.assertEqual(logical_file.metadata.originalcoverage.westlimit,
                     -178.217598362366)
Esempio n. 9
0
def _process_uploaded_sqlite_file(user,
                                  resource,
                                  res_file,
                                  validate_files_dict,
                                  delete_existing_metadata=True):
    # check if it a sqlite file
    fl_ext = utils.get_resource_file_name_and_extension(res_file)[2]

    if fl_ext == '.sqlite':
        # get the file from iRODS to a temp directory
        fl_obj_name = utils.get_file_from_irods(res_file)
        validate_err_message = _validate_odm2_db_file(fl_obj_name)
        if not validate_err_message:
            # first delete relevant existing metadata elements
            if delete_existing_metadata:
                TimeSeriesMetaData.objects.filter(
                    id=resource.metadata.id).update(is_dirty=False)
                _delete_extracted_metadata(resource)
            extract_err_message = _extract_metadata(resource, fl_obj_name)
            if extract_err_message:
                # delete the invalid file
                delete_resource_file_only(resource, res_file)
                # cleanup any extracted metadata
                _delete_extracted_metadata(resource)
                validate_files_dict['are_files_valid'] = False
                extract_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE)
                validate_files_dict['message'] = extract_err_message
            else:
                # set metadata is_dirty to False
                TimeSeriesMetaData.objects.filter(
                    id=resource.metadata.id).update(is_dirty=False)
                # delete the csv file if it exists
                _delete_resource_file(resource, ".csv")
                utils.resource_modified(resource, user, overwrite_bag=False)

        else:  # file validation failed
            # delete the invalid file just uploaded
            delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE)
            validate_files_dict['message'] = validate_err_message

        # cleanup the temp file
        if os.path.exists(fl_obj_name):
            shutil.rmtree(os.path.dirname(fl_obj_name))
    else:
        # delete the invalid file
        delete_resource_file_only(resource, res_file)
        validate_files_dict['are_files_valid'] = False
        err_message = "The uploaded file not a sqlite file. {}"
        err_message += err_message.format(FILE_UPLOAD_ERROR_MESSAGE)
        validate_files_dict['message'] = err_message
Esempio n. 10
0
    def add_to_xml_container(self, container):
        """Generates xml+rdf representation of all the metadata elements associated with this
        logical file type instance. Subclass must override this if it has additional metadata
        elements."""

        NAMESPACES = CoreMetaData.NAMESPACES
        dataset_container = etree.SubElement(
            container, '{%s}Dataset' % NAMESPACES['hsterms'])
        rdf_Description = etree.SubElement(
            dataset_container, '{%s}Description' % NAMESPACES['rdf'])
        dc_datatype = etree.SubElement(rdf_Description,
                                       '{%s}type' % NAMESPACES['dc'])
        data_type = current_site_url(
        ) + "/terms/" + self.logical_file.data_type
        dc_datatype.set('{%s}resource' % NAMESPACES['rdf'], data_type)

        if self.logical_file.dataset_name:
            dc_datatitle = etree.SubElement(rdf_Description,
                                            '{%s}title' % NAMESPACES['dc'])
            dc_datatitle.text = self.logical_file.dataset_name

        # add fileType node
        for res_file in self.logical_file.files.all():
            hsterms_datafile = etree.SubElement(
                rdf_Description, '{%s}dataFile' % NAMESPACES['hsterms'])
            rdf_dataFile_Description = etree.SubElement(
                hsterms_datafile, '{%s}Description' % NAMESPACES['rdf'])
            file_uri = u'{hs_url}/resource/{res_id}/data/contents/{file_name}'.format(
                hs_url=current_site_url(),
                res_id=self.logical_file.resource.short_id,
                file_name=res_file.short_path)
            rdf_dataFile_Description.set('{%s}about' % NAMESPACES['rdf'],
                                         file_uri)
            dc_title = etree.SubElement(rdf_dataFile_Description,
                                        '{%s}title' % NAMESPACES['dc'])

            file_name = get_resource_file_name_and_extension(res_file)[1]
            dc_title.text = file_name

            dc_format = etree.SubElement(rdf_dataFile_Description,
                                         '{%s}format' % NAMESPACES['dc'])
            dc_format.text = res_file.mime_type

        self.add_keywords_to_xml_container(rdf_Description)
        self.add_extra_metadata_to_xml_container(rdf_Description)
        for coverage in self.coverages.all():
            coverage.add_to_xml_container(rdf_Description)
        return rdf_Description
Esempio n. 11
0
def _process_uploaded_sqlite_file(user, resource, res_file, validate_files_dict,
                                  delete_existing_metadata=True):
    # check if it a sqlite file
    fl_ext = utils.get_resource_file_name_and_extension(res_file)[2]

    if fl_ext == '.sqlite':
        # get the file from iRODS to a temp directory
        fl_obj_name = utils.get_file_from_irods(res_file)
        validate_err_message = validate_odm2_db_file(fl_obj_name)
        if not validate_err_message:
            # first delete relevant existing metadata elements
            if delete_existing_metadata:
                TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(is_dirty=False)
                _delete_extracted_metadata(resource)
            extract_err_message = extract_metadata(resource, fl_obj_name)
            if extract_err_message:
                # delete the invalid file
                delete_resource_file_only(resource, res_file)
                # cleanup any extracted metadata
                _delete_extracted_metadata(resource)
                validate_files_dict['are_files_valid'] = False
                extract_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE)
                validate_files_dict['message'] = extract_err_message
            else:
                # set metadata is_dirty to False
                TimeSeriesMetaData.objects.filter(id=resource.metadata.id).update(is_dirty=False)
                # delete the csv file if it exists
                _delete_resource_file(resource, ".csv")
                utils.resource_modified(resource, user, overwrite_bag=False)

        else:   # file validation failed
            # delete the invalid file just uploaded
            delete_resource_file_only(resource, res_file)
            validate_files_dict['are_files_valid'] = False
            validate_err_message += "{}".format(FILE_UPLOAD_ERROR_MESSAGE)
            validate_files_dict['message'] = validate_err_message

        # cleanup the temp file
        if os.path.exists(fl_obj_name):
            shutil.rmtree(os.path.dirname(fl_obj_name))
    else:
        # delete the invalid file
        delete_resource_file_only(resource, res_file)
        validate_files_dict['are_files_valid'] = False
        err_message = "The uploaded file not a sqlite file. {}"
        err_message += err_message.format(FILE_UPLOAD_ERROR_MESSAGE)
        validate_files_dict['message'] = err_message
Esempio n. 12
0
def assert_geofeature_file_type_metadata(self, expected_folder_name):
    # test files in the file type
    self.assertEqual(self.composite_resource.files.count(), 3)
    # check that there is no GenericLogicalFile object
    self.assertEqual(GenericLogicalFile.objects.count(), 0)
    # check that there is one GeoFeatureLogicalFile object
    self.assertEqual(GeoFeatureLogicalFile.objects.count(), 1)
    # check that there is one GeoFeatureFileMetaData object
    self.assertEqual(GeoFeatureFileMetaData.objects.count(), 1)

    logical_file = GeoFeatureLogicalFile.objects.first()
    self.assertEqual(logical_file.files.count(), 3)
    # check that the 3 resource files are now associated with GeoFeatureLogicalFile
    for res_file in self.composite_resource.files.all():
        self.assertEqual(res_file.logical_file_type_name, "GeoFeatureLogicalFile")
        self.assertEqual(res_file.has_logical_file, True)
        self.assertTrue(isinstance(res_file.logical_file, GeoFeatureLogicalFile))
    # check that we put the 3 files in a new folder
    for res_file in self.composite_resource.files.all():
        file_path, base_file_name, _ = get_resource_file_name_and_extension(res_file)
        expected_file_path = "{}/data/contents/{}/{}"
        res_file.file_folder = expected_folder_name
        expected_file_path = expected_file_path.format(self.composite_resource.root_path,
                                                       expected_folder_name, base_file_name)
        self.assertEqual(file_path, expected_file_path)
    # test extracted raster file type metadata
    # there should not be any resource level coverage
    self.assertEqual(self.composite_resource.metadata.coverages.count(), 0)
    self.assertNotEqual(logical_file.metadata.geometryinformation, None)
    self.assertEqual(logical_file.metadata.geometryinformation.featureCount, 51)
    self.assertEqual(logical_file.metadata.geometryinformation.geometryType,
                     "MULTIPOLYGON")

    self.assertNotEqual(logical_file.metadata.originalcoverage, None)
    self.assertEqual(logical_file.metadata.originalcoverage.datum,
                     'unknown')
    self.assertEqual(logical_file.metadata.originalcoverage.projection_name,
                     'unknown')
    self.assertGreater(len(logical_file.metadata.originalcoverage.projection_string), 0)
    self.assertEqual(logical_file.metadata.originalcoverage.unit, 'unknown')
    self.assertEqual(float(logical_file.metadata.originalcoverage.eastlimit), -66.9692712587578)
    self.assertEqual(float(logical_file.metadata.originalcoverage.northlimit), 71.406235393967)
    self.assertEqual(float(logical_file.metadata.originalcoverage.southlimit), 18.921786345087)
    self.assertEqual(float(logical_file.metadata.originalcoverage.westlimit), -178.217598362366)
Esempio n. 13
0
    def add_to_xml_container(self, container):
        """Generates xml+rdf representation of all the metadata elements associated with this
        logical file type instance. Subclass must override this if it has additional metadata
        elements."""

        NAMESPACES = CoreMetaData.NAMESPACES
        dataset_container = etree.SubElement(
            container, '{%s}Dataset' % NAMESPACES['hsterms'])
        rdf_Description = etree.SubElement(
            dataset_container, '{%s}Description' % NAMESPACES['rdf'])
        hsterms_datatype = etree.SubElement(
            rdf_Description, '{%s}dataType' % NAMESPACES['hsterms'])
        hsterms_datatype.text = self.logical_file.data_type
        if self.logical_file.dataset_name:
            hsterms_datatitle = etree.SubElement(
                rdf_Description, '{%s}dataTitle' % NAMESPACES['hsterms'])
            hsterms_datatitle.text = self.logical_file.dataset_name

        # add fileType node
        for res_file in self.logical_file.files.all():
            hsterms_datafile = etree.SubElement(
                rdf_Description, '{%s}dataFile' % NAMESPACES['hsterms'])
            rdf_dataFile_Description = etree.SubElement(
                hsterms_datafile, '{%s}Description' % NAMESPACES['rdf'])
            dc_title = etree.SubElement(rdf_dataFile_Description,
                                        '{%s}title' % NAMESPACES['dc'])

            file_name = get_resource_file_name_and_extension(res_file)[1]
            dc_title.text = file_name

            dc_format = etree.SubElement(rdf_dataFile_Description,
                                         '{%s}format' % NAMESPACES['dc'])
            dc_format.text = res_file.mime_type

        self.add_keywords_to_xml_container(rdf_Description)
        self.add_extra_metadata_to_xml_container(rdf_Description)
        for coverage in self.coverages.all():
            coverage.add_to_xml_container(rdf_Description)
        return rdf_Description
Esempio n. 14
0
def post_create_resource_handler(sender, **kwargs):
    resource = kwargs['resource']
    validate_files_dict = kwargs['validate_files']
    user = kwargs['user']

    # extract metadata from the just uploaded file
    res_file = resource.files.all().first()
    if res_file:
        # check if the uploaded file is a sqlite file or csv file
        file_ext = utils.get_resource_file_name_and_extension(res_file)[2]
        if file_ext == '.sqlite':
            _process_uploaded_sqlite_file(user,
                                          resource,
                                          res_file,
                                          validate_files_dict,
                                          delete_existing_metadata=False)
        elif file_ext == '.csv':
            _process_uploaded_csv_file(resource,
                                       res_file,
                                       validate_files_dict,
                                       user,
                                       delete_existing_metadata=False)
Esempio n. 15
0
def post_create_resource_handler(sender, **kwargs):
    resource = kwargs['resource']
    validate_files_dict = kwargs['validate_files']
    user = kwargs['user']

    # extract metadata from the just uploaded file
    res_file = resource.files.all().first()
    if res_file:
        # check if the uploaded file is a sqlite file or csv file
        file_ext = utils.get_resource_file_name_and_extension(res_file)[2]
        if file_ext == '.sqlite':
            # metadata can exist at this point if a timeseries resource is created
            # using REST API since the API caller can pass metadata information. Before
            # metadata can be extracted from the sqlite file and populated to database, existing
            # metadata needs to be deleted.
            _process_uploaded_sqlite_file(user, resource, res_file, validate_files_dict,
                                          delete_existing_metadata=True)
        elif file_ext == '.csv':
            _process_uploaded_csv_file(resource, res_file, validate_files_dict, user,
                                       delete_existing_metadata=False)
        # since we are extracting metadata after resource creation
        # metadata xml files need to be regenerated - so need to set the
        # dirty bag flags
        utils.set_dirty_bag_flag(resource)
Esempio n. 16
0
def _delete_resource_file(resource, file_ext):
    for res_file in resource.files.all():
        _, _, res_file_ext = utils.get_resource_file_name_and_extension(res_file)
        if res_file_ext == file_ext:
            delete_resource_file_only(resource, res_file)
    def test_zip_set_file_type_to_geo_feature_all(self):
        # here we are using a zip file that has all the 15 files for setting it
        # to Geo Feature file type which includes metadata extraction

        self._create_composite_resource(self.osm_all_files_zip_file)

        self.assertEqual(self.composite_resource.files.all().count(), 1)
        res_file = self.composite_resource.files.first()
        expected_folder_name = res_file.file_name[:-4]
        # check that the resource file is associated with GenericLogicalFile
        self.assertEqual(res_file.has_logical_file, True)
        self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile")
        # check that there is one GenericLogicalFile object
        self.assertEqual(GenericLogicalFile.objects.count(), 1)

        # set the zip file to GeoFeatureFile type
        GeoFeatureLogicalFile.set_file_type(self.composite_resource,
                                            res_file.id, self.user)

        # test files in the file type
        self.assertEqual(self.composite_resource.files.count(), 15)
        # check that there is no GenericLogicalFile object
        self.assertEqual(GenericLogicalFile.objects.count(), 0)
        # check that there is no GenericFileMetaData object
        self.assertEqual(GenericFileMetaData.objects.count(), 0)
        # check that there is one GeoFeatureLogicalFile object
        self.assertEqual(GeoFeatureLogicalFile.objects.count(), 1)
        logical_file = GeoFeatureLogicalFile.objects.first()
        self.assertEqual(logical_file.files.count(), 15)
        # check that the 3 resource files are now associated with GeoFeatureLogicalFile
        for res_file in self.composite_resource.files.all():
            self.assertEqual(res_file.logical_file_type_name,
                             "GeoFeatureLogicalFile")
            self.assertEqual(res_file.has_logical_file, True)
            self.assertTrue(
                isinstance(res_file.logical_file, GeoFeatureLogicalFile))
        # check that we put the 3 files in a new folder
        for res_file in self.composite_resource.files.all():
            file_path, base_file_name, _ = get_resource_file_name_and_extension(
                res_file)
            expected_file_path = "{}/data/contents/{}/{}"
            res_file.file_folder = expected_folder_name
            expected_file_path = expected_file_path.format(
                self.composite_resource.root_path, expected_folder_name,
                base_file_name)
            self.assertEqual(file_path, expected_file_path)
        # test extracted raster file type metadata
        # there should one resource level coverage
        self.assertEqual(self.composite_resource.metadata.coverages.count(), 1)
        self.assertEqual(logical_file.metadata.fieldinformations.all().count(),
                         7)
        self.assertEqual(
            logical_file.metadata.geometryinformation.featureCount, 87)
        self.assertEqual(
            logical_file.metadata.geometryinformation.geometryType, "POLYGON")
        self.assertEqual(logical_file.metadata.originalcoverage.datum,
                         'WGS_1984')
        self.assertTrue(
            abs(logical_file.metadata.originalcoverage.eastlimit -
                3.4520493) < self.allowance)
        self.assertTrue(
            abs(logical_file.metadata.originalcoverage.northlimit -
                45.0466382) < self.allowance)
        self.assertTrue(
            abs(logical_file.metadata.originalcoverage.southlimit -
                42.5732416) < self.allowance)
        self.assertTrue(
            abs(logical_file.metadata.originalcoverage.westlimit -
                (-0.3263017)) < self.allowance)
        self.assertEqual(logical_file.metadata.originalcoverage.unit, 'Degree')
        self.assertEqual(
            logical_file.metadata.originalcoverage.projection_name,
            'GCS_WGS_1984')

        # there should be file level keywords
        for key in ('Logan River', 'TauDEM'):
            self.assertIn(key, logical_file.metadata.keywords)
        self.assertEqual(len(logical_file.metadata.keywords), 2)

        self.composite_resource.delete()
        # there should be no GeoFeatureLogicalFile object at this point
        self.assertEqual(GeoFeatureLogicalFile.objects.count(), 0)
        # there should be no GenericFileMetaData object at this point
        self.assertEqual(GeoFeatureFileMetaData.objects.count(), 0)
    def test_shp_set_file_type_to_geo_feature_required(self):
        # here we are using a shp file for setting it
        # to Geo Feature file type which includes metadata extraction

        self._create_composite_resource()

        # add the 3 required files to the resource
        files = []
        shp_temp_file = os.path.join(self.temp_dir, self.states_shp_file_name)
        shutil.copy(self.states_shp_file, shp_temp_file)

        shx_temp_file = os.path.join(self.temp_dir, self.states_shx_file_name)
        shutil.copy(self.states_shx_file, shx_temp_file)

        dbf_temp_file = os.path.join(self.temp_dir, self.states_dbf_file_name)
        shutil.copy(self.states_dbf_file, dbf_temp_file)

        files.append(
            UploadedFile(file=open(shp_temp_file, 'r'),
                         name=self.states_shp_file_name))
        files.append(
            UploadedFile(file=open(shx_temp_file, 'r'),
                         name=self.states_shx_file_name))
        files.append(
            UploadedFile(file=open(dbf_temp_file, 'r'),
                         name=self.states_dbf_file_name))
        hydroshare.utils.resource_file_add_process(self.composite_resource,
                                                   files, self.user)

        self.assertEqual(self.composite_resource.files.all().count(), 3)
        res_file = self.composite_resource.files.first()
        expected_folder_name = res_file.file_name[:-4]
        # check that the resource file is associated with GenericLogicalFile
        self.assertEqual(res_file.has_logical_file, True)
        self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile")
        # check that there is 3 GenericLogicalFile object
        self.assertEqual(GenericLogicalFile.objects.count(), 3)

        # set the shp file to GeoFeatureFile type
        shp_res_file = [
            f for f in self.composite_resource.files.all()
            if f.extension == '.shp'
        ][0]
        GeoFeatureLogicalFile.set_file_type(self.composite_resource,
                                            shp_res_file.id, self.user)

        # test files in the file type
        self.assertEqual(self.composite_resource.files.count(), 3)
        # check that there is no GenericLogicalFile object
        self.assertEqual(GenericLogicalFile.objects.count(), 0)
        # check that there is one GeoFeatureLogicalFile object
        self.assertEqual(GeoFeatureLogicalFile.objects.count(), 1)
        logical_file = GeoFeatureLogicalFile.objects.first()
        self.assertEqual(logical_file.files.count(), 3)
        # check that the 3 resource files are now associated with GeoFeatureLogicalFile
        for res_file in self.composite_resource.files.all():
            self.assertEqual(res_file.logical_file_type_name,
                             "GeoFeatureLogicalFile")
            self.assertEqual(res_file.has_logical_file, True)
            self.assertTrue(
                isinstance(res_file.logical_file, GeoFeatureLogicalFile))
        # check that we put the 3 files in a new folder
        for res_file in self.composite_resource.files.all():
            file_path, base_file_name, _ = get_resource_file_name_and_extension(
                res_file)
            expected_file_path = "{}/data/contents/{}/{}"
            res_file.file_folder = expected_folder_name
            expected_file_path = expected_file_path.format(
                self.composite_resource.root_path, expected_folder_name,
                base_file_name)
            self.assertEqual(file_path, expected_file_path)
        # test extracted raster file type metadata
        # there should not be any resource level coverage
        self.assertEqual(self.composite_resource.metadata.coverages.count(), 0)
        self.assertNotEqual(logical_file.metadata.geometryinformation, None)
        self.assertEqual(
            logical_file.metadata.geometryinformation.featureCount, 51)
        self.assertEqual(
            logical_file.metadata.geometryinformation.geometryType,
            "MULTIPOLYGON")

        self.assertNotEqual(logical_file.metadata.originalcoverage, None)
        self.assertEqual(logical_file.metadata.originalcoverage.datum,
                         'unknown')
        self.assertEqual(
            logical_file.metadata.originalcoverage.projection_name, 'unknown')
        self.assertGreater(
            len(logical_file.metadata.originalcoverage.projection_string), 0)
        self.assertEqual(logical_file.metadata.originalcoverage.unit,
                         'unknown')
        self.assertEqual(logical_file.metadata.originalcoverage.eastlimit,
                         -66.9692712587578)
        self.assertEqual(logical_file.metadata.originalcoverage.northlimit,
                         71.406235393967)
        self.assertEqual(logical_file.metadata.originalcoverage.southlimit,
                         18.921786345087)
        self.assertEqual(logical_file.metadata.originalcoverage.westlimit,
                         -178.217598362366)

        # there should not be any file level keywords
        self.assertEqual(logical_file.metadata.keywords, [])

        self.composite_resource.delete()
        # there should be no GeoFeatureLogicalFile object at this point
        self.assertEqual(GeoFeatureLogicalFile.objects.count(), 0)
        # there should be no GenericFileMetaData object at this point
        self.assertEqual(GeoFeatureFileMetaData.objects.count(), 0)
Esempio n. 19
0
def _delete_resource_file(resource, file_ext):
    for res_file in resource.files.all():
        _, _, res_file_ext = utils.get_resource_file_name_and_extension(
            res_file)
        if res_file_ext == file_ext:
            delete_resource_file_only(resource, res_file)
Esempio n. 20
0
def assert_raster_file_type_metadata(self):
    # test the resource now has 2 files (vrt file added as part of metadata extraction)
    self.assertEqual(self.composite_resource.files.all().count(), 2)

    # check that the 2 resource files are now associated with GeoRasterLogicalFile
    for res_file in self.composite_resource.files.all():
        self.assertEqual(res_file.logical_file_type_name,
                         "GeoRasterLogicalFile")
        self.assertEqual(res_file.has_logical_file, True)
        self.assertTrue(isinstance(res_file.logical_file,
                                   GeoRasterLogicalFile))

    # check that we put the 2 files in a new folder (small_logan)
    for res_file in self.composite_resource.files.all():
        file_path, base_file_name, _ = get_resource_file_name_and_extension(
            res_file)
        expected_file_path = "{}/data/contents/small_logan/{}"
        expected_file_path = expected_file_path.format(
            self.composite_resource.root_path, base_file_name)
        self.assertEqual(file_path, expected_file_path)

    # check that there is no GenericLogicalFile object
    self.assertEqual(GenericLogicalFile.objects.count(), 0)
    # check that there is one GeoRasterLogicalFile object
    self.assertEqual(GeoRasterLogicalFile.objects.count(), 1)

    res_file = self.composite_resource.files.first()
    # check that the logicalfile is associated with 2 files
    logical_file = res_file.logical_file
    self.assertEqual(logical_file.dataset_name, 'small_logan')
    self.assertEqual(logical_file.has_metadata, True)
    self.assertEqual(logical_file.files.all().count(), 2)
    self.assertEqual(set(self.composite_resource.files.all()),
                     set(logical_file.files.all()))

    # test that size property of the logical file is equal to sun of size of all files
    # that are part of the logical file
    self.assertEqual(logical_file.size,
                     sum([f.size for f in logical_file.files.all()]))

    # test that there should be 1 object of type GeoRasterFileMetaData
    self.assertEqual(GeoRasterFileMetaData.objects.count(), 1)

    # test that the metadata associated with logical file id of type GeoRasterFileMetaData
    self.assertTrue(isinstance(logical_file.metadata, GeoRasterFileMetaData))

    # there should be 2 format elements associated with resource
    self.assertEqual(self.composite_resource.metadata.formats.all().count(), 2)
    self.assertEqual(
        self.composite_resource.metadata.formats.all().filter(
            value='application/vrt').count(), 1)
    self.assertEqual(
        self.composite_resource.metadata.formats.all().filter(
            value='image/tiff').count(), 1)

    # test extracted metadata for the file type

    # geo raster file type should have all the metadata elements
    self.assertEqual(logical_file.metadata.has_all_required_elements(), True)

    # there should be 1 coverage element - box type
    self.assertNotEqual(logical_file.metadata.spatial_coverage, None)
    self.assertEqual(logical_file.metadata.spatial_coverage.type, 'box')

    box_coverage = logical_file.metadata.spatial_coverage
    self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
    self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
    self.assertEqual(box_coverage.value['northlimit'], 42.0500269597691)
    self.assertEqual(box_coverage.value['eastlimit'], -111.57773718106195)
    self.assertEqual(box_coverage.value['southlimit'], 41.98722286029891)
    self.assertEqual(box_coverage.value['westlimit'], -111.69756293084055)

    # testing extended metadata element: original coverage
    ori_coverage = logical_file.metadata.originalCoverage
    self.assertNotEqual(ori_coverage, None)
    self.assertEqual(ori_coverage.value['northlimit'], 4655492.446916306)
    self.assertEqual(ori_coverage.value['eastlimit'], 452144.01909127034)
    self.assertEqual(ori_coverage.value['southlimit'], 4648592.446916306)
    self.assertEqual(ori_coverage.value['westlimit'], 442274.01909127034)
    self.assertEqual(ori_coverage.value['units'], 'meter')
    self.assertEqual(ori_coverage.value['projection'], 'NAD83 / UTM zone 12N')

    # testing extended metadata element: cell information
    cell_info = logical_file.metadata.cellInformation
    self.assertEqual(cell_info.rows, 230)
    self.assertEqual(cell_info.columns, 329)
    self.assertEqual(cell_info.cellSizeXValue, 30.0)
    self.assertEqual(cell_info.cellSizeYValue, 30.0)
    self.assertEqual(cell_info.cellDataType, 'Float32')

    # testing extended metadata element: band information
    self.assertEqual(logical_file.metadata.bandInformations.count(), 1)
    band_info = logical_file.metadata.bandInformations.first()
    self.assertEqual(band_info.noDataValue, '-3.40282346639e+38')
    self.assertEqual(band_info.maximumValue, '2880.00708008')
    self.assertEqual(band_info.minimumValue, '1870.63659668')
Esempio n. 21
0
    def set_file_type(cls, resource, file_id, user):
        """
            Sets a tif or zip raster resource file to GeoRasterFile type
            :param resource: an instance of resource type CompositeResource
            :param file_id: id of the resource file to be set as GeoRasterFile type
            :param user: user who is setting the file type
            :return:
            """

        # had to import it here to avoid import loop
        from hs_core.views.utils import create_folder, remove_folder

        log = logging.getLogger()

        # get the file from irods
        res_file = utils.get_resource_file_by_id(resource, file_id)

        # base file name (no path included)
        file_name = utils.get_resource_file_name_and_extension(res_file)[1]
        # file name without the extension
        file_name = file_name[:-len(res_file.extension)]
        file_folder = res_file.file_folder
        upload_folder = ''
        if res_file is not None and res_file.has_generic_logical_file:
            # get the file from irods to temp dir
            temp_file = utils.get_file_from_irods(res_file)
            # validate the file
            error_info, files_to_add_to_resource = raster_file_validation(
                raster_file=temp_file)
            if not error_info:
                log.info("Geo raster file type file validation successful.")
                # extract metadata
                temp_dir = os.path.dirname(temp_file)
                temp_vrt_file_path = [
                    os.path.join(temp_dir, f) for f in os.listdir(temp_dir)
                    if '.vrt' == os.path.splitext(f)[1]
                ].pop()
                metadata = extract_metadata(temp_vrt_file_path)
                log.info(
                    "Geo raster file type metadata extraction was successful.")
                with transaction.atomic():
                    # create a geo raster logical file object to be associated with resource files
                    logical_file = cls.create()
                    # by default set the dataset_name attribute of the logical file to the
                    # name of the file selected to set file type
                    logical_file.dataset_name = file_name
                    logical_file.save()

                    try:
                        # create a folder for the raster file type using the base file name as the
                        # name for the new folder
                        new_folder_path = cls.compute_file_type_folder(
                            resource, file_folder, file_name)

                        log.info("Folder created:{}".format(new_folder_path))
                        create_folder(resource.short_id, new_folder_path)

                        new_folder_name = new_folder_path.split('/')[-1]
                        if file_folder is None:
                            upload_folder = new_folder_name
                        else:
                            upload_folder = os.path.join(
                                file_folder, new_folder_name)

                        # add all new files to the resource
                        for f in files_to_add_to_resource:
                            uploaded_file = UploadedFile(
                                file=open(f, 'rb'), name=os.path.basename(f))
                            # the added resource file will be part of a new generic logical file
                            # by default
                            new_res_file = utils.add_file_to_resource(
                                resource, uploaded_file, folder=upload_folder)

                            # delete the generic logical file object
                            if new_res_file.logical_file is not None:
                                # deleting the file level metadata object will delete the associated
                                # logical file object
                                new_res_file.logical_file.metadata.delete()

                            # make each resource file we added as part of the logical file
                            logical_file.add_resource_file(new_res_file)

                        log.info(
                            "Geo raster file type - new files were added to the resource."
                        )

                        # use the extracted metadata to populate file metadata
                        for element in metadata:
                            # here k is the name of the element
                            # v is a dict of all element attributes/field names and field values
                            k, v = element.items()[0]
                            logical_file.metadata.create_element(k, **v)
                        log.info(
                            "Geo raster file type - metadata was saved to DB")
                        # set resource to private if logical file is missing required metadata
                        resource.update_public_and_discoverable()
                        # delete the original resource file
                        delete_resource_file(resource.short_id, res_file.id,
                                             user)
                        log.info("Deleted original resource file.")
                    except Exception as ex:
                        msg = "Geo raster file type. Error when setting file type. Error:{}"
                        msg = msg.format(ex.message)
                        log.exception(msg)
                        if upload_folder:
                            # delete any new files uploaded as part of setting file type
                            folder_to_remove = os.path.join(
                                'data', 'contents', upload_folder)
                            remove_folder(user, resource.short_id,
                                          folder_to_remove)
                            log.info("Deleted newly created file type folder")
                        raise ValidationError(msg)
                    finally:
                        # remove temp dir
                        if os.path.isdir(temp_dir):
                            shutil.rmtree(temp_dir)
            else:
                err_msg = "Geo raster file type file validation failed.{}".format(
                    ' '.join(error_info))
                log.info(err_msg)
                raise ValidationError(err_msg)
        else:
            if res_file is None:
                err_msg = "Failed to set Geo raster file type. " \
                          "Resource doesn't have the specified file."
                log.error(err_msg)
                raise ValidationError(err_msg)
            else:
                err_msg = "Failed to set Geo raster file type." \
                          "The specified file doesn't have a generic logical file type."
                log.error(err_msg)
                raise ValidationError(err_msg)
    def test_zip_set_file_type_to_geo_raster(self):
        # here we are using a valid raster zip file for setting it
        # to Geo Raster file type which includes metadata extraction

        self.raster_file_obj = open(self.raster_zip_file, 'r')
        self._create_composite_resource()

        self.assertEqual(self.composite_resource.files.all().count(), 1)
        res_file = self.composite_resource.files.first()

        # check that the resource file is associated with GenericLogicalFile
        self.assertEqual(res_file.has_logical_file, True)
        self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile")

        # check that the resource file is not associated with any logical file
        # self.assertEqual(res_file.has_logical_file, False)
        # set the zip file to GeoRasterFile type
        GeoRasterLogicalFile.set_file_type(self.composite_resource,
                                           res_file.id, self.user)

        # test the resource now has 3 files (one vrt file and 2 tif files)
        self.assertEqual(self.composite_resource.files.all().count(), 3)
        tif_files = hydroshare.utils.get_resource_files_by_extension(
            self.composite_resource, '.tif')
        self.assertEqual(len(tif_files), 2)
        vrt_files = hydroshare.utils.get_resource_files_by_extension(
            self.composite_resource, '.vrt')
        self.assertEqual(len(vrt_files), 1)

        # check that the logicalfile is associated with 3 files
        self.assertEqual(GeoRasterLogicalFile.objects.count(), 1)
        res_file = self.composite_resource.files.first()
        logical_file = res_file.logical_file
        self.assertEqual(logical_file.dataset_name, 'logan_vrt_small')
        self.assertEqual(logical_file.has_metadata, True)
        self.assertEqual(logical_file.files.all().count(), 3)
        self.assertEqual(set(self.composite_resource.files.all()),
                         set(logical_file.files.all()))

        # check that we put the 3 files in a new folder (small_logan)
        for res_file in self.composite_resource.files.all():
            file_path, base_file_name, _ = get_resource_file_name_and_extension(
                res_file)
            expected_file_path = "{}/data/contents/logan_vrt_small/{}"
            expected_file_path = expected_file_path.format(
                self.composite_resource.short_id, base_file_name)
            self.assertEqual(file_path, expected_file_path)

        # check that there is no GenericLogicalFile object
        self.assertEqual(GenericLogicalFile.objects.count(), 0)

        # test that size property of the logical file is equal to sun of size of all files
        # that are part of the logical file
        self.assertEqual(logical_file.size,
                         sum([f.size for f in logical_file.files.all()]))

        # test extracted metadata for the file type
        # geo raster file type should have all the metadata elements
        self.assertEqual(logical_file.metadata.has_all_required_elements(),
                         True)

        # there should be 1 coverage element - box type
        self.assertNotEqual(logical_file.metadata.spatial_coverage, None)
        self.assertEqual(logical_file.metadata.spatial_coverage.type, 'box')

        box_coverage = logical_file.metadata.spatial_coverage
        self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
        self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
        self.assertEqual(box_coverage.value['northlimit'], 42.050028785767275)
        self.assertEqual(box_coverage.value['eastlimit'], -111.5773750264389)
        self.assertEqual(box_coverage.value['southlimit'], 41.98745777902698)
        self.assertEqual(box_coverage.value['westlimit'], -111.65768822411239)

        # testing extended metadata element: original coverage
        ori_coverage = logical_file.metadata.originalCoverage
        self.assertNotEqual(ori_coverage, None)
        self.assertEqual(ori_coverage.value['northlimit'], 4655492.446916306)
        self.assertEqual(ori_coverage.value['eastlimit'], 452174.01909127034)
        self.assertEqual(ori_coverage.value['southlimit'], 4648592.446916306)
        self.assertEqual(ori_coverage.value['westlimit'], 445574.01909127034)
        self.assertEqual(ori_coverage.value['units'], 'meter')
        self.assertEqual(ori_coverage.value['projection'],
                         'NAD83 / UTM zone 12N')

        # testing extended metadata element: cell information
        cell_info = logical_file.metadata.cellInformation
        self.assertEqual(cell_info.rows, 230)
        self.assertEqual(cell_info.columns, 220)
        self.assertEqual(cell_info.cellSizeXValue, 30.0)
        self.assertEqual(cell_info.cellSizeYValue, 30.0)
        self.assertEqual(cell_info.cellDataType, 'Float32')

        # testing extended metadata element: band information
        self.assertEqual(logical_file.metadata.bandInformations.count(), 1)
        band_info = logical_file.metadata.bandInformations.first()
        self.assertEqual(band_info.noDataValue, '-3.40282346639e+38')
        self.assertEqual(band_info.maximumValue, '2880.00708008')
        self.assertEqual(band_info.minimumValue, '2274.95898438')

        self.composite_resource.delete()
def data_store_structure(request):
    """
    Get file hierarchy (collection of subcollections and data objects) for the requested directory
    in hydroshareZone or any federated zone used for HydroShare resource backend store.
    It is invoked by an AJAX call and returns json object that holds content for files
    and folders under the requested directory/collection/subcollection.
    The AJAX request must be a POST request with input data passed in for res_id and store_path
    where store_path is the relative path under res_id collection/directory
    """
    res_id = request.POST.get('res_id', None)
    if res_id is None:
        return HttpResponse('Bad request - resource id is not included',
                            status=status.HTTP_500_INTERNAL_SERVER_ERROR)
    res_id = str(res_id).strip()
    try:
        resource, _, _ = authorize(
            request,
            res_id,
            needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE)
    except NotFound:
        return HttpResponse('Bad request - resource not found',
                            status=status.HTTP_400_BAD_REQUEST)
    except PermissionDenied:
        return HttpResponse('Permission denied',
                            status=status.HTTP_401_UNAUTHORIZED)

    store_path = request.POST.get('store_path', None)
    if store_path is None:
        return HttpResponse('Bad request - store_path is not included',
                            status=status.HTTP_400_BAD_REQUEST)
    store_path = str(store_path).strip()
    if not store_path:
        return HttpResponse('Bad request - store_path cannot be empty',
                            status=status.HTTP_400_BAD_REQUEST)
    # this is federated if warranted, automatically, by choosing an appropriate session.
    istorage = resource.get_irods_storage()
    if resource.resource_federation_path:
        res_coll = os.path.join(resource.resource_federation_path, res_id,
                                store_path)
        rel_path = store_path
    else:
        res_coll = os.path.join(res_id, store_path)
        rel_path = res_coll
    try:
        store = istorage.listdir(res_coll)
        files = []
        for fname in store[1]:
            name_with_full_path = os.path.join(res_coll, fname)
            name_with_rel_path = os.path.join(rel_path, fname)
            size = istorage.size(name_with_full_path)
            mtype = get_file_mime_type(fname)
            idx = mtype.find('/')
            if idx >= 0:
                mtype = mtype[idx + 1:]
            f_pk = ''
            f_url = ''
            logical_file_type = ''
            logical_file_id = ''
            for f in ResourceFile.objects.filter(object_id=resource.id):
                if name_with_rel_path == get_resource_file_name_and_extension(
                        f)[0]:
                    f_pk = f.pk
                    f_url = get_resource_file_url(f)
                    if resource.resource_type == "CompositeResource":
                        logical_file_type = f.logical_file_type_name
                        logical_file_id = f.logical_file.id
                    break

            files.append({
                'name': fname,
                'size': size,
                'type': mtype,
                'pk': f_pk,
                'url': f_url,
                'logical_type': logical_file_type,
                'logical_file_id': logical_file_id
            })
    except SessionException as ex:
        return HttpResponse(ex.stderr,
                            status=status.HTTP_500_INTERNAL_SERVER_ERROR)

    return_object = {
        'files': files,
        'folders': store[0],
        'can_be_public': resource.can_be_public_or_discoverable
    }

    if resource.resource_type == "CompositeResource":
        spatial_coverage_dict = get_coverage_data_dict(resource)
        temporal_coverage_dict = get_coverage_data_dict(
            resource, coverage_type='temporal')
        return_object['spatial_coverage'] = spatial_coverage_dict
        return_object['temporal_coverage'] = temporal_coverage_dict
    return HttpResponse(json.dumps(return_object),
                        content_type="application/json")