def test_setup_upload_location_push_newer_file_bad_prefix(self): """ Test case: Check creation date of incoming *.nc.gz is newer that one already on storage HARVEST_UPLOAD the content of the *nc.gz BUT check THAT We don't delete files not starting with a good value of GSLA_PREFIX_PATH. In our case we patch this global variable to empty to check this """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile(PREV_NC_GZ_STORAGE, dest_path=os.path.join( 'IMOS/OceanCurrent/GSLA/DM00/2018/', os.path.basename(PREV_NC_GZ_STORAGE))) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler self.run_handler_with_exception( AttributeValidationError, NEWER_CREATION_DATE_NC_GZ, allowed_dest_path_regexes=["IMOS/OceanCurrent/GSLA"])
def test_setup_upload_location_push_older_file(self): """ Test case: Check creation date of incoming *.nc.gz is older that one already on storage NO_ACTION *nc.gz """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile(PREV_NC_GZ_STORAGE, dest_path=os.path.join( 'IMOS/OceanCurrent/GSLA/DM00/2018/', os.path.basename(PREV_NC_GZ_STORAGE))) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler on the new file with an older creation date self.run_handler_with_exception(InvalidFileNameError, OLDER_CREATION_DATE_NC_GZ)
def test_good_dm_file_with_compliance_check(self): # this is tested as an update to avoid raising invalid input file error cause of missing ancillary material preexisting_file = PipelineFileCollection() existing_file = PipelineFile(GOOD_NC, dest_path=os.path.join( 'IMOS/ANFOG/slocum_glider/TwoRocks20180503a/', os.path.basename(GOOD_NC))) preexisting_file.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_file.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_file) handler = self.run_handler(GOOD_NC, check_params={'checks': ['cf', 'imos:1.4']}) f = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF) # self.assertEqual(f[0].check_type, PipelineFileCheckType.NC_COMPLIANCE_CHECK) self.assertEqual(f[0].publish_type, PipelineFilePublishType.HARVEST_UPLOAD) self.assertEqual(f[0].dest_path, 'IMOS/ANFOG/slocum_glider/TwoRocks20180503a/' 'IMOS_ANFOG_BCEOPSTUV_20180503T080042Z_SL210_FV01_timeseries_END-20180505T054942Z.nc') self.assertTrue(f[0].is_checked) self.assertTrue(f[0].is_stored) self.assertTrue(f[0].is_harvested)
def test_clear_rt_deployment(self): # TEST 'clear-files' status. process and results identical to status 'renamed' preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT))) existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT))) preexisting_files.update([existing_file1, existing_file2]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) handler = self.run_handler(MISSION_STATUS_CLR) # Process should resuls in : input file unhandled , preexisting file should be deleted nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF) self.assertEqual(nc[0].publish_type, PipelineFilePublishType.DELETE_UNHARVEST) self.assertTrue(nc[0].is_deleted) png = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG) self.assertEqual(png[0].publish_type, PipelineFilePublishType.DELETE_ONLY) self.assertTrue(png[0].is_deleted)
def setUp(self, mock_webfeatureservice): self.storage_broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) self.wfs_broker = WfsBroker( self.config.pipeline_config['global']['wfs_url']) with open(os.path.join(TESTDATA_DIR, 'wfs/GetFeature.json')) as f: self.wfs_broker.wfs.getfeature().getvalue.return_value = f.read() with open(os.path.join(TESTDATA_DIR, 'wfs/get_schema.json')) as f: self.wfs_broker.wfs.get_schema.return_value = json.load(f)
def test_rt_update(self): """ test the update of realtime mission: update consits in : - deletion of previous netCDF - deletion of transect png files - harvest of new netCDF - overwriting of other files """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT))) existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT))) existing_file3 = PipelineFile(PREV_PNG_MISSION, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_MISSION))) preexisting_files.update([existing_file1, existing_file2, existing_file3]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler handler = self.run_handler(GOOD_ZIP_RT) nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF) for n in nc: if n.name == os.path.basename(PREV_NC_RT): self.assertEqual(n.publish_type, PipelineFilePublishType.DELETE_UNHARVEST) self.assertTrue(n.is_deleted) else: self.assertEqual(n.publish_type, PipelineFilePublishType.HARVEST_UPLOAD) self.assertTrue(n.is_harvested) self.assertTrue(n.is_stored) pngs = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG) for png in pngs: if png.name == os.path.basename(PREV_PNG_MISSION): self.assertTrue(png.is_overwrite) else: self.assertTrue(png.is_uploaded) # no update the harvestMission List in that case csv = handler.file_collection.filter_by_attribute_id('file_type', FileType.CSV) self.assertEqual(len(csv), 0)
def test_setup_upload_location_push_newer_yearly_file(self): """ Test case: Check creation date of incoming yearly *.nc.gz is newer that one already on storage UPLOAD_ONLY the new incoming *.nc.gz DELETE_ONLY the previous *.nc.gz NO_ACTION on the nc inside the *.nc.gz """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile( PREV_YEARLY_NC_GZ_STORAGE, dest_path=os.path.join( 'IMOS/OceanCurrent/GSLA/DM00/yearfiles', os.path.basename(PREV_YEARLY_NC_GZ_STORAGE))) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler handler = self.run_handler(GOOD_YEARLY_FILE_DM00) nc_file = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF)[0] self.assertEqual(nc_file.publish_type, PipelineFilePublishType.NO_ACTION) nc_gz_file = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.GZIP)[0] self.assertEqual(nc_gz_file.publish_type, PipelineFilePublishType.UPLOAD_ONLY) nc_gz_delete = handler.file_collection.filter_by_attribute_value( 'name', os.path.basename(PREV_YEARLY_NC_GZ_STORAGE))[0] self.assertEqual(nc_gz_delete.publish_type, PipelineFilePublishType.DELETE_ONLY)
def test_setup_upload_location_push_file_newer_creation_date(self): """ Test case: Check creation date of new *.nc is newer that one already on storage """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile( GOOD_NC_FV01, dest_path=AcornHandler.dest_path(GOOD_NC_FV01)) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # create a new file based on GOOD_NC_FV01. Modify it with an newer creation date # we patch the global variable from the handler in order to use the temporary broker file nc_file_new_creation_date_path = os.path.join( self.temp_dir, os.path.basename(GOOD_NC_FV01)) shutil.copyfile(GOOD_NC_FV01, nc_file_new_creation_date_path) with Dataset(nc_file_new_creation_date_path, mode='r+') as nc_obj: delta_time = timedelta(1, 1, 1) new_time = datetime.strptime(nc_obj.date_created, '%Y-%m-%dT%H:%M:%SZ') + delta_time nc_obj.date_created = datetime.strftime(new_time, '%Y-%m-%dT%H:%M:%SZ') # run the handler on the new file with an newer creation date handler = self.handler_class(nc_file_new_creation_date_path, include_regexes=[r'IMOS_ACORN_.*\.nc']) handler.opendap_root = broker.prefix handler.run() nc_file = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF)[0] self.assertEqual(nc_file.publish_type, PipelineFilePublishType.HARVEST_UPLOAD)
def test_deletion_rt_after_dm_upload(self): """test deletion of RT mission at upload of related DM version""" preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT))) existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT))) existing_file3 = PipelineFile(PREV_PNG_MISSION, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_MISSION))) preexisting_files.update([existing_file1, existing_file2, existing_file3]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler handler = self.run_handler(GOOD_ZIP_DM, check_params={'checks': ['cf', 'imos:1.4']}) nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF) for n in nc: if n.name == os.path.basename(PREV_NC_RT): self.assertEqual(n.publish_type, PipelineFilePublishType.DELETE_UNHARVEST) self.assertTrue(n.is_deleted) elif re.match(AnfogFileClassifier.DM_REGEX, n.name): self.assertEqual(n.publish_type, PipelineFilePublishType.HARVEST_UPLOAD) self.assertTrue(n.is_harvested) self.assertTrue(n.is_stored) else: self.assertEqual(n.publish_type, PipelineFilePublishType.ARCHIVE_ONLY) self.assertTrue(n.is_archived) pngs = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG) for png in pngs: self.assertTrue(png.is_deleted)
def test_setup_upload_location_push_file_older_creation_date(self): """ Test case: Check creation date of new *.nc is older that one already on storage """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile( GOOD_NC_FV01, dest_path=AcornHandler.dest_path(GOOD_NC_FV01)) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # create a new file based on GOOD_NC_FV01. Modify it with an older creation date nc_file_old_creation_date_path = os.path.join( self.temp_dir, os.path.basename(GOOD_NC_FV01)) shutil.copyfile(GOOD_NC_FV01, nc_file_old_creation_date_path) with Dataset(nc_file_old_creation_date_path, mode='r+') as nc_obj: delta_time = timedelta(1, 1, 1) new_time = datetime.strptime(nc_obj.date_created, '%Y-%m-%dT%H:%M:%SZ') - delta_time nc_obj.date_created = datetime.strftime(new_time, '%Y-%m-%dT%H:%M:%SZ') # run the handler on the new file with an older creation date handler = self.handler_class(nc_file_old_creation_date_path, include_regexes=[r'IMOS_ACORN_.*\.nc']) handler.opendap_root = broker.prefix handler.run() self.assertIsInstance(handler.error, InvalidFileContentError)
def test_dstg(self): preexisting_file = PipelineFileCollection() existing_file = PipelineFile(DSTG, dest_path=os.path.join( 'Department_of_Defence/DSTG/slocum_glider/TalismanSaberB20130706/', os.path.basename(DSTG))) preexisting_file.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_file.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_file) # test processing of DSTG and NRL NetCDF files handler = self.run_handler(DSTG) f = handler.file_collection[0] self.assertEqual(f.publish_type, PipelineFilePublishType.HARVEST_UPLOAD) self.assertEqual(f.dest_path, 'Department_of_Defence/DSTG/slocum_glider/TalismanSaberB20130706/' + f.name) self.assertTrue(f.is_stored) self.assertTrue(f.is_harvested)
def test_setup_upload_location_push_same_file(self): """ Test case: Push same file twice to $INCOMING_DIR HARVEST_UPLOAD the incoming *.nc.gz NO_ACTION on the nc inside the *.nc.gz """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile(PREV_NC_GZ_STORAGE, dest_path=os.path.join( 'IMOS/OceanCurrent/GSLA/DM00/2018/', os.path.basename(PREV_NC_GZ_STORAGE))) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler by uploading again the same file handler = self.run_handler(PREV_NC_GZ_STORAGE) nc_file = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF)[0] self.assertEqual(nc_file.publish_type, PipelineFilePublishType.NO_ACTION) nc_gz_file = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.GZIP)[0] self.assertEqual(nc_gz_file.publish_type, PipelineFilePublishType.HARVEST_UPLOAD)
def test_overwrite_same_file(self, mock_callsign): # check that files with same name are overwritten preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile( GOOD_NC, dest_path=os.path.join( 'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/', os.path.basename(GOOD_NC))) existing_file2 = PipelineFile( CSV, dest_path=os.path.join( 'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/', os.path.basename(CSV))) preexisting_files.update([existing_file1, existing_file2]) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler handler = self.run_handler(GOOD_ZIP) nc = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF) self.assertEqual(nc[0].publish_type, PipelineFilePublishType.HARVEST_UPLOAD) self.assertEqual(nc[0].is_deleted, False) csvs = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.CSV) for csv in csvs: if csv.name == os.path.basename(CSV): self.assertEqual(csv.publish_type, PipelineFilePublishType.UPLOAD_ONLY) self.assertEqual(csv.is_deleted, False)
def test_renamed_rt_deployment(self): # test deletion of RT files when deployment renamed or when cleaning files on S3 preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT))) existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT))) existing_file3 = PipelineFile(PREV_PNG_MISSION, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_MISSION))) preexisting_files.update([existing_file1, existing_file2, existing_file3]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) handler = self.run_handler(MISSION_STATUS_RENAMED) # Process should resuls in : input file unhandled , preexisting file should be deleted, cvs file harvested csv = handler.file_collection.filter_by_attribute_id('file_type', FileType.CSV) self.assertEqual(csv[0].publish_type, PipelineFilePublishType.HARVEST_ONLY) self.assertTrue(csv[0].is_harvested) nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF) self.assertEqual(nc[0].publish_type, PipelineFilePublishType.DELETE_UNHARVEST) self.assertTrue(nc[0].is_deleted) pngs = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG) for png in pngs: self.assertEqual(png.publish_type, PipelineFilePublishType.DELETE_ONLY) self.assertTrue(png.is_deleted)
def setUp(self): self.handler_class = MooringsProductsHandler upload_broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) upload_broker.upload(INPUT_FILE_COLLECTION) super().setUp()
def test_delete_previous_file(self, mock_callsign): # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile( PREV_NC, dest_path=os.path.join( 'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/', os.path.basename(PREV_NC))) existing_file2 = PipelineFile( CSV, dest_path=os.path.join( 'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/', os.path.basename(CSV))) existing_file3 = PipelineFile( PNG, dest_path=os.path.join( 'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/', os.path.basename(PNG))) preexisting_files.update( [existing_file1, existing_file2, existing_file3]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler handler = self.run_handler(GOOD_ZIP) # add some tests to make sure the previous files were handled appropriately, e.g. # - they were added as deletions # - they were successfully deleted # - they were the *only* ones deleted nc_files = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF) for nc in nc_files: if nc.name == os.path.basename(PREV_NC): self.assertEqual(nc.publish_type, PipelineFilePublishType.DELETE_UNHARVEST) self.assertEqual(nc.is_deleted, True) else: self.assertEqual(nc.is_deleted, False) csvs = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.CSV) for csv in csvs: if csv.name == os.path.basename(CSV): self.assertEqual(csv.publish_type, PipelineFilePublishType.UPLOAD_ONLY) self.assertEqual(csv.is_deleted, False) pngs = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.PNG) for png in pngs: if png.name == os.path.basename(PNG): self.assertEqual(png.is_deleted, True)