def process(self): """Handle a zip file containing images and no NetCDF files. In this case we just want to publish the zip file itself, not the individual images. If we encounter a "mixed" zip file with images and netCDF files, we're just going to give up, for now. """ images = PipelineFileCollection(f for f in self.file_collection if f.file_type.is_image_type) netcdfs = self.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF) is_zip = self.file_type is FileType.ZIP have_images = len(images) > 0 have_netcdfs = len(netcdfs) > 0 if is_zip and have_images: if have_netcdfs: raise InvalidFileContentError( "Zip file contains both images and netCDFs. Don't know what to do!" " They are handled differently, so please upload only one at a time." ) if not DwmFileClassifier.SOTS_IMAGES_ZIP_PATTERN.match( self.file_basename): raise InvalidFileNameError( "Zip file contains images, but its name does not match pattern for images zip file " "(regular expression '{p}')".format( p=DwmFileClassifier.SOTS_IMAGES_ZIP_PATTERN.pattern)) self.logger.info( "Zip file contains images and no netCDF files. " "Publishing original zip file instead of its contents.") self.file_collection.set_publish_types( PipelineFilePublishType.NO_ACTION) self.input_file_object.publish_type = PipelineFilePublishType.HARVEST_UPLOAD self.file_collection.add(self.input_file_object)
def get_harvest_collection(delete=False, late_deletion=False, with_store=False, already_stored=False): pf_bad = PipelineFile(BAD_NC, is_deletion=delete, late_deletion=late_deletion) pf_empty = PipelineFile(EMPTY_NC, is_deletion=delete, late_deletion=late_deletion) pf_good = PipelineFile(GOOD_NC, is_deletion=delete, late_deletion=late_deletion) collection = PipelineFileCollection([pf_bad, pf_empty, pf_good]) if with_store: publish_type = PipelineFilePublishType.DELETE_UNHARVEST if delete else PipelineFilePublishType.HARVEST_UPLOAD else: publish_type = PipelineFilePublishType.UNHARVEST_ONLY if delete else PipelineFilePublishType.HARVEST_ONLY for pipeline_file in collection: pipeline_file.is_stored = already_stored pipeline_file.dest_path = os.path.join( 'DUMMY', os.path.basename(pipeline_file.src_path)) pipeline_file.publish_type = publish_type return collection
def test_cleanup(self): nc = PipelineFile(GOOD_NC, dest_path=os.path.basename(GOOD_NC)) png = PipelineFile(INVALID_PNG, dest_path=os.path.basename(INVALID_PNG)) ico = PipelineFile(TEST_ICO, dest_path=os.path.basename(TEST_ICO)) unknown = PipelineFile(UNKNOWN_FILE_TYPE, dest_path=os.path.basename(UNKNOWN_FILE_TYPE)) existing_collection = PipelineFileCollection([nc, png, ico, unknown]) self.state_manager.error_broker.upload(existing_collection) self.state_manager.move_to_processing() actual_error_files_before_cleanup = [ v.dest_path for v in self.state_manager.error_broker.query() ] expected_error_files_before_cleanup = [ 'good.nc', 'test.unknown_file_extension', 'test.ico', 'invalid.png' ] self.assertCountEqual(expected_error_files_before_cleanup, actual_error_files_before_cleanup) self.state_manager.success_exit_policies.append( ExitPolicy.DELETE_CUSTOM_REGEXES_FROM_ERROR_STORE) self.state_manager.move_to_success() actual_error_files_after_cleanup = [ v.dest_path for v in self.state_manager.error_broker.query() ] expected_error_files_after_cleanup = ['good.nc', 'invalid.png'] self.assertCountEqual(expected_error_files_after_cleanup, actual_error_files_after_cleanup)
def setUp(self): self.logger = get_pipeline_logger('unittest') self.dummy_input_file = 'dummy.input_file' incoming_file_path = os.path.join( self.config.pipeline_config['watch']['incoming_dir'], os.path.basename(self.temp_nc_file)) safe_copy_file(self.temp_nc_file, incoming_file_path) celery_request = type('DummyRequest', (object, ), {'id': 'NO_REQUEST_ID'})() self.state_manager = IncomingFileStateManager( incoming_file_path, pipeline_name='UNITTEST', config=self.config, logger=self.logger, celery_request=celery_request) self.state_manager.handler = MagicMock( file_basename=self.dummy_input_file, error_cleanup_regexes=[r'test.*']) previous_file_same_name = PipelineFile( self.temp_nc_file, dest_path='dummy.input_file.40c4ec0d-c9db-498d-84f9-01011330086e') nc = PipelineFile(GOOD_NC, dest_path=os.path.basename(GOOD_NC)) png = PipelineFile(INVALID_PNG, dest_path=os.path.basename(INVALID_PNG)) ico = PipelineFile(TEST_ICO, dest_path=os.path.basename(TEST_ICO)) unknown = PipelineFile(UNKNOWN_FILE_TYPE, dest_path=os.path.basename(UNKNOWN_FILE_TYPE)) existing_collection = PipelineFileCollection( [previous_file_same_name, nc, png, ico, unknown]) self.state_manager.error_broker.upload(existing_collection)
def get_notification_data(): collection = PipelineFileCollection(PipelineFile(GOOD_NC)) collection_headers, collection_data = collection.get_table_data() data = { 'input_file': 'good.nc', 'processing_result': 'HANDLER_SUCCESS', 'handler_start_time': '2017-10-23 16:05', 'checks': None, 'collection_headers': collection_headers, 'collection_data': collection_data, 'error_details': '', 'upload_dir': None } return data
def preprocess(self): """Here you can run code that needs to run before the compliance checker step. This might be where you specify which files in the "eligible_files" list are "UPLOAD_ONLY", or not published at all :return: None """ self.logger.info("Running preprocess from child class") (PipelineFileCollection( f for f in self.file_collection if f.publish_type.is_addition_type)[1:].filter_by_attribute_id( 'check_type', PipelineFileCheckType.UNSET).set_check_types( PipelineFileCheckType.NO_ACTION))
def test_good_dm_file_with_compliance_check(self): # this is tested as an update to avoid raising invalid input file error cause of missing ancillary material preexisting_file = PipelineFileCollection() existing_file = PipelineFile(GOOD_NC, dest_path=os.path.join( 'IMOS/ANFOG/slocum_glider/TwoRocks20180503a/', os.path.basename(GOOD_NC))) preexisting_file.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_file.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_file) handler = self.run_handler(GOOD_NC, check_params={'checks': ['cf', 'imos:1.4']}) f = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF) # self.assertEqual(f[0].check_type, PipelineFileCheckType.NC_COMPLIANCE_CHECK) self.assertEqual(f[0].publish_type, PipelineFilePublishType.HARVEST_UPLOAD) self.assertEqual(f[0].dest_path, 'IMOS/ANFOG/slocum_glider/TwoRocks20180503a/' 'IMOS_ANFOG_BCEOPSTUV_20180503T080042Z_SL210_FV01_timeseries_END-20180505T054942Z.nc') self.assertTrue(f[0].is_checked) self.assertTrue(f[0].is_stored) self.assertTrue(f[0].is_harvested)
def test_clear_rt_deployment(self): # TEST 'clear-files' status. process and results identical to status 'renamed' preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT))) existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT))) preexisting_files.update([existing_file1, existing_file2]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) handler = self.run_handler(MISSION_STATUS_CLR) # Process should resuls in : input file unhandled , preexisting file should be deleted nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF) self.assertEqual(nc[0].publish_type, PipelineFilePublishType.DELETE_UNHARVEST) self.assertTrue(nc[0].is_deleted) png = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG) self.assertEqual(png[0].publish_type, PipelineFilePublishType.DELETE_ONLY) self.assertTrue(png[0].is_deleted)
def test_setup_upload_location_push_newer_file_bad_prefix(self): """ Test case: Check creation date of incoming *.nc.gz is newer that one already on storage HARVEST_UPLOAD the content of the *nc.gz BUT check THAT We don't delete files not starting with a good value of GSLA_PREFIX_PATH. In our case we patch this global variable to empty to check this """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile(PREV_NC_GZ_STORAGE, dest_path=os.path.join( 'IMOS/OceanCurrent/GSLA/DM00/2018/', os.path.basename(PREV_NC_GZ_STORAGE))) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler self.run_handler_with_exception( AttributeValidationError, NEWER_CREATION_DATE_NC_GZ, allowed_dest_path_regexes=["IMOS/OceanCurrent/GSLA"])
def test_setup_upload_location_push_older_file(self): """ Test case: Check creation date of incoming *.nc.gz is older that one already on storage NO_ACTION *nc.gz """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile(PREV_NC_GZ_STORAGE, dest_path=os.path.join( 'IMOS/OceanCurrent/GSLA/DM00/2018/', os.path.basename(PREV_NC_GZ_STORAGE))) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler on the new file with an older creation date self.run_handler_with_exception(InvalidFileNameError, OLDER_CREATION_DATE_NC_GZ)
def test_overwrite_same_file(self, mock_callsign): # check that files with same name are overwritten preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile( GOOD_NC, dest_path=os.path.join( 'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/', os.path.basename(GOOD_NC))) existing_file2 = PipelineFile( CSV, dest_path=os.path.join( 'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/', os.path.basename(CSV))) preexisting_files.update([existing_file1, existing_file2]) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler handler = self.run_handler(GOOD_ZIP) nc = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF) self.assertEqual(nc[0].publish_type, PipelineFilePublishType.HARVEST_UPLOAD) self.assertEqual(nc[0].is_deleted, False) csvs = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.CSV) for csv in csvs: if csv.name == os.path.basename(CSV): self.assertEqual(csv.publish_type, PipelineFilePublishType.UPLOAD_ONLY) self.assertEqual(csv.is_deleted, False)
def process_zip_rt(self): """ Set realtime file destination path based on ANFOG_RT FV00 file attributes Check that zip contains a fv00 has already been done ZIP typically contains : - one FV00 (compulsory) - IMAGES (PNGs) all files have to be uploaded to S3 """ self.process_zip_common('RT') # publish type of ancillary files set to UPLOAD_ONLY non_nc_files = PipelineFileCollection( f for f in self.file_collection if (f.file_type is not FileType.NETCDF)) non_nc_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # Check if deployment exist on S3 # -if yes: need to delete previous netcdf file # -if not: add new entry in Harvestmissionfile.csv ; results = self.state_query.query_storage(self.upload_destination) if results: # directory exists, contains files that need to be deleted self.delete_previous_version('RT', 'in_progress') else: # path doesn't exist, deployment is new self.set_deployment_status(self.primary_nc.src_path, 'in_progress')
def preprocess(self): """ Preprocessing for NRT and DM files - NRT: generate a NetCDF files based on input text file. Set the input file publish_type property to 'archive' - DM file collection: update the check_type and publish_type properties for non-NetCDF files. These files are not checked or harvested, but uploaded to S3 """ if self.custom_params is not None and self.custom_params.get( 'ship_callsign_ls'): self.ship_callsign_ls = self.custom_params['ship_callsign_ls'] else: self.ship_callsign_ls = ship_callsign_list() # Delayed mode file submitted as a zip archive if self.file_extension == '.zip': nc_file = self.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF) if len(nc_file) != 1: raise InvalidInputFileError( "Expecting one netCDF file in ZIP archive '{zip}'".format( zip=os.path.basename(self.input_file))) # first process the NetCDF file to set the destination path for the file collection nc = nc_file[0] nc.dest_path = self.dest_path(nc.src_path) nc_dir_path = os.path.dirname(nc.dest_path) # SOOP-CO2 DM and FRMAP .txt,.pdf or/and .xml files. # Set check type to NONEMPTY and publish type to UPLOAD_ONLY non_nc_files = PipelineFileCollection( f for f in self.file_collection if f.file_type is not FileType.NETCDF) for non_nc in non_nc_files: non_nc.check_type = PipelineFileCheckType.FORMAT_CHECK non_nc.publish_type = PipelineFilePublishType.UPLOAD_ONLY non_nc.dest_path = os.path.join(nc_dir_path, non_nc.name) elif self.input_file.endswith('dat.txt'): # Single text file Realtime files (*dat.txt) rt_file = self.file_collection[0] rt_file.publish_type = PipelineFilePublishType.ARCHIVE_ONLY nrt_nc_file_path = soop_co2_nrt_nc_generator.process_co2_rt( rt_file, self.products_dir, self.ship_callsign_ls) nrt_nc_file = PipelineFile(nrt_nc_file_path) self.file_collection.add(nrt_nc_file) nrt_nc_file.publish_type = PipelineFilePublishType.HARVEST_UPLOAD
def preprocess(self): """ Preprocessing of Zip archive and NetCDF files Preprocessing consist in setting the destination path AND deleting previous version files - Zip contains netcdf , images ,text, doc, or xml file and raw file to archive dest_path is generated based on info stored in FV01 NetCDF file. update check_type and publish_type according to destination : raw files : move to archive =>publish_type property to 'archive' - text, doc, xml, images: basic checks uploaded to S3 => set check_type and publish_type attributesge accordingly """ netcdf = self.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF) if len(netcdf) != 1: raise InvalidInputFileError( "Expecting one netCDF file from input file '{infile}'".format( infile=os.path.basename(self.input_file))) nc = netcdf[0] destination = dest_path_soop_ba(nc) nc.dest_path = os.path.join(destination, nc.name) results = self.state_query.query_storage(destination).keys() files_to_delete = self.get_previous_version(results, destination, nc.name) if files_to_delete: self.file_collection.update(files_to_delete) if self.file_type is FileType.ZIP: non_nc_files = PipelineFileCollection( f for f in self.file_collection if f.file_type is not FileType.NETCDF) for non_nc in non_nc_files: non_nc.check_type = PipelineFileCheckType.FORMAT_CHECK if non_nc.extension in ['.ek5', '.out', '.raw']: non_nc.publish_type = PipelineFilePublishType.ARCHIVE_ONLY dest_archive = archive_path_soop_ba(nc) non_nc.archive_path = os.path.join(dest_archive, non_nc.name) else: non_nc.publish_type = PipelineFilePublishType.UPLOAD_ONLY non_nc.dest_path = os.path.join(destination, non_nc.name) files_to_delete = self.get_previous_version( results, destination, non_nc.name) if files_to_delete: self.file_collection.update(files_to_delete)
def test_rt_update(self): """ test the update of realtime mission: update consits in : - deletion of previous netCDF - deletion of transect png files - harvest of new netCDF - overwriting of other files """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT))) existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT))) existing_file3 = PipelineFile(PREV_PNG_MISSION, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_MISSION))) preexisting_files.update([existing_file1, existing_file2, existing_file3]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler handler = self.run_handler(GOOD_ZIP_RT) nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF) for n in nc: if n.name == os.path.basename(PREV_NC_RT): self.assertEqual(n.publish_type, PipelineFilePublishType.DELETE_UNHARVEST) self.assertTrue(n.is_deleted) else: self.assertEqual(n.publish_type, PipelineFilePublishType.HARVEST_UPLOAD) self.assertTrue(n.is_harvested) self.assertTrue(n.is_stored) pngs = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG) for png in pngs: if png.name == os.path.basename(PREV_PNG_MISSION): self.assertTrue(png.is_overwrite) else: self.assertTrue(png.is_uploaded) # no update the harvestMission List in that case csv = handler.file_collection.filter_by_attribute_id('file_type', FileType.CSV) self.assertEqual(len(csv), 0)
def test_setup_upload_location_push_newer_yearly_file(self): """ Test case: Check creation date of incoming yearly *.nc.gz is newer that one already on storage UPLOAD_ONLY the new incoming *.nc.gz DELETE_ONLY the previous *.nc.gz NO_ACTION on the nc inside the *.nc.gz """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile( PREV_YEARLY_NC_GZ_STORAGE, dest_path=os.path.join( 'IMOS/OceanCurrent/GSLA/DM00/yearfiles', os.path.basename(PREV_YEARLY_NC_GZ_STORAGE))) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler handler = self.run_handler(GOOD_YEARLY_FILE_DM00) nc_file = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF)[0] self.assertEqual(nc_file.publish_type, PipelineFilePublishType.NO_ACTION) nc_gz_file = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.GZIP)[0] self.assertEqual(nc_gz_file.publish_type, PipelineFilePublishType.UPLOAD_ONLY) nc_gz_delete = handler.file_collection.filter_by_attribute_value( 'name', os.path.basename(PREV_YEARLY_NC_GZ_STORAGE))[0] self.assertEqual(nc_gz_delete.publish_type, PipelineFilePublishType.DELETE_ONLY)
def test_setup_upload_location_push_file_newer_creation_date(self): """ Test case: Check creation date of new *.nc is newer that one already on storage """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile( GOOD_NC_FV01, dest_path=AcornHandler.dest_path(GOOD_NC_FV01)) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # create a new file based on GOOD_NC_FV01. Modify it with an newer creation date # we patch the global variable from the handler in order to use the temporary broker file nc_file_new_creation_date_path = os.path.join( self.temp_dir, os.path.basename(GOOD_NC_FV01)) shutil.copyfile(GOOD_NC_FV01, nc_file_new_creation_date_path) with Dataset(nc_file_new_creation_date_path, mode='r+') as nc_obj: delta_time = timedelta(1, 1, 1) new_time = datetime.strptime(nc_obj.date_created, '%Y-%m-%dT%H:%M:%SZ') + delta_time nc_obj.date_created = datetime.strftime(new_time, '%Y-%m-%dT%H:%M:%SZ') # run the handler on the new file with an newer creation date handler = self.handler_class(nc_file_new_creation_date_path, include_regexes=[r'IMOS_ACORN_.*\.nc']) handler.opendap_root = broker.prefix handler.run() nc_file = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF)[0] self.assertEqual(nc_file.publish_type, PipelineFilePublishType.HARVEST_UPLOAD)
def test_deletion_rt_after_dm_upload(self): """test deletion of RT mission at upload of related DM version""" preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT))) existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT))) existing_file3 = PipelineFile(PREV_PNG_MISSION, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_MISSION))) preexisting_files.update([existing_file1, existing_file2, existing_file3]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler handler = self.run_handler(GOOD_ZIP_DM, check_params={'checks': ['cf', 'imos:1.4']}) nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF) for n in nc: if n.name == os.path.basename(PREV_NC_RT): self.assertEqual(n.publish_type, PipelineFilePublishType.DELETE_UNHARVEST) self.assertTrue(n.is_deleted) elif re.match(AnfogFileClassifier.DM_REGEX, n.name): self.assertEqual(n.publish_type, PipelineFilePublishType.HARVEST_UPLOAD) self.assertTrue(n.is_harvested) self.assertTrue(n.is_stored) else: self.assertEqual(n.publish_type, PipelineFilePublishType.ARCHIVE_ONLY) self.assertTrue(n.is_archived) pngs = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG) for png in pngs: self.assertTrue(png.is_deleted)
def test_setup_upload_location_push_file_older_creation_date(self): """ Test case: Check creation date of new *.nc is older that one already on storage """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile( GOOD_NC_FV01, dest_path=AcornHandler.dest_path(GOOD_NC_FV01)) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # create a new file based on GOOD_NC_FV01. Modify it with an older creation date nc_file_old_creation_date_path = os.path.join( self.temp_dir, os.path.basename(GOOD_NC_FV01)) shutil.copyfile(GOOD_NC_FV01, nc_file_old_creation_date_path) with Dataset(nc_file_old_creation_date_path, mode='r+') as nc_obj: delta_time = timedelta(1, 1, 1) new_time = datetime.strptime(nc_obj.date_created, '%Y-%m-%dT%H:%M:%SZ') - delta_time nc_obj.date_created = datetime.strftime(new_time, '%Y-%m-%dT%H:%M:%SZ') # run the handler on the new file with an older creation date handler = self.handler_class(nc_file_old_creation_date_path, include_regexes=[r'IMOS_ACORN_.*\.nc']) handler.opendap_root = broker.prefix handler.run() self.assertIsInstance(handler.error, InvalidFileContentError)
def test_dstg(self): preexisting_file = PipelineFileCollection() existing_file = PipelineFile(DSTG, dest_path=os.path.join( 'Department_of_Defence/DSTG/slocum_glider/TalismanSaberB20130706/', os.path.basename(DSTG))) preexisting_file.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_file.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_file) # test processing of DSTG and NRL NetCDF files handler = self.run_handler(DSTG) f = handler.file_collection[0] self.assertEqual(f.publish_type, PipelineFilePublishType.HARVEST_UPLOAD) self.assertEqual(f.dest_path, 'Department_of_Defence/DSTG/slocum_glider/TalismanSaberB20130706/' + f.name) self.assertTrue(f.is_stored) self.assertTrue(f.is_harvested)
def test_setup_upload_location_push_same_file(self): """ Test case: Push same file twice to $INCOMING_DIR HARVEST_UPLOAD the incoming *.nc.gz NO_ACTION on the nc inside the *.nc.gz """ # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file = PipelineFile(PREV_NC_GZ_STORAGE, dest_path=os.path.join( 'IMOS/OceanCurrent/GSLA/DM00/2018/', os.path.basename(PREV_NC_GZ_STORAGE))) preexisting_files.update([existing_file]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler by uploading again the same file handler = self.run_handler(PREV_NC_GZ_STORAGE) nc_file = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF)[0] self.assertEqual(nc_file.publish_type, PipelineFilePublishType.NO_ACTION) nc_gz_file = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.GZIP)[0] self.assertEqual(nc_gz_file.publish_type, PipelineFilePublishType.HARVEST_UPLOAD)
def test_renamed_rt_deployment(self): # test deletion of RT files when deployment renamed or when cleaning files on S3 preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT))) existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT))) existing_file3 = PipelineFile(PREV_PNG_MISSION, dest_path=os.path.join( 'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_MISSION))) preexisting_files.update([existing_file1, existing_file2, existing_file3]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) handler = self.run_handler(MISSION_STATUS_RENAMED) # Process should resuls in : input file unhandled , preexisting file should be deleted, cvs file harvested csv = handler.file_collection.filter_by_attribute_id('file_type', FileType.CSV) self.assertEqual(csv[0].publish_type, PipelineFilePublishType.HARVEST_ONLY) self.assertTrue(csv[0].is_harvested) nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF) self.assertEqual(nc[0].publish_type, PipelineFilePublishType.DELETE_UNHARVEST) self.assertTrue(nc[0].is_deleted) pngs = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG) for png in pngs: self.assertEqual(png.publish_type, PipelineFilePublishType.DELETE_ONLY) self.assertTrue(png.is_deleted)
GETFEATURE_OLD_PRODUCTS_FILE = os.path.join(TEST_ROOT, 'getFeature_old_products.json') GETFEATURE_EMPTY_FILE = os.path.join(TEST_ROOT, 'getFeature_empty.json') with open(GETFEATURE_FILE) as f: TEST_GETFEATURE_JSON = f.read() with open(GETFEATURE_OLD_PRODUCTS_FILE) as f: TEST_GETFEATURE_OLD_PRODUCTS_JSON = f.read() with open(GETFEATURE_EMPTY_FILE) as f: TEST_GETFEATURE_EMPTY_JSON = f.read() # Create collection of input files for the products # These will be uploaded to the mocked equivalent of S3 (where the real input files will be) features = json.loads(TEST_GETFEATURE_JSON)['features'] INPUT_FILE_COLLECTION = PipelineFileCollection() for f in features: pf = PipelineFile( os.path.join(TEST_ROOT, os.path.basename(f['properties']['url'])), dest_path=f['properties']['url'] ) pf.publish_type = PipelineFilePublishType.UPLOAD_ONLY INPUT_FILE_COLLECTION.add(pf) class TestMooringsProductsHandler(HandlerTestCase): def setUp(self): self.handler_class = MooringsProductsHandler upload_broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri']) upload_broker.upload(INPUT_FILE_COLLECTION) super().setUp()
def test_delete_previous_file(self, mock_callsign): # create some PipelineFiles to represent the existing files on 'S3' preexisting_files = PipelineFileCollection() existing_file1 = PipelineFile( PREV_NC, dest_path=os.path.join( 'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/', os.path.basename(PREV_NC))) existing_file2 = PipelineFile( CSV, dest_path=os.path.join( 'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/', os.path.basename(CSV))) existing_file3 = PipelineFile( PNG, dest_path=os.path.join( 'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/', os.path.basename(PNG))) preexisting_files.update( [existing_file1, existing_file2, existing_file3]) # set the files to UPLOAD_ONLY preexisting_files.set_publish_types( PipelineFilePublishType.UPLOAD_ONLY) # upload the 'preexisting_files' collection to the unit test's temporary upload location broker = get_storage_broker( self.config.pipeline_config['global']['upload_uri']) broker.upload(preexisting_files) # run the handler handler = self.run_handler(GOOD_ZIP) # add some tests to make sure the previous files were handled appropriately, e.g. # - they were added as deletions # - they were successfully deleted # - they were the *only* ones deleted nc_files = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.NETCDF) for nc in nc_files: if nc.name == os.path.basename(PREV_NC): self.assertEqual(nc.publish_type, PipelineFilePublishType.DELETE_UNHARVEST) self.assertEqual(nc.is_deleted, True) else: self.assertEqual(nc.is_deleted, False) csvs = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.CSV) for csv in csvs: if csv.name == os.path.basename(CSV): self.assertEqual(csv.publish_type, PipelineFilePublishType.UPLOAD_ONLY) self.assertEqual(csv.is_deleted, False) pngs = handler.file_collection.filter_by_attribute_id( 'file_type', FileType.PNG) for png in pngs: if png.name == os.path.basename(PNG): self.assertEqual(png.is_deleted, True)
def get_previous_version(self, previous_file_list, path, input_file_name): """ Find previous version of each incoming file based on its type/extension and add them to the filecollection with the correct publish type extension can be: .inf', '.nc.png','.pitch.csv','.roll.csv',.gps.csv' inputs: previous_file_list : dictionary containing file listing(full path) and metadata from destination input_file : file basename path : full destination path """ if not previous_file_list: return files_to_delete = PipelineFileCollection() try: extension = ALLOWED_CONTENT_EXTENSIONS.match( input_file_name).groupdict()['extension'] except KeyError: raise ValueError( "unable to determine extension from file name {infile}".format( infile=input_file_name)) # get list of previous files basename to search through basenames = {os.path.basename(f) for f in previous_file_list} this_extension_pattern = re.compile( r".*\.{ext}$".format(ext=extension)) if input_file_name not in basenames: previous_file = [ f for f in previous_file_list if this_extension_pattern.match(f) ] if extension == 'nc': if len(previous_file) != 1: raise ValueError( "Expected exactly 1 previous versions of the netcdf file, found {n}. Aborting " .format(n=len(previous_file))) else: # if uploaded file name has the same name published file => no action, file will be overwritten, otherwise # sort file per wildcard and work out which one to delete ( # check previous file widcard : # can be '.inf', '.nc.png','.pitch.csv','.roll.csv',.gps.csv' if len(previous_file) > 1: raise ValueError( "Found more than one previous versions of the extension '{ext}'. Aborting" .format(ext=extension)) elif len(previous_file) == 0: return prev_file = previous_file[0] dest_path = os.path.join(path, os.path.basename(prev_file)) self.logger.info( "adding deletion of previous file '{dest_path}'".format( dest_path=dest_path)) file_to_delete = PipelineFile(prev_file, is_deletion=True, dest_path=dest_path) if extension == 'nc': file_to_delete.publish_type = PipelineFilePublishType.DELETE_UNHARVEST else: file_to_delete.publish_type = PipelineFilePublishType.DELETE_ONLY files_to_delete.add(file_to_delete) return files_to_delete