예제 #1
0
    def process(self):
        """Handle a zip file containing images and no NetCDF files. In this case we just want to publish the zip file
        itself, not the individual images. If we encounter a "mixed" zip file with images and netCDF files,
        we're just going to give up, for now.
        """
        images = PipelineFileCollection(f for f in self.file_collection
                                        if f.file_type.is_image_type)
        netcdfs = self.file_collection.filter_by_attribute_id(
            'file_type', FileType.NETCDF)
        is_zip = self.file_type is FileType.ZIP
        have_images = len(images) > 0
        have_netcdfs = len(netcdfs) > 0
        if is_zip and have_images:
            if have_netcdfs:
                raise InvalidFileContentError(
                    "Zip file contains both images and netCDFs. Don't know what to do!"
                    " They are handled differently, so please upload only one at a time."
                )
            if not DwmFileClassifier.SOTS_IMAGES_ZIP_PATTERN.match(
                    self.file_basename):
                raise InvalidFileNameError(
                    "Zip file contains images, but its name does not match pattern for images zip file "
                    "(regular expression '{p}')".format(
                        p=DwmFileClassifier.SOTS_IMAGES_ZIP_PATTERN.pattern))

            self.logger.info(
                "Zip file contains images and no netCDF files. "
                "Publishing original zip file instead of its contents.")

            self.file_collection.set_publish_types(
                PipelineFilePublishType.NO_ACTION)
            self.input_file_object.publish_type = PipelineFilePublishType.HARVEST_UPLOAD
            self.file_collection.add(self.input_file_object)
예제 #2
0
def get_harvest_collection(delete=False,
                           late_deletion=False,
                           with_store=False,
                           already_stored=False):
    pf_bad = PipelineFile(BAD_NC,
                          is_deletion=delete,
                          late_deletion=late_deletion)
    pf_empty = PipelineFile(EMPTY_NC,
                            is_deletion=delete,
                            late_deletion=late_deletion)
    pf_good = PipelineFile(GOOD_NC,
                           is_deletion=delete,
                           late_deletion=late_deletion)

    collection = PipelineFileCollection([pf_bad, pf_empty, pf_good])

    if with_store:
        publish_type = PipelineFilePublishType.DELETE_UNHARVEST if delete else PipelineFilePublishType.HARVEST_UPLOAD
    else:
        publish_type = PipelineFilePublishType.UNHARVEST_ONLY if delete else PipelineFilePublishType.HARVEST_ONLY

    for pipeline_file in collection:
        pipeline_file.is_stored = already_stored
        pipeline_file.dest_path = os.path.join(
            'DUMMY', os.path.basename(pipeline_file.src_path))
        pipeline_file.publish_type = publish_type

    return collection
예제 #3
0
    def test_cleanup(self):
        nc = PipelineFile(GOOD_NC, dest_path=os.path.basename(GOOD_NC))
        png = PipelineFile(INVALID_PNG,
                           dest_path=os.path.basename(INVALID_PNG))
        ico = PipelineFile(TEST_ICO, dest_path=os.path.basename(TEST_ICO))
        unknown = PipelineFile(UNKNOWN_FILE_TYPE,
                               dest_path=os.path.basename(UNKNOWN_FILE_TYPE))
        existing_collection = PipelineFileCollection([nc, png, ico, unknown])
        self.state_manager.error_broker.upload(existing_collection)

        self.state_manager.move_to_processing()

        actual_error_files_before_cleanup = [
            v.dest_path for v in self.state_manager.error_broker.query()
        ]
        expected_error_files_before_cleanup = [
            'good.nc', 'test.unknown_file_extension', 'test.ico', 'invalid.png'
        ]
        self.assertCountEqual(expected_error_files_before_cleanup,
                              actual_error_files_before_cleanup)

        self.state_manager.success_exit_policies.append(
            ExitPolicy.DELETE_CUSTOM_REGEXES_FROM_ERROR_STORE)
        self.state_manager.move_to_success()

        actual_error_files_after_cleanup = [
            v.dest_path for v in self.state_manager.error_broker.query()
        ]
        expected_error_files_after_cleanup = ['good.nc', 'invalid.png']
        self.assertCountEqual(expected_error_files_after_cleanup,
                              actual_error_files_after_cleanup)
예제 #4
0
    def setUp(self):
        self.logger = get_pipeline_logger('unittest')

        self.dummy_input_file = 'dummy.input_file'
        incoming_file_path = os.path.join(
            self.config.pipeline_config['watch']['incoming_dir'],
            os.path.basename(self.temp_nc_file))
        safe_copy_file(self.temp_nc_file, incoming_file_path)

        celery_request = type('DummyRequest', (object, ),
                              {'id': 'NO_REQUEST_ID'})()
        self.state_manager = IncomingFileStateManager(
            incoming_file_path,
            pipeline_name='UNITTEST',
            config=self.config,
            logger=self.logger,
            celery_request=celery_request)
        self.state_manager.handler = MagicMock(
            file_basename=self.dummy_input_file,
            error_cleanup_regexes=[r'test.*'])

        previous_file_same_name = PipelineFile(
            self.temp_nc_file,
            dest_path='dummy.input_file.40c4ec0d-c9db-498d-84f9-01011330086e')
        nc = PipelineFile(GOOD_NC, dest_path=os.path.basename(GOOD_NC))
        png = PipelineFile(INVALID_PNG,
                           dest_path=os.path.basename(INVALID_PNG))
        ico = PipelineFile(TEST_ICO, dest_path=os.path.basename(TEST_ICO))
        unknown = PipelineFile(UNKNOWN_FILE_TYPE,
                               dest_path=os.path.basename(UNKNOWN_FILE_TYPE))
        existing_collection = PipelineFileCollection(
            [previous_file_same_name, nc, png, ico, unknown])
        self.state_manager.error_broker.upload(existing_collection)
예제 #5
0
def get_notification_data():
    collection = PipelineFileCollection(PipelineFile(GOOD_NC))
    collection_headers, collection_data = collection.get_table_data()

    data = {
        'input_file': 'good.nc',
        'processing_result': 'HANDLER_SUCCESS',
        'handler_start_time': '2017-10-23 16:05',
        'checks': None,
        'collection_headers': collection_headers,
        'collection_data': collection_data,
        'error_details': '',
        'upload_dir': None
    }

    return data
예제 #6
0
 def preprocess(self):
     """Here you can run code that needs to run before the compliance checker step. This might be where you specify
     which files in the "eligible_files" list are "UPLOAD_ONLY", or not published at all 
     
     :return: None
     """
     self.logger.info("Running preprocess from child class")
     (PipelineFileCollection(
         f for f in self.file_collection
         if f.publish_type.is_addition_type)[1:].filter_by_attribute_id(
             'check_type', PipelineFileCheckType.UNSET).set_check_types(
                 PipelineFileCheckType.NO_ACTION))
예제 #7
0
    def test_good_dm_file_with_compliance_check(self):
        #  this is tested as an update to avoid raising invalid input file error cause of missing ancillary material
        preexisting_file = PipelineFileCollection()

        existing_file = PipelineFile(GOOD_NC, dest_path=os.path.join(
            'IMOS/ANFOG/slocum_glider/TwoRocks20180503a/', os.path.basename(GOOD_NC)))

        preexisting_file.update([existing_file])

        # set the files to UPLOAD_ONLY
        preexisting_file.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_file)

        handler = self.run_handler(GOOD_NC, check_params={'checks': ['cf', 'imos:1.4']})

        f = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF)
        # self.assertEqual(f[0].check_type, PipelineFileCheckType.NC_COMPLIANCE_CHECK)
        self.assertEqual(f[0].publish_type, PipelineFilePublishType.HARVEST_UPLOAD)

        self.assertEqual(f[0].dest_path,
                         'IMOS/ANFOG/slocum_glider/TwoRocks20180503a/'
                         'IMOS_ANFOG_BCEOPSTUV_20180503T080042Z_SL210_FV01_timeseries_END-20180505T054942Z.nc')
        self.assertTrue(f[0].is_checked)
        self.assertTrue(f[0].is_stored)
        self.assertTrue(f[0].is_harvested)
예제 #8
0
    def test_clear_rt_deployment(self):

        # TEST 'clear-files' status. process and results identical to status 'renamed'
        preexisting_files = PipelineFileCollection()

        existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join(
            'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT)))

        existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join(
            'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT)))

        preexisting_files.update([existing_file1, existing_file2])

        # set the files to UPLOAD_ONLY
        preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)


        handler = self.run_handler(MISSION_STATUS_CLR)

        # Process should resuls in : input file unhandled , preexisting file should be deleted


        nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF)
        self.assertEqual(nc[0].publish_type, PipelineFilePublishType.DELETE_UNHARVEST)
        self.assertTrue(nc[0].is_deleted)

        png = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG)
        self.assertEqual(png[0].publish_type, PipelineFilePublishType.DELETE_ONLY)
        self.assertTrue(png[0].is_deleted)
예제 #9
0
    def test_setup_upload_location_push_newer_file_bad_prefix(self):
        """
        Test case: Check creation date of incoming *.nc.gz is newer that one already on storage
                   HARVEST_UPLOAD the content of the *nc.gz

                   BUT check THAT
                   We don't delete files not starting with a good value of GSLA_PREFIX_PATH.
                   In our case we patch this global variable to empty to check this
        """
        # create some PipelineFiles to represent the existing files on 'S3'
        preexisting_files = PipelineFileCollection()

        existing_file = PipelineFile(PREV_NC_GZ_STORAGE,
                                     dest_path=os.path.join(
                                         'IMOS/OceanCurrent/GSLA/DM00/2018/',
                                         os.path.basename(PREV_NC_GZ_STORAGE)))

        preexisting_files.update([existing_file])

        # set the files to UPLOAD_ONLY
        preexisting_files.set_publish_types(
            PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(
            self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)

        # run the handler
        self.run_handler_with_exception(
            AttributeValidationError,
            NEWER_CREATION_DATE_NC_GZ,
            allowed_dest_path_regexes=["IMOS/OceanCurrent/GSLA"])
예제 #10
0
    def test_setup_upload_location_push_older_file(self):
        """
        Test case: Check creation date of incoming *.nc.gz is older that one already on storage
                   NO_ACTION  *nc.gz
        """
        # create some PipelineFiles to represent the existing files on 'S3'
        preexisting_files = PipelineFileCollection()

        existing_file = PipelineFile(PREV_NC_GZ_STORAGE,
                                     dest_path=os.path.join(
                                         'IMOS/OceanCurrent/GSLA/DM00/2018/',
                                         os.path.basename(PREV_NC_GZ_STORAGE)))

        preexisting_files.update([existing_file])

        # set the files to UPLOAD_ONLY
        preexisting_files.set_publish_types(
            PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(
            self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)

        # run the handler on the new file with an older creation date
        self.run_handler_with_exception(InvalidFileNameError,
                                        OLDER_CREATION_DATE_NC_GZ)
예제 #11
0
    def test_overwrite_same_file(self, mock_callsign):
        # check that files with same name are overwritten
        preexisting_files = PipelineFileCollection()

        existing_file1 = PipelineFile(
            GOOD_NC,
            dest_path=os.path.join(
                'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/',
                os.path.basename(GOOD_NC)))

        existing_file2 = PipelineFile(
            CSV,
            dest_path=os.path.join(
                'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/',
                os.path.basename(CSV)))

        preexisting_files.update([existing_file1, existing_file2])
        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(
            self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)

        # run the handler
        handler = self.run_handler(GOOD_ZIP)
        nc = handler.file_collection.filter_by_attribute_id(
            'file_type', FileType.NETCDF)
        self.assertEqual(nc[0].publish_type,
                         PipelineFilePublishType.HARVEST_UPLOAD)
        self.assertEqual(nc[0].is_deleted, False)

        csvs = handler.file_collection.filter_by_attribute_id(
            'file_type', FileType.CSV)
        for csv in csvs:
            if csv.name == os.path.basename(CSV):
                self.assertEqual(csv.publish_type,
                                 PipelineFilePublishType.UPLOAD_ONLY)
                self.assertEqual(csv.is_deleted, False)
예제 #12
0
    def process_zip_rt(self):
        """
        Set realtime file destination path based on ANFOG_RT FV00 file attributes
        Check that zip contains a fv00 has already been done
        ZIP typically contains :
            - one FV00 (compulsory)
            - IMAGES (PNGs)
        all files have to be uploaded to S3
        """
        self.process_zip_common('RT')
        # publish type of ancillary files set to UPLOAD_ONLY
        non_nc_files = PipelineFileCollection(
            f for f in self.file_collection
            if (f.file_type is not FileType.NETCDF))
        non_nc_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY)

        # Check if deployment exist on S3
        # -if yes: need to delete previous netcdf file
        # -if not: add new entry in Harvestmissionfile.csv ;
        results = self.state_query.query_storage(self.upload_destination)
        if results:  # directory exists, contains files that need to be deleted
            self.delete_previous_version('RT', 'in_progress')
        else:  # path doesn't exist, deployment is new
            self.set_deployment_status(self.primary_nc.src_path, 'in_progress')
예제 #13
0
    def preprocess(self):
        """ Preprocessing for NRT and DM files
           - NRT: generate a NetCDF files based on input text file.
             Set the input file publish_type property to 'archive'
           - DM file collection: update the check_type and publish_type properties for non-NetCDF files.
             These files are not checked or harvested, but uploaded to S3

        """
        if self.custom_params is not None and self.custom_params.get(
                'ship_callsign_ls'):
            self.ship_callsign_ls = self.custom_params['ship_callsign_ls']
        else:
            self.ship_callsign_ls = ship_callsign_list()

        # Delayed mode file submitted as a zip archive
        if self.file_extension == '.zip':
            nc_file = self.file_collection.filter_by_attribute_id(
                'file_type', FileType.NETCDF)
            if len(nc_file) != 1:
                raise InvalidInputFileError(
                    "Expecting one netCDF file in ZIP archive '{zip}'".format(
                        zip=os.path.basename(self.input_file)))

            # first process the NetCDF file to set the destination path for the file collection
            nc = nc_file[0]
            nc.dest_path = self.dest_path(nc.src_path)
            nc_dir_path = os.path.dirname(nc.dest_path)

            # SOOP-CO2 DM and FRMAP .txt,.pdf or/and .xml files.
            # Set check type to NONEMPTY and publish type to UPLOAD_ONLY
            non_nc_files = PipelineFileCollection(
                f for f in self.file_collection
                if f.file_type is not FileType.NETCDF)
            for non_nc in non_nc_files:
                non_nc.check_type = PipelineFileCheckType.FORMAT_CHECK
                non_nc.publish_type = PipelineFilePublishType.UPLOAD_ONLY
                non_nc.dest_path = os.path.join(nc_dir_path, non_nc.name)

        elif self.input_file.endswith('dat.txt'):
            # Single text file Realtime files (*dat.txt)
            rt_file = self.file_collection[0]
            rt_file.publish_type = PipelineFilePublishType.ARCHIVE_ONLY

            nrt_nc_file_path = soop_co2_nrt_nc_generator.process_co2_rt(
                rt_file, self.products_dir, self.ship_callsign_ls)
            nrt_nc_file = PipelineFile(nrt_nc_file_path)
            self.file_collection.add(nrt_nc_file)
            nrt_nc_file.publish_type = PipelineFilePublishType.HARVEST_UPLOAD
예제 #14
0
    def preprocess(self):
        """ Preprocessing of Zip archive and NetCDF files
            Preprocessing consist in setting the destination path AND deleting previous version files
            - Zip contains netcdf , images ,text, doc, or xml file and raw file to archive
             dest_path is generated based on info stored in FV01 NetCDF file.
             update check_type and publish_type according to destination :
             raw files :  move to archive =>publish_type property to 'archive'
            - text, doc, xml, images: basic checks
              uploaded to S3 => set check_type and publish_type attributesge accordingly
        """

        netcdf = self.file_collection.filter_by_attribute_id(
            'file_type', FileType.NETCDF)
        if len(netcdf) != 1:
            raise InvalidInputFileError(
                "Expecting one netCDF file from input file '{infile}'".format(
                    infile=os.path.basename(self.input_file)))

        nc = netcdf[0]
        destination = dest_path_soop_ba(nc)
        nc.dest_path = os.path.join(destination, nc.name)

        results = self.state_query.query_storage(destination).keys()
        files_to_delete = self.get_previous_version(results, destination,
                                                    nc.name)
        if files_to_delete:
            self.file_collection.update(files_to_delete)

        if self.file_type is FileType.ZIP:
            non_nc_files = PipelineFileCollection(
                f for f in self.file_collection
                if f.file_type is not FileType.NETCDF)
            for non_nc in non_nc_files:
                non_nc.check_type = PipelineFileCheckType.FORMAT_CHECK
                if non_nc.extension in ['.ek5', '.out', '.raw']:
                    non_nc.publish_type = PipelineFilePublishType.ARCHIVE_ONLY
                    dest_archive = archive_path_soop_ba(nc)
                    non_nc.archive_path = os.path.join(dest_archive,
                                                       non_nc.name)
                else:
                    non_nc.publish_type = PipelineFilePublishType.UPLOAD_ONLY
                    non_nc.dest_path = os.path.join(destination, non_nc.name)

                    files_to_delete = self.get_previous_version(
                        results, destination, non_nc.name)
                    if files_to_delete:
                        self.file_collection.update(files_to_delete)
예제 #15
0
    def test_rt_update(self):
        """ test the update of realtime mission:
         update consits in :
         - deletion of previous netCDF
         - deletion of transect png files
         - harvest of new netCDF
         - overwriting of other files
        """
        # create some PipelineFiles to represent the existing files on 'S3'
        preexisting_files = PipelineFileCollection()

        existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join(
            'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT)))

        existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join(
            'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT)))
        existing_file3 = PipelineFile(PREV_PNG_MISSION, dest_path=os.path.join(
            'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_MISSION)))

        preexisting_files.update([existing_file1, existing_file2, existing_file3])

        # set the files to UPLOAD_ONLY
        preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)

        # run the handler
        handler = self.run_handler(GOOD_ZIP_RT)

        nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF)
        for n in nc:
            if n.name == os.path.basename(PREV_NC_RT):
                self.assertEqual(n.publish_type, PipelineFilePublishType.DELETE_UNHARVEST)
                self.assertTrue(n.is_deleted)
            else:
                self.assertEqual(n.publish_type, PipelineFilePublishType.HARVEST_UPLOAD)
                self.assertTrue(n.is_harvested)
                self.assertTrue(n.is_stored)

        pngs = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG)
        for png in pngs:
            if png.name == os.path.basename(PREV_PNG_MISSION):
                self.assertTrue(png.is_overwrite)
            else:
                self.assertTrue(png.is_uploaded)

        # no update the harvestMission List in that case
        csv = handler.file_collection.filter_by_attribute_id('file_type', FileType.CSV)
        self.assertEqual(len(csv), 0)
예제 #16
0
    def test_setup_upload_location_push_newer_yearly_file(self):
        """
        Test case: Check creation date of incoming  yearly *.nc.gz is newer that one already on storage
                   UPLOAD_ONLY the new incoming *.nc.gz
                   DELETE_ONLY the previous *.nc.gz
                   NO_ACTION on the nc inside the *.nc.gz
        """
        # create some PipelineFiles to represent the existing files on 'S3'
        preexisting_files = PipelineFileCollection()

        existing_file = PipelineFile(
            PREV_YEARLY_NC_GZ_STORAGE,
            dest_path=os.path.join(
                'IMOS/OceanCurrent/GSLA/DM00/yearfiles',
                os.path.basename(PREV_YEARLY_NC_GZ_STORAGE)))

        preexisting_files.update([existing_file])

        # set the files to UPLOAD_ONLY
        preexisting_files.set_publish_types(
            PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(
            self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)

        # run the handler
        handler = self.run_handler(GOOD_YEARLY_FILE_DM00)

        nc_file = handler.file_collection.filter_by_attribute_id(
            'file_type', FileType.NETCDF)[0]
        self.assertEqual(nc_file.publish_type,
                         PipelineFilePublishType.NO_ACTION)

        nc_gz_file = handler.file_collection.filter_by_attribute_id(
            'file_type', FileType.GZIP)[0]
        self.assertEqual(nc_gz_file.publish_type,
                         PipelineFilePublishType.UPLOAD_ONLY)

        nc_gz_delete = handler.file_collection.filter_by_attribute_value(
            'name', os.path.basename(PREV_YEARLY_NC_GZ_STORAGE))[0]
        self.assertEqual(nc_gz_delete.publish_type,
                         PipelineFilePublishType.DELETE_ONLY)
예제 #17
0
    def test_setup_upload_location_push_file_newer_creation_date(self):
        """
        Test case: Check creation date of new *.nc is newer that one already on storage
        """
        # create some PipelineFiles to represent the existing files on 'S3'
        preexisting_files = PipelineFileCollection()

        existing_file = PipelineFile(
            GOOD_NC_FV01, dest_path=AcornHandler.dest_path(GOOD_NC_FV01))
        preexisting_files.update([existing_file])

        # set the files to UPLOAD_ONLY
        preexisting_files.set_publish_types(
            PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(
            self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)

        # create a new file based on GOOD_NC_FV01. Modify it with an newer creation date
        # we patch the global variable from the handler in order to use the temporary broker file
        nc_file_new_creation_date_path = os.path.join(
            self.temp_dir, os.path.basename(GOOD_NC_FV01))
        shutil.copyfile(GOOD_NC_FV01, nc_file_new_creation_date_path)
        with Dataset(nc_file_new_creation_date_path, mode='r+') as nc_obj:
            delta_time = timedelta(1, 1, 1)
            new_time = datetime.strptime(nc_obj.date_created,
                                         '%Y-%m-%dT%H:%M:%SZ') + delta_time
            nc_obj.date_created = datetime.strftime(new_time,
                                                    '%Y-%m-%dT%H:%M:%SZ')

        # run the handler on the new file with an newer creation date
        handler = self.handler_class(nc_file_new_creation_date_path,
                                     include_regexes=[r'IMOS_ACORN_.*\.nc'])
        handler.opendap_root = broker.prefix
        handler.run()

        nc_file = handler.file_collection.filter_by_attribute_id(
            'file_type', FileType.NETCDF)[0]
        self.assertEqual(nc_file.publish_type,
                         PipelineFilePublishType.HARVEST_UPLOAD)
예제 #18
0
    def test_deletion_rt_after_dm_upload(self):
        """test deletion of RT mission at upload of related DM version"""
        preexisting_files = PipelineFileCollection()

        existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join(
            'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT)))

        existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join(
            'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT)))
        existing_file3 = PipelineFile(PREV_PNG_MISSION, dest_path=os.path.join(
            'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_MISSION)))

        preexisting_files.update([existing_file1, existing_file2, existing_file3])

        # set the files to UPLOAD_ONLY
        preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)

        # run the handler
        handler = self.run_handler(GOOD_ZIP_DM, check_params={'checks': ['cf', 'imos:1.4']})

        nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF)
        for n in nc:
            if n.name == os.path.basename(PREV_NC_RT):
                self.assertEqual(n.publish_type, PipelineFilePublishType.DELETE_UNHARVEST)
                self.assertTrue(n.is_deleted)
            elif re.match(AnfogFileClassifier.DM_REGEX, n.name):
                self.assertEqual(n.publish_type, PipelineFilePublishType.HARVEST_UPLOAD)
                self.assertTrue(n.is_harvested)
                self.assertTrue(n.is_stored)
            else:
                self.assertEqual(n.publish_type, PipelineFilePublishType.ARCHIVE_ONLY)
                self.assertTrue(n.is_archived)

        pngs = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG)
        for png in pngs:
            self.assertTrue(png.is_deleted)
예제 #19
0
    def test_setup_upload_location_push_file_older_creation_date(self):
        """
        Test case: Check creation date of new *.nc is older that one already on storage
        """
        # create some PipelineFiles to represent the existing files on 'S3'
        preexisting_files = PipelineFileCollection()

        existing_file = PipelineFile(
            GOOD_NC_FV01, dest_path=AcornHandler.dest_path(GOOD_NC_FV01))
        preexisting_files.update([existing_file])

        # set the files to UPLOAD_ONLY
        preexisting_files.set_publish_types(
            PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(
            self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)

        # create a new file based on GOOD_NC_FV01. Modify it with an older creation date
        nc_file_old_creation_date_path = os.path.join(
            self.temp_dir, os.path.basename(GOOD_NC_FV01))
        shutil.copyfile(GOOD_NC_FV01, nc_file_old_creation_date_path)
        with Dataset(nc_file_old_creation_date_path, mode='r+') as nc_obj:
            delta_time = timedelta(1, 1, 1)
            new_time = datetime.strptime(nc_obj.date_created,
                                         '%Y-%m-%dT%H:%M:%SZ') - delta_time
            nc_obj.date_created = datetime.strftime(new_time,
                                                    '%Y-%m-%dT%H:%M:%SZ')

        # run the handler on the new file with an older creation date
        handler = self.handler_class(nc_file_old_creation_date_path,
                                     include_regexes=[r'IMOS_ACORN_.*\.nc'])
        handler.opendap_root = broker.prefix
        handler.run()

        self.assertIsInstance(handler.error, InvalidFileContentError)
예제 #20
0
    def test_dstg(self):
        preexisting_file = PipelineFileCollection()
        existing_file = PipelineFile(DSTG, dest_path=os.path.join(
            'Department_of_Defence/DSTG/slocum_glider/TalismanSaberB20130706/', os.path.basename(DSTG)))

        preexisting_file.update([existing_file])

        # set the files to UPLOAD_ONLY
        preexisting_file.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_file)

        # test processing of DSTG and NRL NetCDF files
        handler = self.run_handler(DSTG)

        f = handler.file_collection[0]
        self.assertEqual(f.publish_type, PipelineFilePublishType.HARVEST_UPLOAD)
        self.assertEqual(f.dest_path,
                         'Department_of_Defence/DSTG/slocum_glider/TalismanSaberB20130706/' + f.name)
        self.assertTrue(f.is_stored)
        self.assertTrue(f.is_harvested)
예제 #21
0
    def test_setup_upload_location_push_same_file(self):
        """
        Test case: Push same file twice to $INCOMING_DIR
                   HARVEST_UPLOAD the incoming *.nc.gz
                   NO_ACTION on the nc inside the *.nc.gz
        """
        # create some PipelineFiles to represent the existing files on 'S3'
        preexisting_files = PipelineFileCollection()

        existing_file = PipelineFile(PREV_NC_GZ_STORAGE,
                                     dest_path=os.path.join(
                                         'IMOS/OceanCurrent/GSLA/DM00/2018/',
                                         os.path.basename(PREV_NC_GZ_STORAGE)))

        preexisting_files.update([existing_file])

        # set the files to UPLOAD_ONLY
        preexisting_files.set_publish_types(
            PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(
            self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)

        # run the handler by uploading again the same file
        handler = self.run_handler(PREV_NC_GZ_STORAGE)

        nc_file = handler.file_collection.filter_by_attribute_id(
            'file_type', FileType.NETCDF)[0]
        self.assertEqual(nc_file.publish_type,
                         PipelineFilePublishType.NO_ACTION)

        nc_gz_file = handler.file_collection.filter_by_attribute_id(
            'file_type', FileType.GZIP)[0]
        self.assertEqual(nc_gz_file.publish_type,
                         PipelineFilePublishType.HARVEST_UPLOAD)
예제 #22
0
    def test_renamed_rt_deployment(self):
        # test deletion of RT files when deployment renamed or when cleaning files on S3
        preexisting_files = PipelineFileCollection()

        existing_file1 = PipelineFile(PREV_NC_RT, dest_path=os.path.join(
            'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_NC_RT)))

        existing_file2 = PipelineFile(PREV_PNG_TRANSECT, dest_path=os.path.join(
            'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_TRANSECT)))
        existing_file3 = PipelineFile(PREV_PNG_MISSION, dest_path=os.path.join(
            'IMOS/ANFOG/REALTIME/slocum_glider/TwoRocks20180503a/', os.path.basename(PREV_PNG_MISSION)))

        preexisting_files.update([existing_file1, existing_file2, existing_file3])

        # set the files to UPLOAD_ONLY
        preexisting_files.set_publish_types(PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)

        handler = self.run_handler(MISSION_STATUS_RENAMED)

        # Process should resuls in : input file unhandled , preexisting file should be deleted, cvs file harvested
        csv = handler.file_collection.filter_by_attribute_id('file_type', FileType.CSV)
        self.assertEqual(csv[0].publish_type, PipelineFilePublishType.HARVEST_ONLY)
        self.assertTrue(csv[0].is_harvested)

        nc = handler.file_collection.filter_by_attribute_id('file_type', FileType.NETCDF)
        self.assertEqual(nc[0].publish_type, PipelineFilePublishType.DELETE_UNHARVEST)
        self.assertTrue(nc[0].is_deleted)

        pngs = handler.file_collection.filter_by_attribute_id('file_type', FileType.PNG)
        for png in pngs:
            self.assertEqual(png.publish_type, PipelineFilePublishType.DELETE_ONLY)
            self.assertTrue(png.is_deleted)
예제 #23
0
GETFEATURE_OLD_PRODUCTS_FILE = os.path.join(TEST_ROOT, 'getFeature_old_products.json')
GETFEATURE_EMPTY_FILE = os.path.join(TEST_ROOT, 'getFeature_empty.json')

with open(GETFEATURE_FILE) as f:
    TEST_GETFEATURE_JSON = f.read()

with open(GETFEATURE_OLD_PRODUCTS_FILE) as f:
    TEST_GETFEATURE_OLD_PRODUCTS_JSON = f.read()

with open(GETFEATURE_EMPTY_FILE) as f:
    TEST_GETFEATURE_EMPTY_JSON = f.read()

# Create collection of input files for the products
# These will be uploaded to the mocked equivalent of S3 (where the real input files will be)
features = json.loads(TEST_GETFEATURE_JSON)['features']
INPUT_FILE_COLLECTION = PipelineFileCollection()
for f in features:
    pf = PipelineFile(
            os.path.join(TEST_ROOT, os.path.basename(f['properties']['url'])),
            dest_path=f['properties']['url']
    )
    pf.publish_type = PipelineFilePublishType.UPLOAD_ONLY
    INPUT_FILE_COLLECTION.add(pf)


class TestMooringsProductsHandler(HandlerTestCase):
    def setUp(self):
        self.handler_class = MooringsProductsHandler
        upload_broker = get_storage_broker(self.config.pipeline_config['global']['upload_uri'])
        upload_broker.upload(INPUT_FILE_COLLECTION)
        super().setUp()
예제 #24
0
    def test_delete_previous_file(self, mock_callsign):
        # create some PipelineFiles to represent the existing files on 'S3'
        preexisting_files = PipelineFileCollection()

        existing_file1 = PipelineFile(
            PREV_NC,
            dest_path=os.path.join(
                'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/',
                os.path.basename(PREV_NC)))

        existing_file2 = PipelineFile(
            CSV,
            dest_path=os.path.join(
                'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/',
                os.path.basename(CSV)))
        existing_file3 = PipelineFile(
            PNG,
            dest_path=os.path.join(
                'IMOS/SOOP/SOOP-BA/VKAD_Antarctic-Discovery/Antarctic-Discovery_20160116-20160129/',
                os.path.basename(PNG)))
        preexisting_files.update(
            [existing_file1, existing_file2, existing_file3])

        # set the files to UPLOAD_ONLY
        preexisting_files.set_publish_types(
            PipelineFilePublishType.UPLOAD_ONLY)

        # upload the 'preexisting_files' collection to the unit test's temporary upload location
        broker = get_storage_broker(
            self.config.pipeline_config['global']['upload_uri'])
        broker.upload(preexisting_files)

        # run the handler
        handler = self.run_handler(GOOD_ZIP)

        # add some tests to make sure the previous files were handled appropriately, e.g.
        # - they were added as deletions
        # - they were successfully deleted
        # - they were the *only* ones deleted
        nc_files = handler.file_collection.filter_by_attribute_id(
            'file_type', FileType.NETCDF)

        for nc in nc_files:
            if nc.name == os.path.basename(PREV_NC):
                self.assertEqual(nc.publish_type,
                                 PipelineFilePublishType.DELETE_UNHARVEST)
                self.assertEqual(nc.is_deleted, True)
            else:
                self.assertEqual(nc.is_deleted, False)
        csvs = handler.file_collection.filter_by_attribute_id(
            'file_type', FileType.CSV)
        for csv in csvs:
            if csv.name == os.path.basename(CSV):
                self.assertEqual(csv.publish_type,
                                 PipelineFilePublishType.UPLOAD_ONLY)
                self.assertEqual(csv.is_deleted, False)

        pngs = handler.file_collection.filter_by_attribute_id(
            'file_type', FileType.PNG)
        for png in pngs:
            if png.name == os.path.basename(PNG):
                self.assertEqual(png.is_deleted, True)
예제 #25
0
    def get_previous_version(self, previous_file_list, path, input_file_name):
        """
            Find previous version of each incoming file based on its type/extension and
            add them to the filecollection with the correct publish type
            extension can be: .inf', '.nc.png','.pitch.csv','.roll.csv',.gps.csv'
            inputs: previous_file_list : dictionary containing file listing(full path) and metadata from destination
                  input_file :  file basename
                   path : full destination path
        """

        if not previous_file_list:
            return

        files_to_delete = PipelineFileCollection()

        try:
            extension = ALLOWED_CONTENT_EXTENSIONS.match(
                input_file_name).groupdict()['extension']
        except KeyError:
            raise ValueError(
                "unable to determine extension from file name {infile}".format(
                    infile=input_file_name))

        # get list of previous files basename  to search through
        basenames = {os.path.basename(f) for f in previous_file_list}

        this_extension_pattern = re.compile(
            r".*\.{ext}$".format(ext=extension))
        if input_file_name not in basenames:
            previous_file = [
                f for f in previous_file_list
                if this_extension_pattern.match(f)
            ]

            if extension == 'nc':
                if len(previous_file) != 1:
                    raise ValueError(
                        "Expected exactly 1 previous versions of the netcdf file, found {n}. Aborting "
                        .format(n=len(previous_file)))
            else:
                # if uploaded file name has the same name published file => no action, file will be overwritten, otherwise
                # sort file per wildcard and work out which one to delete (
                # check previous file widcard :
                # can be '.inf', '.nc.png','.pitch.csv','.roll.csv',.gps.csv'
                if len(previous_file) > 1:
                    raise ValueError(
                        "Found more than one previous versions of the extension '{ext}'. Aborting"
                        .format(ext=extension))
                elif len(previous_file) == 0:
                    return

            prev_file = previous_file[0]
            dest_path = os.path.join(path, os.path.basename(prev_file))
            self.logger.info(
                "adding deletion of previous file '{dest_path}'".format(
                    dest_path=dest_path))

            file_to_delete = PipelineFile(prev_file,
                                          is_deletion=True,
                                          dest_path=dest_path)

            if extension == 'nc':
                file_to_delete.publish_type = PipelineFilePublishType.DELETE_UNHARVEST
            else:
                file_to_delete.publish_type = PipelineFilePublishType.DELETE_ONLY

            files_to_delete.add(file_to_delete)

        return files_to_delete