Example #1
0
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(
            old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        new_list = [x for x in curr_list if x not in old_list]

        ret['start_time'] = get_time_from_filename(new_list[0][0],
                                                   date_extraction_pattern,
                                                   date_pattern)
        ret['end_time'] = get_time_from_filename(
            new_list[len(new_list) - 1][0], date_extraction_pattern,
            date_pattern)

        ret['new_files'] = new_list
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
    def test__constraints_for_new_request(self):
#        ret = SlocumDataHandler._constraints_for_new_request({})
#        self.assertIsInstance(ret, dict)

        old_list = [
            ('test_data/slocum/ru05-2012-021-0-0-sbd.dat', 1337261358.0, 521081),
            ('test_data/slocum/ru05-2012-022-0-0-sbd.dat', 1337261358.0, 521081),
        ]

#        old_list = None

        edres = ExternalDataset(name='test_ed_res', dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation())
        config = {
            'external_dataset_res':edres,
            'new_data_check':old_list,
            'ds_params':{
                # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here
                'base_url':'test_data/slocum/',
                'list_pattern':'ru05-*-sbd.dat',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp)
                'date_pattern':'%Y %j',
                'date_extraction_pattern':'ru05-([\d]{4})-([\d]{3})-\d-\d-sbd.dat'
            }
        }
        ret = SlocumDataHandler._constraints_for_new_request(config)
        log.debug('test__constraints_for_new_request: {0}'.format(ret['new_files']))
        self.assertEqual(ret['new_files'], list_file_info(config['ds_params']['base_url'], config['ds_params']['list_pattern']))
Example #3
0
    def _constraints_for_historical_request(cls, config):
        """
        Returns a list of new file names in the given directory
        @param config dictionary of configuration parameters
        @retval list of new file names
        """
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        start_time = get_safe(config, 'constraints.start_time')
        end_time = get_safe(config, 'constraints.end_time')

        new_list = []
        curr_list = list_file_info(base_url, list_pattern)

        for x in curr_list:
            curr_time = get_time_from_filename(x[0], date_extraction_pattern,
                                               date_pattern)
            if start_time <= curr_time <= end_time:
                new_list.append(x)

        return {'new_files': new_list}
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        #compare the last read files (old_list) with the current directory contents (curr_list)
        #if the file names are the same (curr_file[0] and old_file[0]) check the size of the
        #current file (curr_file[2]) with the file position when the last file was read (old_file[3])
        #if there's more data now that was read last time, add the file to the list
        new_list = []
        for curr_file in curr_list:
            found = False
            for old_file in old_list:
                if curr_file[0] == old_file[0]:      #if filenames are the same, that means the file is still in the directory, and was previously read
                    found = True
                    if curr_file[2] > old_file[3]:   #f2[2] is the current file size, f2[3] is the last read file size
                        new_list.append((curr_file[0], curr_file[1], curr_file[2], old_file[-1]))     #add it in if the current file size is bigger than the last time
            if not found:
                new_list.append(curr_file)

        config['set_new_data_check'] = curr_list

        ret['new_files'] = new_list
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
Example #5
0
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        new_list = [x for x in curr_list if x not in old_list]

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
 def test__constraints_for_historical_request(self):
     config = {
         'ds_params': {
             # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here
             #            'base_url':'http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/',
             #            'pattern':'<a href="([^"]*\.ruv)">.*(\d{2}-[a-zA-Z]{3}-\d{4} \d{2}:\d{2})\s*(\d{3,5}\w)',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp)
             'base_url': 'test_data/slocum',
             'list_pattern':
             'ru05-*-sbd.dat',  # Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp)
             'date_pattern': '%Y %j',
             'date_extraction_pattern':
             'ru05-([\d]{4})-([\d]{3})-\d-\d-sbd.dat'
         },
         'constraints': {
             'start_time': 1327122000,
             'end_time': 1327294800
         }
     }
     ret = SlocumDataHandler._constraints_for_historical_request(config)
     log.debug(
         'test_constraints_for_historical_request: {0}'.format(config))
     self.assertEqual(
         ret['new_files'],
         list_file_info(config['ds_params']['base_url'],
                        config['ds_params']['list_pattern']))
Example #7
0
    def _constraints_for_new_request(cls, config):
        """
        Returns a constraints dictionary with 'array_len' and 'count' assigned random integers
        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
        @retval constraints dictionary
        """
        old_list = get_safe(config, 'new_data_check') or []

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        #Not exactly the prettiest method, but here goes:
        #old_list comes in as a list of lists: [[]]
        #curr_list comes in as a list of tuples: [()]
        #each needs to be a set of tuples for set.difference to work properly
        #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new
        #files that are available

        curr_set = set(tuple(x) for x in curr_list)
        old_set = set(tuple(x) for x in old_list)

        #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly
        new_list = list(curr_set.difference(old_set))

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0],
                                                   date_extraction_pattern,
                                                   date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[-1][0],
                                                 date_extraction_pattern,
                                                 date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        log.debug('constraints_for_new_request: {0}'.format(ret))

        return ret
    def _constraints_for_new_request(cls, config):
#        """
#        Returns a constraints dictionary with 'array_len' and 'count' assigned random integers
#        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
#        """
#        # Make sure the array_len is at least 1 larger than max_rec - so chunking is always seen
#        max_rec = get_safe(config, 'max_records', 1)
#        return {'array_len':npr.randint(max_rec+1,max_rec+10,1)[0],}

        old_list = get_safe(config, 'new_data_check') or []

        ret = {}
        base_url = get_safe(config,'ds_params.base_url')
        list_pattern = get_safe(config,'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        #Not exactly the prettiest method, but here goes:
        #old_list comes in as a list of lists: [[]]
        #curr_list comes in as a list of tuples: [()]
        #each needs to be a set of tuples for set.difference to work properly
        #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new
        #files that are available

        curr_set = set(tuple(x) for x in curr_list)
        old_set = set(tuple(x) for x in old_list)

        #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly
        new_list = list(curr_set.difference(old_set))

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        log.debug('constraints_for_new_request: {0}'.format(ret))

        return ret
    def _constraints_for_historical_request(cls, config):
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        start_time = get_safe(config, 'constraints.start_time')
        end_time = get_safe(config, 'constraints.end_time')

        new_list = []
        curr_list = list_file_info(base_url, list_pattern)

        new_list = curr_list

                #        config['constraints']['new_files'] = new_list
        return {'new_files': new_list}
    def _constraints_for_new_request(cls, config):
        """
        Returns a constraints dictionary with 'array_len' and 'count' assigned random integers
        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
        @retval constraints dictionary
        """
        old_list = get_safe(config, "new_data_check") or []

        ret = {}
        base_url = get_safe(config, "ds_params.base_url")
        list_pattern = get_safe(config, "ds_params.list_pattern")
        date_pattern = get_safe(config, "ds_params.date_pattern")
        date_extraction_pattern = get_safe(config, "ds_params.date_extraction_pattern")

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        # Not exactly the prettiest method, but here goes:
        # old_list comes in as a list of lists: [[]]
        # curr_list comes in as a list of tuples: [()]
        # each needs to be a set of tuples for set.difference to work properly
        # set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new
        # files that are available

        curr_set = set(tuple(x) for x in curr_list)
        old_set = set(tuple(x) for x in old_list)

        # new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly
        new_list = list(curr_set.difference(old_set))

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config["set_new_data_check"] = curr_list

        # The new_list is the set of new files - these will be processed
        ret["new_files"] = new_list
        ret["start_time"] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret["end_time"] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern)
        ret["bounding_box"] = {}
        ret["vars"] = []

        log.debug("constraints_for_new_request: {0}".format(ret))

        return ret
Example #11
0
    def _constraints_for_historical_request(cls, config):
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        start_time = get_safe(config, 'constraints.start_time')
        end_time = get_safe(config, 'constraints.end_time')

        new_list = []
        curr_list = list_file_info(base_url, list_pattern)

        new_list = curr_list

        #        config['constraints']['new_files'] = new_list
        return {'new_files': new_list}
Example #12
0
    def _constraints_for_historical_request(cls, config):
        base_url = get_safe(config,'ds_params.base_url')
        list_pattern = get_safe(config,'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        start_time = get_safe(config, 'constraints.start_time')
        end_time = get_safe(config, 'constraints.end_time')

        new_list = []
        curr_list = list_file_info(base_url, list_pattern)

        for x in curr_list:
            curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern)
            if start_time <= curr_time <= end_time:
                new_list.append(x)

        return {'new_files':new_list}
 def test__constraints_for_historical_request(self):
     config = {
         'ds_params': {
             # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here
             #            'base_url':'http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/',
             #            'pattern':'<a href="([^"]*\.ruv)">.*(\d{2}-[a-zA-Z]{3}-\d{4} \d{2}:\d{2})\s*(\d{3,5}\w)',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp)
             'base_url': 'test_data/slocum',
             'list_pattern': 'ru05-*-sbd.dat',  # Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp)
             'date_pattern': '%Y %j',
             'date_extraction_pattern': 'ru05-([\d]{4})-([\d]{3})-\d-\d-sbd.dat'},
         'constraints': {
             'start_time': 1327122000,
             'end_time': 1327294800
         }
     }
     ret = SlocumDataHandler._constraints_for_historical_request(config)
     log.debug('test_constraints_for_historical_request: {0}'.format(config))
     self.assertEqual(ret['new_files'], list_file_info(config['ds_params']['base_url'], config['ds_params']['list_pattern']))
Example #14
0
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(
            old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        #compare the last read files (old_list) with the current directory contents (curr_list)
        #if the file names are the same (curr_file[0] and old_file[0]) check the size of the
        #current file (curr_file[2]) with the file position when the last file was read (old_file[3])
        #if there's more data now that was read last time, add the file to the list
        new_list = []
        for curr_file in curr_list:
            found = False
            for old_file in old_list:
                if curr_file[0] == old_file[
                        0]:  #if filenames are the same, that means the file is still in the directory, and was previously read
                    found = True
                    if curr_file[2] > old_file[
                            3]:  #f2[2] is the current file size, f2[3] is the last read file size
                        new_list.append(
                            (curr_file[0], curr_file[1], curr_file[2],
                             old_file[-1])
                        )  #add it in if the current file size is bigger than the last time
            if not found:
                new_list.append(curr_file)

        config['set_new_data_check'] = curr_list

        ret['new_files'] = new_list
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
Example #15
0
    def _constraints_for_historical_request(cls, config):
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        start_time = get_safe(config, 'constraints.start_time')
        end_time = get_safe(config, 'constraints.end_time')

        new_list = []
        curr_list = list_file_info(base_url, list_pattern)

        for x in curr_list:
            curr_time = get_time_from_filename(x[0], date_extraction_pattern,
                                               date_pattern)
            if start_time <= curr_time <= end_time:
                new_list.append(x)

        return {'new_files': new_list}
    def _constraints_for_new_request(cls, config):
#        """
#        Returns a constraints dictionary with 'array_len' and 'count' assigned random integers
#        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
#        """
#        # Make sure the array_len is at least 1 larger than max_rec - so chunking is always seen
#        max_rec = get_safe(config, 'max_records', 1)
#        return {'array_len':npr.randint(max_rec+1,max_rec+10,1)[0],}

        old_list = get_safe(config, 'new_data_check') or []

        ret = {}
        base_url = get_safe(config,'ds_params.base_url')
        list_pattern = get_safe(config,'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)
        # Determine which files are new
        log.warn('curr_list:{0}'.format(curr_list))
        log.warn('old_list:{0}'.format(old_list))

        new_list = [tuple(x) for x in curr_list if list(x) not in old_list]

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        log.warn('constraints_for_new_request: {0}'.format(ret))

        return ret
    def test__constraints_for_historical_request(self):
        config = {
            'ds_params':{
                # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here
                #            'base_url':'http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/',
                #            'pattern':'<a href="([^"]*\.ruv)">.*(\d{2}-[a-zA-Z]{3}-\d{4} \d{2}:\d{2})\s*(\d{3,5}\w)',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp)
                'base_url':'test_data/ruv',
                'list_pattern':'RDLm_SEAB_*.ruv',
                'date_pattern':'%Y %m %d %H %M',
                'date_extraction_pattern': 'RDLm_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv'
            },
            'constraints' : {
                'start_time': 1338998400,
                'end_time': 1339012800
            }
        }
        ret = RuvDataHandler._constraints_for_historical_request(config)
        log.debug('test__constraints_for_historical_request: NEW_FILES == {0}'.format(ret['new_files']))
        files = list_file_info(config['ds_params']['base_url'], config['ds_params']['list_pattern'])
#        files = files[:2]
        log.error(ret['new_files'])
        log.error(files)
        self.assertEqual(ret['new_files'],files)
    def _constraints_for_historical_request(cls, config):
        """
        Returns a list of new file names in the given directory
        @param config dictionary of configuration parameters
        @retval list of new file names
        """
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        start_time = get_safe(config, 'constraints.start_time')
        end_time = get_safe(config, 'constraints.end_time')

        new_list = []
        curr_list = list_file_info(base_url, list_pattern)

        for x in curr_list:
            curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern)
            if start_time <= curr_time <= end_time:
                new_list.append(x)

        return {'new_files': new_list}
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        new_list = [x for x in curr_list if x not in old_list]

        ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern)

        ret['new_files'] = new_list
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
    def test__constraints_for_new_request(self):
        #        ret = SlocumDataHandler._constraints_for_new_request({})
        #        self.assertIsInstance(ret, dict)

        old_list = [
            ('test_data/slocum/ru05-2012-021-0-0-sbd.dat', 1337261358.0,
             521081),
            ('test_data/slocum/ru05-2012-022-0-0-sbd.dat', 1337261358.0,
             521081),
        ]

        #        old_list = None

        edres = ExternalDataset(name='test_ed_res',
                                dataset_description=DatasetDescription(),
                                update_description=UpdateDescription(),
                                contact=ContactInformation())
        config = {
            'external_dataset_res': edres,
            'new_data_check': old_list,
            'ds_params': {
                # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here
                'base_url': 'test_data/slocum/',
                'list_pattern':
                'ru05-*-sbd.dat',  # Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp)
                'date_pattern': '%Y %j',
                'date_extraction_pattern':
                'ru05-([\d]{4})-([\d]{3})-\d-\d-sbd.dat'
            }
        }
        ret = SlocumDataHandler._constraints_for_new_request(config)
        log.debug('test__constraints_for_new_request: {0}'.format(
            ret['new_files']))
        self.assertEqual(
            ret['new_files'],
            list_file_info(config['ds_params']['base_url'],
                           config['ds_params']['list_pattern']))
Example #21
0
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(
            old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        new_list = [x for x in curr_list if x not in old_list]

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0],
                                                   date_extraction_pattern,
                                                   date_pattern)
        ret['end_time'] = get_time_from_filename(
            new_list[len(new_list) - 1][0], date_extraction_pattern,
            date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
Example #22
0
    def test__constraints_for_new_request(self):
        edres = ExternalDataset(name='test_ed_res',
                                dataset_description=DatasetDescription(),
                                update_description=UpdateDescription(),
                                contact=ContactInformation())

        #        old_list = [
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0000.ruv',
        #             '04-Jun-2012 20:43',
        #             '136K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0100.ruv',
        #             '04-Jun-2012 21:43',
        #             '135K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0200.ruv',
        #             '04-Jun-2012 22:42',
        #             '137K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0300.ruv',
        #             '04-Jun-2012 23:41',
        #             '136K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0400.ruv',
        #             '05-Jun-2012 00:41',
        #             '150K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0500.ruv',
        #             '05-Jun-2012 01:41',
        #             '142K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0600.ruv',
        #             '05-Jun-2012 02:41',
        #             '138K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0700.ruv',
        #             '05-Jun-2012 03:41',
        #             '136K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0800.ruv',
        #             '05-Jun-2012 04:41',
        #             '138K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0900.ruv',
        #             '05-Jun-2012 05:40',
        #             '147K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1000.ruv',
        #             '05-Jun-2012 06:40',
        #             '143K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1100.ruv',
        #             '05-Jun-2012 07:40',
        #             '148K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1200.ruv',
        #             '05-Jun-2012 08:40',
        #             '147K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1300.ruv',
        #             '05-Jun-2012 09:39',
        #             '148K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1400.ruv',
        #             '05-Jun-2012 10:38',
        #             '143K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1500.ruv',
        #             '05-Jun-2012 11:43',
        #             '143K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1600.ruv',
        #             '05-Jun-2012 12:43',
        #             '146K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1700.ruv',
        #             '05-Jun-2012 13:42',
        #             '134K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1800.ruv',
        #             '05-Jun-2012 14:42',
        #             '143K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1900.ruv',
        #             '05-Jun-2012 15:42',
        #             '148K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2000.ruv',
        #             '05-Jun-2012 16:41',
        #             '157K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2100.ruv',
        #             '05-Jun-2012 17:41',
        #             '160K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2200.ruv',
        #             '05-Jun-2012 18:41',
        #             '158K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2300.ruv',
        #             '05-Jun-2012 19:41',
        #             '148K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0000.ruv',
        #             '05-Jun-2012 20:40',
        #             '140K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0100.ruv',
        #             '05-Jun-2012 21:40',
        #             '133K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0200.ruv',
        #             '05-Jun-2012 22:40',
        #             '143K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0300.ruv',
        #             '05-Jun-2012 23:39',
        #             '156K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0400.ruv',
        #             '06-Jun-2012 00:39',
        #             '146K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0500.ruv',
        #             '06-Jun-2012 01:39',
        #             '147K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0600.ruv',
        #             '06-Jun-2012 02:39',
        #             '147K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0700.ruv',
        #             '06-Jun-2012 03:43',
        #             '148K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0800.ruv',
        #             '06-Jun-2012 04:42',
        #             '137K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0900.ruv',
        #             '06-Jun-2012 05:42',
        #             '130K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1000.ruv',
        #             '06-Jun-2012 06:42',
        #             '129K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1100.ruv',
        #             '06-Jun-2012 07:42',
        #             '136K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1200.ruv',
        #             '06-Jun-2012 08:42',
        #             '137K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1300.ruv',
        #             '06-Jun-2012 09:41',
        #             '151K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1400.ruv',
        #             '06-Jun-2012 10:41',
        #             '153K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1500.ruv',
        #             '06-Jun-2012 11:41',
        #             '156K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1600.ruv',
        #             '06-Jun-2012 12:41',
        #             '157K'),
        #            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1700.ruv',
        #             '06-Jun-2012 13:40',
        #             '161K'),]

        old_list = [
            ('test_data/ruv/RDLm_SEAB_2012_06_06_1200.ruv', 1339006638.0,
             119066),
            ('test_data/ruv/RDLm_SEAB_2012_06_06_1300.ruv', 1339006629.0,
             109316),
            ('test_data/ruv/RDLm_SEAB_2012_06_06_1400.ruv', 1339006521.0,
             113411),
        ]

        #        old_list = None

        config = {
            'external_dataset_res': edres,
            #            'new_data_check':None,
            'new_data_check': old_list,
            'ds_params': {
                # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here
                # 'base_url':'http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/',
                # CBM: What's the difference between RDLi and RDLm???
                # 'pattern':'<a href="(RDLm.*\.ruv)">.*(\d{2}-[a-zA-Z]{3}-\d{4} \d{2}:\d{2})\s*(\d{1,5}\w)',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp)
                'base_url':
                'test_data/ruv',
                'list_pattern':
                'RDLm_SEAB_*.ruv',
                'date_pattern':
                '%Y %m %d %H %M',
                'date_extraction_pattern':
                'RDLm_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv'
            }
        }
        ret = RuvDataHandler._constraints_for_new_request(config)
        log.debug('test__new_data_constraints: {0}'.format(config))
        self.assertEqual(
            ret['new_files'],
            list_file_info(config['ds_params']['base_url'],
                           config['ds_params']['list_pattern']))
 def test_list_file_info_by_fs(self, list_file_info_fs_mock, _get_type_mock):
     _get_type_mock.return_value = 'fs'
     list_file_info_fs_mock.return_value = ['file1', 'file2']
     self.assertEqual(list_file_info('fs', 'pattern'), ['file1', 'file2'])
 def test_list_file_info_by_http(self, list_file_info_http_mock, _get_type_mock):
     _get_type_mock.return_value = 'http'
     list_file_info_http_mock.return_value = ['file1', 'file2']
     self.assertEqual(list_file_info('http', 'pattern'), ['file1', 'file2'])
    def test__constraints_for_new_request(self):
        edres = ExternalDataset(name='test_ed_res', dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation())

#        old_list = [
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0000.ruv',
#             '04-Jun-2012 20:43',
#             '136K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0100.ruv',
#             '04-Jun-2012 21:43',
#             '135K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0200.ruv',
#             '04-Jun-2012 22:42',
#             '137K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0300.ruv',
#             '04-Jun-2012 23:41',
#             '136K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0400.ruv',
#             '05-Jun-2012 00:41',
#             '150K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0500.ruv',
#             '05-Jun-2012 01:41',
#             '142K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0600.ruv',
#             '05-Jun-2012 02:41',
#             '138K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0700.ruv',
#             '05-Jun-2012 03:41',
#             '136K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0800.ruv',
#             '05-Jun-2012 04:41',
#             '138K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0900.ruv',
#             '05-Jun-2012 05:40',
#             '147K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1000.ruv',
#             '05-Jun-2012 06:40',
#             '143K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1100.ruv',
#             '05-Jun-2012 07:40',
#             '148K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1200.ruv',
#             '05-Jun-2012 08:40',
#             '147K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1300.ruv',
#             '05-Jun-2012 09:39',
#             '148K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1400.ruv',
#             '05-Jun-2012 10:38',
#             '143K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1500.ruv',
#             '05-Jun-2012 11:43',
#             '143K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1600.ruv',
#             '05-Jun-2012 12:43',
#             '146K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1700.ruv',
#             '05-Jun-2012 13:42',
#             '134K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1800.ruv',
#             '05-Jun-2012 14:42',
#             '143K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1900.ruv',
#             '05-Jun-2012 15:42',
#             '148K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2000.ruv',
#             '05-Jun-2012 16:41',
#             '157K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2100.ruv',
#             '05-Jun-2012 17:41',
#             '160K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2200.ruv',
#             '05-Jun-2012 18:41',
#             '158K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2300.ruv',
#             '05-Jun-2012 19:41',
#             '148K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0000.ruv',
#             '05-Jun-2012 20:40',
#             '140K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0100.ruv',
#             '05-Jun-2012 21:40',
#             '133K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0200.ruv',
#             '05-Jun-2012 22:40',
#             '143K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0300.ruv',
#             '05-Jun-2012 23:39',
#             '156K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0400.ruv',
#             '06-Jun-2012 00:39',
#             '146K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0500.ruv',
#             '06-Jun-2012 01:39',
#             '147K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0600.ruv',
#             '06-Jun-2012 02:39',
#             '147K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0700.ruv',
#             '06-Jun-2012 03:43',
#             '148K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0800.ruv',
#             '06-Jun-2012 04:42',
#             '137K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0900.ruv',
#             '06-Jun-2012 05:42',
#             '130K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1000.ruv',
#             '06-Jun-2012 06:42',
#             '129K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1100.ruv',
#             '06-Jun-2012 07:42',
#             '136K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1200.ruv',
#             '06-Jun-2012 08:42',
#             '137K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1300.ruv',
#             '06-Jun-2012 09:41',
#             '151K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1400.ruv',
#             '06-Jun-2012 10:41',
#             '153K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1500.ruv',
#             '06-Jun-2012 11:41',
#             '156K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1600.ruv',
#             '06-Jun-2012 12:41',
#             '157K'),
#            ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1700.ruv',
#             '06-Jun-2012 13:40',
#             '161K'),]

        old_list = [
            ('test_data/ruv/RDLm_SEAB_2012_06_06_1200.ruv', 1339006638.0, 119066),
            ('test_data/ruv/RDLm_SEAB_2012_06_06_1300.ruv', 1339006629.0, 109316),
            ('test_data/ruv/RDLm_SEAB_2012_06_06_1400.ruv', 1339006521.0, 113411),
        ]

#        old_list = None

        config = {
            'external_dataset_res':edres,
            #            'new_data_check':None,
            'new_data_check':old_list,
            'ds_params':{
                # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here
    #            'base_url':'http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/',
                #CBM: What's the difference between RDLi and RDLm???
    #            'pattern':'<a href="(RDLm.*\.ruv)">.*(\d{2}-[a-zA-Z]{3}-\d{4} \d{2}:\d{2})\s*(\d{1,5}\w)',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp)
                'base_url':'test_data/ruv',
                'list_pattern':'RDLm_SEAB_*.ruv',
                'date_pattern':'%Y %m %d %H %M',
                'date_extraction_pattern': 'RDLm_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv'
            }
        }
        ret = RuvDataHandler._constraints_for_new_request(config)
        log.debug('test__new_data_constraints: {0}'.format(config))
        self.assertEqual(ret['new_files'], list_file_info(config['ds_params']['base_url'], config['ds_params']['list_pattern']))