def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list( old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) new_list = [x for x in curr_list if x not in old_list] ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename( new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern) ret['new_files'] = new_list ret['bounding_box'] = {} ret['vars'] = [] return ret
def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list(old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new new_list = [x for x in curr_list if x not in old_list] if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] return ret
def _constraints_for_new_request(cls, config): """ Returns a constraints dictionary with 'array_len' and 'count' assigned random integers @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict @retval constraints dictionary """ old_list = get_safe(config, 'new_data_check') or [] ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new #Not exactly the prettiest method, but here goes: #old_list comes in as a list of lists: [[]] #curr_list comes in as a list of tuples: [()] #each needs to be a set of tuples for set.difference to work properly #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new #files that are available curr_set = set(tuple(x) for x in curr_list) old_set = set(tuple(x) for x in old_list) #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly new_list = list(curr_set.difference(old_set)) if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] log.debug('constraints_for_new_request: {0}'.format(ret)) return ret
def _constraints_for_new_request(cls, config): # """ # Returns a constraints dictionary with 'array_len' and 'count' assigned random integers # @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict # """ # # Make sure the array_len is at least 1 larger than max_rec - so chunking is always seen # max_rec = get_safe(config, 'max_records', 1) # return {'array_len':npr.randint(max_rec+1,max_rec+10,1)[0],} old_list = get_safe(config, 'new_data_check') or [] ret = {} base_url = get_safe(config,'ds_params.base_url') list_pattern = get_safe(config,'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new #Not exactly the prettiest method, but here goes: #old_list comes in as a list of lists: [[]] #curr_list comes in as a list of tuples: [()] #each needs to be a set of tuples for set.difference to work properly #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new #files that are available curr_set = set(tuple(x) for x in curr_list) old_set = set(tuple(x) for x in old_list) #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly new_list = list(curr_set.difference(old_set)) if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] log.debug('constraints_for_new_request: {0}'.format(ret)) return ret
def _constraints_for_historical_request(cls, config): """ Returns a list of new file names in the given directory @param config dictionary of configuration parameters @retval list of new file names """ base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') start_time = get_safe(config, 'constraints.start_time') end_time = get_safe(config, 'constraints.end_time') new_list = [] curr_list = list_file_info(base_url, list_pattern) for x in curr_list: curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern) if start_time <= curr_time <= end_time: new_list.append(x) return {'new_files': new_list}
def _constraints_for_new_request(cls, config): """ Returns a constraints dictionary with 'array_len' and 'count' assigned random integers @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict @retval constraints dictionary """ old_list = get_safe(config, "new_data_check") or [] ret = {} base_url = get_safe(config, "ds_params.base_url") list_pattern = get_safe(config, "ds_params.list_pattern") date_pattern = get_safe(config, "ds_params.date_pattern") date_extraction_pattern = get_safe(config, "ds_params.date_extraction_pattern") curr_list = list_file_info(base_url, list_pattern) # Determine which files are new # Not exactly the prettiest method, but here goes: # old_list comes in as a list of lists: [[]] # curr_list comes in as a list of tuples: [()] # each needs to be a set of tuples for set.difference to work properly # set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new # files that are available curr_set = set(tuple(x) for x in curr_list) old_set = set(tuple(x) for x in old_list) # new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly new_list = list(curr_set.difference(old_set)) if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config["set_new_data_check"] = curr_list # The new_list is the set of new files - these will be processed ret["new_files"] = new_list ret["start_time"] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret["end_time"] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern) ret["bounding_box"] = {} ret["vars"] = [] log.debug("constraints_for_new_request: {0}".format(ret)) return ret
def test_get_time_from_filename(self, basename_mock, re_mock, mktime_mock): basename_mock.return_value = 'test_data/ruv' retval = MagicMock() retval.groups.return_value = ('2012', '06', '06', '12', '00') re_mock.return_value = retval mktime_mock.return_value = 1338998400.0 self.assertEqual(get_time_from_filename(file_name='test_data/ruv/RDLm_SEAB_2012_06_06_1200.ruv', date_extraction_pattern='RDLm_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv', date_pattern='%Y %m %d %H %M'), 1338998400.0)
def _constraints_for_new_request(cls, config): # """ # Returns a constraints dictionary with 'array_len' and 'count' assigned random integers # @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict # """ # # Make sure the array_len is at least 1 larger than max_rec - so chunking is always seen # max_rec = get_safe(config, 'max_records', 1) # return {'array_len':npr.randint(max_rec+1,max_rec+10,1)[0],} old_list = get_safe(config, 'new_data_check') or [] ret = {} base_url = get_safe(config,'ds_params.base_url') list_pattern = get_safe(config,'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new log.warn('curr_list:{0}'.format(curr_list)) log.warn('old_list:{0}'.format(old_list)) new_list = [tuple(x) for x in curr_list if list(x) not in old_list] if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] log.warn('constraints_for_new_request: {0}'.format(ret)) return ret
def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list(old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) new_list = [x for x in curr_list if x not in old_list] ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern) ret['new_files'] = new_list ret['bounding_box'] = {} ret['vars'] = [] return ret
def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list( old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new new_list = [x for x in curr_list if x not in old_list] if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename( new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] return ret
def _constraints_for_historical_request(cls, config): base_url = get_safe(config,'ds_params.base_url') list_pattern = get_safe(config,'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') start_time = get_safe(config, 'constraints.start_time') end_time = get_safe(config, 'constraints.end_time') new_list = [] curr_list = list_file_info(base_url, list_pattern) for x in curr_list: curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern) if start_time <= curr_time <= end_time: new_list.append(x) return {'new_files':new_list}
def _constraints_for_historical_request(cls, config): base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') start_time = get_safe(config, 'constraints.start_time') end_time = get_safe(config, 'constraints.end_time') new_list = [] curr_list = list_file_info(base_url, list_pattern) for x in curr_list: curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern) if start_time <= curr_time <= end_time: new_list.append(x) return {'new_files': new_list}
def _constraints_for_historical_request(cls, config): """ Returns a list of new file names in the given directory @param config dictionary of configuration parameters @retval list of new file names """ base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') start_time = get_safe(config, 'constraints.start_time') end_time = get_safe(config, 'constraints.end_time') new_list = [] curr_list = list_file_info(base_url, list_pattern) for x in curr_list: curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern) if start_time <= curr_time <= end_time: new_list.append(x) return {'new_files': new_list}