def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list( old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) new_list = [x for x in curr_list if x not in old_list] ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename( new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern) ret['new_files'] = new_list ret['bounding_box'] = {} ret['vars'] = [] return ret
def test__constraints_for_new_request(self): # ret = SlocumDataHandler._constraints_for_new_request({}) # self.assertIsInstance(ret, dict) old_list = [ ('test_data/slocum/ru05-2012-021-0-0-sbd.dat', 1337261358.0, 521081), ('test_data/slocum/ru05-2012-022-0-0-sbd.dat', 1337261358.0, 521081), ] # old_list = None edres = ExternalDataset(name='test_ed_res', dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) config = { 'external_dataset_res':edres, 'new_data_check':old_list, 'ds_params':{ # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here 'base_url':'test_data/slocum/', 'list_pattern':'ru05-*-sbd.dat',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp) 'date_pattern':'%Y %j', 'date_extraction_pattern':'ru05-([\d]{4})-([\d]{3})-\d-\d-sbd.dat' } } ret = SlocumDataHandler._constraints_for_new_request(config) log.debug('test__constraints_for_new_request: {0}'.format(ret['new_files'])) self.assertEqual(ret['new_files'], list_file_info(config['ds_params']['base_url'], config['ds_params']['list_pattern']))
def _constraints_for_historical_request(cls, config): """ Returns a list of new file names in the given directory @param config dictionary of configuration parameters @retval list of new file names """ base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') start_time = get_safe(config, 'constraints.start_time') end_time = get_safe(config, 'constraints.end_time') new_list = [] curr_list = list_file_info(base_url, list_pattern) for x in curr_list: curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern) if start_time <= curr_time <= end_time: new_list.append(x) return {'new_files': new_list}
def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list(old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) #compare the last read files (old_list) with the current directory contents (curr_list) #if the file names are the same (curr_file[0] and old_file[0]) check the size of the #current file (curr_file[2]) with the file position when the last file was read (old_file[3]) #if there's more data now that was read last time, add the file to the list new_list = [] for curr_file in curr_list: found = False for old_file in old_list: if curr_file[0] == old_file[0]: #if filenames are the same, that means the file is still in the directory, and was previously read found = True if curr_file[2] > old_file[3]: #f2[2] is the current file size, f2[3] is the last read file size new_list.append((curr_file[0], curr_file[1], curr_file[2], old_file[-1])) #add it in if the current file size is bigger than the last time if not found: new_list.append(curr_file) config['set_new_data_check'] = curr_list ret['new_files'] = new_list ret['bounding_box'] = {} ret['vars'] = [] return ret
def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list(old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new new_list = [x for x in curr_list if x not in old_list] if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] return ret
def test__constraints_for_historical_request(self): config = { 'ds_params': { # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here # 'base_url':'http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/', # 'pattern':'<a href="([^"]*\.ruv)">.*(\d{2}-[a-zA-Z]{3}-\d{4} \d{2}:\d{2})\s*(\d{3,5}\w)',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp) 'base_url': 'test_data/slocum', 'list_pattern': 'ru05-*-sbd.dat', # Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp) 'date_pattern': '%Y %j', 'date_extraction_pattern': 'ru05-([\d]{4})-([\d]{3})-\d-\d-sbd.dat' }, 'constraints': { 'start_time': 1327122000, 'end_time': 1327294800 } } ret = SlocumDataHandler._constraints_for_historical_request(config) log.debug( 'test_constraints_for_historical_request: {0}'.format(config)) self.assertEqual( ret['new_files'], list_file_info(config['ds_params']['base_url'], config['ds_params']['list_pattern']))
def _constraints_for_new_request(cls, config): """ Returns a constraints dictionary with 'array_len' and 'count' assigned random integers @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict @retval constraints dictionary """ old_list = get_safe(config, 'new_data_check') or [] ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new #Not exactly the prettiest method, but here goes: #old_list comes in as a list of lists: [[]] #curr_list comes in as a list of tuples: [()] #each needs to be a set of tuples for set.difference to work properly #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new #files that are available curr_set = set(tuple(x) for x in curr_list) old_set = set(tuple(x) for x in old_list) #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly new_list = list(curr_set.difference(old_set)) if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] log.debug('constraints_for_new_request: {0}'.format(ret)) return ret
def _constraints_for_new_request(cls, config): # """ # Returns a constraints dictionary with 'array_len' and 'count' assigned random integers # @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict # """ # # Make sure the array_len is at least 1 larger than max_rec - so chunking is always seen # max_rec = get_safe(config, 'max_records', 1) # return {'array_len':npr.randint(max_rec+1,max_rec+10,1)[0],} old_list = get_safe(config, 'new_data_check') or [] ret = {} base_url = get_safe(config,'ds_params.base_url') list_pattern = get_safe(config,'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new #Not exactly the prettiest method, but here goes: #old_list comes in as a list of lists: [[]] #curr_list comes in as a list of tuples: [()] #each needs to be a set of tuples for set.difference to work properly #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new #files that are available curr_set = set(tuple(x) for x in curr_list) old_set = set(tuple(x) for x in old_list) #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly new_list = list(curr_set.difference(old_set)) if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] log.debug('constraints_for_new_request: {0}'.format(ret)) return ret
def _constraints_for_historical_request(cls, config): base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') start_time = get_safe(config, 'constraints.start_time') end_time = get_safe(config, 'constraints.end_time') new_list = [] curr_list = list_file_info(base_url, list_pattern) new_list = curr_list # config['constraints']['new_files'] = new_list return {'new_files': new_list}
def _constraints_for_new_request(cls, config): """ Returns a constraints dictionary with 'array_len' and 'count' assigned random integers @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict @retval constraints dictionary """ old_list = get_safe(config, "new_data_check") or [] ret = {} base_url = get_safe(config, "ds_params.base_url") list_pattern = get_safe(config, "ds_params.list_pattern") date_pattern = get_safe(config, "ds_params.date_pattern") date_extraction_pattern = get_safe(config, "ds_params.date_extraction_pattern") curr_list = list_file_info(base_url, list_pattern) # Determine which files are new # Not exactly the prettiest method, but here goes: # old_list comes in as a list of lists: [[]] # curr_list comes in as a list of tuples: [()] # each needs to be a set of tuples for set.difference to work properly # set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new # files that are available curr_set = set(tuple(x) for x in curr_list) old_set = set(tuple(x) for x in old_list) # new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly new_list = list(curr_set.difference(old_set)) if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config["set_new_data_check"] = curr_list # The new_list is the set of new files - these will be processed ret["new_files"] = new_list ret["start_time"] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret["end_time"] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern) ret["bounding_box"] = {} ret["vars"] = [] log.debug("constraints_for_new_request: {0}".format(ret)) return ret
def _constraints_for_historical_request(cls, config): base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') start_time = get_safe(config, 'constraints.start_time') end_time = get_safe(config, 'constraints.end_time') new_list = [] curr_list = list_file_info(base_url, list_pattern) new_list = curr_list # config['constraints']['new_files'] = new_list return {'new_files': new_list}
def _constraints_for_historical_request(cls, config): base_url = get_safe(config,'ds_params.base_url') list_pattern = get_safe(config,'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') start_time = get_safe(config, 'constraints.start_time') end_time = get_safe(config, 'constraints.end_time') new_list = [] curr_list = list_file_info(base_url, list_pattern) for x in curr_list: curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern) if start_time <= curr_time <= end_time: new_list.append(x) return {'new_files':new_list}
def test__constraints_for_historical_request(self): config = { 'ds_params': { # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here # 'base_url':'http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/', # 'pattern':'<a href="([^"]*\.ruv)">.*(\d{2}-[a-zA-Z]{3}-\d{4} \d{2}:\d{2})\s*(\d{3,5}\w)',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp) 'base_url': 'test_data/slocum', 'list_pattern': 'ru05-*-sbd.dat', # Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp) 'date_pattern': '%Y %j', 'date_extraction_pattern': 'ru05-([\d]{4})-([\d]{3})-\d-\d-sbd.dat'}, 'constraints': { 'start_time': 1327122000, 'end_time': 1327294800 } } ret = SlocumDataHandler._constraints_for_historical_request(config) log.debug('test_constraints_for_historical_request: {0}'.format(config)) self.assertEqual(ret['new_files'], list_file_info(config['ds_params']['base_url'], config['ds_params']['list_pattern']))
def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list( old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) #compare the last read files (old_list) with the current directory contents (curr_list) #if the file names are the same (curr_file[0] and old_file[0]) check the size of the #current file (curr_file[2]) with the file position when the last file was read (old_file[3]) #if there's more data now that was read last time, add the file to the list new_list = [] for curr_file in curr_list: found = False for old_file in old_list: if curr_file[0] == old_file[ 0]: #if filenames are the same, that means the file is still in the directory, and was previously read found = True if curr_file[2] > old_file[ 3]: #f2[2] is the current file size, f2[3] is the last read file size new_list.append( (curr_file[0], curr_file[1], curr_file[2], old_file[-1]) ) #add it in if the current file size is bigger than the last time if not found: new_list.append(curr_file) config['set_new_data_check'] = curr_list ret['new_files'] = new_list ret['bounding_box'] = {} ret['vars'] = [] return ret
def _constraints_for_historical_request(cls, config): base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') start_time = get_safe(config, 'constraints.start_time') end_time = get_safe(config, 'constraints.end_time') new_list = [] curr_list = list_file_info(base_url, list_pattern) for x in curr_list: curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern) if start_time <= curr_time <= end_time: new_list.append(x) return {'new_files': new_list}
def _constraints_for_new_request(cls, config): # """ # Returns a constraints dictionary with 'array_len' and 'count' assigned random integers # @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict # """ # # Make sure the array_len is at least 1 larger than max_rec - so chunking is always seen # max_rec = get_safe(config, 'max_records', 1) # return {'array_len':npr.randint(max_rec+1,max_rec+10,1)[0],} old_list = get_safe(config, 'new_data_check') or [] ret = {} base_url = get_safe(config,'ds_params.base_url') list_pattern = get_safe(config,'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new log.warn('curr_list:{0}'.format(curr_list)) log.warn('old_list:{0}'.format(old_list)) new_list = [tuple(x) for x in curr_list if list(x) not in old_list] if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] log.warn('constraints_for_new_request: {0}'.format(ret)) return ret
def test__constraints_for_historical_request(self): config = { 'ds_params':{ # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here # 'base_url':'http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/', # 'pattern':'<a href="([^"]*\.ruv)">.*(\d{2}-[a-zA-Z]{3}-\d{4} \d{2}:\d{2})\s*(\d{3,5}\w)',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp) 'base_url':'test_data/ruv', 'list_pattern':'RDLm_SEAB_*.ruv', 'date_pattern':'%Y %m %d %H %M', 'date_extraction_pattern': 'RDLm_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv' }, 'constraints' : { 'start_time': 1338998400, 'end_time': 1339012800 } } ret = RuvDataHandler._constraints_for_historical_request(config) log.debug('test__constraints_for_historical_request: NEW_FILES == {0}'.format(ret['new_files'])) files = list_file_info(config['ds_params']['base_url'], config['ds_params']['list_pattern']) # files = files[:2] log.error(ret['new_files']) log.error(files) self.assertEqual(ret['new_files'],files)
def _constraints_for_historical_request(cls, config): """ Returns a list of new file names in the given directory @param config dictionary of configuration parameters @retval list of new file names """ base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') start_time = get_safe(config, 'constraints.start_time') end_time = get_safe(config, 'constraints.end_time') new_list = [] curr_list = list_file_info(base_url, list_pattern) for x in curr_list: curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern) if start_time <= curr_time <= end_time: new_list.append(x) return {'new_files': new_list}
def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list(old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) new_list = [x for x in curr_list if x not in old_list] ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern) ret['new_files'] = new_list ret['bounding_box'] = {} ret['vars'] = [] return ret
def test__constraints_for_new_request(self): # ret = SlocumDataHandler._constraints_for_new_request({}) # self.assertIsInstance(ret, dict) old_list = [ ('test_data/slocum/ru05-2012-021-0-0-sbd.dat', 1337261358.0, 521081), ('test_data/slocum/ru05-2012-022-0-0-sbd.dat', 1337261358.0, 521081), ] # old_list = None edres = ExternalDataset(name='test_ed_res', dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) config = { 'external_dataset_res': edres, 'new_data_check': old_list, 'ds_params': { # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here 'base_url': 'test_data/slocum/', 'list_pattern': 'ru05-*-sbd.dat', # Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp) 'date_pattern': '%Y %j', 'date_extraction_pattern': 'ru05-([\d]{4})-([\d]{3})-\d-\d-sbd.dat' } } ret = SlocumDataHandler._constraints_for_new_request(config) log.debug('test__constraints_for_new_request: {0}'.format( ret['new_files'])) self.assertEqual( ret['new_files'], list_file_info(config['ds_params']['base_url'], config['ds_params']['list_pattern']))
def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list( old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new new_list = [x for x in curr_list if x not in old_list] if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename( new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] return ret
def test__constraints_for_new_request(self): edres = ExternalDataset(name='test_ed_res', dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) # old_list = [ # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0000.ruv', # '04-Jun-2012 20:43', # '136K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0100.ruv', # '04-Jun-2012 21:43', # '135K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0200.ruv', # '04-Jun-2012 22:42', # '137K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0300.ruv', # '04-Jun-2012 23:41', # '136K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0400.ruv', # '05-Jun-2012 00:41', # '150K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0500.ruv', # '05-Jun-2012 01:41', # '142K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0600.ruv', # '05-Jun-2012 02:41', # '138K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0700.ruv', # '05-Jun-2012 03:41', # '136K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0800.ruv', # '05-Jun-2012 04:41', # '138K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0900.ruv', # '05-Jun-2012 05:40', # '147K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1000.ruv', # '05-Jun-2012 06:40', # '143K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1100.ruv', # '05-Jun-2012 07:40', # '148K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1200.ruv', # '05-Jun-2012 08:40', # '147K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1300.ruv', # '05-Jun-2012 09:39', # '148K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1400.ruv', # '05-Jun-2012 10:38', # '143K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1500.ruv', # '05-Jun-2012 11:43', # '143K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1600.ruv', # '05-Jun-2012 12:43', # '146K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1700.ruv', # '05-Jun-2012 13:42', # '134K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1800.ruv', # '05-Jun-2012 14:42', # '143K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1900.ruv', # '05-Jun-2012 15:42', # '148K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2000.ruv', # '05-Jun-2012 16:41', # '157K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2100.ruv', # '05-Jun-2012 17:41', # '160K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2200.ruv', # '05-Jun-2012 18:41', # '158K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2300.ruv', # '05-Jun-2012 19:41', # '148K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0000.ruv', # '05-Jun-2012 20:40', # '140K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0100.ruv', # '05-Jun-2012 21:40', # '133K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0200.ruv', # '05-Jun-2012 22:40', # '143K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0300.ruv', # '05-Jun-2012 23:39', # '156K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0400.ruv', # '06-Jun-2012 00:39', # '146K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0500.ruv', # '06-Jun-2012 01:39', # '147K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0600.ruv', # '06-Jun-2012 02:39', # '147K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0700.ruv', # '06-Jun-2012 03:43', # '148K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0800.ruv', # '06-Jun-2012 04:42', # '137K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0900.ruv', # '06-Jun-2012 05:42', # '130K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1000.ruv', # '06-Jun-2012 06:42', # '129K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1100.ruv', # '06-Jun-2012 07:42', # '136K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1200.ruv', # '06-Jun-2012 08:42', # '137K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1300.ruv', # '06-Jun-2012 09:41', # '151K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1400.ruv', # '06-Jun-2012 10:41', # '153K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1500.ruv', # '06-Jun-2012 11:41', # '156K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1600.ruv', # '06-Jun-2012 12:41', # '157K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1700.ruv', # '06-Jun-2012 13:40', # '161K'),] old_list = [ ('test_data/ruv/RDLm_SEAB_2012_06_06_1200.ruv', 1339006638.0, 119066), ('test_data/ruv/RDLm_SEAB_2012_06_06_1300.ruv', 1339006629.0, 109316), ('test_data/ruv/RDLm_SEAB_2012_06_06_1400.ruv', 1339006521.0, 113411), ] # old_list = None config = { 'external_dataset_res': edres, # 'new_data_check':None, 'new_data_check': old_list, 'ds_params': { # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here # 'base_url':'http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/', # CBM: What's the difference between RDLi and RDLm??? # 'pattern':'<a href="(RDLm.*\.ruv)">.*(\d{2}-[a-zA-Z]{3}-\d{4} \d{2}:\d{2})\s*(\d{1,5}\w)',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp) 'base_url': 'test_data/ruv', 'list_pattern': 'RDLm_SEAB_*.ruv', 'date_pattern': '%Y %m %d %H %M', 'date_extraction_pattern': 'RDLm_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv' } } ret = RuvDataHandler._constraints_for_new_request(config) log.debug('test__new_data_constraints: {0}'.format(config)) self.assertEqual( ret['new_files'], list_file_info(config['ds_params']['base_url'], config['ds_params']['list_pattern']))
def test_list_file_info_by_fs(self, list_file_info_fs_mock, _get_type_mock): _get_type_mock.return_value = 'fs' list_file_info_fs_mock.return_value = ['file1', 'file2'] self.assertEqual(list_file_info('fs', 'pattern'), ['file1', 'file2'])
def test_list_file_info_by_http(self, list_file_info_http_mock, _get_type_mock): _get_type_mock.return_value = 'http' list_file_info_http_mock.return_value = ['file1', 'file2'] self.assertEqual(list_file_info('http', 'pattern'), ['file1', 'file2'])
def test__constraints_for_new_request(self): edres = ExternalDataset(name='test_ed_res', dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) # old_list = [ # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0000.ruv', # '04-Jun-2012 20:43', # '136K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0100.ruv', # '04-Jun-2012 21:43', # '135K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0200.ruv', # '04-Jun-2012 22:42', # '137K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0300.ruv', # '04-Jun-2012 23:41', # '136K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0400.ruv', # '05-Jun-2012 00:41', # '150K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0500.ruv', # '05-Jun-2012 01:41', # '142K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0600.ruv', # '05-Jun-2012 02:41', # '138K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0700.ruv', # '05-Jun-2012 03:41', # '136K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0800.ruv', # '05-Jun-2012 04:41', # '138K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_0900.ruv', # '05-Jun-2012 05:40', # '147K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1000.ruv', # '05-Jun-2012 06:40', # '143K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1100.ruv', # '05-Jun-2012 07:40', # '148K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1200.ruv', # '05-Jun-2012 08:40', # '147K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1300.ruv', # '05-Jun-2012 09:39', # '148K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1400.ruv', # '05-Jun-2012 10:38', # '143K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1500.ruv', # '05-Jun-2012 11:43', # '143K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1600.ruv', # '05-Jun-2012 12:43', # '146K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1700.ruv', # '05-Jun-2012 13:42', # '134K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1800.ruv', # '05-Jun-2012 14:42', # '143K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_1900.ruv', # '05-Jun-2012 15:42', # '148K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2000.ruv', # '05-Jun-2012 16:41', # '157K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2100.ruv', # '05-Jun-2012 17:41', # '160K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2200.ruv', # '05-Jun-2012 18:41', # '158K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_05_2300.ruv', # '05-Jun-2012 19:41', # '148K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0000.ruv', # '05-Jun-2012 20:40', # '140K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0100.ruv', # '05-Jun-2012 21:40', # '133K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0200.ruv', # '05-Jun-2012 22:40', # '143K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0300.ruv', # '05-Jun-2012 23:39', # '156K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0400.ruv', # '06-Jun-2012 00:39', # '146K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0500.ruv', # '06-Jun-2012 01:39', # '147K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0600.ruv', # '06-Jun-2012 02:39', # '147K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0700.ruv', # '06-Jun-2012 03:43', # '148K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0800.ruv', # '06-Jun-2012 04:42', # '137K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_0900.ruv', # '06-Jun-2012 05:42', # '130K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1000.ruv', # '06-Jun-2012 06:42', # '129K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1100.ruv', # '06-Jun-2012 07:42', # '136K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1200.ruv', # '06-Jun-2012 08:42', # '137K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1300.ruv', # '06-Jun-2012 09:41', # '151K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1400.ruv', # '06-Jun-2012 10:41', # '153K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1500.ruv', # '06-Jun-2012 11:41', # '156K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1600.ruv', # '06-Jun-2012 12:41', # '157K'), # ('http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/RDLi_BELM_2012_06_06_1700.ruv', # '06-Jun-2012 13:40', # '161K'),] old_list = [ ('test_data/ruv/RDLm_SEAB_2012_06_06_1200.ruv', 1339006638.0, 119066), ('test_data/ruv/RDLm_SEAB_2012_06_06_1300.ruv', 1339006629.0, 109316), ('test_data/ruv/RDLm_SEAB_2012_06_06_1400.ruv', 1339006521.0, 113411), ] # old_list = None config = { 'external_dataset_res':edres, # 'new_data_check':None, 'new_data_check':old_list, 'ds_params':{ # These would be extracted from the dataset_description.parameters during _init_acquisition_cycle, but since that isn't called, just add them here # 'base_url':'http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/', #CBM: What's the difference between RDLi and RDLm??? # 'pattern':'<a href="(RDLm.*\.ruv)">.*(\d{2}-[a-zA-Z]{3}-\d{4} \d{2}:\d{2})\s*(\d{1,5}\w)',# Appended to base to filter files; Either a shell style pattern (for filesystem) or regex (for http/ftp) 'base_url':'test_data/ruv', 'list_pattern':'RDLm_SEAB_*.ruv', 'date_pattern':'%Y %m %d %H %M', 'date_extraction_pattern': 'RDLm_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv' } } ret = RuvDataHandler._constraints_for_new_request(config) log.debug('test__new_data_constraints: {0}'.format(config)) self.assertEqual(ret['new_files'], list_file_info(config['ds_params']['base_url'], config['ds_params']['list_pattern']))