def test__init__(self): attributes = dict() attributes['job_name'] = self.job_name self.expected = {'_name': self.job_name, '_attributes': attributes, '_num_jobs': 1, '_cluster_id': 0, '_job_file': '', '_remote': None, '_remote_id': None, '_remote_input_files': None, '_cwd': '.'} self.actual = self.job.__dict__ self.msg = 'testing initialization with default values' self.assertDictEqual(*self.assert_args) exe = 'exe' args = 'args' num_jobs = '5' self.job = Job(self.job_name, OrderedDict(), num_jobs, executable=exe, arguments=args) attributes.update({'executable': exe, 'arguments': args}) self.expected.update({'_name': self.job_name, '_attributes': attributes, '_num_jobs': int(num_jobs)}) self.actual = self.job.__dict__ self.actual['_attributes'] = dict(self.actual['_attributes']) self.msg = 'testing initialization with all values supplied' self.assertDictEqual(*self.assert_args) num_jobs = 'five' self.assertRaises(ValueError, Job, self.job_name, num_jobs=num_jobs)
def test_log_file(self): self.job = Job(self.job_name, Templates.base) log_file = '%s/%s/%s.%s.log' % (self.job.initial_dir, self.job.logdir, self.job_name, self.job.cluster_id) expected = log_file actual = self.job.log_file msg = 'checking resolving attribute function for log file' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual))
def test_submit(self): working_dir = os.path.join(self.base_dir, 'test_files', 'working_dir') self.job = Job('remote_test', Templates.vanilla_transfer_files, host='localhost', username=os.environ['USER'], private_key='~/.ssh/id_rsa', remote_input_files=['../copy_test.py', 'input.txt'], transfer_input_files='../input.txt', executable=os.path.join(self.base_dir, 'test_files', 'copy_test.py'), working_directory=working_dir) remote_base_path = os.path.expanduser('~/' + self.job._remote_id) if os.path.exists(remote_base_path): raise self.job.submit() self.assertTrue(os.path.exists(remote_base_path)) self.job.wait() self.job.sync_remote_output() local_output = os.path.join(working_dir, self.job.name) self.assertTrue(os.path.exists(local_output)) output = os.path.join(local_output, 'output.txt') self.assertTrue(os.path.exists(output)) shutil.rmtree(remote_base_path) shutil.rmtree(local_output)
def test_resolve_attribute(self): job = Job(self.job_name, Templates.vanilla_base) expected = self.job_name actual = job._resolve_attribute('initialdir') msg = 'checking resolving attribute function' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual))
def test__getattr__(self): exe = 'exe' self.job = Job(self.job_name, executable=exe) expected = exe actual = self.job.executable msg = 'testing that existing value is returned' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) pass
def test_get(self): non_existent_attr = 'not-there' expected = None actual = self.job.get(non_existent_attr) msg = 'testing that None is returned when attribute does not exist' self.assertIsNone(actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) expected = 'expected' actual = self.job.get(non_existent_attr, expected) msg = 'testing that supplied value is returned when attribute does not exist' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) exe = 'exe' self.job = Job(self.job_name, executable=exe) expected = exe actual = self.job.get('executable') msg = 'testing that existing value is returned' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual))
def setUp(self): """ :return: """ self.job_a = Job('a', Templates.base) self.job_b = Job('b', Templates.base) self.job_c = Job('c', Templates.base) self.job_d = Job('d', Templates.base) self.node_a = Node(self.job_a) self.node_b = Node(self.job_b) self.node_c = Node(self.job_c) self.node_d = Node(self.job_d) self.node_a.add_child(self.node_b) self.node_a.add_child(self.node_c) self.node_d.add_parent(self.node_b) self.node_d.add_parent(self.node_c)
def condorpy_job(self): if not hasattr(self, '_condorpy_job'): if 'executable' in self.attributes.keys(): del self.attributes['executable'] if self.scheduler: host = self.scheduler.host username = self.scheduler.username password = self.scheduler.password private_key = self.scheduler.private_key_path private_key_pass = self.scheduler.private_key_pass else: host = None username = None password = None private_key = None private_key_pass = None attributes = dict() attributes.update(self.attributes) attributes.pop('remote_input_files', None) job = Job(name=self.name.replace(' ', '_'), attributes=self.condorpy_template, executable=self.executable, host=host, username=username, password=password, private_key=private_key, private_key_pass=private_key_pass, remote_input_files=self.remote_input_files, working_directory=self.workspace, **attributes) job._cluster_id = self.cluster_id job._num_jobs = self.num_jobs if self.remote_id: job._remote_id = self.remote_id else: self.remote_id = job._remote_id self._condorpy_job = job return self._condorpy_job
def condorpy_job(self): if not hasattr(self, '_condorpy_job'): if 'executable' in self.attributes.keys(): del self.attributes['executable'] if self.scheduler: host=self.scheduler.host username=self.scheduler.username password=self.scheduler.password private_key=self.scheduler.private_key_path private_key_pass=self.scheduler.private_key_pass else: host=None username=None password=None private_key=None private_key_pass=None attributes = dict() attributes.update(self.attributes) attributes.pop('remote_input_files', None) job = Job(name=self.name.replace(' ', '_'), attributes=self.condorpy_template, executable=self.executable, host=host, username=username, password=password, private_key=private_key, private_key_pass=private_key_pass, remote_input_files=self.remote_input_files, working_directory=self.workspace, **attributes) job._cluster_id = self.cluster_id job._num_jobs = self.num_jobs if self.remote_id: job._remote_id = self.remote_id else: self.remote_id = job._remote_id self._condorpy_job = job return self._condorpy_job
def condorpy_job(self): if not hasattr(self, '_condorpy_job'): job = Job(name=self.name.replace(' ', '_'), attributes=self.attributes, num_jobs=self.num_jobs, remote_input_files=self.remote_input_files, working_directory=self.workspace) self._condorpy_job = job return self._condorpy_job
def test__init__(self): attributes = OrderedDict() attributes['job_name'] = self.job_name attributes['executable'] = None attributes['arguments'] = None expected = { '_name': self.job_name, '_attributes': attributes, '_num_jobs': 1, '_cluster_id': 0, '_job_file': '' } actual = self.job.__dict__ msg = 'testing initialization with default values' self.assertDictEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) exe = 'exe' args = '-args' num_jobs = '5' self.job = Job(self.job_name, OrderedDict(), exe, args, num_jobs) attributes['executable'] = exe attributes['arguments'] = args expected = { '_name': self.job_name, '_attributes': attributes, '_num_jobs': int(num_jobs), '_cluster_id': 0, '_job_file': '' } actual = self.job.__dict__ msg = 'testing initialization with all values supplied' self.assertDictEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) num_jobs = 'five' self.assertRaises(ValueError, Job, self.job_name, num_jobs=num_jobs)
def test_condorpy_node(self, mock_job): mock_job_return = Job(name='test_job', attributes={'foo': 'bar'}, num_jobs=1, remote_input_files=['test_file.txt'], working_directory=self.workspace_dir) mock_job.return_value = mock_job_return self.condorworkflownode.job = mock_job_return ret = self.condorworkflownode.condorpy_node # Check result self.assertEqual('<Node: test_job parents() children()>', repr(ret))
class TestIntegration(unittest.TestCase): expected = None actual = None msg = None base_dir = os.path.join(os.path.dirname(__file__)) @property def output(self): return '%s\nExpected: %s\nActual: %s\n' % (self.msg, self.expected, self.actual) @property def assert_args(self): return (self.expected, self.actual, self.output) def setUp(self): self.job_name = 'job_name' self.job = Job(self.job_name) def tearDown(self): pass def test_submit(self): working_dir = os.path.join(self.base_dir, 'test_files', 'working_dir') self.job = Job('remote_test', Templates.vanilla_transfer_files, host='localhost', username=os.environ['USER'], private_key='~/.ssh/id_rsa', remote_input_files=['../copy_test.py', 'input.txt'], transfer_input_files='../input.txt', executable=os.path.join(self.base_dir, 'test_files', 'copy_test.py'), working_directory=working_dir) remote_base_path = os.path.expanduser('~/' + self.job._remote_id) if os.path.exists(remote_base_path): raise self.job.submit() self.assertTrue(os.path.exists(remote_base_path)) self.job.wait() self.job.sync_remote_output() local_output = os.path.join(working_dir, self.job.name) self.assertTrue(os.path.exists(local_output)) output = os.path.join(local_output, 'output.txt') self.assertTrue(os.path.exists(output)) shutil.rmtree(remote_base_path) shutil.rmtree(local_output)
def run_autoroute_multiprocess(autoroute_input_directory, #path to AutoRoute input directory autoroute_output_directory, #path to AutoRoute output directory log_directory, #path to HTCondor/multiprocessing logs autoroute_executable_location="", #location of AutoRoute executable autoroute_manager=None, #AutoRoute manager with default parameters rapid_output_directory="", #path to ECMWF RAPID input/output directory return_period="", # return period name in return period file return_period_file="", # return period file generated from RAPID historical run rapid_output_file="", #path to RAPID output file to be used date_peak_search_start=None, #datetime of start of search for peakflow date_peak_search_end=None, #datetime of end of search for peakflow river_id="", #field with unique identifier of river streamflow_id="", #field with streamflow stream_network_shapefile="", #stream network shapefile mode="multiprocess", #multiprocess or htcondor generate_flood_map_raster=True, #generate flood raster generate_flood_depth_raster=False, #generate flood raster generate_flood_map_shapefile=False, #generate a flood map shapefile wait_for_all_processes_to_finish=True, #waits for all processes to finish before ending script num_cpus=-17 #number of processes to use on computer ): """ This it the main AutoRoute-RAPID process """ time_start_all = datetime.utcnow() if not generate_flood_depth_raster and not generate_flood_map_raster and not generate_flood_map_shapefile: raise Exception("ERROR: Must set generate_flood_depth_raster, generate_flood_map_raster, or generate_flood_map_shapefile to True to proceed ...") #-------------------------------------------------------------------------- #Validate Inputs #-------------------------------------------------------------------------- valid_mode_list = ['multiprocess','htcondor'] if mode not in valid_mode_list: raise Exception("ERROR: Invalid multiprocess mode {}. Only multiprocess or htcondor allowed ...".format(mode)) if mode == "htcondor" and not HTCONDOR_ENABLED: raise Exception("ERROR: HTCondor mode not allowed. Must have condorpy and HTCondor installed to work ...".format(mode)) #DETERMINE MODE TO PREPARE STREAMFLOW PREPARE_MODE = get_valid_streamflow_prepare_mode(autoroute_input_directory, rapid_output_directory, return_period, return_period_file, rapid_output_file, river_id, streamflow_id, stream_network_shapefile, ) #-------------------------------------------------------------------------- #Initialize Run #-------------------------------------------------------------------------- try: os.makedirs(autoroute_output_directory) except OSError: pass local_scripts_location = os.path.dirname(os.path.realpath(__file__)) #initialize HTCondor/multiprocess log directories prepare_log_directory = os.path.join(log_directory, "prepare") try: os.makedirs(prepare_log_directory) except OSError: pass if PREPARE_MODE > 0: print("Streamflow preparation logs can be found here: {0}".format(prepare_log_directory)) run_log_directory = os.path.join(log_directory, "run") try: os.makedirs(run_log_directory) except OSError: pass print("AutoRoute simulation logs can be found here: {0}".format(run_log_directory)) #keep list of jobs autoroute_job_info = { 'multiprocess_job_list': [], 'htcondor_job_list': [], 'htcondor_job_info': [], 'output_folder': autoroute_output_directory, } if mode == "multiprocess": num_cpus = get_valid_num_cpus(num_cpus) #start pool pool_streamflow = multiprocessing.Pool(num_cpus) pool_main = multiprocessing.Pool(num_cpus) #-------------------------------------------------------------------------- #Run the model #-------------------------------------------------------------------------- #loop through sub-directories streamflow_job_list = [] for directory in os.listdir(autoroute_input_directory): master_watershed_autoroute_input_directory = os.path.join(autoroute_input_directory, directory) if os.path.isdir(master_watershed_autoroute_input_directory): autoroute_watershed_name = os.path.basename(autoroute_input_directory) autoroute_job_name = "{0}-{1}".format(autoroute_watershed_name, directory) try: case_insensitive_file_search(master_watershed_autoroute_input_directory, r'elevation\.(?!prj)') except Exception: try: case_insensitive_file_search(os.path.join(master_watershed_autoroute_input_directory, 'elevation'), r'hdr\.adf') except Exception: print("ERROR: Elevation raster not found. Skipping run ...") continue pass pass try: stream_info_file = case_insensitive_file_search(master_watershed_autoroute_input_directory, r'stream_info\.txt') except Exception: print("Stream info file not found. Skipping run ...") continue pass if PREPARE_MODE > 0: streamflow_job_list.append((PREPARE_MODE, master_watershed_autoroute_input_directory, stream_info_file, rapid_output_directory, return_period_file, return_period, rapid_output_file, date_peak_search_start, date_peak_search_end, river_id, streamflow_id, stream_network_shapefile, autoroute_job_name, prepare_log_directory, )) output_shapefile_base_name = '{0}_{1}'.format(autoroute_watershed_name, directory) #set up flood raster name output_flood_map_raster_name = 'flood_map_raster_{0}.tif'.format(output_shapefile_base_name) master_output_flood_map_raster_name = os.path.join(autoroute_output_directory, output_flood_map_raster_name) #set up flood raster name output_flood_depth_raster_name = 'flood_depth_raster_{0}.tif'.format(output_shapefile_base_name) master_output_flood_depth_raster_name = os.path.join(autoroute_output_directory, output_flood_depth_raster_name) #set up flood shapefile name output_shapefile_shp_name = '{0}.shp'.format(output_shapefile_base_name) master_output_shapefile_shp_name = os.path.join(autoroute_output_directory, output_shapefile_shp_name) delete_flood_map_raster = False if not generate_flood_map_shapefile: master_output_shapefile_shp_name = "" else: if not generate_flood_map_raster: generate_flood_map_raster = True delete_flood_map_raster = True if not generate_flood_map_raster: master_output_flood_map_raster_name = "" if not generate_flood_depth_raster: master_output_flood_depth_raster_name = "" if mode == "htcondor": #create job to run autoroute for each raster in watershed job = CJob('job_autoroute_{0}_{1}'.format(os.path.basename(autoroute_input_directory), directory), tmplt.vanilla_transfer_files) if generate_flood_map_shapefile: #setup additional floodmap shapfile names output_shapefile_shx_name = '{0}.shx'.format(output_shapefile_base_name) master_output_shapefile_shx_name = os.path.join(autoroute_output_directory, output_shapefile_shx_name) output_shapefile_prj_name = '{0}.prj'.format(output_shapefile_base_name) master_output_shapefile_prj_name = os.path.join(autoroute_output_directory, output_shapefile_prj_name) output_shapefile_dbf_name = '{0}.dbf'.format(output_shapefile_base_name) master_output_shapefile_dbf_name = os.path.join(autoroute_output_directory, output_shapefile_dbf_name) transfer_output_remaps = "{0} = {1}; {2} = {3}; {4} = {5};" \ " {6} = {7}; {8} = {9}".format(output_shapefile_shp_name, master_output_shapefile_shp_name, output_shapefile_shx_name, master_output_shapefile_shx_name, output_shapefile_prj_name, master_output_shapefile_prj_name, output_shapefile_dbf_name, master_output_shapefile_dbf_name, output_flood_map_raster_name, master_output_flood_map_raster_name) if generate_flood_depth_raster: transfer_output_remaps += "; {0} = {1}".format(output_flood_depth_raster_name, master_output_flood_depth_raster_name) else: output_shapefile_shp_name = "" transfer_output_remaps = "" if generate_flood_map_raster: transfer_output_remaps = "{0} = {1}".format(output_flood_map_raster_name, master_output_flood_map_raster_name) if generate_flood_depth_raster: if transfer_output_remaps: transfer_output_remaps += "; " transfer_output_remaps += "{0} = {1}".format(output_flood_depth_raster_name, master_output_flood_depth_raster_name) job.set('transfer_output_remaps',"\"{0}\"" .format(transfer_output_remaps)) job.set('executable', os.path.join(local_scripts_location,'multicore_worker_process.py')) job.set('transfer_input_files', "{0}".format(master_watershed_autoroute_input_directory)) job.set('initialdir', run_log_directory) job.set('arguments', '{0} {1} {2} {3} {4} {5} {6}' % (autoroute_executable_location, autoroute_manager, directory, output_flood_map_raster_name, output_flood_depth_raster_name, output_shapefile_shp_name, delete_flood_map_raster)) autoroute_job_info['htcondor_job_list'].append(job) autoroute_job_info['htcondor_job_info'].append({ 'output_shapefile_base_name': output_shapefile_base_name, 'autoroute_job_name': autoroute_job_name}) else: #mode == "multiprocess": autoroute_job_info['multiprocess_job_list'].append((autoroute_executable_location, autoroute_manager, master_watershed_autoroute_input_directory, master_output_flood_map_raster_name, master_output_flood_depth_raster_name, master_output_shapefile_shp_name, delete_flood_map_raster, autoroute_job_name, run_log_directory )) #For testing function serially """ run_autoroute_multiprocess_worker((autoroute_executable_location, autoroute_manager, master_watershed_autoroute_input_directory, master_output_flood_map_raster_name, master_output_flood_depth_raster_name, master_output_shapefile_shp_name, delete_flood_map_raster, autoroute_job_name, run_log_directory)) """ if PREPARE_MODE > 0: #generate streamflow streamflow_job_list = pool_streamflow.imap_unordered(prepare_autoroute_streamflow_multiprocess_worker, streamflow_job_list, chunksize=1) for streamflow_job_output in streamflow_job_list: print("STREAMFLOW READY: {0}".format(streamflow_job_output)) pool_streamflow.close() pool_streamflow.join() print("Running AutoRoute simulations ...") #submit jobs to run if mode == "multiprocess": autoroute_job_info['multiprocess_worker_list'] = pool_main.imap_unordered(run_autoroute_multiprocess_worker, autoroute_job_info['multiprocess_job_list'], chunksize=1) else: for htcondor_job in autoroute_job_info['htcondor_job_list']: htcondor_job.submit() if wait_for_all_processes_to_finish: #wait for all of the jobs to complete if mode == "multiprocess": for multi_job_output in autoroute_job_info['multiprocess_worker_list']: print("JOB FINISHED: {0}".format(multi_job_output[3])) #just in case ... pool_main.close() pool_main.join() else: for htcondor_job_index, htcondor_job in enumerate(autoroute_job_info['htcondor_job_list']): htcondor_job.wait() print("JOB FINISHED: {0}".format(autoroute_job_info['htcondor_job_info'][htcondor_job_index]['autoroute_job_name'])) print("Time to complete entire AutoRoute process: {0}".format(datetime.utcnow()-time_start_all)) else: return autoroute_job_info
def run_autoroute_multiprocess( autoroute_input_directory, #path to AutoRoute input directory autoroute_output_directory, #path to AutoRoute output directory log_directory, #path to HTCondor/multiprocessing logs autoroute_executable_location="", #location of AutoRoute executable autoroute_manager=None, #AutoRoute manager with default parameters rapid_output_directory="", #path to ECMWF RAPID input/output directory return_period="", # return period name in return period file return_period_file="", # return period file generated from RAPID historical run rapid_output_file="", #path to RAPID output file to be used date_peak_search_start=None, #datetime of start of search for peakflow date_peak_search_end=None, #datetime of end of search for peakflow river_id="", #field with unique identifier of river streamflow_id="", #field with streamflow stream_network_shapefile="", #stream network shapefile mode="multiprocess", #multiprocess or htcondor generate_flood_map_raster=True, #generate flood raster generate_flood_depth_raster=False, #generate flood raster generate_flood_map_shapefile=False, #generate a flood map shapefile wait_for_all_processes_to_finish=True, #waits for all processes to finish before ending script num_cpus=-17 #number of processes to use on computer ): """ This it the main AutoRoute-RAPID process """ time_start_all = datetime.utcnow() if not generate_flood_depth_raster and not generate_flood_map_raster and not generate_flood_map_shapefile: raise Exception( "ERROR: Must set generate_flood_depth_raster, generate_flood_map_raster, or generate_flood_map_shapefile to True to proceed ..." ) #-------------------------------------------------------------------------- #Validate Inputs #-------------------------------------------------------------------------- valid_mode_list = ['multiprocess', 'htcondor'] if mode not in valid_mode_list: raise Exception( "ERROR: Invalid multiprocess mode {}. Only multiprocess or htcondor allowed ..." .format(mode)) if mode == "htcondor" and not HTCONDOR_ENABLED: raise Exception( "ERROR: HTCondor mode not allowed. Must have condorpy and HTCondor installed to work ..." .format(mode)) #DETERMINE MODE TO PREPARE STREAMFLOW PREPARE_MODE = get_valid_streamflow_prepare_mode( autoroute_input_directory, rapid_output_directory, return_period, return_period_file, rapid_output_file, river_id, streamflow_id, stream_network_shapefile, ) #-------------------------------------------------------------------------- #Initialize Run #-------------------------------------------------------------------------- try: os.makedirs(autoroute_output_directory) except OSError: pass local_scripts_location = os.path.dirname(os.path.realpath(__file__)) #initialize HTCondor/multiprocess log directories prepare_log_directory = os.path.join(log_directory, "prepare") try: os.makedirs(prepare_log_directory) except OSError: pass if PREPARE_MODE > 0: print("Streamflow preparation logs can be found here: {0}".format( prepare_log_directory)) run_log_directory = os.path.join(log_directory, "run") try: os.makedirs(run_log_directory) except OSError: pass print("AutoRoute simulation logs can be found here: {0}".format( run_log_directory)) #keep list of jobs autoroute_job_info = { 'multiprocess_job_list': [], 'htcondor_job_list': [], 'htcondor_job_info': [], 'output_folder': autoroute_output_directory, } if mode == "multiprocess": num_cpus = get_valid_num_cpus(num_cpus) #start pool pool_streamflow = multiprocessing.Pool(num_cpus) pool_main = multiprocessing.Pool(num_cpus) #-------------------------------------------------------------------------- #Run the model #-------------------------------------------------------------------------- #loop through sub-directories streamflow_job_list = [] for directory in os.listdir(autoroute_input_directory): master_watershed_autoroute_input_directory = os.path.join( autoroute_input_directory, directory) if os.path.isdir(master_watershed_autoroute_input_directory): autoroute_watershed_name = os.path.basename( autoroute_input_directory) autoroute_job_name = "{0}-{1}".format(autoroute_watershed_name, directory) try: case_insensitive_file_search( master_watershed_autoroute_input_directory, r'elevation\.(?!prj)') except Exception: try: case_insensitive_file_search( os.path.join( master_watershed_autoroute_input_directory, 'elevation'), r'hdr\.adf') except Exception: print( "ERROR: Elevation raster not found. Skipping run ...") continue pass pass try: stream_info_file = case_insensitive_file_search( master_watershed_autoroute_input_directory, r'stream_info\.txt') except Exception: print("Stream info file not found. Skipping run ...") continue pass if PREPARE_MODE > 0: streamflow_job_list.append(( PREPARE_MODE, master_watershed_autoroute_input_directory, stream_info_file, rapid_output_directory, return_period_file, return_period, rapid_output_file, date_peak_search_start, date_peak_search_end, river_id, streamflow_id, stream_network_shapefile, autoroute_job_name, prepare_log_directory, )) output_shapefile_base_name = '{0}_{1}'.format( autoroute_watershed_name, directory) #set up flood raster name output_flood_map_raster_name = 'flood_map_raster_{0}.tif'.format( output_shapefile_base_name) master_output_flood_map_raster_name = os.path.join( autoroute_output_directory, output_flood_map_raster_name) #set up flood raster name output_flood_depth_raster_name = 'flood_depth_raster_{0}.tif'.format( output_shapefile_base_name) master_output_flood_depth_raster_name = os.path.join( autoroute_output_directory, output_flood_depth_raster_name) #set up flood shapefile name output_shapefile_shp_name = '{0}.shp'.format( output_shapefile_base_name) master_output_shapefile_shp_name = os.path.join( autoroute_output_directory, output_shapefile_shp_name) delete_flood_map_raster = False if not generate_flood_map_shapefile: master_output_shapefile_shp_name = "" else: if not generate_flood_map_raster: generate_flood_map_raster = True delete_flood_map_raster = True if not generate_flood_map_raster: master_output_flood_map_raster_name = "" if not generate_flood_depth_raster: master_output_flood_depth_raster_name = "" if mode == "htcondor": #create job to run autoroute for each raster in watershed job = CJob( 'job_autoroute_{0}_{1}'.format( os.path.basename(autoroute_input_directory), directory), tmplt.vanilla_transfer_files) if generate_flood_map_shapefile: #setup additional floodmap shapfile names output_shapefile_shx_name = '{0}.shx'.format( output_shapefile_base_name) master_output_shapefile_shx_name = os.path.join( autoroute_output_directory, output_shapefile_shx_name) output_shapefile_prj_name = '{0}.prj'.format( output_shapefile_base_name) master_output_shapefile_prj_name = os.path.join( autoroute_output_directory, output_shapefile_prj_name) output_shapefile_dbf_name = '{0}.dbf'.format( output_shapefile_base_name) master_output_shapefile_dbf_name = os.path.join( autoroute_output_directory, output_shapefile_dbf_name) transfer_output_remaps = "{0} = {1}; {2} = {3}; {4} = {5};" \ " {6} = {7}; {8} = {9}".format(output_shapefile_shp_name, master_output_shapefile_shp_name, output_shapefile_shx_name, master_output_shapefile_shx_name, output_shapefile_prj_name, master_output_shapefile_prj_name, output_shapefile_dbf_name, master_output_shapefile_dbf_name, output_flood_map_raster_name, master_output_flood_map_raster_name) if generate_flood_depth_raster: transfer_output_remaps += "; {0} = {1}".format( output_flood_depth_raster_name, master_output_flood_depth_raster_name) else: output_shapefile_shp_name = "" transfer_output_remaps = "" if generate_flood_map_raster: transfer_output_remaps = "{0} = {1}".format( output_flood_map_raster_name, master_output_flood_map_raster_name) if generate_flood_depth_raster: if transfer_output_remaps: transfer_output_remaps += "; " transfer_output_remaps += "{0} = {1}".format( output_flood_depth_raster_name, master_output_flood_depth_raster_name) job.set('transfer_output_remaps', "\"{0}\"".format(transfer_output_remaps)) job.set( 'executable', os.path.join(local_scripts_location, 'multicore_worker_process.py')) job.set( 'transfer_input_files', "{0}".format(master_watershed_autoroute_input_directory)) job.set('initialdir', run_log_directory) job.set( 'arguments', '{0} {1} {2} {3} {4} {5} {6}' % (autoroute_executable_location, autoroute_manager, directory, output_flood_map_raster_name, output_flood_depth_raster_name, output_shapefile_shp_name, delete_flood_map_raster)) autoroute_job_info['htcondor_job_list'].append(job) autoroute_job_info['htcondor_job_info'].append({ 'output_shapefile_base_name': output_shapefile_base_name, 'autoroute_job_name': autoroute_job_name }) else: #mode == "multiprocess": autoroute_job_info['multiprocess_job_list'].append( (autoroute_executable_location, autoroute_manager, master_watershed_autoroute_input_directory, master_output_flood_map_raster_name, master_output_flood_depth_raster_name, master_output_shapefile_shp_name, delete_flood_map_raster, autoroute_job_name, run_log_directory)) #For testing function serially """ run_autoroute_multiprocess_worker((autoroute_executable_location, autoroute_manager, master_watershed_autoroute_input_directory, master_output_flood_map_raster_name, master_output_flood_depth_raster_name, master_output_shapefile_shp_name, delete_flood_map_raster, autoroute_job_name, run_log_directory)) """ if PREPARE_MODE > 0: #generate streamflow streamflow_job_list = pool_streamflow.imap_unordered( prepare_autoroute_streamflow_multiprocess_worker, streamflow_job_list, chunksize=1) for streamflow_job_output in streamflow_job_list: print("STREAMFLOW READY: {0}".format(streamflow_job_output)) pool_streamflow.close() pool_streamflow.join() print("Running AutoRoute simulations ...") #submit jobs to run if mode == "multiprocess": autoroute_job_info[ 'multiprocess_worker_list'] = pool_main.imap_unordered( run_autoroute_multiprocess_worker, autoroute_job_info['multiprocess_job_list'], chunksize=1) else: for htcondor_job in autoroute_job_info['htcondor_job_list']: htcondor_job.submit() if wait_for_all_processes_to_finish: #wait for all of the jobs to complete if mode == "multiprocess": for multi_job_output in autoroute_job_info[ 'multiprocess_worker_list']: print("JOB FINISHED: {0}".format(multi_job_output[3])) #just in case ... pool_main.close() pool_main.join() else: for htcondor_job_index, htcondor_job in enumerate( autoroute_job_info['htcondor_job_list']): htcondor_job.wait() print("JOB FINISHED: {0}".format( autoroute_job_info['htcondor_job_info'][htcondor_job_index] ['autoroute_job_name'])) print("Time to complete entire AutoRoute process: {0}".format( datetime.utcnow() - time_start_all)) else: return autoroute_job_info
class TestJob(unittest.TestCase): expected = None actual = None msg = None base_dir = os.path.join(os.path.dirname(__file__)) @property def output(self): return '%s\nExpected: %s\nActual: %s\n' % (self.msg, self.expected, self.actual) @property def assert_args(self): return (self.expected, self.actual, self.output) def setUp(self): self.job_name = 'job_name' self.job = Job(self.job_name) def tearDown(self): pass def test__init__(self): attributes = dict() attributes['job_name'] = self.job_name self.expected = {'_name': self.job_name, '_attributes': attributes, '_num_jobs': 1, '_cluster_id': 0, '_job_file': '', '_remote': None, '_remote_id': None, '_remote_input_files': None, '_cwd': '.'} self.actual = self.job.__dict__ self.msg = 'testing initialization with default values' self.assertDictEqual(*self.assert_args) exe = 'exe' args = 'args' num_jobs = '5' self.job = Job(self.job_name, OrderedDict(), num_jobs, executable=exe, arguments=args) attributes.update({'executable': exe, 'arguments': args}) self.expected.update({'_name': self.job_name, '_attributes': attributes, '_num_jobs': int(num_jobs)}) self.actual = self.job.__dict__ self.actual['_attributes'] = dict(self.actual['_attributes']) self.msg = 'testing initialization with all values supplied' self.assertDictEqual(*self.assert_args) num_jobs = 'five' self.assertRaises(ValueError, Job, self.job_name, num_jobs=num_jobs) def test__str__(self): expected = 'job_name = %s\n\nqueue 1\n' % (self.job_name) actual = self.job.__str__() msg = 'testing to string with default initialization' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test__repr__(self): expected = '<' \ 'Job: name=%s, num_jobs=%d, cluster_id=%s>' % (self.job_name, 1, 0) actual = self.job.__repr__() msg = 'testing repr with default initialization' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test__copy__(self): original = self.job copy = self.job.__copy__() expected = original.name actual = copy.name msg = 'testing that name of copy is equal to original' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) expected = original.attributes actual = copy.attributes msg = 'testing that attributes dictionary of copy is equal to original' self.assertDictEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) msg = "testing that attributes is the same instance as the original's" self.assertIs(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test__deepcopy__(self): original = self.job memo = dict() copy = self.job.__deepcopy__(memo) expected = self.job.name actual = copy.name msg = 'testing that name of deepcopy is equal to original' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) expected = original.attributes actual = copy.attributes msg = 'testing that attributes dictionary of copy is equal to original' self.assertDictEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) msg = "testing that attributes is not the same instance as the original's" self.assertIsNot(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test__getattr__(self): exe = 'exe' self.job = Job(self.job_name, executable=exe) expected = exe actual = self.job.executable msg = 'testing that existing value is returned' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) pass def test__setattr__(self): pass def test_name(self): expected = self.job_name actual = self.job.name msg = 'checking initialization of name' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) new_name = 'new_name' self.job.name = new_name expected = new_name actual = self.job.name msg = 'checking assignment of name' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test_attributes(self): pass def test_num_jobs(self): pass def test_cluster_id(self): pass def test_job_file(self): job_file_name = '%s.job' % (self.job_name) job_file = os.path.join(os.path.relpath(os.getcwd()), job_file_name) expected = job_file actual = self.job.job_file msg = 'checking resolving attribute function for job file' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) init_dir = 'init_dir' self.job.initialdir = init_dir job_file = os.path.join(init_dir, job_file_name) self.assertEqual(job_file, self.job.job_file) def test_log_file(self): self.job = Job(self.job_name, Templates.base) log_file = '%s/%s/%s.%s.log' % (self.job.initial_dir, self.job.logdir, self.job_name, self.job.cluster_id) expected = log_file actual = self.job.log_file msg = 'checking resolving attribute function for log file' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test_initial_dir(self): pass def test_submit(self): pass def test_remove(self): pass def test_edit(self): expected = NotImplementedError actual = self.job.edit self.assertRaises(expected, actual) def test_status(self): expected = NotImplementedError actual = self.job.edit self.assertRaises(expected, actual) def test_wait(self): pass def test_get(self): non_existent_attr = 'not-there' expected = None actual = self.job.get(non_existent_attr) msg = 'testing that None is returned when attribute does not exist' self.assertIsNone(actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) expected = 'expected' actual = self.job.get(non_existent_attr, expected) msg = 'testing that supplied value is returned when attribute does not exist' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) exe = 'exe' self.job = Job(self.job_name, executable=exe) expected = exe actual = self.job.get('executable') msg = 'testing that existing value is returned' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test_set(self): key = 'was-not-there' value = 'now-it-is' self.job.set(key, value) expected = value actual = self.job.attributes[key] msg = 'testing that attribute that previously does not exist is set correctly' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) key = 'was-already-there' value = 'used-to-be-this' new_value = 'now-it-is-this' self.job.set(key, value) self.job.set(key,new_value) expected = new_value actual = self.job.attributes[key] msg = 'testing that attribute that previously existed is re-set correctly' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) key = 'python boolean' value = True self.job.set(key, value) expected = 'true' actual = self.job.attributes[key] msg = 'testing that an attribute can be set with the Python boolean value "True"' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) key = 'python boolean' value = False self.job.set(key, value) expected = 'false' actual = self.job.attributes[key] msg = 'testing that an attribute can be set with the Python boolean value "False"' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) key = 'python list' value = ['file.txt', 1] self.job.set(key, value) expected = ', '.join([str(i) for i in value]) actual = self.job.attributes[key] msg = 'testing that an attribute can be set with a Python list' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test_delete(self): key = 'was-not-there' value = 'now-it-is' self.job.set(key, value) self.job.delete(key) member = key container = self.job.attributes msg = 'testing that attribute is removed when deleted' self.assertNotIn(member, container, msg) def test_write_job_file(self): pass def test_list_attributes(self): pass def test_make_dir(self): pass def test_make_job_dirs(self): pass def test_resolve_attribute(self): job = Job(self.job_name, Templates.vanilla_base) expected = self.job_name actual = job._resolve_attribute('initialdir') msg = 'checking resolving attribute function' self.assertEqual(expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test_resolve_attribute_match(self): pass def test_remote(self): working_dir = os.path.join(self.base_dir, 'test_files', 'working_dir') self.job = Job('remote_test', Templates.vanilla_transfer_files, host='localhost', username=os.environ['USER'], private_key='~/.ssh/id_rsa', remote_input_files=['../copy_test.py', 'input.txt'], transfer_input_files='../input.txt', working_directory=working_dir) remote_base_path = os.path.expanduser('~/' + self.job._remote_id) if os.path.exists(remote_base_path): raise self.job._write_job_file() self.assertTrue(os.path.exists(remote_base_path)) self.job.sync_remote_output() local_output = os.path.join(working_dir, self.job.name) self.assertTrue(os.path.exists(local_output)) shutil.rmtree(remote_base_path) shutil.rmtree(local_output)
def setUp(self): self.job_name = 'job_name' self.job = Job(self.job_name)
def run_ecmwf_forecast_process( rapid_executable_location, # path to RAPID executable rapid_io_files_location, # path ro RAPID input/output directory ecmwf_forecast_location, # path to ECMWF forecasts subprocess_log_directory, # path to store HTCondor/multiprocess logs main_log_directory, # path to store main logs region="", #1 of the 12 partitioned ECMWF files. Leave empty if using global, data_store_url="", # CKAN API url data_store_api_key="", # CKAN API Key, data_store_owner_org="", # CKAN owner organization app_instance_id="", # Streamflow Prediction tool instance ID sync_rapid_input_with_ckan=False, # match Streamflow Prediciton tool RAPID input download_ecmwf=True, # Download recent ECMWF forecast before running, date_string="", # string of date of interest ftp_host="", # ECMWF ftp site path ftp_login="", # ECMWF ftp login name ftp_passwd="", # ECMWF ftp password ftp_directory="", # ECMWF ftp directory delete_past_ecmwf_forecasts=True, # Deletes all past forecasts before next run upload_output_to_ckan=False, # upload data to CKAN and remove local copy delete_output_when_done=False, # delete all output data from this code initialize_flows=False, # use forecast to initialize next run warning_flow_threshold=100, # flows below this threshold will be ignored era_interim_data_location="", # path to ERA Interim return period data create_warning_points=False, # generate waring points for Streamflow Prediction Tool autoroute_executable_location="", # location of AutoRoute executable autoroute_io_files_location="", # path to AutoRoute input/outpuf directory geoserver_url="", # url to API endpoint ending in geoserver/rest geoserver_username="", # username for geoserver geoserver_password="", # password for geoserver mp_mode='htcondor', # valid options are htcondor and multiprocess, mp_execute_directory="", # required if using multiprocess mode initialization_time_step=12, # time step of ECMWF Forecast Process, in hours #doesn't appear to be used MJS 8/23/2020... watersheds_with_dams_list=[], # a list of all watersheds where dam outflows are being forced #doesn't appear to be used, MJS 8/23/2020... stream_ids_with_dams_dict={}, # a dictionary with the watershed key and a value of a list of stream IDs where dams are located #doesn't appear to be used, MJS 8/23/2020... dam_outflows={} # a dictionary with the key as a stream ID and a value of a list of outflows BS_opt_dam=False, IS_dam_tot=0, IS_dam_use=0, dam_tot_id_file="", dam_use_id_file="", dam_file=""): """ This it the main ECMWF RAPID forecast process """ time_begin_all = datetime.datetime.utcnow() LOCAL_SCRIPTS_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) LOCK_INFO_FILE = os.path.join(main_log_directory, "spt_compute_ecmwf_run_info_lock.txt") log_file_path = os.path.join( main_log_directory, "spt_compute_ecmwf_{0}.log".format( time_begin_all.strftime("%y%m%d%H%M%S"))) with CaptureStdOutToLog(log_file_path): if not CONDOR_ENABLED and mp_mode == 'htcondor': raise ImportError( "condorpy is not installed. Please install condorpy to use the 'htcondor' option." ) if not AUTOROUTE_ENABLED and autoroute_executable_location and autoroute_io_files_location: raise ImportError( "AutoRoute is not enabled. Please install tethys_dataset_services" " and AutoRoutePy to use the AutoRoute option.") if mp_mode == "multiprocess": if not mp_execute_directory or not os.path.exists( mp_execute_directory): raise Exception( "If mode is multiprocess, mp_execute_directory is required ..." ) if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key: # sync with data store ri_manager = RAPIDInputDatasetManager(data_store_url, data_store_api_key, 'ecmwf', app_instance_id) ri_manager.sync_dataset( os.path.join(rapid_io_files_location, 'input')) # clean up old log files clean_logs(subprocess_log_directory, main_log_directory, log_file_path=log_file_path) data_manager = None if upload_output_to_ckan and data_store_url and data_store_api_key: if not SPT_DATASET_ENABLED: raise ImportError( "spt_dataset_manager is not installed. " "Please install spt_dataset_manager to use the 'ckan' options." ) # init data manager for CKAN data_manager = ECMWFRAPIDDatasetManager(data_store_url, data_store_api_key, data_store_owner_org) # get list of correclty formatted rapid input directories in rapid directory rapid_input_directories = get_valid_watershed_list( os.path.join(rapid_io_files_location, "input")) if download_ecmwf and ftp_host: # get list of folders to download ecmwf_folders = sorted( get_ftp_forecast_list( 'Runoff.%s*%s*.netcdf.tar*' % (date_string, region), ftp_host, ftp_login, ftp_passwd, ftp_directory)) else: # get list of folders to run ecmwf_folders = sorted( glob( os.path.join(ecmwf_forecast_location, 'Runoff.' + date_string + '*.netcdf'))) # LOAD LOCK INFO FILE last_forecast_date = datetime.datetime.utcfromtimestamp(0) if os.path.exists(LOCK_INFO_FILE): with open(LOCK_INFO_FILE) as fp_lock_info: previous_lock_info = json.load(fp_lock_info) if previous_lock_info['running']: print("Another SPT ECMWF forecast process is running.\n" "The lock file is located here: {0}\n" "If this is an error, you have two options:\n" "1) Delete the lock file.\n" "2) Edit the lock file and set \"running\" to false. \n" "Then, re-run this script. \n Exiting ...".format( LOCK_INFO_FILE)) return else: last_forecast_date = datetime.datetime.strptime( previous_lock_info['last_forecast_date'], '%Y%m%d%H') run_ecmwf_folders = [] for ecmwf_folder in ecmwf_folders: # get date forecast_date = get_datetime_from_forecast_folder( ecmwf_folder) # if more recent, add to list # check to determine if forecast time step is 12 or 24 hours if initialization_time_step == 24: if forecast_date > last_forecast_date and forecast_date.hour != 12: run_ecmwf_folders.append(ecmwf_folder) elif initialization_time_step == 12: if forecast_date > last_forecast_date: run_ecmwf_folders.append(ecmwf_folder) ecmwf_folders = run_ecmwf_folders if not ecmwf_folders: print("No new forecasts found to run. Exiting ...") return # GENERATE NEW LOCK INFO FILE update_lock_info_file(LOCK_INFO_FILE, True, last_forecast_date.strftime('%Y%m%d%H')) # rapid_input_directories_sub = [rapid_input_directory for rapid_input_directory in rapid_input_directories if hydroshed_index in rapid_input_directory] # Try/Except added for lock file try: # ADD SEASONAL INITIALIZATION WHERE APPLICABLE if initialize_flows: initial_forecast_date_timestep = get_date_timestep_from_forecast_folder( ecmwf_folders[0]) seasonal_init_job_list = [] for rapid_input_directory in rapid_input_directories: seasonal_master_watershed_input_directory = os.path.join( rapid_io_files_location, "input", rapid_input_directory) # add seasonal initialization if no initialization file and historical Qout file exists if era_interim_data_location and os.path.exists( era_interim_data_location): era_interim_watershed_directory = os.path.join( era_interim_data_location, rapid_input_directory) if os.path.exists(era_interim_watershed_directory): # INITIALIZE FROM SEASONAL AVERAGE FILE seasonal_streamflow_file = glob( os.path.join(era_interim_watershed_directory, "seasonal_average*.nc")) if seasonal_streamflow_file: seasonal_init_job_list.append( (seasonal_streamflow_file[0], seasonal_master_watershed_input_directory, initial_forecast_date_timestep, "seasonal_average_file")) else: # INITIALIZE FROM HISTORICAL STREAMFLOW FILE historical_qout_file = glob( os.path.join( era_interim_watershed_directory, "Qout*.nc")) if historical_qout_file: seasonal_init_job_list.append(( historical_qout_file[0], seasonal_master_watershed_input_directory, initial_forecast_date_timestep, "historical_streamflow_file")) if seasonal_init_job_list: # use multiprocessing instead of htcondor due to potential for huge file sizes if len(seasonal_init_job_list) > 1: seasonal_pool = mp_Pool() seasonal_pool.imap( compute_seasonal_initial_rapid_flows_multicore_worker, seasonal_init_job_list, chunksize=1) seasonal_pool.close() seasonal_pool.join() else: compute_seasonal_initial_rapid_flows_multicore_worker( seasonal_init_job_list[0]) # ---------------------------------------------------------------------- # BEGIN ECMWF-RAPID FORECAST LOOP # ---------------------------------------------------------------------- master_job_info_list = [] for ecmwf_folder in ecmwf_folders: if download_ecmwf: # download forecast ecmwf_folder = download_and_extract_ftp( ecmwf_forecast_location, ecmwf_folder, ftp_host, ftp_login, ftp_passwd, ftp_directory, delete_past_ecmwf_forecasts) # get list of forecast files ecmwf_forecasts = glob( os.path.join(ecmwf_folder, '*.runoff.%s*nc' % region)) # look for old version of forecasts if not ecmwf_forecasts: ecmwf_forecasts = glob(os.path.join(ecmwf_folder, 'full_*.runoff.netcdf')) + \ glob(os.path.join(ecmwf_folder, '*.52.205.*.runoff.netcdf')) if not ecmwf_forecasts: print("ERROR: Forecasts not found in folder. Exiting ...") update_lock_info_file( LOCK_INFO_FILE, False, last_forecast_date.strftime('%Y%m%d%H')) return # make the largest files first ecmwf_forecasts.sort(key=os.path.getsize, reverse=True) forecast_date_timestep = get_date_timestep_from_forecast_folder( ecmwf_folder) print("Running ECMWF Forecast: {0}".format( forecast_date_timestep)) # submit jobs to downsize ecmwf files to watershed rapid_watershed_jobs = {} for rapid_input_directory in rapid_input_directories: # keep list of jobs rapid_watershed_jobs[rapid_input_directory] = { 'jobs': [], 'jobs_info': [] } print("Running forecasts for: {0} {1}".format( rapid_input_directory, os.path.basename(ecmwf_folder))) watershed, subbasin = get_watershed_subbasin_from_folder( rapid_input_directory) master_watershed_input_directory = os.path.join( rapid_io_files_location, "input", rapid_input_directory) master_watershed_outflow_directory = os.path.join( rapid_io_files_location, 'output', rapid_input_directory, forecast_date_timestep) try: os.makedirs(master_watershed_outflow_directory) except OSError: pass # initialize HTCondor/multiprocess Logging Directory subprocess_forecast_log_dir = os.path.join( subprocess_log_directory, forecast_date_timestep) try: os.makedirs(subprocess_forecast_log_dir) except OSError: pass # add USGS gage data to initialization file if initialize_flows: # update intial flows with usgs data update_inital_flows_usgs( master_watershed_input_directory, forecast_date_timestep) # create jobs for HTCondor/multiprocess for watershed_job_index, forecast in enumerate( ecmwf_forecasts): ensemble_number = get_ensemble_number_from_forecast( forecast) # get basin names outflow_file_name = 'Qout_%s_%s_%s.nc' % ( watershed.lower(), subbasin.lower(), ensemble_number) node_rapid_outflow_file = outflow_file_name master_rapid_outflow_file = os.path.join( master_watershed_outflow_directory, outflow_file_name) job_name = 'job_%s_%s_%s_%s' % (forecast_date_timestep, watershed, subbasin, ensemble_number) rapid_watershed_jobs[rapid_input_directory][ 'jobs_info'].append({ 'watershed': watershed, 'subbasin': subbasin, 'outflow_file_name': master_rapid_outflow_file, 'forecast_date_timestep': forecast_date_timestep, 'ensemble_number': ensemble_number, 'master_watershed_outflow_directory': master_watershed_outflow_directory, 'data_manager': data_manager # added this to try to upload forecast in mp }) if mp_mode == "htcondor": # create job to downscale forecasts for watershed job = CJob(job_name, tmplt.vanilla_transfer_files) job.set( 'executable', os.path.join(LOCAL_SCRIPTS_DIRECTORY, 'htcondor_ecmwf_rapid.py')) job.set( 'transfer_input_files', "%s, %s, %s" % (forecast, master_watershed_input_directory, LOCAL_SCRIPTS_DIRECTORY)) job.set('initialdir', subprocess_forecast_log_dir) job.set( 'arguments', '%s %s %s %s %s %s' % (forecast, forecast_date_timestep, watershed.lower(), subbasin.lower(), rapid_executable_location, initialize_flows)) job.set( 'transfer_output_remaps', "\"%s = %s\"" % (node_rapid_outflow_file, master_rapid_outflow_file)) job.submit() rapid_watershed_jobs[rapid_input_directory][ 'jobs'].append(job) elif mp_mode == "multiprocess": rapid_watershed_jobs[rapid_input_directory][ 'jobs'].append(( forecast, forecast_date_timestep, watershed.lower(), subbasin.lower(), rapid_executable_location, initialize_flows, job_name, master_rapid_outflow_file, master_watershed_input_directory, mp_execute_directory, subprocess_forecast_log_dir, watershed_job_index, initialization_time_step, # dam arguments included, MJS 8/23/2020 ........ BS_opt_dam, IS_dam_tot, IS_dam_use, dam_tot_id_file, dam_use_id_file, dam_file)) # COMMENTED CODE FOR DEBUGGING SERIALLY ## run_ecmwf_rapid_multiprocess_worker((forecast, ## forecast_date_timestep, ## watershed.lower(), ## subbasin.lower(), ## rapid_executable_location, ## initialize_flows, ## job_name, ## master_rapid_outflow_file, ## master_watershed_input_directory, ## mp_execute_directory, ## subprocess_forecast_log_dir, ## watershed_job_index ## initialization_time_step)) else: raise Exception( "ERROR: Invalid mp_mode. Valid types are htcondor and multiprocess ..." ) for rapid_input_directory, watershed_job_info in rapid_watershed_jobs.items( ): # add sub job list to master job list master_job_info_list = master_job_info_list + watershed_job_info[ 'jobs_info'] if mp_mode == "htcondor": # wait for jobs to finish then upload files for job_index, job in enumerate( watershed_job_info['jobs']): job.wait() # upload file when done if data_manager: upload_single_forecast( watershed_job_info['jobs_info'][job_index], data_manager) elif mp_mode == "multiprocess": pool_main = mp_Pool() func = partial(run_ecmwf_rapid_multiprocess_worker, watershed_job_info['jobs_info']) multiprocess_worker_list = pool_main.imap_unordered( func, watershed_job_info['jobs'], # watershed_job_info['jobs'], chunksize=1) if data_manager: for multi_job_index in multiprocess_worker_list: # upload file when done upload_single_forecast( watershed_job_info['jobs_info'] [multi_job_index], data_manager) # just in case ... pool_main.close() pool_main.join() # when all jobs in watershed are done, generate warning points if create_warning_points: watershed, subbasin = get_watershed_subbasin_from_folder( rapid_input_directory) forecast_directory = os.path.join( rapid_io_files_location, 'output', rapid_input_directory, forecast_date_timestep) era_interim_watershed_directory = os.path.join( era_interim_data_location, rapid_input_directory) if os.path.exists(era_interim_watershed_directory): print( "Generating warning points for {0}-{1} from {2}" .format(watershed, subbasin, forecast_date_timestep)) era_interim_files = glob( os.path.join(era_interim_watershed_directory, "return_period*.nc")) if era_interim_files: try: generate_ecmwf_warning_points( forecast_directory, era_interim_files[0], forecast_directory, threshold=warning_flow_threshold) if upload_output_to_ckan and data_store_url and data_store_api_key: data_manager.initialize_run_ecmwf( watershed, subbasin, forecast_date_timestep) data_manager.zip_upload_warning_points_in_directory( forecast_directory) except Exception as ex: print(ex) pass else: print( "No ERA Interim file found. Skipping ...") else: print( "No ERA Interim directory found for {0}. " "Skipping warning point generation...".format( rapid_input_directory)) # initialize flows for next run if initialize_flows: # create new init flow files/generate warning point files for rapid_input_directory in rapid_input_directories: input_directory = os.path.join(rapid_io_files_location, 'input', rapid_input_directory) forecast_directory = os.path.join( rapid_io_files_location, 'output', rapid_input_directory, forecast_date_timestep) if os.path.exists(forecast_directory): # loop through all the rapid_namelist files in directory watershed, subbasin = get_watershed_subbasin_from_folder( rapid_input_directory) if initialize_flows: print( "Initializing flows for {0}-{1} from {2}". format(watershed, subbasin, forecast_date_timestep)) basin_files = find_current_rapid_output( forecast_directory, watershed, subbasin) try: compute_initial_rapid_flows( basin_files, input_directory, forecast_date_timestep, initialization_time_step) except Exception as ex: print(ex) pass # run autoroute process if added if autoroute_executable_location and autoroute_io_files_location: # run autoroute on all of the watersheds run_autorapid_process(autoroute_executable_location, autoroute_io_files_location, rapid_io_files_location, forecast_date_timestep, subprocess_forecast_log_dir, geoserver_url, geoserver_username, geoserver_password, app_instance_id) last_forecast_date = get_datetime_from_date_timestep( forecast_date_timestep) # update lock info file with next forecast update_lock_info_file(LOCK_INFO_FILE, True, last_forecast_date.strftime('%Y%m%d%H')) # ---------------------------------------------------------------------- # END FORECAST LOOP # ---------------------------------------------------------------------- except Exception as ex: print_exc() print(ex) pass # Release & update lock info file with all completed forecasts update_lock_info_file(LOCK_INFO_FILE, False, last_forecast_date.strftime('%Y%m%d%H')) if delete_output_when_done: # delete local datasets for job_info in master_job_info_list: try: rmtree(job_info['master_watershed_outflow_directory']) except OSError: pass # delete watershed folder if empty for item in os.listdir( os.path.join(rapid_io_files_location, 'output')): try: os.rmdir( os.path.join(rapid_io_files_location, 'output', item)) except OSError: pass # print info to user time_end = datetime.datetime.utcnow() print("Time Begin: {0}".format(time_begin_all)) print("Time Finish: {0}".format(time_end)) print("TOTAL TIME: {0}".format(time_end - time_begin_all))
def run_ecmwf_rapid_process(rapid_executable_location, #path to RAPID executable rapid_io_files_location, #path ro RAPID input/output directory ecmwf_forecast_location, #path to ECMWF forecasts subprocess_log_directory, #path to store HTCondor/multiprocess logs main_log_directory, #path to store main logs data_store_url="", #CKAN API url data_store_api_key="", #CKAN API Key, data_store_owner_org="", #CKAN owner organization app_instance_id="", #Streamflow Prediction tool instance ID sync_rapid_input_with_ckan=False, #match Streamflow Prediciton tool RAPID input download_ecmwf=True, #Download recent ECMWF forecast before running, date_string=None, #string of date of interest ftp_host="", #ECMWF ftp site path ftp_login="", #ECMWF ftp login name ftp_passwd="", #ECMWF ftp password ftp_directory="", #ECMWF ftp directory upload_output_to_ckan=False, #upload data to CKAN and remove local copy delete_output_when_done=False, #delete all output data from this code initialize_flows=False, #use forecast to initialize next run era_interim_data_location="", #path to ERA Interim return period data create_warning_points=False, #generate waring points for Streamflow Prediction Tool autoroute_executable_location="", #location of AutoRoute executable autoroute_io_files_location="", #path to AutoRoute input/outpuf directory geoserver_url='', #url to API endpoint ending in geoserver/rest geoserver_username='', #username for geoserver geoserver_password='', #password for geoserver mp_mode='htcondor', #valid options are htcondor and multiprocess, mp_execute_directory='',#required if using multiprocess mode ): """ This it the main ECMWF RAPID process """ time_begin_all = datetime.datetime.utcnow() if date_string == None: date_string = time_begin_all.strftime('%Y%m%d') if mp_mode == "multiprocess": if not mp_execute_directory or not os.path.exists(mp_execute_directory): raise Exception("If mode is multiprocess, mp_execute_directory is required ...") #date_string = datetime.datetime(2016,2,12).strftime('%Y%m%d') local_scripts_location = os.path.dirname(os.path.realpath(__file__)) if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key: #sync with data store ri_manager = RAPIDInputDatasetManager(data_store_url, data_store_api_key, 'ecmwf', app_instance_id) ri_manager.sync_dataset(os.path.join(rapid_io_files_location,'input')) #clean up old log files clean_logs(subprocess_log_directory, main_log_directory) #get list of correclty formatted rapid input directories in rapid directory rapid_input_directories = get_valid_watershed_list(os.path.join(rapid_io_files_location, "input")) if download_ecmwf and ftp_host: #download all files for today ecmwf_folders = sorted(download_all_ftp(ecmwf_forecast_location, 'Runoff.%s*.netcdf.tar*' % date_string, ftp_host, ftp_login, ftp_passwd, ftp_directory)) else: ecmwf_folders = sorted(glob(os.path.join(ecmwf_forecast_location, 'Runoff.'+date_string+'*.netcdf'))) data_manager = None if upload_output_to_ckan and data_store_url and data_store_api_key: #init data manager for CKAN data_manager = ECMWFRAPIDDatasetManager(data_store_url, data_store_api_key, data_store_owner_org) #ADD SEASONAL INITIALIZATION WHERE APPLICABLE if initialize_flows: initial_forecast_date_timestep = get_date_timestep_from_forecast_folder(ecmwf_folders[0]) seasonal_init_job_list = [] for rapid_input_directory in rapid_input_directories: seasonal_master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory) #add seasonal initialization if no initialization file and historical Qout file exists if era_interim_data_location and os.path.exists(era_interim_data_location): era_interim_watershed_directory = os.path.join(era_interim_data_location, rapid_input_directory) if os.path.exists(era_interim_watershed_directory): historical_qout_file = glob(os.path.join(era_interim_watershed_directory, "Qout*.nc")) if historical_qout_file: seasonal_init_job_list.append((historical_qout_file[0], seasonal_master_watershed_input_directory, initial_forecast_date_timestep)) if seasonal_init_job_list: #use multiprocessing instead of htcondor due to potential for huge file sizes if len(seasonal_init_job_list) > 1: seasonal_pool = mp_Pool() seasonal_pool.imap(compute_seasonal_initial_rapid_flows_multicore_worker, seasonal_init_job_list, chunksize=1) seasonal_pool.close() seasonal_pool.join() else: compute_seasonal_initial_rapid_flows_multicore_worker(seasonal_init_job_list[0]) #prepare ECMWF files master_job_info_list = [] for ecmwf_folder in ecmwf_folders: ecmwf_forecasts = glob(os.path.join(ecmwf_folder,'full_*.runoff.netcdf')) + \ glob(os.path.join(ecmwf_folder,'*.52.205.*.runoff.netcdf')) #look for new version of forecasts if not ecmwf_forecasts: ecmwf_forecasts = glob(os.path.join(ecmwf_folder,'*.runoff.nc')) #make the largest files first ecmwf_forecasts.sort(key=os.path.getsize, reverse=True) forecast_date_timestep = get_date_timestep_from_forecast_folder(ecmwf_folder) print forecast_date_timestep #submit jobs to downsize ecmwf files to watershed iteration = 0 rapid_watershed_jobs = {} for rapid_input_directory in rapid_input_directories: #keep list of jobs rapid_watershed_jobs[rapid_input_directory] = { 'jobs': [], 'jobs_info': [] } print "Running forecasts for:", rapid_input_directory, os.path.basename(ecmwf_folder) watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory) master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory) master_watershed_outflow_directory = os.path.join(rapid_io_files_location, 'output', rapid_input_directory, forecast_date_timestep) #add USGS gage data to initialization file if initialize_flows: #update intial flows with usgs data update_inital_flows_usgs(master_watershed_input_directory, forecast_date_timestep) #create jobs for HTCondor for watershed_job_index, forecast in enumerate(ecmwf_forecasts): ensemble_number = get_ensemble_number_from_forecast(forecast) try: os.makedirs(master_watershed_outflow_directory) except OSError: pass #initialize HTCondor Logging Directory subprocess_forecast_log_dir = os.path.join(subprocess_log_directory, forecast_date_timestep) try: os.makedirs(subprocess_forecast_log_dir) except OSError: pass #get basin names outflow_file_name = 'Qout_%s_%s_%s.nc' % (watershed.lower(), subbasin.lower(), ensemble_number) node_rapid_outflow_file = outflow_file_name master_rapid_outflow_file = os.path.join(master_watershed_outflow_directory, outflow_file_name) job_name = 'job_%s_%s_%s_%s_%s' % (forecast_date_timestep, watershed, subbasin, ensemble_number, iteration) if mp_mode == "htcondor": #create job to downscale forecasts for watershed job = CJob(job_name, tmplt.vanilla_transfer_files) job.set('executable',os.path.join(local_scripts_location,'htcondor_ecmwf_rapid.py')) job.set('transfer_input_files', "%s, %s, %s" % (forecast, master_watershed_input_directory, local_scripts_location)) job.set('initialdir', subprocess_forecast_log_dir) job.set('arguments', '%s %s %s %s %s %s' % (forecast, forecast_date_timestep, watershed.lower(), subbasin.lower(), rapid_executable_location, initialize_flows)) job.set('transfer_output_remaps',"\"%s = %s\"" % (node_rapid_outflow_file, master_rapid_outflow_file)) job.submit() rapid_watershed_jobs[rapid_input_directory]['jobs'].append(job) rapid_watershed_jobs[rapid_input_directory]['jobs_info'].append({'watershed' : watershed, 'subbasin' : subbasin, 'outflow_file_name' : master_rapid_outflow_file, 'forecast_date_timestep' : forecast_date_timestep, 'ensemble_number': ensemble_number, 'master_watershed_outflow_directory': master_watershed_outflow_directory, }) elif mp_mode == "multiprocess": rapid_watershed_jobs[rapid_input_directory]['jobs'].append((forecast, forecast_date_timestep, watershed.lower(), subbasin.lower(), rapid_executable_location, initialize_flows, job_name, master_rapid_outflow_file, master_watershed_input_directory, mp_execute_directory, subprocess_forecast_log_dir, watershed_job_index)) ## run_ecmwf_rapid_multiprocess_worker((forecast, ## forecast_date_timestep, ## watershed.lower(), ## subbasin.lower(), ## rapid_executable_location, ## initialize_flows, ## job_name, ## master_rapid_outflow_file, ## master_watershed_input_directory, ## mp_execute_directory, ## subprocess_forecast_log_dir, ## watershed_job_index)) else: raise Exception("ERROR: Invalid mp_mode. Valid types are htcondor and multiprocess ...") iteration += 1 for rapid_input_directory, watershed_job_info in rapid_watershed_jobs.iteritems(): #add sub job list to master job list master_job_info_list = master_job_info_list + watershed_job_info['jobs_info'] if mp_mode == "htcondor": #wait for jobs to finish then upload files for job_index, job in enumerate(watershed_job_info['jobs']): job.wait() #upload file when done if data_manager: upload_single_forecast(watershed_job_info['jobs_info'][job_index], data_manager) elif mp_mode == "multiprocess": pool_main = mp_Pool() multiprocess_worker_list = pool_main.imap_unordered(run_ecmwf_rapid_multiprocess_worker, watershed_job_info['jobs'], chunksize=1) if data_manager: for multi_job_output in multiprocess_worker_list: job_index = multi_job_output[0] #upload file when done upload_single_forecast(watershed_job_info['jobs_info'][job_index], data_manager) #just in case ... pool_main.close() pool_main.join() #when all jobs in watershed are done, generate warning points if create_warning_points: watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory) forecast_directory = os.path.join(rapid_io_files_location, 'output', rapid_input_directory, forecast_date_timestep) era_interim_watershed_directory = os.path.join(era_interim_data_location, rapid_input_directory) if os.path.exists(era_interim_watershed_directory): print "Generating Warning Points for", watershed, subbasin, "from", forecast_date_timestep era_interim_files = glob(os.path.join(era_interim_watershed_directory, "return_period*.nc")) if era_interim_files: try: generate_warning_points(forecast_directory, era_interim_files[0], forecast_directory, threshold=10) if upload_output_to_ckan and data_store_url and data_store_api_key: data_manager.initialize_run_ecmwf(watershed, subbasin, forecast_date_timestep) data_manager.zip_upload_warning_points_in_directory(forecast_directory) except Exception, ex: print ex pass else: print "No ERA Interim file found. Skipping ..." else: print "No ERA Interim directory found for", rapid_input_directory, ". Skipping warning point generation..." #initialize flows for next run if initialize_flows: #create new init flow files/generate warning point files for rapid_input_directory in rapid_input_directories: input_directory = os.path.join(rapid_io_files_location, 'input', rapid_input_directory) forecast_directory = os.path.join(rapid_io_files_location, 'output', rapid_input_directory, forecast_date_timestep) if os.path.exists(forecast_directory): #loop through all the rapid_namelist files in directory watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory) if initialize_flows: print "Initializing flows for", watershed, subbasin, "from", forecast_date_timestep basin_files = find_current_rapid_output(forecast_directory, watershed, subbasin) try: compute_initial_rapid_flows(basin_files, input_directory, forecast_date_timestep) except Exception, ex: print ex pass
class TestJob(unittest.TestCase): def setUp(self): self.job_name = 'job_name' self.job = Job(self.job_name) def tearDown(self): pass def test__init__(self): attributes = OrderedDict() attributes['job_name'] = self.job_name attributes['executable'] = None attributes['arguments'] = None expected = { '_name': self.job_name, '_attributes': attributes, '_num_jobs': 1, '_cluster_id': 0, '_job_file': '' } actual = self.job.__dict__ msg = 'testing initialization with default values' self.assertDictEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) exe = 'exe' args = '-args' num_jobs = '5' self.job = Job(self.job_name, OrderedDict(), exe, args, num_jobs) attributes['executable'] = exe attributes['arguments'] = args expected = { '_name': self.job_name, '_attributes': attributes, '_num_jobs': int(num_jobs), '_cluster_id': 0, '_job_file': '' } actual = self.job.__dict__ msg = 'testing initialization with all values supplied' self.assertDictEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) num_jobs = 'five' self.assertRaises(ValueError, Job, self.job_name, num_jobs=num_jobs) def test__str__(self): expected = 'job_name = %s\n\nqueue 1\n' % (self.job_name) actual = self.job.__str__() msg = 'testing to string with default initialization' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test__repr__(self): expected = '<' \ 'Job: name=%s, num_jobs=%d, cluster_id=%s>' % (self.job_name, 1, 0) actual = self.job.__repr__() msg = 'testing repr with default initialization' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test__copy__(self): original = self.job copy = self.job.__copy__() expected = original.name actual = copy.name msg = 'testing that name of copy is equal to original' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) expected = original.attributes actual = copy.attributes msg = 'testing that attributes dictionary of copy is equal to original' self.assertDictEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) msg = "testing that attributes is the same instance as the original's" self.assertIs( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test__deepcopy__(self): original = self.job memo = dict() copy = self.job.__deepcopy__(memo) expected = self.job.name actual = copy.name msg = 'testing that name of deepcopy is equal to original' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) expected = original.attributes actual = copy.attributes msg = 'testing that attributes dictionary of copy is equal to original' self.assertDictEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) msg = "testing that attributes is not the same instance as the original's" self.assertIsNot( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test__getattr__(self): exe = 'exe' self.job = Job(self.job_name, executable=exe) expected = exe actual = self.job.executable msg = 'testing that existing value is returned' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) pass def test__setattr__(self): pass def test_name(self): expected = self.job_name actual = self.job.name msg = 'checking initialization of name' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) new_name = 'new_name' self.job.name = new_name expected = new_name actual = self.job.name msg = 'checking assignment of name' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test_attributes(self): pass def test_num_jobs(self): pass def test_cluster_id(self): pass def test_job_file(self): job_file_name = '%s.job' % (self.job_name) job_file = os.path.join(os.getcwd(), job_file_name) expected = job_file actual = self.job.job_file msg = 'checking resolving attribute function for job file' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) init_dir = 'init_dir' self.job.initialdir = init_dir job_file = os.path.join(init_dir, job_file_name) self.assertEqual(job_file, self.job.job_file) def test_log_file(self): self.job = Job(self.job_name, Templates.base) log_file = '%s/%s.%s.log' % (self.job.logdir, self.job_name, self.job.cluster_id) expected = log_file actual = self.job.log_file msg = 'checking resolving attribute function for log file' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test_initial_dir(self): pass def test_submit(self): pass def test_remove(self): pass def test_edit(self): expected = NotImplementedError actual = self.job.edit self.assertRaises(expected, actual) def test_status(self): expected = NotImplementedError actual = self.job.edit self.assertRaises(expected, actual) def test_wait(self): self.job.wait() def test_get(self): non_existent_attr = 'not-there' expected = None actual = self.job.get(non_existent_attr) msg = 'testing that None is returned when attribute does not exist' self.assertIsNone( actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) expected = 'expected' actual = self.job.get(non_existent_attr, expected) msg = 'testing that supplied value is returned when attribute does not exist' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) exe = 'exe' self.job = Job(self.job_name, executable=exe) expected = exe actual = self.job.get('executable') msg = 'testing that existing value is returned' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test_set(self): key = 'was-not-there' value = 'now-it-is' self.job.set(key, value) expected = value actual = self.job.attributes[key] msg = 'testing that attribute that previously does not exist is set correctly' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) key = 'was-already-there' value = 'used-to-be-this' new_value = 'now-it-is-this' self.job.set(key, value) self.job.set(key, new_value) expected = new_value actual = self.job.attributes[key] msg = 'testing that attribute that previously existed is re-set correctly' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test_delete(self): key = 'was-not-there' value = 'now-it-is' self.job.set(key, value) self.job.delete(key) member = key container = self.job.attributes msg = 'testing that attribute is removed when deleted' self.assertNotIn(member, container, msg) def test_write_job_file(self): pass def test_list_attributes(self): pass def test_make_dir(self): pass def test_make_job_dirs(self): pass def test_resolve_attribute(self): job = Job(self.job_name, Templates.vanilla_base) expected = self.job_name actual = job._resolve_attribute('initialdir') msg = 'checking resolving attribute function' self.assertEqual( expected, actual, '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual)) def test_resolve_attribute_match(self): pass
def run_era_interim_rapid_process(rapid_executable_location, rapid_io_files_location, ecmwf_forecast_location, era_interim_data_location, condor_log_directory, main_log_directory, data_store_url, data_store_api_key, app_instance_id, sync_rapid_input_with_ckan, download_ecmwf, download_era_interim, upload_output_to_ckan, generate_return_periods_file): """ This it the main process """ time_begin_all = datetime.datetime.utcnow() date_string = time_begin_all.strftime('%Y%m%d') #date_string = datetime.datetime(2015,2,3).strftime('%Y%m%d') rapid_scripts_location = os.path.dirname(os.path.realpath(__file__)) if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key: #sync with data store ri_manager = RAPIDInputDatasetManager(data_store_url, data_store_api_key, 'ecmwf', app_instance_id) ri_manager.sync_dataset(os.path.join(rapid_io_files_location,'input')) #clean up old log files clean_logs(condor_log_directory, main_log_directory) #initialize HTCondor Directory condor_init_dir = os.path.join(condor_log_directory, date_string) try: os.makedirs(condor_init_dir) except OSError: pass #get list of correclty formatted rapid input directories in rapid directory rapid_input_directories = [] for directory in os.listdir(os.path.join(rapid_io_files_location,'input')): if os.path.isdir(os.path.join(rapid_io_files_location,'input', directory)) \ and len(directory.split("-")) == 2: rapid_input_directories.append(directory) else: print directory, "incorrectly formatted. Skipping ..." era_interim_folder = era_interim_data_location if download_era_interim: #download historical ERA data era_interim_folders = download_all_ftp(era_interim_data_location, 'erai_runoff_1980to20*.tar.gz.tar') era_interim_folder = era_interim_folders[0] if upload_output_to_ckan and data_store_url and data_store_api_key: #init data manager for CKAN data_manager = ECMWFRAPIDDatasetManager(data_store_url, data_store_api_key) #run ERA Interim processes iteration = 0 job_list = [] job_info_list = [] for rapid_input_directory in rapid_input_directories: input_folder_split = rapid_input_directory.split("-") watershed = input_folder_split[0] subbasin = input_folder_split[1] master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory) master_watershed_outflow_directory = os.path.join(rapid_io_files_location, 'output', rapid_input_directory) try: os.makedirs(master_watershed_outflow_directory) except OSError: pass #get basin names interim_folder_basename = os.path.basename(era_interim_folder) print era_interim_folder, interim_folder_basename outflow_file_name = 'Qout_%s.nc' % interim_folder_basename node_rapid_outflow_file = outflow_file_name master_rapid_outflow_file = os.path.join(master_watershed_outflow_directory, outflow_file_name) #create job to downscale forecasts for watershed job = CJob('job_%s_%s_%s' % (interim_folder_basename, watershed, iteration), tmplt.vanilla_transfer_files) job.set('executable',os.path.join(rapid_scripts_location,'compute_ecmwf_rapid.py')) job.set('transfer_input_files', "%s, %s" % (master_watershed_input_directory, rapid_scripts_location)) job.set('initialdir', condor_init_dir) job.set('arguments', '%s %s %s %s %s' % (watershed.lower(), subbasin.lower(), rapid_executable_location, era_interim_folder, ecmwf_forecast_location)) job.set('transfer_output_remaps', "\"%s = %s\"" % (node_rapid_outflow_file, master_rapid_outflow_file)) job.submit() job_list.append(job) job_info_list.append({'watershed' : watershed, 'subbasin' : subbasin, 'outflow_file_name' : master_rapid_outflow_file, 'master_watershed_outflow_directory': master_watershed_outflow_directory, }) iteration += 1 #wait for jobs to finish then upload files for index, job in enumerate(job_list): job.wait() #generate return periods if generate_return_periods_file: job_info = job_info_list[index] watershed_output_dir = job_info['master_watershed_outflow_directory'] erai_output_file = job_info['outflow_file_name'] return_periods_file = os.path.join(watershed_output_dir, 'return_periods.nc') generate_return_periods(erai_output_file, return_periods_file) """ #upload file when done if upload_output_to_ckan and data_store_url and data_store_api_key: job_info = job_info_list[index] print "Uploading", job_info['watershed'], job_info['subbasin'], \ job_info['forecast_date_timestep'], job_info['ensemble_number'] #Upload to CKAN data_manager.initialize_run_ecmwf(job_info['watershed'], job_info['subbasin'], job_info['forecast_date_timestep']) data_manager.update_resource_ensemble_number(job_info['ensemble_number']) #upload file try: #tar.gz file output_tar_file = os.path.join(job_info['master_watershed_outflow_directory'], "%s.tar.gz" % data_manager.resource_name) if not os.path.exists(output_tar_file): with tarfile.open(output_tar_file, "w:gz") as tar: tar.add(job_info['outflow_file_name'], arcname=os.path.basename(job_info['outflow_file_name'])) return_data = data_manager.upload_resource(output_tar_file) if not return_data['success']: print return_data print "Attempting to upload again" return_data = data_manager.upload_resource(output_tar_file) if not return_data['success']: print return_data else: print "Upload success" else: print "Upload success" except Exception, e: print e pass #remove tar.gz file os.remove(output_tar_file) #initialize flows for next run if initialize_flows: #create new init flow files for rapid_input_directory in rapid_input_directories: input_directory = os.path.join(rapid_io_files_location, 'input', rapid_input_directory) path_to_watershed_files = os.path.join(rapid_io_files_location, 'output', rapid_input_directory) forecast_date_timestep = None #finds the current output from downscaled ECMWF forecasts if os.path.exists(path_to_watershed_files): forecast_date_timestep = sorted([d for d in os.listdir(path_to_watershed_files) \ if os.path.isdir(os.path.join(path_to_watershed_files, d))], reverse=True)[0] if forecast_date_timestep: #loop through all the rapid_namelist files in directory forecast_directory = os.path.join(path_to_watershed_files, forecast_date_timestep) input_folder_split = rapid_input_directory.split("-") watershed = input_folder_split[0] subbasin = input_folder_split[1] if initialize_flows: print "Initializing flows for", watershed, subbasin, "from", forecast_date_timestep basin_files = find_current_rapid_output(forecast_directory, watershed, subbasin) try: compute_initial_rapid_flows(basin_files, input_directory, forecast_date_timestep) except Exception, ex: print ex pass if upload_output_to_ckan and data_store_url and data_store_api_key: #delete local datasets for job_info in job_info_list: try: rmtree(job_info['master_watershed_outflow_directory']) except OSError: pass #delete watershed folder if empty for item in os.listdir(os.path.join(rapid_io_files_location, 'output')): try: os.rmdir(os.path.join(rapid_io_files_location, 'output', item)) except OSError: pass """ #print info to user time_end = datetime.datetime.utcnow() print "Time Begin All: " + str(time_begin_all) print "Time Finish All: " + str(time_end) print "TOTAL TIME: " + str(time_end-time_begin_all)