def test_filemanager_update_local(self): """ run filemanager set and populate, then create a dummy file in the input directory and run update_local which should mark it as present """ """ ############# SETUP ################ """ print '\n' print_message( '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok') sta = False types = ['atm', 'ice', 'ocn', 'rest', 'streams.cice', 'streams.ocean'] database = '{}.db'.format(inspect.stack()[0][3]) simstart = 51 simend = 60 remote_path = '/global/homes/r/renata/ACME_simulations/20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil' experiment = '20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil' mutex = threading.Lock() """ ############### TEST ################# """ filemanager = FileManager(event_list=EventList(), mutex=mutex, sta=sta, types=types, database=database, remote_endpoint=self.remote_endpoint, remote_path=remote_path, local_endpoint=self.local_endpoint, local_path=self.local_path, experiment=experiment) filemanager.populate_file_list(simstart=simstart, simend=simend, experiment=experiment) filemanager.mutex.acquire() df = DataFile.select().limit(1) filemanager.mutex.release() name = df[0].name head, tail = os.path.split(df[0].local_path) if not os.path.exists(head): os.makedirs(head) dummy_file_path = df[0].local_path print '----- writing out dummy file at {} -----'.format( dummy_file_path) with open(dummy_file_path, 'w') as fp: fp.write('this is a test file') filemanager.update_local_status() filemanager.mutex.acquire() df = DataFile.select().where(DataFile.name == name)[0] filemanager.mutex.release() self.assertEqual(df.local_status, 0) self.assertTrue(df.local_size > 0) """ ############### CLEANUP ################# """ os.remove(database)
def test_filemanager_populate_no_sta(self): """ run filemanager set and populate with sta turned off """ """ ############### SETUP ################ """ print '\n' print_message( '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok') sta = False database = '{}.db'.format(inspect.stack()[0][3]) simstart = 1 simend = 10 experiment = '20180215.DECKv1b_1pctCO2.ne30_oEC.edison' mutex = threading.Lock() """ ############## TEST ############### """ filemanager = FileManager(event_list=EventList(), mutex=mutex, sta=sta, types=self.file_types, database=database, remote_endpoint=self.remote_endpoint, remote_path=self.remote_path, local_endpoint=self.local_endpoint, local_path=self.local_path, experiment=self.experiment) filemanager.populate_file_list(simstart=simstart, simend=simend, experiment=experiment) filemanager.mutex.acquire() simlength = simend - simstart + 1 for _type in ['atm', 'lnd', 'ocn', 'ice']: file_names = [ x.name for x in DataFile.select().where(DataFile.datatype == _type) ] if not len(file_names) == (simlength * 12): print _type + ' does not have ' + str( simlength * 12) + ' files' self.assertEqual(len(file_names), (simlength * 12)) for year in range(simstart, simend + 1): for month in range(1, 13): name = (file_type_map[_type].replace( 'EXPERIMENT', experiment).replace('YEAR', '{:04d}'.format(year)).replace( 'MONTH', '{:02}'.format(month))) self.assertTrue(name in file_names) filemanager.mutex.release() """ ############## CLEANUP ############### """ os.remove(database)
def test_filemanager_update_remote_yes_sta(self): """ run filemanager setup and populate, then run update_remote_status on a directory that has been short term archived """ """ ############### SETUP ################# """ print '\n' print_message( '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok') sta = True types = [ 'atm', 'ice', 'ocn', 'rest', 'streams.ocean', 'streams.cice', 'mpas-o_in', 'mpas-cice_in', 'meridionalHeatTransport' ] database = '{}.db'.format(inspect.stack()[0][3]) simstart = 51 source_path = '/global/cscratch1/sd/golaz/ACME_simulations/20180215.DECKv1b_1pctCO2.ne30_oEC.edison' simend = 60 experiment = '20180215.DECKv1b_1pctCO2.ne30_oEC.edison' mutex = threading.Lock() """ ############### TEST ################# """ filemanager = FileManager(event_list=EventList(), mutex=mutex, sta=sta, types=types, database=database, remote_endpoint=self.remote_endpoint, remote_path=source_path, local_endpoint=self.local_endpoint, local_path=self.local_path, experiment=self.experiment) filemanager.populate_file_list(simstart=simstart, simend=simend, experiment=experiment) client = get_client() filemanager.update_remote_status(client) filemanager.mutex.acquire() for datafile in DataFile.select(): if datafile.remote_status != 0: print datafile.name, datafile.remote_path self.assertEqual(datafile.remote_status, 0) if filemanager.mutex.locked(): filemanager.mutex.release() self.assertTrue(filemanager.all_data_remote()) """ ############## CLEANUP ############### """ os.remove(database)
def test_filemanager_update_remote_no_sta(self): """ run filemanager setup and populate, then run update_remote_status with 10 years of atm output, and finally run all_data_remote to show that all the remote data has been recognized """ """ ############# SETUP ################## """ print '\n' print_message( '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok') remote_path = '/global/homes/r/renata/ACME_simulations/20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil' sta = False types = ['atm'] database = '{}.db'.format(inspect.stack()[0][3]) simstart = 51 simend = 60 experiment = '20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil' mutex = threading.Lock() """ ################ TEST ################## """ filemanager = FileManager(event_list=EventList(), mutex=mutex, sta=False, types=types, database=database, remote_endpoint=self.remote_endpoint, remote_path=remote_path, local_endpoint=self.local_endpoint, local_path=self.local_path, experiment=experiment) filemanager.populate_file_list(simstart=simstart, simend=simend, experiment=experiment) client = get_client() filemanager.update_remote_status(client) filemanager.mutex.acquire() for datafile in DataFile.select(): if datafile.remote_status != 0: print datafile.name, datafile.remote_path, datafile.remote_status, datafile.datatype self.assertEqual(datafile.remote_status, 0) if filemanager.mutex.locked(): filemanager.mutex.release() self.assertTrue(filemanager.all_data_remote()) """ ############## CLEANUP ############### """ os.remove(database)
def test_cmor_valid_completed(self): """ tests that a valid config on a completed case will mark itself as already being run and not start """ print_message( '\n---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok') config = ConfigObj(self.valid_config_path) config['post-processing']['cmor']['variable_list'] = [ config['post-processing']['cmor']['variable_list'] ] case_name = '20180129.DECKv1b_piControl.ne30_oEC.edison' case = config['simulations'][case_name] messages = verify_config(config) self.assertEqual(len(messages), 0) config['global']['resource_path'] = 'resources/' filemanager = FileManager(config=config, event_list=EventList()) filemanager.populate_file_list() filemanager.update_local_status() timeseries = Timeseries(short_name=case['short_name'], case=case_name, start=config['simulations']['start_year'], end=config['simulations']['end_year'], config=config, run_type='atm') timeseries.check_data_ready(filemanager=filemanager) timeseries.setup_data(config=config, filemanager=filemanager, case=case_name) timeseries.execute(config=config, event_list=EventList()) timeseries.handle_completion(filemanager=filemanager, config=config, event_list=EventList()) cmor = Cmor(short_name=case['short_name'], case=case_name, start=config['simulations']['start_year'], end=config['simulations']['end_year'], config=config) cmor.check_data_ready(filemanager=filemanager) cmor.setup_data(config=config, filemanager=filemanager, case=case_name) self.assertTrue(cmor.postvalidate(config=config)) self.assertTrue(cmor.execute(config=config, event_list=EventList())) self.assertEquals(cmor.status, JobStatus.COMPLETED) self.assertTrue( cmor.handle_completion(filemanager=filemanager, event_list=EventList(), config=config))
def test_filemanager_setup_valid_with_inplace_data(self): """ run the filemanager setup with sta turned on """ print '\n' print_message( '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok') config_path = 'tests/test_configs/e3sm_diags_complete.cfg' config = ConfigObj(config_path) db = '{}.db'.format(inspect.stack()[0][3]) filemanager = FileManager(database=db, event_list=EventList(), config=config) filemanager.populate_file_list() filemanager.update_local_status() self.assertTrue(isinstance(filemanager, FileManager)) self.assertTrue(os.path.exists(db)) self.assertTrue(filemanager.all_data_local()) os.remove(db)
def test_filemanager_get_file_paths(self): """ run the filemanager setup with sta turned on """ print '\n' print_message( '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok') config_path = 'tests/test_configs/filemanager_partial_data.cfg' config = ConfigObj(config_path) db = '{}.db'.format(inspect.stack()[0][3]) filemanager = FileManager(database=db, event_list=EventList(), config=config) filemanager.populate_file_list() self.assertTrue(isinstance(filemanager, FileManager)) self.assertTrue(os.path.exists(db)) filemanager.update_local_status() filemanager.write_database() self.assertFalse(filemanager.all_data_local()) # test that the filemanager returns correct paths paths = filemanager.get_file_paths_by_year( datatype='atm', case='20180129.DECKv1b_piControl.ne30_oEC.edison', start_year=1, end_year=2) for path in paths: self.assertTrue(os.path.exists(path)) # test that the filemanager returns correct paths with no year paths = filemanager.get_file_paths_by_year( datatype='ocn_streams', case='20180129.DECKv1b_piControl.ne30_oEC.edison') for path in paths: self.assertTrue(os.path.exists(path)) # test nothing is returned for incorrect yeras paths = filemanager.get_file_paths_by_year( datatype='ocn_streams', case='20180129.DECKv1b_piControl.ne30_oEC.edison', start_year=1, end_year=100) self.assertTrue(paths is None) # test the filemanager knows when data is ready ready = filemanager.check_data_ready( data_required=['atm'], case='20180129.DECKv1b_piControl.ne30_oEC.edison', start_year=1, end_year=2) self.assertTrue(ready) # test the filemanager knows when data is NOT ready ready = filemanager.check_data_ready( data_required=['atm'], case='20180129.DECKv1b_piControl.ne30_oEC.edison', start_year=1, end_year=3) self.assertFalse(ready) ready = filemanager.check_data_ready( data_required=['ocn_streams'], case='20180129.DECKv1b_piControl.ne30_oEC.edison') self.assertTrue(ready) os.remove(db)
def test_filemanager_all_data_local(self): """ Create a dummy project and populate it with empty files to test that filemanager.all_data_local works correctly""" """ ############### SETUP ################## """ print '\n' print_message( '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok') sta = True database = '{}.db'.format(inspect.stack()[0][3]) simstart = 1 simend = 10 event_list = EventList() remote_path = '/dummy/remote/20180215.DECKv1b_1pctCO2.ne30_oEC.edison/run/something' local_path = '/p/user_pub/e3sm/baldwin32/E3SM_test_data/dummyproject' experiment = '20180215.DECKv1b_1pctCO2.ne30_oEC.edison' types = ['atm', 'ocn', 'lnd', 'ice'] mutex = threading.Lock() if os.path.exists(local_path): shutil.rmtree(local_path) """ ############### TEST ################## """ filemanager = FileManager(event_list=EventList(), mutex=mutex, sta=sta, types=types, database=database, remote_endpoint=self.remote_endpoint, remote_path=remote_path, local_endpoint=self.local_endpoint, local_path=local_path, experiment=self.experiment) self.assertEqual( filemanager.remote_path, '/dummy/remote/20180215.DECKv1b_1pctCO2.ne30_oEC.edison') filemanager.populate_file_list(simstart=simstart, simend=simend, experiment=experiment) filemanager.update_local_status() self.assertFalse(filemanager.all_data_local()) filemanager.mutex.acquire() for df in DataFile.select(): name = df.name head, tail = os.path.split(df.local_path) if not os.path.exists(head): os.makedirs(head) with open(df.local_path, 'w') as fp: fp.write('this is a test file') size = os.path.getsize(df.local_path) df.remote_size = size df.local_size = size df.save() if filemanager.mutex.locked(): filemanager.mutex.release() filemanager.update_local_status() self.assertTrue(filemanager.all_data_local()) """ ######################################### """ os.remove(database)
def setup(argv, display_event, **kwargs): """ Parse the commandline arguments, and setup the master config dict Parameters: parser (argparse.ArgumentParser): The parser object display_event (Threadding_event): The event to turn the display on and off """ print "entering setup" # Setup the parser args = parse_args(argv=argv) if not args.config: parse_args(print_help=True) return False, False, False event_list = kwargs['event_list'] thread_list = kwargs['thread_list'] mutex = kwargs['mutex'] # check if globus config is valid, else remove it globus_config = os.path.join(os.path.expanduser('~'), '.globus.cfg') if os.path.exists(globus_config): try: conf = ConfigObj(globus_config) except: os.remove(globus_config) if not os.path.exists(args.config): print "Invalid config, {} does not exist".format(args.config) return False, False, False # Check that there are no white space errors in the config file line_index = check_config_white_space(args.config) if line_index != 0: print ''' ERROR: line {num} does not have a space after the \'=\', white space is required. Please add a space and run again.'''.format(num=line_index) return False, False, False # read the config file and setup the config dict try: config = ConfigObj(args.config) except Exception as e: print "Error parsing config file {}".format(args.config) parse_args(print_help=True) return False, False, False # run validator for config file if config.get('global'): if args.resource_dir: config['global']['resource_dir'] = args.resource_dir else: config['global']['resource_dir'] = os.path.join( sys.prefix, 'share', 'processflow', 'resources') else: return False, False, False template_path = os.path.join(config['global']['resource_dir'], 'config_template.json') with open(template_path, 'r') as template_file: template = json.load(template_file) valid, messages = verify_config(config, template) if not valid: for message in messages: print message return False, False, False config['global']['input_path'] = os.path.join( config['global']['project_path'], 'input') config['global']['output_path'] = os.path.join( config['global']['project_path'], 'output') # setup output and cache directories if not os.path.exists(config['global']['input_path']): os.makedirs(config['global']['input_path']) if not os.path.exists(config['global']['output_path']): os.makedirs(config['global']['output_path']) # Copy the config into the input directory for safe keeping input_config_path = os.path.join(config['global']['input_path'], 'run.cfg') try: copy(args.config, input_config_path) except: pass # setup logging if args.log: log_path = args.log else: log_path = os.path.join( config.get('global').get('output_path'), 'workflow.log') config['global']['log_path'] = log_path logging.basicConfig(format='%(asctime)s:%(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filename=log_path, filemode='w', level=logging.INFO) logging.getLogger('globus_sdk').setLevel(logging.WARNING) # Make sure the set_frequency is a list of ints set_frequency = config['global']['set_frequency'] if not isinstance(set_frequency, list): set_frequency = [int(set_frequency)] else: # These are sometimes strings which break things later new_freqs = [] for freq in set_frequency: new_freqs.append(int(freq)) set_frequency = new_freqs config['global']['set_frequency'] = set_frequency # setup config for file type directories if not isinstance(config['global']['file_types'], list): config['global']['file_types'] = [config['global']['file_types']] # setup run_scipts_path run_script_path = os.path.join(config['global']['output_path'], 'run_scripts') config['global']['run_scripts_path'] = run_script_path if not os.path.exists(run_script_path): os.makedirs(run_script_path) # setup tmp_path tmp_path = os.path.join(config['global']['output_path'], 'tmp') config['global']['tmp_path'] = tmp_path if os.path.exists(tmp_path): rmtree(tmp_path) os.makedirs(tmp_path) # setup the year_set list config['global']['simulation_start_year'] = int( config['global']['simulation_start_year']) config['global']['simulation_end_year'] = int( config['global']['simulation_end_year']) sim_start_year = int(config['global']['simulation_start_year']) sim_end_year = int(config['global']['simulation_end_year']) config['global']['short_term_archive'] = int( config['global']['short_term_archive']) # initialize the filemanager event_list.push(message='Initializing file manager') head, tail = os.path.split(config['global']['source_path']) if tail == 'run': config['global']['source_path'] = head filemanager = FileManager( database=os.path.join(config['global']['project_path'], 'input', 'workflow.db'), types=config['global']['file_types'], sta=config['global']['short_term_archive'], remote_path=config['global']['source_path'], remote_endpoint=config['transfer']['source_endpoint'], local_path=os.path.join(config['global']['project_path'], 'input'), local_endpoint=config['transfer']['destination_endpoint'], mutex=mutex) filemanager.populate_file_list( simstart=config['global']['simulation_start_year'], simend=config['global']['simulation_end_year'], experiment=config['global']['experiment']) print 'Updating local status' filemanager.update_local_status() print 'Local status update complete' all_data = filemanager.all_data_local() if all_data: print 'All data is local' else: print 'Additional data needed' logging.info("FileManager setup complete") logging.info(str(filemanager)) if all_data or args.no_monitor: print "skipping globus setup" else: endpoints = [endpoint for endpoint in config['transfer'].values()] if args.no_ui: print 'Running in no-ui mode' addr = config.get('global').get('email') if not addr: print 'When running in no-ui mode, you must enter an email address.' return False, False, False setup_success = setup_globus(endpoints=endpoints, no_ui=True, src=config.get('global').get('email'), dst=config.get('global').get('email'), event_list=event_list) else: output_path = config.get('global').get('output_path') error_output = os.path.join(output_path, 'workflow.error') config['global']['error_path'] = error_output if not os.path.exists(output_path): os.makedirs(output_path) sys.stderr = open(error_output, 'w') msg = 'Activating endpoints {}'.format(' '.join(endpoints)) logging.info(msg) setup_success = setup_globus(endpoints=endpoints, display_event=display_event, no_ui=False) if not setup_success: print "Globus setup error" return False, False, False else: print 'Globus authentication complete' print 'Checking file access on globus transfer nodes' setup_success, endpoint = check_globus( source_endpoint=config['transfer']['source_endpoint'], source_path=config['global']['source_path'], destination_endpoint=config['transfer']['destination_endpoint'], destination_path=config['global']['input_path']) if not setup_success: print 'ERROR! Unable to access {} globus node'.format( endpoint['type']) print 'The node may be down, or you may not have access to the requested directory' return False, False, False # setup the runmanager runmanager = RunManager(event_list=event_list, output_path=config['global']['output_path'], caseID=config['global']['experiment'], scripts_path=run_script_path, thread_list=kwargs['thread_list'], event=kwargs['kill_event']) runmanager.setup_job_sets(set_frequency=config['global']['set_frequency'], sim_start_year=sim_start_year, sim_end_year=sim_end_year, config=config, filemanager=filemanager) # Turning off the GUI for the time being # config['global']['ui'] = False if args.no_ui else True config['global']['ui'] = False config['global']['no_cleanup'] = True if args.no_cleanup else False config['global']['no_monitor'] = True if args.no_monitor else False config['global']['print_file_list'] = True if args.file_list else False logging.info('Starting run with config') logging.info(pformat(config)) return config, filemanager, runmanager