new_simulation.experiment_id = to_experiment.id tags = copy.copy(simulation.tags) tags[CopiedFromSimulation] = simulation.id new_simulation.set_tags(tags) job = simulation.hpc_jobs[-1] # override any fields here as necessary... if job and job.configuration new_simulation.configuration = Configuration( environment_name=job.configuration.environment_name, simulation_input_args=job.configuration.simulation_input_args, working_directory_root=job.configuration.working_directory_root, executable_path=job.configuration.executable_path, maximum_number_of_retries=SetupParser.get(parameter='num_retries'), priority=SetupParser.get(parameter='priority'), min_cores=job.configuration.min_cores, max_cores=job.configuration.max_cores, exclusive=job.configuration.exclusive, node_group_name=SetupParser.get(parameter='node_group'), asset_collection_id=job.configuration.asset_collection_id) with tempfile.TemporaryDirectory() as dir files_to_add_last = {} for f in simulation.files if f.file_name == 'config.json' dest_file = os.path.join(dir, 'config.json') with open(dest_file, 'wb') as fp fp.write(f.retrieve()) modify_config_json(dest_file)
def location(self): return SetupParser.get('type') if self._location is None else self._location
def copy_simulation(simulation, to_experiment): simulation.refresh(query_criteria=QueryCriteria().select_children( ['files', 'hpc_jobs', 'tags'])) new_simulation = Simulation(simulation.name, description=simulation.description) new_simulation.experiment_id = to_experiment.id tags = copy.copy(simulation.tags) tags["CopiedFromSimulation"] = simulation.id new_simulation.set_tags(tags) job = simulation.hpc_jobs[-1] # override any fields here as necessary... if job and job.configuration: new_simulation.configuration = Configuration( environment_name=job.configuration.environment_name, simulation_input_args=job.configuration.simulation_input_args, working_directory_root=job.configuration.working_directory_root, executable_path=job.configuration.executable_path, maximum_number_of_retries=SetupParser.get(parameter='num_retries'), priority=SetupParser.get(parameter='priority'), min_cores=job.configuration.min_cores, max_cores=job.configuration.max_cores, exclusive=job.configuration.exclusive, node_group_name=SetupParser.get(parameter='node_group'), asset_collection_id=job.configuration.asset_collection_id) with tempfile.TemporaryDirectory() as dir: files_to_add_last = {} for f in simulation.files: if f.file_name == 'config.json': dest_file = os.path.join(dir, 'config.json') with open(dest_file, 'wb') as fp: fp.write(f.retrieve()) modify_config_json(dest_file) # with open(dest_file, 'rb') as fp: # data = fp.read() filename = dest_file # checksum = hashlib.md5(data).hexdigest() sf = SimulationFile(file_name=filename, file_type=f.file_type, description=f.description) files_to_add_last[filename] = sf else: filename = f.file_name checksum = f.md5_checksum sf = SimulationFile(file_name=filename, file_type=f.file_type, description=f.description, md5_checksum=checksum) new_simulation.add_file(sf) new_simulation.save(return_missing_files=False) if len(files_to_add_last) > 0: for file_path, sf in files_to_add_last.items(): new_simulation.add_file(sf, file_path=file_path) new_simulation.save(return_missing_files=False) print('new sim = ' + str(new_simulation.id)) return new_simulation
def generate_immune_overlays(self): ''' generate immune overlays; the json file format pointed to by self.immunity_burnin_meta_file_path is { exp_id1:path to experiment meta json file created in the folder indicated by sim_root in dtk_setup.cfg exp_id2: --||--, ... } ''' # group nodes by parameter self.group_nodes_by_params() with open('nodes_by_param.json', 'w') as p_f: json.dump(self.nodes_by_params, p_f, indent=4) # maintain a list of overlays' file paths so that they can be used by the spatial simulation later? overlays_list = [] # open file containing paths to immune initialization burnin sweep meta files with open(self.immunity_burnin_meta_file_path, 'r') as imm_f: immun_meta_exps = json.load(imm_f) i = 0 for exp_id, exp_path in immun_meta_exps.items(): with open(os.path.abspath(exp_path), 'r') as meta_f: exp_meta = json.load(meta_f) # find out if experiment has been run locally or remotely on hpc; # that determines the simulation output folder location and structure exp_location_type = exp_meta['location'] exp_name = exp_meta['exp_name'] # do we have a helper function that determines the output directory of a simulation given simulation and experiment IDs (working both for COMPs and local)? sim_dir_map = None if exp_location_type == 'HPC': from simtools.SetupParser import SetupParser sp = SetupParser('HPC') om("Pulling immunization data from COMPs.") om("This requires a login.") COMPS_login(sp.get('server_endpoint')) om("Login success!") sim_dir_map = parser.createSimDirectoryMap(exp_id) # iterate through the experiments simulations; for sim_id, sim_record in exp_meta['sims'].items(): sim_output_path = '' #for each simulation get the values of parameters relevant to the immune initialization burnin (i.e. the parameters # given by the keys in self.nodes_params) # the mapping between nodes and immune overlays below could be done more efficiently, but for the typical number of parameters the # mapping below should work fine # get the parameter keys in the right order (see group_nodes_by_params(self) and get_params_key(self...); # the set of relevant parameters is the same across all nodes, so take the one from the first node node_params = self.nodes_params.values().next().keys() param_values = [] for param in node_params: if param in sim_record: param_values.append(sim_record[param]) # get nodes associated with this set of parameters if self.get_params_key( param_values) in self.nodes_by_params: params_key = self.get_params_key(param_values) nodes = self.nodes_by_params[params_key] # for each simulation get its output immunity report if exp_location_type == 'LOCAL': sim_output_path = os.path.join( exp_meta['sim_root'], exp_name + '_' + exp_meta['exp_id'], sim_id, 'output') elif exp_location_type == 'HPC': sim_output_path = os.path.join( sim_dir_map[sim_id], 'output') immunity_report_file_path = os.path.join( sim_output_path, 'MalariaImmunityReport_AnnualAverage.json') # generate immune overlay from dtk.tools.demographics.createimmunelayer import \ immune_init_from_custom_output_for_spatial as immune_init immune_overlay_json = immune_init( { "Metadata": { "Author": "dtk-tools", "IdReference": "Gridded world grump30arcsec", "NodeCount": len(nodes) }, "Nodes": nodes }, immunity_report_file_path, ) # save compiled overlay overlay_file_name = params_key + exp_name + '.json' overlay_file_path = os.path.join( self.immune_overlays_path, overlay_file_name) with open(overlay_file_path, 'w') as imo_f: json.dump(immune_overlay_json, imo_f) CompileDemographics(overlay_file_path, forceoverwrite=True) # add overlay location to overlay list overlays_list.append(overlay_file_name) print( str(len(overlays_list)) + " immune initialization overlay files processed successfully!" ) return overlays_list
md = json.loads(metadata_file.read()) sim_map = CompsDTKOutputParser.createSimDirectoryMap(md['exp_id']) for simId, sim in md['sims'].items(): site = sim['_site_'] geography = geography_from_site(site) demog_name = get_geography_parameter(geography, 'Demographics_Filenames')[0].replace( 'compiled.', '') immunity_report_path = os.path.join( sim_map[simId], 'output', 'MalariaImmunityReport_FinalYearAverage.json') with open(os.path.join(SetupParser.get('input_root'), demog_name)) as f: j = json.loads(f.read()) metadata = j['Metadata'] metadata.update({ 'Author': LocalOS.username, 'DateCreated': datetime.datetime.now().strftime('%a %b %d %X %Y'), 'Tool': os.path.basename(__file__) }) imm.immune_init_from_custom_output( { "Metadata": metadata, "Nodes": [{
def create_simulations(self, exp_name='test', exp_builder=None, suite_id=None, verbose=True): """ Create an experiment with simulations modified according to the specified experiment builder. """ self.exp_builder = exp_builder or SingleSimulationBuilder() self.cache.clear() # Create the experiment if not present already if not self.experiment or self.experiment.exp_name != exp_name: self.create_experiment(experiment_name=exp_name, suite_id=suite_id) else: # Refresh the experiment self.refresh_experiment() # Save the experiment in the DB DataStore.save_experiment(self.experiment, verbose=verbose) # Separate the experiment builder generator into batches sim_per_batch = int(SetupParser.get('sims_per_thread', default=50)) max_creator_processes = multiprocessing.cpu_count() - 1 mods = list(self.exp_builder.mod_generator) total_sims = len(mods) # Create the simulation processes creator_processes = [ self.get_simulation_creator(function_set=fn_batch, max_sims_per_batch=sim_per_batch) for fn_batch in chunks( mods, max(sim_per_batch, math.ceil(total_sims / max_creator_processes))) ] # Display some info if verbose: logger.info("Creating the simulations") logger.info(" | Creator processes: %s (max: %s)" % (len(creator_processes), max_creator_processes)) logger.info(" | Simulations per batch: %s" % sim_per_batch) logger.info(" | Simulations Count: %s" % total_sims) sys.stdout.write(" | Created simulations: 0/{}".format(total_sims)) sys.stdout.flush() # Start all the processes for c in creator_processes: c.start() # While they are running, display the status while True: created_sims = len(self.cache) sys.stdout.write("\r {} Created simulations: {}/{}".format( next(animation), created_sims, total_sims)) sys.stdout.flush() if created_sims == total_sims or all(not c.is_alive() for c in creator_processes): break time.sleep(0.3) for c in creator_processes: c.join() # We exited make sure we had no issues print("\r | Created simulations: {}/{}".format(len(self.cache), total_sims)) sys.stdout.flush() if created_sims != total_sims: logger.error( "Commission seems to have failed. Only {} simulations were created but {} were expected...\n" "Exiting...".format(created_sims, total_sims)) exit() # Insert all those newly created simulations to the DB DataStore.bulk_insert_simulations(self.cache) # Refresh the experiment self.refresh_experiment() # Display sims if verbose: sims_to_display = 2 display = -sims_to_display if total_sims > sims_to_display else -total_sims logger.info(" ") logger.info("Simulation(s) created:\n" "----------------------") for sim in self.experiment.simulations[display:]: logger.info("- Simulation {}".format(sim.id)) logger.info( json.dumps(sim.tags, indent=2, cls=GeneralEncoder, sort_keys=True)) logger.info(" ") if total_sims > sims_to_display: logger.info("... and %s more" % (total_sims + display))
def file_writer(self, write_fn): """ Dump all the files needed for the simulation in the simulation directory. This includes: * The campaign file * The custom reporters file * The different demographic overlays * The other input files (``input_files`` dictionary) * The config file * The emodules_map file Args: write_fn: The function that will write the files. This function needs to take a file name and a content. Examples: For example, in the :py:class:`SimConfigBuilder` class, the :py:func:`dump_files` is defining the `write_fn` like:: def write_file(name, content): filename = os.path.join(working_directory, '%s.json' % name) with open(filename, 'w') as f: f.write(content) """ from simtools.SetupParser import SetupParser if self.human_readability: dump = lambda content: json.dumps( content, sort_keys=True, indent=3, cls=NumpyEncoder).strip('"') else: dump = lambda content: json.dumps( content, sort_keys=True, cls=NumpyEncoder).strip('"') write_fn( self.config['parameters']['Campaign_Filename'], self.campaign.to_json(self.campaign.Use_Defaults, self.human_readability)) if self.custom_reports: self.set_param('Custom_Reports_Filename', 'custom_reports.json') write_fn('custom_reports.json', dump(format_reports(self.custom_reports))) for name, content in self.demog_overlays.items(): self.append_overlay('%s' % name) write_fn('%s' % name, dump(content)) for name, content in self.input_files.items(): write_fn(name, dump(content)) # Add missing item from campaign individual events into Listed_Events self.config['parameters']['Listed_Events'] = self.check_custom_events() write_fn('config.json', dump(self.config)) # complete the path to each dll before writing emodules_map.json location = SetupParser.get('type') if location == 'LOCAL': root = self.assets.dll_root elif location == 'HPC': root = 'Assets' else: raise Exception('Unknown location: %s' % location) for module_type in self.emodules_map.keys(): self.emodules_map[module_type] = list( set([ os.path.join(root, dll) for dll in self.emodules_map[module_type] ])) write_fn('emodules_map.json', dump(self.emodules_map))
import os from dtk.generic.migration import single_roundtrip_params from dtk.tools.spatialworkflow.SpatialManager import SpatialManager from dtk.utils.builders.sweep import GenericSweepBuilder from dtk.utils.core.DTKConfigBuilder import DTKConfigBuilder from simtools.SetupParser import SetupParser location = 'HPC' #'LOCAL' setup = SetupParser.init(location) geography = 'DRC/Bandundu' sites = ['Bandundu'] dll_root = SetupParser.get('dll_root') builder = GenericSweepBuilder.from_dict({'_site_':sites, # study sites #'x_Local_Migration':[1e-2], 'Run_Number':range(1) # random seeds }) cb = DTKConfigBuilder.from_defaults('MALARIA_SIM', #Num_Cores=24, Num_Cores=1, Simulation_Duration=365*5) # migration cb.update_params(single_roundtrip_params) # set demographics file name cb.update_params({'Demographics_Filenames':[os.path.join(geography,"DRC_Bandundu_1_node_demographics.json")]})
def _get_path(self, path_type): return self.paths.get(path_type, None) or SetupParser.get(self.SETUP_MAPPING[path_type])
def init(cls): location = SetupParser.get('type') logger.debug('Factory - Initializing %s ExperimentManager' % location) return cls._factory(location)(experiment=None, config_builder=None)
# Create a new AnalyzeManager and add experiment and analyzer am = AnalyzeManager(verbose=False) for em in experiments: am.add_experiment(em.experiment) analyzer = DownloadAnalyzerTPI( filenames=['output\\ReportHIVByAgeAndGender.csv'], TPI_tag="TPI", ignore_TPI=False, REP_tag="TPI", ignore_REP=True, output_path=output_directory) am.add_analyzer(analyzer) # Make sure we refresh our set of experiments for e in experiments: e.refresh_experiment() COMPS_login(SetupParser.get("server_endpoint")) am.analyze() # If we are not done we wait for 30 sec, if we are done we leave if not finished: print("Waiting 30 seconds") time.sleep(30) else: break
last_cleanup = DataStore.get_setting('last_log_cleanup') if not last_cleanup or (datetime.today() - datetime.strptime(last_cleanup.value.split(' ')[0],'%Y-%m-%d')).days > 1: # Do the cleanup from simtools.DataAccess.LoggingDataStore import LoggingDataStore LoggingDataStore.log_cleanup() DataStore.save_setting(DataStore.create_setting(key='last_log_cleanup', value=datetime.today())) if __name__ == "__main__": logger.debug('Start Overseer pid: %d' % os.getpid()) # we technically don't care about full consistency of SetupParser with the original dtk command, as experiments # have all been created. We can grab 'generic' max_local_sims / max_threads SetupParser.init() # default block max_local_sims = int(SetupParser.get('max_local_sims')) # Create the queue local_queue = multiprocessing.Queue(max_local_sims) managers = OrderedDict() # Queue to be shared among all runners in order to update the individual simulation states in the DB manager = Manager() # Take this opportunity to cleanup the logs lc = threading.Thread(target=LogCleaner) lc.start() count = 0
def test_block_inheritance(self): """ Issue 1246 Verify that multi-level block inheritance works properly and that 'type' percolates from the deepest-level (root) of an inheritance chain. """ # # ck4, template for following tests/asserts # SetupParser.init(selected_block='LOCAL', # setup_file=os.path.join(self.input_path, '1246', somedir, 'simtools.ini'), is_testing=True) # something = None # self.assertTrue(something) # SetupParser._uninit() # # Using a 3 level-inheritance scheme in all of these cases # # descendant block values override parent block values # EXCEPT: 'type is inherited from the inheritance chain root # verify that block order in the ini file does not matter for arbitrary key/values OR 'type' values = {} types = {} for i in range(1, 4): testdir = 'ordering%d' % i SetupParser.init(selected_block='LOCALB', setup_file=os.path.join(self.input_path, '1246', testdir, 'simtools.ini'), is_testing=True) values[testdir] = SetupParser.get('a') types[testdir] = SetupParser.get('type') SetupParser._uninit() unique_values = sorted(set(values.values())) self.assertEqual(unique_values, ['3']) unique_types = sorted(set(types.values())) self.assertEqual(unique_types, ['LOCAL']) # verify that the proper values are inherited, regardless of the level in the inheritance chain values are # located. SetupParser.init(selected_block='LOCALB', setup_file=os.path.join(self.input_path, '1246', 'mixedLevelValues', 'simtools.ini'), is_testing=True) self.assertEqual(SetupParser.get('a'), '1') self.assertEqual(SetupParser.get('b'), '3') self.assertEqual(SetupParser.get('c'), '5') self.assertEqual(SetupParser.get('d'), '7') self.assertEqual(SetupParser.get('e'), '10') SetupParser._uninit() # Blocks used as the 'type' should fail if missing kwargs = { 'selected_block': 'LOCALB', 'setup_file': os.path.join(self.input_path, '1246', 'missingReferencedBlock', 'simtools.ini'), 'is_testing': True } self.assertRaises(SetupParser.InvalidBlock, SetupParser.init, **kwargs) SetupParser._uninit() # Blocks missing 'type' should fail kwargs = { 'selected_block': 'LOCALB', 'setup_file': os.path.join(self.input_path, '1246', 'missingType', 'simtools.ini'), 'is_testing': True } self.assertRaises(SetupParser.InvalidBlock, SetupParser.init, **kwargs) SetupParser._uninit()
def create_simulations(self, exp_name='test', exp_builder=None, suite_id=None, verbose=True): """ Create an experiment with simulations modified according to the specified experiment builder. """ global simulations_expected simulations_expected = 0 self.exp_builder = exp_builder or SingleSimulationBuilder() self.cache.clear() # Create the experiment if not present already if not self.experiment or self.experiment.exp_name != exp_name: self.create_experiment(experiment_name=exp_name, suite_id=suite_id) else: # Refresh the experiment self.refresh_experiment() # Save the experiment in the DB DataStore.save_experiment(self.experiment, verbose=verbose) # Separate the experiment builder generator into batches sim_per_batch = int(SetupParser.get('sims_per_thread', default=50)) mods = self.exp_builder.mod_generator max_creator_processes = min( multiprocessing.cpu_count() - 1, int( SetupParser.get('max_threads', default=multiprocessing.cpu_count() - 1))) creator_processes = [] work_queue = Queue(max_creator_processes * 5) simulations_created = 0 def fill_queue(mods, sim_per_batch, max_creator_processes, work_queue): global simulations_expected # Add the work to be done for wbatch in batch(mods, sim_per_batch): work_queue.put(wbatch) simulations_expected += len(wbatch) # Poison for _ in range(max_creator_processes): work_queue.put(None) t = Thread(target=fill_queue, args=(mods, sim_per_batch, max_creator_processes, work_queue)) t.start() for _ in range(max_creator_processes): creator_process = self.get_simulation_creator( work_queue=work_queue) creator_process.daemon = True creator_process.start() creator_processes.append(creator_process) # Display some info if verbose: logger.info("Creating the simulations") logger.info( " | Creator processes: {} ".format(max_creator_processes)) logger.info(" | Simulations per batch: {}".format(sim_per_batch)) # Status display while simulations_created == 0 or simulations_created != simulations_expected or t.isAlive( ): sys.stdout.write("\r {} Created simulations: {}/{}".format( next(animation), simulations_created, simulations_expected)) sys.stdout.flush() # Refresh the number of sims created simulations_created = len(self.cache) time.sleep(0.3) for p in creator_processes: p.join() sys.stdout.write("\r | Created simulations: {}/{}\n".format( simulations_created, simulations_expected)) sys.stdout.flush() # Insert simulations in the cache DataStore.bulk_insert_simulations(self.cache) self.cache.clear() # Refresh the experiment self.refresh_experiment() # Display sims if verbose: sims_to_display = 2 display = -sims_to_display if simulations_created > sims_to_display else -simulations_created logger.info(" ") logger.info("Simulation{} created:\n" "----------------------".format( "s" if simulations_created > 1 else "")) for sim in self.experiment.simulations[display:]: logger.info("- Simulation {}".format(sim.id)) logger.info( json.dumps(sim.tags, indent=2, cls=GeneralEncoder, sort_keys=True)) logger.info(" ") if simulations_created > sims_to_display: logger.info("... and %s more" % (simulations_created + display))
def generate_climate_files(self): # see InputDataWorker for other work options self.wo = InputDataWorker( demographics_file_path=self.demographics_file_path, wo_output_path=self.work_order_path, project_info=self.climate_project, start_year=self.start_year, num_years=self.num_years, resolution=self.resolution, idRef=self.idRef) # login to COMPS (if not already logged in) to submit climate files generation work order self.wo.wo_2_json() from COMPS.Data.WorkItem import WorkerOrPluginKey, WorkItemState from COMPS.Data import QueryCriteria from COMPS.Data import WorkItem, WorkItemFile from COMPS.Data import AssetCollection workerkey = WorkerOrPluginKey(name='InputDataWorker', version='1.0.0.0_RELEASE') wi = WorkItem('dtk-tools InputDataWorker WorkItem', workerkey, SetupParser.get('environment')) wi.set_tags({ 'dtk-tools': None, 'WorkItem type': 'InputDataWorker dtk-tools' }) with open(self.work_order_path, 'rb') as workorder_file: # wi.AddWorkOrder(workorder_file.read()) wi.add_work_order(data=workorder_file.read()) with open(self.demographics_file_path, 'rb') as demog_file: wi.add_file(WorkItemFile( os.path.basename(self.demographics_file_path), 'Demographics', ''), data=demog_file.read()) wi.save() print("Created request for climate files generation.") print("Commissioning...") wi.commission() while wi.state not in (WorkItemState.Succeeded, WorkItemState.Failed, WorkItemState.Canceled): om('Waiting for climate generation to complete (current state: ' + str(wi.state) + ')', style='flushed') time.sleep(5) wi.refresh() print("Climate files SUCCESSFULLY generated") # Get the collection with our files collections = wi.get_related_asset_collections() collection_id = collections[0].id comps_collection = AssetCollection.get( collection_id, query_criteria=QueryCriteria().select_children('assets')) # Get the files if len(comps_collection.assets) > 0: print("Found output files:") for asset in comps_collection.assets: print("- %s (%s)" % (asset.file_name, file_size(asset.length))) print("\nDownloading to %s..." % self.climate_files_output_path) # Download the collection download_asset_collection(comps_collection, self.climate_files_output_path) # return filenames; this use of re in conjunction w/ glob is not great; consider refactor rain_bin_re = os.path.abspath(self.climate_files_output_path + '/*rain*.bin') humidity_bin_re = os.path.abspath(self.climate_files_output_path + '/*humidity*.bin') temperature_bin_re = os.path.abspath( self.climate_files_output_path + '/*temperature*.bin') rain_file_name = os.path.basename(glob.glob(rain_bin_re)[0]) humidity_file_name = os.path.basename( glob.glob(humidity_bin_re)[0]) temperature_file_name = os.path.basename( glob.glob(temperature_bin_re)[0]) print('Climate files SUCCESSFULLY stored.') return rain_file_name, temperature_file_name, humidity_file_name else: print('No output files found')