def tearDown(self): try: logging.debug("remove") RAM.remove_allocator(self.allocator.name) if self.proc is not None: logging.debug("shutdown") self.allocator.shutdown() self.proc.terminate() else: self.allocator.invalidate() # Restore 'ssh' and 'scp' configuration. protocol.configure_ssh(self.orig_ssh) protocol.configure_scp(self.orig_scp) time.sleep(2) for name in (_RJE_ROOT, _DMZ_ROOT): if os.path.exists(name): shutil.rmtree(name) finally: os.chdir(self.orig_dir)
def tearDown(self): try: logging.debug('remove') RAM.remove_allocator(self.allocator.name) if self.proc is not None: logging.debug('shutdown') self.allocator.shutdown() self.proc.terminate() else: self.allocator.invalidate() # Restore 'ssh' and 'scp' configuration. protocol.configure_ssh(self.orig_ssh) protocol.configure_scp(self.orig_scp) time.sleep(2) for name in (_RJE_ROOT, _DMZ_ROOT): if os.path.exists(name): shutil.rmtree(name) finally: os.chdir(self.orig_dir)
def test_configure(self): logging.debug('') logging.debug('test_configure') # Reconfigure. with open('resources.cfg', 'w') as out: out.write(""" [LocalHost] max_load: 100 """) local = RAM.get_allocator('LocalHost') max_load = local.max_load try: self.assertTrue(max_load < 100) RAM.configure('resources.cfg') self.assertEqual(local.max_load, 100) local.max_load = max_load finally: os.remove('resources.cfg') # Add another local. with open('resources.cfg', 'w') as out: out.write(""" [Local2] classname: openmdao.main.resource.LocalAllocator authkey: PublicKey allow_shell: False total_cpus: 42 max_load: 200 """) try: RAM.configure('resources.cfg') local2 = RAM.get_allocator('Local2') self.assertEqual(local2.factory._authkey, 'PublicKey') self.assertEqual(local2.factory._allow_shell, False) self.assertEqual(local2.total_cpus, 42) self.assertEqual(local2.max_load, 200) self.assertEqual(local2.host, socket.gethostname()) self.assertTrue(local2.pid > 0) RAM.remove_allocator('Local2') finally: os.remove('resources.cfg') # Bad local total_cpus. with open('resources.cfg', 'w') as out: out.write(""" [Local2] classname: openmdao.main.resource.LocalAllocator total_cpus: 0 """) try: assert_raises(self, "RAM.configure('resources.cfg')", globals(), locals(), ValueError, 'Local2: total_cpus must be > 0, got 0') finally: os.remove('resources.cfg') # Bad local max_load. with open('resources.cfg', 'w') as out: out.write(""" [Local2] classname: openmdao.main.resource.LocalAllocator max_load: 0 """) try: assert_raises(self, "RAM.configure('resources.cfg')", globals(), locals(), ValueError, 'Local2: max_load must be > 0, got 0') finally: os.remove('resources.cfg') # Bad module. with open('resources.cfg', 'w') as out: out.write(""" [BadModule] classname: no-such-module.Allocator max_load: 100 """) try: assert_raises(self, "RAM.configure('resources.cfg')", globals(), locals(), RuntimeError, "RAM configure BadModule: can't import" " 'no-such-module'") finally: os.remove('resources.cfg') # Bad class. with open('resources.cfg', 'w') as out: out.write(""" [BadClass] classname: openmdao.main.resource.NoSuchAllocator max_load: 100 """) try: assert_raises(self, "RAM.configure('resources.cfg')", globals(), locals(), RuntimeError, "RAM configure BadClass: no class" " 'NoSuchAllocator' in openmdao.main.resource") finally: os.remove('resources.cfg') # Add, insert, get, remove. local3 = LocalAllocator('Local3') local4 = LocalAllocator('Local4', total_cpus=4) RAM.add_allocator(local3) try: allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] self.assertEqual(allocator_names, ['LocalHost', 'Local3']) self.assertTrue(RAM.get_allocator('Local3') is local3) self.assertTrue(RAM.get_allocator(1) is local3) RAM.insert_allocator(0, local4) try: allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] self.assertEqual(allocator_names, ['Local4', 'LocalHost', 'Local3']) finally: RAM.remove_allocator('Local4') finally: RAM.remove_allocator(1) assert_raises(self, "RAM.get_allocator('Local3')", globals(), locals(), ValueError, "allocator 'Local3' not found") assert_raises(self, "RAM.remove_allocator('Local3')", globals(), locals(), ValueError, "allocator 'Local3' not found") assert_raises(self, "LocalAllocator('BadLoad', max_load=-2)", globals(), locals(), ValueError, "BadLoad: max_load must be > 0, got -2")
def test_remote(self): logging.debug('') logging.debug('test_remote') # Start remote server. server_dir = 'Factory' if os.path.exists(server_dir): shutil.rmtree(server_dir, onerror=onerror) os.mkdir(server_dir) os.chdir(server_dir) try: server, server_cfg = start_server() cfg = read_server_config(server_cfg) factory = None try: factory = connect(cfg['address'], cfg['port'], pubkey=cfg['key']) prefix = RAM._make_prefix(factory.host) remote = '%s_LocalHost' % prefix # Show no remotes currently in RAM. allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] logging.debug('%s', allocator_names) self.assertFalse(remote in allocator_names) # Add remote server's allocator. RAM.add_remotes(factory) allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] logging.debug('%s', allocator_names) self.assertTrue(remote in allocator_names) self.assertFalse(RAM.get_allocator(remote) is RAM.list_allocators()[0]) self.assertTrue(RAM.get_allocator(remote) is RAM.list_allocators()[1]) # Max servers. max_servers = RAM.max_servers(dict(allocator=remote)) self.assertTrue(max_servers >= 0) # Avoid host load issues. remote_alloc = RAM.get_allocator(remote) max_servers, info = \ remote_alloc.max_servers(dict(localhost=True)) self.assertEqual(max_servers, 0) self.assertEqual(info, dict(localhost='requested local host')) max_servers, info = \ remote_alloc.max_servers(dict(allocator='LocalHost')) self.assertEqual(max_servers, 0) self.assertEqual(info, dict(allocator='wrong allocator')) estimate, info = \ remote_alloc.time_estimate(dict(allocator='LocalHost')) self.assertEqual(estimate, -2) self.assertEqual(info, dict(allocator='wrong allocator')) # Allocate, release. remote_server, info = RAM.allocate(dict(allocator=remote)) RAM.release(remote_server) # Remove remote allocators. allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] for name in allocator_names: if name.startswith(prefix): RAM.remove_allocator(name) allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] logging.debug('%s', allocator_names) self.assertFalse(remote in allocator_names) finally: if factory is not None: factory.cleanup() server.terminate(timeout=10) finally: os.chdir('..') shutil.rmtree(server_dir, onerror=onerror) # Access local RAM in manner it would be accessed in the server. self.assertEqual(RAM._get_instance().get_total_allocators(), 1) self.assertTrue(RAM._get_instance().get_allocator_proxy(0) is RAM.list_allocators()[0])
def test_configure(self): logging.debug('') logging.debug('test_configure') # Reconfigure. with open('resources.cfg', 'w') as out: out.write(""" [LocalHost] max_load: 100 """) local = RAM.get_allocator('LocalHost') max_load = local.max_load try: self.assertTrue(max_load < 100) RAM.configure('resources.cfg') self.assertEqual(local.max_load, 100) local.max_load = max_load finally: os.remove('resources.cfg') # Add another local. with open('resources.cfg', 'w') as out: out.write(""" [Local2] classname: openmdao.main.resource.LocalAllocator authkey: PublicKey allow_shell: False total_cpus: 42 max_load: 200 """) try: RAM.configure('resources.cfg') local2 = RAM.get_allocator('Local2') self.assertEqual(local2.factory._authkey, 'PublicKey') self.assertEqual(local2.factory._allow_shell, False) self.assertEqual(local2.total_cpus, 42) self.assertEqual(local2.max_load, 200) self.assertEqual(local2.host, socket.gethostname()) self.assertTrue(local2.pid > 0) RAM.remove_allocator('Local2') finally: os.remove('resources.cfg') # Bad local total_cpus. with open('resources.cfg', 'w') as out: out.write(""" [Local2] classname: openmdao.main.resource.LocalAllocator total_cpus: 0 """) try: assert_raises(self, "RAM.configure('resources.cfg')", globals(), locals(), ValueError, 'Local2: total_cpus must be > 0, got 0') finally: os.remove('resources.cfg') # Bad local max_load. with open('resources.cfg', 'w') as out: out.write(""" [Local2] classname: openmdao.main.resource.LocalAllocator max_load: 0 """) try: assert_raises(self, "RAM.configure('resources.cfg')", globals(), locals(), ValueError, 'Local2: max_load must be > 0, got 0') finally: os.remove('resources.cfg') # Bad module. with open('resources.cfg', 'w') as out: out.write(""" [BadModule] classname: no-such-module.Allocator max_load: 100 """) try: assert_raises( self, "RAM.configure('resources.cfg')", globals(), locals(), RuntimeError, "RAM configure BadModule: can't import" " 'no-such-module'") finally: os.remove('resources.cfg') # Bad class. with open('resources.cfg', 'w') as out: out.write(""" [BadClass] classname: openmdao.main.resource.NoSuchAllocator max_load: 100 """) try: assert_raises( self, "RAM.configure('resources.cfg')", globals(), locals(), RuntimeError, "RAM configure BadClass: no class" " 'NoSuchAllocator' in openmdao.main.resource") finally: os.remove('resources.cfg') # Add, insert, get, remove. local3 = LocalAllocator('Local3') local4 = LocalAllocator('Local4', total_cpus=4) RAM.add_allocator(local3) try: allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] self.assertEqual(allocator_names, ['LocalHost', 'Local3']) self.assertTrue(RAM.get_allocator('Local3') is local3) self.assertTrue(RAM.get_allocator(1) is local3) RAM.insert_allocator(0, local4) try: allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] self.assertEqual(allocator_names, ['Local4', 'LocalHost', 'Local3']) finally: RAM.remove_allocator('Local4') finally: RAM.remove_allocator(1) assert_raises(self, "RAM.get_allocator('Local3')", globals(), locals(), ValueError, "allocator 'Local3' not found") assert_raises(self, "RAM.remove_allocator('Local3')", globals(), locals(), ValueError, "allocator 'Local3' not found") assert_raises(self, "LocalAllocator('BadLoad', max_load=-2)", globals(), locals(), ValueError, "BadLoad: max_load must be > 0, got -2")
def test_remote(self): logging.debug('') logging.debug('test_remote') # Start remote server. server_dir = 'Factory' if os.path.exists(server_dir): shutil.rmtree(server_dir, onerror=onerror) os.mkdir(server_dir) os.chdir(server_dir) try: server, server_cfg = start_server() cfg = read_server_config(server_cfg) factory = None try: factory = connect(cfg['address'], cfg['port'], pubkey=cfg['key']) prefix = RAM._make_prefix(factory.host) remote = '%s_LocalHost' % prefix # Show no remotes currently in RAM. allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] logging.debug('%s', allocator_names) self.assertFalse(remote in allocator_names) # Add remote server's allocator. RAM.add_remotes(factory) allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] logging.debug('%s', allocator_names) self.assertTrue(remote in allocator_names) self.assertFalse( RAM.get_allocator(remote) is RAM.list_allocators()[0]) self.assertTrue( RAM.get_allocator(remote) is RAM.list_allocators()[1]) # Max servers. max_servers = RAM.max_servers(dict(allocator=remote)) self.assertTrue(max_servers >= 0) # Avoid host load issues. remote_alloc = RAM.get_allocator(remote) max_servers, info = \ remote_alloc.max_servers(dict(localhost=True)) self.assertEqual(max_servers, 0) self.assertEqual(info, dict(localhost='requested local host')) max_servers, info = \ remote_alloc.max_servers(dict(allocator='LocalHost')) self.assertEqual(max_servers, 0) self.assertEqual(info, dict(allocator='wrong allocator')) estimate, info = \ remote_alloc.time_estimate(dict(allocator='LocalHost')) self.assertEqual(estimate, -2) self.assertEqual(info, dict(allocator='wrong allocator')) # Allocate, release. remote_server, info = RAM.allocate(dict(allocator=remote)) RAM.release(remote_server) # Remove remote allocators. allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] for name in allocator_names: if name.startswith(prefix): RAM.remove_allocator(name) allocator_names = \ [allocator.name for allocator in RAM.list_allocators()] logging.debug('%s', allocator_names) self.assertFalse(remote in allocator_names) finally: if factory is not None: factory.cleanup() server.terminate(timeout=10) finally: os.chdir('..') shutil.rmtree(server_dir, onerror=onerror) # Access local RAM in manner it would be accessed in the server. self.assertEqual(RAM._get_instance().get_total_allocators(), 1) self.assertTrue(RAM._get_instance().get_allocator_proxy(0) is RAM.list_allocators()[0])
def rundlcs(envcmd = None, options=None, args=None, batch_size=5): """ run the whole process, including startup and shutdown to do: parse input create load cases create app assembly create dispatcher send cases and app to dispatcher run cases collect and save output envcmd: a text string cmd (e.g. 'source env.sh') to set up the environment for the cluster allocator """ if (batch_size != None and line_count(options.cases)-1 > batch_size): rundlcs_bybatch(envcmd, options, args, batch_size) return if options==None: options, args = get_options() print options ctrl = parse_input(options) # ctrl will be just the input, but broken up into separate categories, e.g. # ctrl.cases, ctrl.app, ctrl.dispatch, ... # work in progress; running efficiently at NREL. if (options.cluster_allocator): # cluster=ClusterAllocator() ### never had the guts to try this yet! # env = os.environ # fname = "%s/.env.sh" % (env['HOME']) # fout = file(fname, "w") # for key in env: # fout.write("export %s=%s\n" % (key,env[key])) # fout.close() ### if envcmd == None: cluster=ClusterAllocator() else: cluster=ClusterAllocator(use_modules=False, beforestart=envcmd) # cluster=ClusterAllocator(use_modules=False, beforestart=". %s;" % fname) RAM.remove_allocator('LocalHost') RAM.add_allocator(cluster) # RAM.insert_allocator(0,cluster) ### using "factory" functions to create specific subclasses (e.g. distinguish between FAST and HAWC2) # Then we use these to create the cases... case_params = ctrl.cases casetab = GenericRunCaseTable() casetab.initFromFile(case_params['source_file'], verbose=True, start_at = options.start_at) # solver... solver = 'FAST' # solver = 'HAWC2' if solver=='FAST': ## TODO, changed when we have a real turbine # aero code stuff: for constructors aerocode = openFAST(ctrl.output) ## need better name than output_params aerocode.setOutput(ctrl.output) elif solver == 'HAWC2': aerocoe = openHAWC2(None) raise NotImplementedError, "HAWC2 aeroecode wrapper not implemented in runBatch.py yet" else: raise ValueError, "unknown aerocode: %s" % solver # case iterator dispatcher = CaseAnalyzer(ctrl.dispatcher) ### After this point everything should be generic, all appropriate subclass object created # # # # # # # # # # # dispatcher.presetup_workflow(aerocode, casetab.cases) # just makes sure parts are there when configure() is called dispatcher.configure() # Now tell the dispatcher to (setup and ) run the cases using the aerocode on the turbine. # calling configure() is done inside run(). but now it is done already (above), too. # norun does not write directories, but it does set us up to process them if they already exist if (not options.norun): dispatcher.run() # TODO: more complexity will be needed for difference between "run now" and "run later" cases. dispatcher.collect_output(ctrl.output)
def rundlcs(envcmd = None, options=None, args=None, batch_size=100): """ run the whole process, including startup and shutdown to do: parse input create load cases create app assembly create dispatcher send cases and app to dispatcher run cases collect and save output envcmd: a text string cmd (e.g. 'source env.sh') to set up the environment for the cluster allocator """ if (batch_size != None and line_count(options.cases)-1 > batch_size): rundlcs_bybatch(envcmd, options, args, batch_size) return if options==None: options, args = get_options() print options ctrl = parse_input(options) # ctrl will be just the input, but broken up into separate categories, e.g. # ctrl.cases, ctrl.app, ctrl.dispatch, ... # work in progress; running efficiently at NREL. if (options.cluster_allocator): # cluster=ClusterAllocator() ### never had the guts to try this yet! # env = os.environ # fname = "%s/.env.sh" % (env['HOME']) # fout = file(fname, "w") # for key in env: # fout.write("export %s=%s\n" % (key,env[key])) # fout.close() ### if envcmd == None: cluster=ClusterAllocator() else: cluster=ClusterAllocator(use_modules=False, beforestart=envcmd) # cluster=ClusterAllocator(use_modules=False, beforestart=". %s;" % fname) RAM.remove_allocator('LocalHost') RAM.add_allocator(cluster) # RAM.insert_allocator(0,cluster) ### using "factory" functions to create specific subclasses (e.g. distinguish between FAST and HAWC2) # Then we use these to create the cases... case_params = ctrl.cases casetab = GenericRunCaseTable() casetab.initFromFile(case_params['source_file'], verbose=True, start_at = options.start_at) # solver... solver = 'FAST' # solver = 'HAWC2' if solver=='FAST': ## TODO, changed when we have a real turbine # aero code stuff: for constructors aerocode = openFAST(ctrl.output) ## need better name than output_params aerocode.setOutput(ctrl.output) elif solver == 'HAWC2': aerocoe = openHAWC2(None) raise NotImplementedError, "HAWC2 aeroecode wrapper not implemented in runBatch.py yet" else: raise ValueError, "unknown aerocode: %s" % solver # case iterator dispatcher = CaseAnalyzer(ctrl.dispatcher) ### After this point everything should be generic, all appropriate subclass object created # # # # # # # # # # # dispatcher.presetup_workflow(aerocode, casetab.cases) # just makes sure parts are there when configure() is called dispatcher.configure() # Now tell the dispatcher to (setup and ) run the cases using the aerocode on the turbine. # calling configure() is done inside run(). but now it is done already (above), too. # norun does not write directories, but it does set us up to process them if they already exist if (not options.norun): dispatcher.run() # TODO: more complexity will be needed for difference between "run now" and "run later" cases. dispatcher.collect_output(ctrl.output)