def __init__(self): print("Setting up distributed code") instance = DistributedAmuse() instance.initialize_code() # instance.parameters.debug = True # instance.parameters.webinterface_port = 4556 instance.commit_parameters() print("url:", instance.get_webinterface_url()) print("Resources:") print(instance.resources) #Claim nodes on the resources. In this example simply the "local" machine pilot = Pilot() pilot.resource_name = 'local' # label of the resource to be used pilot.node_count = 1 # desired number of nodes pilot.time = 99 | units.hour # wallclock that resource remains available (mainly for job queues) pilot.slots_per_node = 99 # slots is accounting measure for a job pilot.node_label = 'local' # label for subgroups of the resource instance.pilots.add_pilot(pilot) print("Reservations:") print(instance.pilots) print("Waiting for reservations") instance.wait_for_pilots() self.instance = instance
def init_local_only(): print("Setting up distributed code") instance = DistributedAmuse() instance.initialize_code() instance.parameters.webinterface_port = 4556 print("url:", instance.get_webinterface_url()) #~ print "Resources:" #~ print instance.resources #Claim nodes on the resources. In this example simply the "local" machine pilot = Pilot() pilot.resource_name = 'local' pilot.node_count = 1 pilot.time = 99 | units.hour pilot.slots_per_node = 99 pilot.node_label = 'local' instance.pilots.add_pilot(pilot) #~ print "Reservations:" #~ print instance.pilots print("Waiting for reservations") instance.wait_for_pilots() # atexit.register(instance.stop) return instance
def start_distributed(lgm_node_names): lgm_nodes = [ new_lgm_node(lgm_node_name) for lgm_node_name in lgm_node_names ] instance = DistributedAmuse(redirection="file", redirect_file="distributed_amuse.log") instance.initialize_code() instance.resources.add_resource(new_lgm_gateway()) for lgm_node in lgm_nodes: instance.resources.add_resource(lgm_node) instance.resources.add_resource(new_cartesius_resource()) instance.pilots.add_pilot(new_local_pilot()) for lgm_node in lgm_nodes: instance.pilots.add_pilot(new_gpu_node_pilot(lgm_node)) instance.pilots.add_pilot(new_cpu_node_pilot(lgm_node)) instance.pilots.add_pilot(new_cartesius_pilot()) print "Pilots:" print instance.pilots print "Waiting for pilots" instance.wait_for_pilots() return instance
def start_distributed(): instance = DistributedAmuse(redirection="file", redirect_file="distributed_amuse.log") instance.initialize_code() instance.pilots.add_pilot(new_local_pilot()) print "Pilots:" print instance.pilots print "Waiting for pilots" instance.wait_for_pilots() return instance
def start_distributed_local(): instance = DistributedAmuse(redirection="file", redirect_file="distributed_amuse.log") instance.initialize_code() instance.parameters.debug = True instance.use_for_all_workers() instance.pilots.add_pilot(new_local_pilot()) instance.pilots.add_pilot(new_local_pilot(slots=2, label="GPU")) instance.pilots.add_pilot(new_local_pilot(slots=24, label="hydro")) print "Pilots:" print instance.pilots print "Waiting for pilots" instance.wait_for_pilots() return instance
def start_distributed_amuse(): print "Creating distributed amuse" distributed_amuse = DistributedAmuse(redirection='none') distributed_amuse.parameters.debug = True distributed_amuse.parameters.webinterface_port = 4556 distributed_amuse.use_for_all_workers() #open the address of the webinterface in a brower window webbrowser.open(distributed_amuse.get_webinterface_url()) #Add some resources #resource = Resource() #resource.name='some.machine' #resource.location="*****@*****.**" #resource.scheduler_type="sge" #resource.amuse_dir="/home/user/amuse" #distributed_amuse.resources.add_resource(resource) print "Resources:" print distributed_amuse.resources #Claim nodes on the resources. In this example simply the "local" machine pilot = Pilot() pilot.resource_name = 'local' pilot.node_count = 1 pilot.time = 2 | units.hour pilot.slots_per_node = 22 pilot.label = 'local' distributed_amuse.pilots.add_pilot(pilot) print "Pilots:" print distributed_amuse.pilots print "Waiting for pilots" distributed_amuse.wait_for_pilots() print "setting distributed as default channel" distributed_amuse.use_for_all_workers() return distributed_amuse
def start_distributed_amuse(): print "Creating distributed amuse" distributed_amuse = DistributedAmuse(redirection='none') distributed_amuse.parameters.debug = True distributed_amuse.parameters.webinterface_port = 4556 distributed_amuse.use_for_all_workers() #open the address of the webinterface in a brower window webbrowser.open(distributed_amuse.get_webinterface_url()) #Add some resources #resource = Resource() #resource.name='some.machine' #resource.location="*****@*****.**" #resource.scheduler_type="sge" #resource.amuse_dir="/home/user/amuse" #distributed_amuse.resources.add_resource(resource) print "Resources:" print distributed_amuse.resources #Claim nodes on the resources. In this example simply the "local" machine pilot = Pilot() pilot.resource_name='local' pilot.node_count=1 pilot.time= 2|units.hour pilot.slots_per_node=22 pilot.label='local' distributed_amuse.pilots.add_pilot(pilot) print "Pilots:" print distributed_amuse.pilots print "Waiting for pilots" distributed_amuse.wait_for_pilots() print "setting distributed as default channel" distributed_amuse.use_for_all_workers() return distributed_amuse
def init_das5_only(username, num_nodes, num_cores): print("Setting up distributed code") instance = DistributedAmuse() instance.parameters.debug = False instance.parameters.worker_queue_timeout=1 | units.hour instance.parameters.webinterface_port = 4556 print("url:", instance.get_webinterface_url()) instance.commit_parameters() #print "Resources:" resource = Resource() resource.name = "DAS-5" resource.location = username + "@fs0.das5.cs.vu.nl" resource.scheduler_type = "slurm" resource.amuse_dir = "/home/" + username + "/amuse/amuse" resource.tmp_dir = "/home/" + username + "/tmp" instance.resources.add_resource(resource) #print instance.resources pilot = Pilot() pilot.resource_name="DAS-5" pilot.queue_name="defq" pilot.node_count=num_nodes pilot.time= 24|units.hour pilot.slots_per_node=num_cores pilot.label="DAS-5-Pilot" instance.pilots.add_pilot(pilot) #~ print "Reservations:" #~ print instance.pilots print("Waiting for reservations") instance.wait_for_pilots() return instance
def start_distributed(lgm_node_names): lgm_nodes = [ new_lgm_node(lgm_node_name) for lgm_node_name in lgm_node_names ] # instance = DistributedAmuse() instance = DistributedAmuse(redirection="file", redirect_file="distributed_amuse.log") # instance = DistributedAmuse(redirection="none") instance.initialize_code() instance.use_for_all_workers(True) instance.resources.add_resource(new_lgm_gateway()) for lgm_node in lgm_nodes: instance.resources.add_resource(lgm_node) instance.pilots.add_pilot(new_local_pilot()) for lgm_node in lgm_nodes: instance.pilots.add_pilot(new_cpu_node_pilot(lgm_node)) print " ** distributed amuse ** " print " ** pilots:" print instance.pilots print "\t waiting for pilots" instance.wait_for_pilots() return instance
# resource.scheduler_type="sge" # resource.amuse_dir="/home/user/amuse" # instance.resources.add_resource(resource) print("Resources:") print(instance.resources) # Claim nodes on the resources. In this example simply the "local" machine pilot = Pilot() pilot.resource_name = 'local' pilot.node_count = 1 pilot.time = 2 | units.hour pilot.slots_per_node = 32 pilot.label = 'local' instance.pilots.add_pilot(pilot) print("Pilots:") print(instance.pilots) print("Waiting for pilots") instance.wait_for_pilots() print("setting distributed as default channel") instance.use_for_all_workers() print("Running tests") nose.run() print("all tests done, stopping distributed code") instance.stop()
print instance.resources # Claim nodes on the resources. In this example simply the "local" machine pilot = Pilot() pilot.resource_name = "local" pilot.node_count = 1 pilot.time = 2 | units.hour pilot.slots_per_node = 22 pilot.label = "local" instance.pilots.add_pilot(pilot) print "Pilots:" print instance.pilots print "Waiting for pilots" instance.wait_for_pilots() print "setting distributed as default channel" instance.use_for_all_workers() print "Running script" script = sys.argv[1] sys.argv = sys.argv[1:] execfile(script) print "script done, stopping distributed code" instance.stop()