def __get_launch_method(self, hostname, user=None): """ returns desired execution method: ssh, aprun """ if user == None: user = self.__discover_ssh_user(hostname) host = "" if user!=None and user!="": logger.debug("discovered user: "******"@" + hostname else: host = hostname gsissh_available = False try: cmd = "gsissh " + host + " /bin/date" logger.debug("Execute: " + cmd) gsissh_available = (subprocess.call(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)==0) except: pass ssh_available = False try: cmd = "ssh " + host + " /bin/date" logger.debug("Execute: " + cmd) ssh_available = (subprocess.call(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)==0) except: pass launch_method = "ssh" if ssh_available == False and gsissh_available == True: launch_method="gsissh" else: launch_method="ssh" logger.info("SSH: %r GSISSH: %r Use: %s"%(ssh_available, gsissh_available, launch_method)) return launch_method
def _put_file(self, source, target): logger.debug("Put file: %s to %s" % (source, target)) start = time.time() if self.is_local: command = "cp -r %s %s" % (source, target) else: command = "iput -f -R %s %s %s" % (self.resource_group, source, target) self.__run_command(command) put_time = time.time() - start number_replica = 0 if self.is_local == False: #pdb.set_trace() home_directory = self.__run_command("ipwd")[0].strip() full_filename = os.path.join(home_directory, target) command = "irepl-osg -f %s -G %s" % (full_filename, self.resource_group) output = self.__run_command(command) for i in output: if i.find("copied") > 0 or i.find("replica") > 0: number_replica = number_replica + 1 rep_time = time.time() - start - put_time logger.info( "Upload;Replication;Total;File Size;Backend;Number Replica;Timestamp: %f;%f;%f;%d;%s;%d;%s" % (put_time, rep_time, time.time() - start, os.path.getsize(source), self.resource_group, number_replica, datetime.datetime.today().isoformat()))
def test_pilotdata(job, run_id, size=1): start_time = time.time() # Logging information time_log = [] number_jobs = job["number_subjobs"] lrms_compute = str(job["pilot_compute_url"]) lrms_data = str(job["pilot_data_url"]) result_tuple = (run_id, lrms_compute, lrms_data, number_jobs, size, datetime.datetime.today().isoformat(),str(job["number_pilots"])) pilotjobs = [] logger.debug("*******************************************************************************************") logger.debug("Start %d pilots."%(job["number_pilots"]*len(job["pilot_compute_url"]))) for i in range(0, job["number_pilots"]): for pilot in range(0, len(job["pilot_compute_url"])): pj = start_pilot(job, pilot) submission_time = time.time() - start_time pilot_startup_begin = time.time() submission_time_tuple = result_tuple + ("Pilot Submission Time", str(submission_time)) time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n"%(submission_time_tuple)) compute_unit_description = { "executable": "/bin/date", "arguments": [], "number_of_processes": 1, "output": "stdout.txt", "error": "stderr.txt" } compute_unit = pj.submit_compute_unit(compute_unit_description) compute_unit.wait() pj.wait() pilot_startup_time = time.time() - pilot_startup_begin pilot_startup_time_tuple = result_tuple + ("Pilot Startup Time", str(pilot_startup_time)) time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n"%(pilot_startup_time_tuple)) logger.debug("Started %d pilots"%len(pilotjobs)) # pj_startup_threads=[] # for i in range(0, job["number_pilots"]): # for pilot in range(0, len(job["pilot_compute_url"])): # t=threading.Thread(target=start_pilot, args=(job, pilot)) # t.start() # pj_startup_threads.append(t) # # for t in pj_startup_threads: # t.join() logger.debug("Started %d pilots"%len(pilotjobs)) #result_tuple = (run_id, lrms_compute, lrms_data, number_jobs, size, datetime.datetime.today().isoformat(),str(job["number_pilots"])) #all_pilots_active = time.time() - start_time #all_pilots_active_tuple = result_tuple+ ("Pilot Submission Time", str(all_pilots_active)) #time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n"%(all_pilots_active_tuple)) logger.info("Terminate Pilot Compute/Data Service") pilot_compute_service.cancel() return time_log
def _put_file(self, source, target): logger.debug("Put file: %s to %s"%(source, target)) start = time.time() if self.is_local: command = "cp -r %s %s"%(source, target) else: command = "iput -f -R %s %s %s"%(self.resource_group, source, target) self.__run_command(command) put_time = time.time() - start number_replica = 0 if self.is_local==False: #pdb.set_trace() home_directory= self.__run_command("ipwd")[0].strip() full_filename = os.path.join(home_directory, target) command = "irepl-osg -f %s -G %s"%(full_filename, self.resource_group) output = self.__run_command(command) for i in output: if i.find("copied") > 0 or i.find("replica")>0: number_replica = number_replica + 1 rep_time = time.time() - start - put_time logger.info("Upload;Replication;Total;File Size;Backend;Number Replica;Timestamp: %f;%f;%f;%d;%s;%d;%s"%(put_time, rep_time, time.time()-start, os.path.getsize(source), self.resource_group, number_replica, datetime.datetime.today().isoformat()))
def __get_launch_method(self, hostname, user=None): """ returns desired execution method: ssh, aprun """ if user == None: user = self.__discover_ssh_user(hostname) host = "" if user != None and user != "": logger.debug("discovered user: "******"@" + hostname else: host = hostname gsissh_available = False try: cmd = "gsissh " + host + " /bin/date" logger.debug("Execute: " + cmd) gsissh_available = (subprocess.call(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0) except: pass ssh_available = False try: cmd = "ssh " + host + " /bin/date" logger.debug("Execute: " + cmd) ssh_available = (subprocess.call(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0) except: pass launch_method = "ssh" if ssh_available == False and gsissh_available == True: launch_method = "gsissh" else: launch_method = "ssh" logger.info("SSH: %r GSISSH: %r Use: %s" % (ssh_available, gsissh_available, launch_method)) return launch_method
try: from .job_plugin.ec2ssh import Service as EC2Service except: pass # import other BigJob packages # import API import api.base sys.path.append(os.path.dirname(__file__)) if SAGA_BLISS == False: try: import saga logger.info("Using SAGA C++/Python.") is_bliss = False except: logger.warn("SAGA C++ and Python bindings not found. Using Bliss.") try: import bliss.saga as saga is_bliss = True except: logger.warn("SAGA Bliss not found") else: logger.info("Using SAGA Bliss.") try: import bliss.saga as saga is_bliss = True except: logger.warn("SAGA Bliss not found")
# Create Data Unit Description #base_dir = "../data1" #url_list = os.listdir(base_dir) # make absolute paths #absolute_url_list = [os.path.join(base_dir, i) for i in url_list] data_unit_description = { "file_urls": [os.path.join(os.getcwd(), "test.txt")], "affinity_datacenter_label": "us-east-1", "affinity_machine_label": "" } # submit pilot data to a pilot store input_data_unit = pd.submit_data_unit(data_unit_description) input_data_unit.wait() logger.info("Data Unit URL: " + input_data_unit.get_url()) pilot_compute_service = PilotComputeService(coordination_url=COORDINATION_URL) pilot_compute_description_amazon_west = { "service_url": 'ec2+ssh://aws.amazon.com', "number_of_processes": 1, 'affinity_datacenter_label': "us-google", 'affinity_machine_label': "", # cloud specific attributes #"vm_id":"ami-d7f742be", "vm_id": "ami-5c3b1b19", "vm_ssh_username":"******", "vm_ssh_keyname":"MyKey", "vm_ssh_keyfile":"/Users/luckow/.ssh/id_rsa", "vm_type":"t1.micro", "region" : "us-west-1",
def test_pilotdata(job, run_id, size=1): start_time = time.time() # Logging information time_log = [] number_jobs = job["number_subjobs"] lrms_compute = str(job["pilot_compute_url"]) lrms_data = str(job["pilot_data_url"]) result_tuple = (run_id, lrms_compute, lrms_data, number_jobs, size, datetime.datetime.today().isoformat(), str(job["number_pilots"])) pilotjobs = [] logger.debug( "*******************************************************************************************" ) logger.debug("Start %d pilots." % (job["number_pilots"] * len(job["pilot_compute_url"]))) for i in range(0, job["number_pilots"]): for pilot in range(0, len(job["pilot_compute_url"])): pj = start_pilot(job, pilot) submission_time = time.time() - start_time pilot_startup_begin = time.time() submission_time_tuple = result_tuple + ("Pilot Submission Time", str(submission_time)) time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n" % (submission_time_tuple)) compute_unit_description = { "executable": "/bin/date", "arguments": [], "number_of_processes": 1, "output": "stdout.txt", "error": "stderr.txt" } compute_unit = pj.submit_compute_unit(compute_unit_description) compute_unit.wait() pj.wait() pilot_startup_time = time.time() - pilot_startup_begin pilot_startup_time_tuple = result_tuple + ("Pilot Startup Time", str(pilot_startup_time)) time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n" % (pilot_startup_time_tuple)) logger.debug("Started %d pilots" % len(pilotjobs)) # pj_startup_threads=[] # for i in range(0, job["number_pilots"]): # for pilot in range(0, len(job["pilot_compute_url"])): # t=threading.Thread(target=start_pilot, args=(job, pilot)) # t.start() # pj_startup_threads.append(t) # # for t in pj_startup_threads: # t.join() logger.debug("Started %d pilots" % len(pilotjobs)) #result_tuple = (run_id, lrms_compute, lrms_data, number_jobs, size, datetime.datetime.today().isoformat(),str(job["number_pilots"])) #all_pilots_active = time.time() - start_time #all_pilots_active_tuple = result_tuple+ ("Pilot Submission Time", str(all_pilots_active)) #time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n"%(all_pilots_active_tuple)) logger.info("Terminate Pilot Compute/Data Service") pilot_compute_service.cancel() return time_log
output_data_unit.wait() # create compute unit compute_unit_description = { "executable": "/bin/cat", "arguments": ["test.txt"], "number_of_processes": 1, "output": "stdout.txt", "error": "stderr.txt", "input_data": [input_data_unit.get_url()], # Put files stdout.txt and stderr.txt into output data unit "output_data": [ { output_data_unit.get_url(): ["std*"] } ] } compute_unit = compute_data_service.submit_compute_unit(compute_unit_description) logger.info("Finished setup of ComputeDataService. Waiting for scheduling of PD") compute_data_service.wait() logger.debug("Output Data Unit: " + str(output_data_unit.list())) output_data_unit.export(".") logger.info("Terminate Pilot Compute/Data Service") compute_data_service.cancel() pilot_data_service.cancel() pilot_compute_service.cancel()
"vm_id": "ami-00000047", "vm_ssh_username":"******", "vm_ssh_keyname":"OSDC", "vm_ssh_keyfile":"/glusterfs/users/aluckow/.ssh/osdc_rsa.pub", "vm_type":"m1.tiny", "access_key_id":"8002fb8a8572432c92d2e080ab1f326a", "secret_access_key":"db32d545bd8e44b3b22514622b9621c5" } pilotjob = pilot_compute_service.create_pilot(pilot_compute_description) compute_data_service = ComputeDataService() compute_data_service.add_pilot_compute_service(pilot_compute_service) # create compute unit compute_unit_description = { "executable": "/bin/date", "arguments": [""], "number_of_processes": 1, "output": "stdout.txt", "error": "stderr.txt", } compute_unit = compute_data_service.submit_compute_unit(compute_unit_description) logger.info("Finished setup of ComputeDataService. Waiting for scheduling of CU") compute_data_service.wait() logger.info("Terminate Pilot Compute/Data Service") compute_data_service.cancel() pilot_compute_service.cancel()
try: from job_plugin.slurmssh import Service as SlurmService except: pass # import other BigJob packages # import API import api.base sys.path.append(os.path.dirname(__file__)) if SAGA_BLISS == False: try: import saga logger.info("Using SAGA C++/Python.") is_bliss=False except: logger.warn("SAGA C++ and Python bindings not found. Using Bliss.") try: import bliss.saga as saga is_bliss=True except: logger.warn("SAGA Bliss not found") else: logger.info("Using SAGA Bliss.") try: import bliss.saga as saga is_bliss=True except: logger.warn("SAGA Bliss not found")