Example #1
0
 def __get_launch_method(self, hostname, user=None):
     """ returns desired execution method: ssh, aprun """
     if user == None: user = self.__discover_ssh_user(hostname)
     host = ""
     if user!=None and user!="": 
         logger.debug("discovered user: "******"@" + hostname
     else:
         host = hostname 
     gsissh_available = False
     try:
         cmd = "gsissh " + host + " /bin/date"
         logger.debug("Execute: " + cmd)
         gsissh_available = (subprocess.call(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)==0)
     except:
         pass
 
     ssh_available = False
     try:
         cmd = "ssh " + host + " /bin/date"
         logger.debug("Execute: " + cmd)
         ssh_available = (subprocess.call(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)==0)
     except:
         pass
 
     launch_method = "ssh"
     if ssh_available == False and gsissh_available == True:
         launch_method="gsissh"
     else:
         launch_method="ssh"
     logger.info("SSH: %r GSISSH: %r Use: %s"%(ssh_available, gsissh_available, launch_method))
     return launch_method
Example #2
0
 def _put_file(self, source, target):
     logger.debug("Put file: %s to %s" % (source, target))
     start = time.time()
     if self.is_local:
         command = "cp -r %s %s" % (source, target)
     else:
         command = "iput -f -R %s %s %s" % (self.resource_group, source,
                                            target)
     self.__run_command(command)
     put_time = time.time() - start
     number_replica = 0
     if self.is_local == False:
         #pdb.set_trace()
         home_directory = self.__run_command("ipwd")[0].strip()
         full_filename = os.path.join(home_directory, target)
         command = "irepl-osg -f %s -G %s" % (full_filename,
                                              self.resource_group)
         output = self.__run_command(command)
         for i in output:
             if i.find("copied") > 0 or i.find("replica") > 0:
                 number_replica = number_replica + 1
     rep_time = time.time() - start - put_time
     logger.info(
         "Upload;Replication;Total;File Size;Backend;Number Replica;Timestamp: %f;%f;%f;%d;%s;%d;%s"
         % (put_time, rep_time, time.time() - start,
            os.path.getsize(source), self.resource_group, number_replica,
            datetime.datetime.today().isoformat()))
Example #3
0
def test_pilotdata(job, run_id, size=1):
    start_time = time.time()

    # Logging information 
    time_log = []
    number_jobs = job["number_subjobs"]
    lrms_compute = str(job["pilot_compute_url"])
    lrms_data = str(job["pilot_data_url"])
    
    result_tuple = (run_id, lrms_compute, lrms_data, number_jobs, size, datetime.datetime.today().isoformat(),str(job["number_pilots"]))
            
    pilotjobs = []
    logger.debug("*******************************************************************************************")
    logger.debug("Start %d pilots."%(job["number_pilots"]*len(job["pilot_compute_url"])))
    for i in range(0, job["number_pilots"]):
        for pilot in range(0, len(job["pilot_compute_url"])):
            pj = start_pilot(job, pilot)
            submission_time = time.time() - start_time
            pilot_startup_begin = time.time()
            submission_time_tuple = result_tuple + ("Pilot Submission Time", str(submission_time))
            time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n"%(submission_time_tuple))
            compute_unit_description = {
                "executable": "/bin/date",
                "arguments": [],
                "number_of_processes": 1,
                "output": "stdout.txt",
                "error": "stderr.txt"
            }    
            compute_unit = pj.submit_compute_unit(compute_unit_description)
            compute_unit.wait()  
            pj.wait()
            pilot_startup_time = time.time() - pilot_startup_begin
            pilot_startup_time_tuple = result_tuple + ("Pilot Startup Time", str(pilot_startup_time))
            time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n"%(pilot_startup_time_tuple))
    
    
    logger.debug("Started %d pilots"%len(pilotjobs))
    
#     pj_startup_threads=[]
#     for i in range(0, job["number_pilots"]):
#         for pilot in range(0, len(job["pilot_compute_url"])):
#             t=threading.Thread(target=start_pilot, args=(job, pilot))
#             t.start()
#             pj_startup_threads.append(t)
#                             
#     for t in pj_startup_threads:
#         t.join()

    logger.debug("Started %d pilots"%len(pilotjobs))
    #result_tuple = (run_id, lrms_compute, lrms_data, number_jobs, size, datetime.datetime.today().isoformat(),str(job["number_pilots"]))
    #all_pilots_active = time.time() - start_time
    #all_pilots_active_tuple = result_tuple+ ("Pilot Submission Time", str(all_pilots_active))
    #time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n"%(all_pilots_active_tuple))       
        
    logger.info("Terminate Pilot Compute/Data Service")
    pilot_compute_service.cancel()
    return time_log
Example #4
0
 def _put_file(self, source, target):
     logger.debug("Put file: %s to %s"%(source, target))
     start = time.time()
     if self.is_local:
         command = "cp -r %s %s"%(source, target)
     else:
         command = "iput -f -R %s %s %s"%(self.resource_group, source, target)
     self.__run_command(command)
     put_time = time.time() - start
     number_replica = 0
     if self.is_local==False:
         #pdb.set_trace()
         home_directory= self.__run_command("ipwd")[0].strip()
         full_filename = os.path.join(home_directory, target)
         command = "irepl-osg -f %s -G %s"%(full_filename, self.resource_group)
         output = self.__run_command(command) 
         for i in output:
             if i.find("copied") > 0 or i.find("replica")>0:
                 number_replica = number_replica + 1 
     rep_time = time.time() - start - put_time
     logger.info("Upload;Replication;Total;File Size;Backend;Number Replica;Timestamp: %f;%f;%f;%d;%s;%d;%s"%(put_time, rep_time, time.time()-start, os.path.getsize(source), self.resource_group, number_replica, datetime.datetime.today().isoformat()))
Example #5
0
    def __get_launch_method(self, hostname, user=None):
        """ returns desired execution method: ssh, aprun """
        if user == None: user = self.__discover_ssh_user(hostname)
        host = ""
        if user != None and user != "":
            logger.debug("discovered user: "******"@" + hostname
        else:
            host = hostname
        gsissh_available = False
        try:
            cmd = "gsissh " + host + " /bin/date"
            logger.debug("Execute: " + cmd)
            gsissh_available = (subprocess.call(cmd,
                                                shell=True,
                                                stdout=subprocess.PIPE,
                                                stderr=subprocess.PIPE) == 0)
        except:
            pass

        ssh_available = False
        try:
            cmd = "ssh " + host + " /bin/date"
            logger.debug("Execute: " + cmd)
            ssh_available = (subprocess.call(cmd,
                                             shell=True,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE) == 0)
        except:
            pass

        launch_method = "ssh"
        if ssh_available == False and gsissh_available == True:
            launch_method = "gsissh"
        else:
            launch_method = "ssh"
        logger.info("SSH: %r GSISSH: %r Use: %s" %
                    (ssh_available, gsissh_available, launch_method))
        return launch_method
Example #6
0
try:
    from .job_plugin.ec2ssh import Service as EC2Service
except:
    pass

# import other BigJob packages
# import API
import api.base

sys.path.append(os.path.dirname(__file__))

if SAGA_BLISS == False:
    try:
        import saga
        logger.info("Using SAGA C++/Python.")
        is_bliss = False
    except:
        logger.warn("SAGA C++ and Python bindings not found. Using Bliss.")
        try:
            import bliss.saga as saga
            is_bliss = True
        except:
            logger.warn("SAGA Bliss not found")
else:
    logger.info("Using SAGA Bliss.")
    try:
        import bliss.saga as saga
        is_bliss = True
    except:
        logger.warn("SAGA Bliss not found")
 # Create Data Unit Description
 #base_dir = "../data1"
 #url_list = os.listdir(base_dir)
 # make absolute paths
 #absolute_url_list = [os.path.join(base_dir, i) for i in url_list]
 data_unit_description = {
                           "file_urls": [os.path.join(os.getcwd(), "test.txt")],
                           "affinity_datacenter_label": "us-east-1",              
                           "affinity_machine_label": ""
                          }    
   
 # submit pilot data to a pilot store 
 input_data_unit = pd.submit_data_unit(data_unit_description)
 input_data_unit.wait()
 
 logger.info("Data Unit URL: " + input_data_unit.get_url())
 pilot_compute_service = PilotComputeService(coordination_url=COORDINATION_URL)
 
 pilot_compute_description_amazon_west = {
                          "service_url": 'ec2+ssh://aws.amazon.com',
                          "number_of_processes": 1,                             
                          'affinity_datacenter_label': "us-google",              
                          'affinity_machine_label': "", 
                          # cloud specific attributes
                          #"vm_id":"ami-d7f742be",
                          "vm_id": "ami-5c3b1b19",
                          "vm_ssh_username":"******",
                          "vm_ssh_keyname":"MyKey",
                          "vm_ssh_keyfile":"/Users/luckow/.ssh/id_rsa",
                          "vm_type":"t1.micro",
                          "region" : "us-west-1",
Example #8
0
def test_pilotdata(job, run_id, size=1):
    start_time = time.time()

    # Logging information
    time_log = []
    number_jobs = job["number_subjobs"]
    lrms_compute = str(job["pilot_compute_url"])
    lrms_data = str(job["pilot_data_url"])

    result_tuple = (run_id, lrms_compute, lrms_data, number_jobs, size,
                    datetime.datetime.today().isoformat(),
                    str(job["number_pilots"]))

    pilotjobs = []
    logger.debug(
        "*******************************************************************************************"
    )
    logger.debug("Start %d pilots." %
                 (job["number_pilots"] * len(job["pilot_compute_url"])))
    for i in range(0, job["number_pilots"]):
        for pilot in range(0, len(job["pilot_compute_url"])):
            pj = start_pilot(job, pilot)
            submission_time = time.time() - start_time
            pilot_startup_begin = time.time()
            submission_time_tuple = result_tuple + ("Pilot Submission Time",
                                                    str(submission_time))
            time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n" %
                            (submission_time_tuple))
            compute_unit_description = {
                "executable": "/bin/date",
                "arguments": [],
                "number_of_processes": 1,
                "output": "stdout.txt",
                "error": "stderr.txt"
            }
            compute_unit = pj.submit_compute_unit(compute_unit_description)
            compute_unit.wait()
            pj.wait()
            pilot_startup_time = time.time() - pilot_startup_begin
            pilot_startup_time_tuple = result_tuple + ("Pilot Startup Time",
                                                       str(pilot_startup_time))
            time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n" %
                            (pilot_startup_time_tuple))

    logger.debug("Started %d pilots" % len(pilotjobs))

    #     pj_startup_threads=[]
    #     for i in range(0, job["number_pilots"]):
    #         for pilot in range(0, len(job["pilot_compute_url"])):
    #             t=threading.Thread(target=start_pilot, args=(job, pilot))
    #             t.start()
    #             pj_startup_threads.append(t)
    #
    #     for t in pj_startup_threads:
    #         t.join()

    logger.debug("Started %d pilots" % len(pilotjobs))
    #result_tuple = (run_id, lrms_compute, lrms_data, number_jobs, size, datetime.datetime.today().isoformat(),str(job["number_pilots"]))
    #all_pilots_active = time.time() - start_time
    #all_pilots_active_tuple = result_tuple+ ("Pilot Submission Time", str(all_pilots_active))
    #time_log.append("%s;%s;%s;%s;%s;%s;%s;%s;%s\n"%(all_pilots_active_tuple))

    logger.info("Terminate Pilot Compute/Data Service")
    pilot_compute_service.cancel()
    return time_log
    output_data_unit.wait()
    
    # create compute unit
    compute_unit_description = {
            "executable": "/bin/cat",
            "arguments": ["test.txt"],
            "number_of_processes": 1,
            "output": "stdout.txt",
            "error": "stderr.txt",   
            "input_data": [input_data_unit.get_url()],
            # Put files stdout.txt and stderr.txt into output data unit
            "output_data": [
                            {
                             output_data_unit.get_url(): 
                             ["std*"]
                            }
                           ]    
    }   
    
    compute_unit = compute_data_service.submit_compute_unit(compute_unit_description)
    logger.info("Finished setup of ComputeDataService. Waiting for scheduling of PD")
    compute_data_service.wait()
    
    logger.debug("Output Data Unit: " + str(output_data_unit.list()))
    output_data_unit.export(".")
    
    logger.info("Terminate Pilot Compute/Data Service")
    compute_data_service.cancel()
    pilot_data_service.cancel()
    pilot_compute_service.cancel()
                             "vm_id": "ami-00000047",
                             "vm_ssh_username":"******",
                             "vm_ssh_keyname":"OSDC",
                             "vm_ssh_keyfile":"/glusterfs/users/aluckow/.ssh/osdc_rsa.pub",
                             "vm_type":"m1.tiny",
                             "access_key_id":"8002fb8a8572432c92d2e080ab1f326a",
                             "secret_access_key":"db32d545bd8e44b3b22514622b9621c5"                                                       }
    pilotjob = pilot_compute_service.create_pilot(pilot_compute_description)
    
    compute_data_service = ComputeDataService()
    compute_data_service.add_pilot_compute_service(pilot_compute_service)
    
    
    # create compute unit
    compute_unit_description = {
            "executable": "/bin/date",
            "arguments": [""],
            "number_of_processes": 1,
            "output": "stdout.txt",
            "error": "stderr.txt",   
    }   
    
    compute_unit = compute_data_service.submit_compute_unit(compute_unit_description)
    logger.info("Finished setup of ComputeDataService. Waiting for scheduling of CU")
    compute_data_service.wait()
    
    
    logger.info("Terminate Pilot Compute/Data Service")
    compute_data_service.cancel()
    pilot_compute_service.cancel()
Example #11
0
try:
    from job_plugin.slurmssh import Service as SlurmService
except:
    pass 


# import other BigJob packages
# import API
import api.base
sys.path.append(os.path.dirname(__file__))


if SAGA_BLISS == False:
    try:
        import saga
        logger.info("Using SAGA C++/Python.")
        is_bliss=False
    except:
        logger.warn("SAGA C++ and Python bindings not found. Using Bliss.")
        try:
            import bliss.saga as saga
            is_bliss=True
        except:
            logger.warn("SAGA Bliss not found")
else:
    logger.info("Using SAGA Bliss.")
    try:
        import bliss.saga as saga
        is_bliss=True 
    except:
        logger.warn("SAGA Bliss not found")