コード例 #1
0
    def __get_pilot_url(self, job_url):
        end = job_url.index(":jobs")

        # Make sure that DB type is appended
        surl = SAGAUrl(job_url)
        query = surl.query
        pilot_url = job_url[:end]
        if query != None and query != "":
            pilot_url = pilot_url + "?" + query
        return pilot_url
コード例 #2
0
 def __exists_file(cls, url):
     """ return True if file at url exists. Otherwise False """
     file_url = SAGAUrl(url)
     if file_url.host == "":
         if os.path.exists(str(file_url)):
             return True
         else:
             return False
     elif file_url.host == "localhost":
         if os.path.exists(file_url.path):
             return True
         else:
             return False
     else:
         return True
コード例 #3
0
    def __parse_subjob_url(self, subjob_url):
        #pdb.set_trace()
        subjob_saga_url = SAGAUrl(subjob_url)
        dbtype = subjob_saga_url.query
        coordination = subjob_url[:subjob_url.index("bigjob")]
        if dbtype != None:
            coordination = os.path.join(coordination, "?" + dbtype)
        sj_url = subjob_saga_url.path[1:]

        #        sj_url = subjob_url[subjob_url.find("bigjob"):]
        #        if sj_url.find("/") > 0 or dbtype!=None or dbtype!="":
        #            comp = sj_url.split("/")
        #            sj_url = comp[0]
        #            if comp[1].find("dbtype")>0:
        #                dbtype=comp[1][comp[1].find("dbtype"):]

        logger.debug("Parsed URL - Coordination: %s Pilot: %s" %
                     (coordination, sj_url))
        return coordination, sj_url
コード例 #4
0
    def __parse_url(self, url):
        try:
            surl = SAGAUrl(url)
            host = surl.host
            port = surl.port
            username = surl.username
            password = surl.password
            query = surl.query
            if query != None and query.endswith("/"):
                query = query[:-1]
            scheme = "%s://" % surl.scheme
        except:
            """ Fallback URL parser based on Python urlparse library """
            logger.error("URL %s could not be parsed" % (url))
            traceback.print_exc(file=sys.stderr)
            result = urlparse.urlparse(url)
            logger.debug("Result: " + str(result))
            host = result.hostname
            #host = None
            port = result.port
            username = result.username
            password = result.password
            scheme = "%s://" % result.scheme
            if host == None:
                logger.debug("Python 2.6 fallback")
                if url.find("/", len(scheme)) > 0:
                    host = url[len(scheme):url.find("/", len(scheme))]
                else:
                    host = url[len(scheme):]
                if host.find(":") > 1:
                    logger.debug(host)
                    comp = host.split(":")
                    host = comp[0]
                    port = int(comp[1])

            if url.find("?") > 0:
                query = url[url.find("?") + 1:]
            else:
                query = None

        logger.debug("%s %s %s" % (scheme, host, port))
        return scheme, username, password, host, port, query
コード例 #5
0
 def wait(self):
     """ Waits for completion of all sub-jobs """
     while 1:
         jobs = self.coordination.get_jobs_of_pilot(self.pilot_url)
         finish_counter = 0
         result_map = {}
         for i in jobs:
             # parse job id out of sj url
             surl = SAGAUrl(i)
             state = self.coordination.get_job_state(surl.path)
             #state = job_detail["state"]
             if result_map.has_key(state) == False:
                 result_map[state] = 1
             else:
                 result_map[state] = result_map[state] + 1
             if self.__has_finished(state) == True:
                 finish_counter = finish_counter + 1
         logger.debug("Total Jobs: %s States: %s" %
                      (len(jobs), str(result_map)))
         if finish_counter == len(jobs):
             break
         time.sleep(2)
コード例 #6
0
    def __parse_pilot_url(self, pilot_url):
        #pdb.set_trace()
        pilot_saga_url = SAGAUrl(pilot_url)
        dbtype = pilot_saga_url.query
        coordination = pilot_url[:pilot_url.index("bigjob")]
        if dbtype != None:
            coordination = os.path.join(coordination, "?" + dbtype)
        pilot_url = pilot_saga_url.path[1:]

        #dbtype = None
        #coordination = pilot_url[:pilot_url.index("bigjob")]
        #pilot_url = pilot_url[pilot_url.find("bigjob"):]
        #if pilot_url.find("/") > 0:
        #    comp = pilot_url.split("/")
        #    pilot_url = comp[0]
        #    if comp[1].find("dbtype")>0:
        #        dbtype=comp[1][comp[1].find("dbtype"):]

        #if dbtype!=None:
        #    coordination = os.path.join(coordination, "?"+dbtype)
        logger.debug("Parsed URL - Coordination: %s Pilot: %s" %
                     (coordination, pilot_url))
        return coordination, pilot_url
コード例 #7
0
    def start_pilot_job(self,
                        lrms_url,
                        number_nodes=1,
                        queue=None,
                        project=None,
                        working_directory=None,
                        userproxy=None,
                        walltime=None,
                        processes_per_node=1,
                        filetransfers=None,
                        external_queue="",
                        pilot_compute_description=None):
        """ Start a batch job (using SAGA Job API) at resource manager. Currently, the following resource manager are supported:
            fork://localhost/ (Default Job Adaptor
            gram://qb1.loni.org/jobmanager-pbs (Globus Adaptor)
            pbspro://localhost (PBS Pro Adaptor)
        
        """
        if self.job != None:
            raise BigJobError(
                "One BigJob already active. Please stop BigJob first.")
            return

        ##############################################################################
        # initialization of coordination and communication subsystem
        # Communication & Coordination initialization
        lrms_saga_url = SAGAUrl(lrms_url)
        self.url = lrms_saga_url
        self.pilot_url = self.app_url + ":" + lrms_saga_url.host
        self.number_nodes = int(number_nodes) * int(processes_per_node)

        # Store references to BJ in global dict
        _pilot_url_dict[self.pilot_url] = self
        _pilot_url_dict[external_queue] = self

        logger.debug("create pilot job entry on backend server: " +
                     self.pilot_url)
        self.coordination.set_pilot_state(self.pilot_url, str(Unknown), False)
        self.coordination.set_pilot_description(self.pilot_url, filetransfers)
        logger.debug("set pilot state to: " + str(Unknown))

        ##############################################################################
        # Create Job Service (Default: SAGA Job Service, alternative Job Services supported)
        self.js = None
        if lrms_saga_url.scheme == "gce+ssh":
            self.js = GCEService(lrms_saga_url, pilot_compute_description)
        elif lrms_saga_url.scheme=="ec2+ssh" or lrms_saga_url.scheme=="euca+ssh" \
            or lrms_saga_url.scheme=="nova+ssh":
            self.js = EC2Service(lrms_saga_url, pilot_compute_description)
        else:
            self.js = SAGAJobService(lrms_saga_url)

        ##############################################################################
        # create job description
        jd = SAGAJobDescription()

        #  Attempt to create working directory (e.g. in local scenario)
        if working_directory != None:
            if not os.path.isdir(working_directory) \
                and (lrms_saga_url.scheme.startswith("fork") or lrms_saga_url.scheme.startswith("condor")) \
                and working_directory.startswith("go:")==False:
                os.mkdir(working_directory)
            self.working_directory = working_directory
        else:
            # if no working dir is set assume use home directory
            # will fail if home directory is not the same on remote machine
            # but this is just a guess to avoid failing
            #self.working_directory = os.path.expanduser("~")
            self.working_directory = ""

        if queue != None:
            jd.queue = queue
        if project != None:
            jd.project = project
        if walltime != None:
            if is_bliss:
                jd.wall_time_limit = int(walltime)
            else:
                jd.wall_time_limit = str(walltime)

        ##############################################################################
        # File Management and Stage-In
        # Determine whether target machine use gsissh or ssh to logon.
        # logger.debug("Detect launch method for: " + lrms_saga_url.host)
        # self.launch_method = self.__get_launch_method(lrms_saga_url.host,lrms_saga_url.username)
        self.bigjob_working_directory_url = ""
        if lrms_saga_url.scheme.startswith("gce") or lrms_saga_url.scheme.startswith("ec2")\
            or lrms_saga_url.scheme.startswith("euca") or lrms_saga_url.scheme.startswith("nova"):
            logger.debug(
                "File Staging for Cloud Instances currently not supported.")
        elif lrms_saga_url.scheme.startswith("condor") == True:
            logger.debug("Using Condor file staging")
        else:
            # build target url for working directory
            # this will also create the remote directory for the BJ
            # Fallback if working directory is not a valid URL
            if not (self.working_directory.startswith("go:")
                    or self.working_directory.startswith("ssh://")):
                if lrms_saga_url.username != None and lrms_saga_url.username != "":
                    self.bigjob_working_directory_url = "ssh://" + lrms_saga_url.username + "@" + lrms_saga_url.host + self.__get_bigjob_working_dir(
                    )
                else:
                    self.bigjob_working_directory_url = "ssh://" + lrms_saga_url.host + self.__get_bigjob_working_dir(
                    )
            elif self.working_directory.startswith("go:"):
                self.bigjob_working_directory_url = os.path.join(
                    self.working_directory, self.uuid)
            else:
                # working directory is a valid file staging URL
                self.bigjob_working_directory_url = self.working_directory

            # initialize file manager that takes care of file movement and directory creation
            if self.__filemanager == None:
                self.__initialize_pilot_data(
                    self.bigjob_working_directory_url)  # determines the url

            if self.__filemanager != None and not self.working_directory.startswith(
                    "/"):
                self.working_directory = self.__filemanager.get_path(
                    self.bigjob_working_directory_url)

            # determine working directory of bigjob
            # if a remote sandbox can be created via ssh => create a own dir for each bj job id
            # otherwise use specified working directory
            logger.debug("BigJob working directory: %s" %
                         self.bigjob_working_directory_url)
            if self.__filemanager != None and self.__filemanager.create_remote_directory(
                    self.bigjob_working_directory_url) == True:
                self.working_directory = self.__get_bigjob_working_dir()
                self.__stage_files(filetransfers,
                                   self.bigjob_working_directory_url)
            else:
                logger.warn("No file staging adaptor found.")

            logger.debug("BJ Working Directory: %s", self.working_directory)

        if lrms_saga_url.scheme.startswith("condor") == False:
            jd.working_directory = self.working_directory
        else:
            jd.working_directory = ""

        ##############################################################################
        # Create and process BJ bootstrap script
        bootstrap_script = self.__generate_bootstrap_script(
            self.coordination.get_address(),
            self.pilot_url,  # Queue 1 used by this BJ object 
            external_queue  # Queue 2 used by Pilot Compute Service 
            # or another external scheduler
        )
        logger.debug("Adaptor specific modifications: " +
                     str(lrms_saga_url.scheme))
        if is_bliss:
            bootstrap_script = self.__escape_bliss(bootstrap_script)
        else:
            if lrms_saga_url.scheme == "gram":
                bootstrap_script = self.__escape_rsl(bootstrap_script)
            elif lrms_saga_url.scheme == "pbspro" or lrms_saga_url.scheme == "xt5torque" or lrms_saga_url.scheme == "torque":
                bootstrap_script = self.__escape_pbs(bootstrap_script)
            elif lrms_saga_url.scheme == "ssh":
                bootstrap_script = self.__escape_ssh(bootstrap_script)
        logger.debug(bootstrap_script)

        # Define Agent Executable in Job description
        # in Condor case bootstrap script is staged
        # (Python app cannot be passed inline in Condor job description)
        if lrms_saga_url.scheme.startswith("condor") == True:

            condor_bootstrap_filename = os.path.join(
                "/tmp", "bootstrap-" + str(self.uuid))
            condor_bootstrap_file = open(condor_bootstrap_filename, "w")
            condor_bootstrap_file.write(bootstrap_script)
            condor_bootstrap_file.close()
            logger.debug("Using Condor - bootstrap file: " +
                         condor_bootstrap_filename)

            jd.executable = "/usr/bin/env"
            jd.arguments = [
                "python",
                os.path.basename(condor_bootstrap_filename)
            ]
            bj_file_transfers = []
            file_transfer_spec = condor_bootstrap_filename + " > " + os.path.basename(
                condor_bootstrap_filename)
            bj_file_transfers.append(file_transfer_spec)
            output_file_name = "output-" + str(self.uuid) + ".tar.gz"
            output_file_transfer_spec = os.path.join(
                self.working_directory,
                output_file_name) + " < " + output_file_name
            #output_file_transfer_spec = os.path.join(self.working_directory, "output.tar.gz") +" < output.tar.gz"
            logger.debug("Output transfer: " + output_file_transfer_spec)
            bj_file_transfers.append(output_file_transfer_spec)
            if filetransfers != None:
                for t in filetransfers:
                    bj_file_transfers.append(t)
            logger.debug("Condor file transfers: " + str(bj_file_transfers))
            jd.file_transfer = bj_file_transfers
        else:
            if is_bliss:
                jd.total_cpu_count = int(number_nodes)
            else:
                jd.number_of_processes = str(number_nodes)
                jd.processes_per_host = str(processes_per_node)
            jd.spmd_variation = "single"
            jd.arguments = ["python", "-c", bootstrap_script]
            jd.executable = "/usr/bin/env"

        logger.debug("Working directory: " + jd.working_directory)

        jd.output = os.path.join(self.working_directory,
                                 "stdout-" + self.uuid + "-agent.txt")
        jd.error = os.path.join(self.working_directory,
                                "stderr-" + self.uuid + "-agent.txt")

        ##############################################################################
        # Create and submit pilot job to job service
        logger.debug("Creating pilot job with description: %s" % str(jd))
        self.job = self.js.create_job(jd)
        logger.debug("Submit pilot job to: " + str(lrms_saga_url))
        self.job.run()
        return self.pilot_url
コード例 #8
0
ファイル: nocoord_adaptor.py プロジェクト: icheckmate/BigJob
 def get_base_url(cls, application_id):
     surl = SAGAUrl(cls.BASE_URL)
     base_url = surl.scheme + "://" + surl.host + "/" + application_id + "/"
     logger.debug(base_url)
     return base_url
コード例 #9
0
ファイル: nocoord_adaptor.py プロジェクト: icheckmate/BigJob
 def __remove_dbtype(cls, url):
     surl = SAGAUrl(url)
     return str(surl)