Example #1
0
 def GetNumSubmit(self, idleslots, idlejobs, idleuserjobs):
     """
     Calculate the number of glideins to submit.
     
     @param idleslots: Number of idle startd's
     @param idlejobs: Number of glideins in queue, but not active
     @param idleuserjobs: Number of idle user jobs from FLOCK_FROM
     
     @return: int - Number of glideins to submit
     """
     
     # If we have already submitted enough glideins to fufill the request,
     # don't submit more.
     if max([idlejobs, idleslots]) >= idleuserjobs:
         logging.debug("The number of idlejobs or idleslots fufills the requested idleuserjobs, not submitting any glideins")
         return 0
     
     status = ClusterStatus(status_constraint="IsUndefined(Offline)")
     
     # Check that running glideins are reporting to the collector
     running_glidein_jobs = status.GetRunningGlideinJobs()
     logging.debug("Number of running_glidein_jobs = %i", running_glidein_jobs)
     running_glideins = status.GetRunningGlideins()
     logging.debug("Number of running glideins = %i", running_glideins)
     
     if ((running_glidein_jobs * .9) > running_glideins):
         logging.error("I'm guessing glideins are not reporting to the collector, not submitting")
         return 0
     
     # Ok, so now submit until we can't submit any more, or there are less user jobs
     return min([int(get_option("maxqueuedjobs")) - idlejobs, \
                 idleuserjobs,\
                 int(get_option("MaxIdleGlideins")) - idleslots])
Example #2
0
 def _DropPriv(self):
     factory_user = get_option("factory_user")
     current_uid = os.getuid()
     if factory_user is None:
         logging.warning("factory_user is not set in campus factory config file")
         if get_option("CONDOR_IDS"):
             logging.info("CONDOR_IDS is set, will use for dropping privledge")
             (factory_uid, factory_gid) = get_option("CONDOR_IDS").split(".")
             factory_uid = int(factory_uid)
             factory_gid = int(factory_gid)
             factory_user = pwd.getpwuid(factory_uid).pw_name
         elif current_uid == 0:
             logging.error("We are running as root, which can not submit condor jobs.")
             logging.error("Don't know who to drop privledges to.")
             logging.error("I can't do my job!")
             logging.error("Exiting...")
             sys.exit(1)
     else:
         # If factory user is set
         factory_uid = pwd.getpwnam(factory_user).pw_uid
         factory_gid = pwd.getpwnam(factory_user).pw_gid
         logging.debug("Using %i:%i for user:group" % (factory_uid, factory_gid))
     
     # Some parts of bosco need the HOME directory and USER to be defined
     os.environ["HOME"] = pwd.getpwnam(factory_user).pw_dir
     os.environ["USER"] = factory_user
     os.setgid(factory_gid)
     os.setuid(factory_uid)
Example #3
0
    def _SetLogging(self):
        """
        Setting the logging level and set the logging.
        """
        logging_levels = {'debug': logging.DEBUG,
                          'info': logging.INFO,
                          'warning': logging.WARNING,
                          'error': logging.ERROR,
                          'critical': logging.CRITICAL}

        level = logging_levels.get(get_option("loglevel"))
        logdirectory = get_option("logdirectory")
        handler = logging.handlers.RotatingFileHandler(os.path.join(logdirectory, "campus_factory.log"),
                        maxBytes=10000000, backupCount=5)
        root_logger = logging.getLogger()
        root_logger.setLevel(level)
        formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
        handler.setFormatter(formatter)
        root_logger.addHandler(handler)
        
        # Send stdout to the log
        stdout_logger = logging.getLogger()
        sl = StreamToLogger(stdout_logger, logging.INFO)
        sys.stdout = sl
 
        stderr_logger = logging.getLogger()
        sl = StreamToLogger(stderr_logger, logging.ERROR)
        sys.stderr = sl
Example #4
0
 def GetNumSubmit(self, idleslots, idlejobs, idleuserjobs):
     """
     Calculate the number of glideins to submit.
     
     @param idleslots: Number of idle startd's
     @param idlejobs: Number of glideins in queue, but not active
     @param idleuserjobs: Number of idle user jobs from FLOCK_FROM
     
     @return: int - Number of glideins to submit
     """
     
     # If we have already submitted enough glideins to fufill the request,
     # don't submit more.
     if max([idlejobs, idleslots]) >= idleuserjobs:
         logging.debug("The number of idlejobs or idleslots fufills the requested idleuserjobs, not submitting any glideins")
         return 0
     
     status = ClusterStatus(status_constraint="IsUndefined(Offline)")
     
     # Check that running glideins are reporting to the collector
     running_glidein_jobs = status.GetRunningGlideinJobs()
     logging.debug("Number of running_glidein_jobs = %i", running_glidein_jobs)
     running_glideins = status.GetRunningGlideins()
     logging.debug("Number of running glideins = %i", running_glideins)
     
     if ((running_glidein_jobs * .9) > running_glideins):
         logging.error("I'm guessing glideins are not reporting to the collector, not submitting")
         return 0
     
     # Ok, so now submit until we can't submit any more, or there are less user jobs
     return min([int(get_option("maxqueuedjobs")) - idlejobs, \
                 idleuserjobs,\
                 int(get_option("MaxIdleGlideins")) - idleslots])
Example #5
0
    def GetIdleJobs(self, status):
        """
        Get the number of idle jobs from configured flock from hosts.
        
        @return: { user, int } - Number of idle jobs by user (dictionary)
        """
        # Check for idle jobs to flock from
        if not self.UseOffline:

            schedds = []
            # Get schedd's to query
            if get_option("FLOCK_FROM"):
                schedds = get_option("FLOCK_FROM").strip().split(",")

            logging.debug("Schedds to query: %s" % str(schedds))

            idleuserjobs = status.GetIdleJobs(schedds)
            if idleuserjobs == None:
                logging.info(
                    "Received None from idle user jobs, going to try later")
                return None

            # Add all the idle jobs from all the schedds, unique on user (owner)
            user_idle = {}
            for schedd in idleuserjobs.keys():
                for user in idleuserjobs[schedd].keys():
                    if not user_idle.has_key(user):
                        user_idle[user] = 0
                    user_idle[user] += idleuserjobs[schedd][user]

            return user_idle
Example #6
0
    def _DropPriv(self):
        factory_user = get_option("factory_user")
        current_uid = os.getuid()
        if factory_user is None:
            logging.warning(
                "factory_user is not set in campus factory config file")
            if get_option("CONDOR_IDS"):
                logging.info(
                    "CONDOR_IDS is set, will use for dropping privledge")
                (factory_uid,
                 factory_gid) = get_option("CONDOR_IDS").split(".")
                factory_uid = int(factory_uid)
                factory_gid = int(factory_gid)
                factory_user = pwd.getpwuid(factory_uid).pw_name
            elif current_uid == 0:
                logging.error(
                    "We are running as root, which can not submit condor jobs."
                )
                logging.error("Don't know who to drop privledges to.")
                logging.error("I can't do my job!")
                logging.error("Exiting...")
                sys.exit(1)
        else:
            # If factory user is set
            factory_uid = pwd.getpwnam(factory_user).pw_uid
            factory_gid = pwd.getpwnam(factory_user).pw_gid
            logging.debug("Using %i:%i for user:group" %
                          (factory_uid, factory_gid))

        # Some parts of bosco need the HOME directory and USER to be defined
        os.environ["HOME"] = pwd.getpwnam(factory_user).pw_dir
        os.environ["USER"] = factory_user
        os.setgid(factory_gid)
        os.setuid(factory_uid)
Example #7
0
    def _SetLogging(self):
        """
        Setting the logging level and set the logging.
        """
        logging_levels = {
            'debug': logging.DEBUG,
            'info': logging.INFO,
            'warning': logging.WARNING,
            'error': logging.ERROR,
            'critical': logging.CRITICAL
        }

        level = logging_levels.get(get_option("loglevel"))
        logdirectory = get_option("logdirectory")
        handler = logging.handlers.RotatingFileHandler(os.path.join(
            logdirectory, "campus_factory.log"),
                                                       maxBytes=10000000,
                                                       backupCount=5)
        root_logger = logging.getLogger()
        root_logger.setLevel(level)
        formatter = logging.Formatter(
            "%(asctime)s - %(levelname)s - %(message)s")
        handler.setFormatter(formatter)
        root_logger.addHandler(handler)

        # Send stdout to the log
        stdout_logger = logging.getLogger()
        sl = StreamToLogger(stdout_logger, logging.INFO)
        sys.stdout = sl

        stderr_logger = logging.getLogger()
        sl = StreamToLogger(stderr_logger, logging.ERROR)
        sys.stderr = sl
Example #8
0
    def GetIdleJobs(self, status):
        """
        Get the number of idle jobs from configured flock from hosts.
        
        @return: { user, int } - Number of idle jobs by user (dictionary)
        """
        # Check for idle jobs to flock from
        if not self.UseOffline:

            schedds = []
            # Get schedd's to query
            if get_option("FLOCK_FROM"):
                schedds = get_option("FLOCK_FROM").strip().split(",")
            
            # Add the local host to query
            schedds.append(get_option("CONDOR_HOST"))
                            
            logging.debug("Schedds to query: %s" % str(schedds))
            
            
            idleuserjobs = status.GetIdleJobs(schedds)
            if idleuserjobs == None:
                logging.info("Received None from idle user jobs, going to try later")
                return None
            
            # Add all the idle jobs from all the schedds, unique on user (owner)
            user_idle = {}
            for schedd in idleuserjobs.keys():
                for user in idleuserjobs[schedd].keys():
                    if not user_idle.has_key(user):
                        user_idle[user] = 0
                    user_idle[user] += idleuserjobs[schedd][user]

            return user_idle
Example #9
0
 def _GetClusterSpecificConfig(self, option, default):
     if get_option_section(self.cluster_unique, option):
         return  get_option_section(self.cluster_unique, option)
     elif get_option(option):
         return get_option(option)
     else:
         return default
Example #10
0
    def Intialize(self):
        """
        
        Function to initialize the factory's variables such as configuration
        and logging
        """
        # Set the sighup signal handler
        signal.signal(signal.SIGHUP, self.Intialize)

        # Read in the configuration file
        self.config_file = self.options.config
        files_read = set_config_file(self.config_file)

        # check if no files read in
        if len(files_read) < 1:
            sys.stderr.write("No configuration files found.  Location = %s\n" %
                             self.config_file)
            sys.exit(1)

        self._SetLogging()

        if os.getuid() == 0 or get_option("factory_user"):
            logging.info("Detected that factory should change user")
            self._DropPriv()

        if get_option("useoffline", "false").lower() == "true":
            self.UseOffline = True
        else:
            self.UseOffline = False

        self.cluster_list = []
        # Get the cluster lists
        if get_option("clusterlist", "") is not "":
            logging.debug(
                "Using the cluster list in the campus factory configuration.")
            for cluster_id in get_option("clusterlist").split(','):
                self.cluster_list.append(
                    Cluster(cluster_id, useOffline=self.UseOffline))
        else:
            # Check for the bosco cluster command
            (stdout, stderr) = RunExternal("bosco_cluster -l")
            if len(stdout) != 0 and stdout is not "No clusters configured":
                logging.debug("Using the cluster list installed with BOSCO")
                for cluster_id in stdout.split("\n"):
                    if len(cluster_id) > 0 and cluster_id != "":
                        self.cluster_list.append(
                            Cluster(cluster_id, useOffline=self.UseOffline))
            else:
                # Initialize as empty, which infers to submit 'here'
                self.cluster_list = [
                    Cluster(get_option("CONDOR_HOST"),
                            useOffline=self.UseOffline)
                ]

        # Tar up the executables
        wrangler = DaemonWrangler()
        wrangler.Package()
Example #11
0
    def Intialize(self):
        """
        
        Function to initialize the factory's variables such as configuration
        and logging
        """
        # Set the sighup signal handler
        signal.signal(signal.SIGHUP, self.Intialize)
        
        # Read in the configuration file
        self.config_file = self.options.config
        files_read = set_config_file(self.config_file)

        # check if no files read in
        if len(files_read) < 1:
            sys.stderr.write("No configuration files found.  Location = %s\n" % self.config_file)
            sys.exit(1)
            
        self._SetLogging()
        
        if os.getuid() == 0 or get_option("factory_user"):
            logging.info("Detected that factory should change user")
            self._DropPriv()
       
        if  get_option("useoffline", "false").lower() == "true":
            self.UseOffline = True
        else:
            self.UseOffline = False
        
        self.cluster_list = []
        # Get the cluster lists
        if get_option("clusterlist", "") is not "":
            logging.debug("Using the cluster list in the campus factory configuration.")
            for cluster_id in get_option("clusterlist").split(','):
                self.cluster_list.append(Cluster(cluster_id, useOffline = self.UseOffline))
        else:
            # Check for the bosco cluster command
            (stdout, stderr) = RunExternal("bosco_cluster -l")
            if len(stdout) != 0 and stdout is not "No clusters configured":
                logging.debug("Using the cluster list installed with BOSCO")
                for cluster_id in stdout.split("\n"):
                    if len(cluster_id) > 0 and cluster_id != "":
                        self.cluster_list.append(Cluster(cluster_id, useOffline = self.UseOffline))
            else:
                # Initialize as empty, which infers to submit 'here'
                self.cluster_list = [ Cluster(get_option("CONDOR_HOST"), useOffline = self.UseOffline) ]
        
        # Tar up the executables
        wrangler = DaemonWrangler()
        wrangler.Package()
Example #12
0
    def ClusterMeetPreferences(self):
        idleslots = self.status.GetIdleGlideins()
        if idleslots == None:
            logging.info("Received None from idle glideins, going to try later")
            raise ClusterPreferenceException("Received None from idle glideins")
        logging.debug("Idle glideins = %i" % idleslots)
        if idleslots >= int(get_option("MAXIDLEGLIDEINS", "5")):
            logging.info("Too many idle glideins")
            raise ClusterPreferenceException("Too many idle glideins")

        # Check for idle glidein jobs
        idlejobs = self.status.GetIdleGlideinJobs()
        if idlejobs == None:
            logging.info("Received None from idle glidein jobs, going to try later")
            raise ClusterPreferenceException("Received None from idle glidein jobs")
        logging.debug("Queued jobs = %i" % idlejobs)
        if idlejobs >= int(get_option("maxqueuedjobs", "5")):
            logging.info("Too many queued jobs")
            raise ClusterPreferenceException("Too many queued jobs")

        return (idleslots, idlejobs)
Example #13
0
    def SubmitGlideins(self, numSubmit):
        """
        Submit numSubmit glideins.
        
        @param numSubmit: The number of glideins to submit.
        """
        # Substitute values in submit file
        filename = os.path.join(get_option("GLIDEIN_DIRECTORY"), "job.submit.template")

        # Submit jobs
        for i in range(numSubmit):
            self.SingleSubmit(filename)
Example #14
0
 def SingleSubmit(self, filename):
     """
     Submit a single glidein job
     
     @param filename: The file (string) to submit
     
     """
     
     # Get the cluster specific information
     # First, the cluster tmp directory
     cluster_tmp = self._GetClusterSpecificConfig("worker_tmp", "/tmp")
     remote_factory_location = self._GetClusterSpecificConfig("remote_factory", "~/bosco/campus_factory")
     
     # If we are submtiting to ourselves, then don't need remote cluster
     if get_option("CONDOR_HOST") == self.cluster_unique:
         remote_cluster = ""
     else:
         remote_cluster = self.cluster_entry
     
     # TODO: These options should be moved to a better location
     options = {"WN_TMP": cluster_tmp, \
                "GLIDEIN_HOST": get_option("COLLECTOR_HOST"), \
                "GLIDEIN_Site": self.cluster_unique, \
                "BOSCOCluster": self.cluster_unique, \
                "REMOTE_FACTORY": remote_factory_location, \
                "REMOTE_CLUSTER": remote_cluster, \
                "REMOTE_SCHEDULER": self.cluster_type, \
                "GLIDEIN_DIR": get_option("GLIDEIN_DIRECTORY"), \
                "PASSWDFILE_LOCATION": get_option("SEC_PASSWORD_FILE")}
     
     options_str = ""
     for key in options.keys():
         options_str += " -a %s=\"%s\"" % (key, options[key])
         
     (stdout, stderr) = RunExternal("condor_submit %s %s" % (filename, options_str))
     logging.debug("stdout: %s" % stdout)
     logging.debug("stderr: %s" % stderr)
Example #15
0
 def __init__(self, daemons=None, base_condor_dir = None, dumb_package = False):
     """
     @param daemons: A list of daemons that will be included in the package
     """
     if daemons is None:
         self.daemons = DEFAULT_GLIDEIN_DAEMONS
     else:
         self.daemons = daemons
     
     try:
         self.glidein_dir = get_option("GLIDEIN_DIRECTORY")
     except:
         self.glidein_dir = ""
         
     self.base_condor_dir = base_condor_dir
     self.dumb_package = dumb_package
Example #16
0
 def _CheckDaemons(self):
     """
     Make sure that the daemons that are supposed to be packaged are
     available and readable.
     """
     
     condor_sbin = get_option("SBIN")
     logging.debug("Found SBIN directory = %s" % condor_sbin)
     daemon_paths = []
     for daemon in self.daemons:
         daemon_path = os.path.join(condor_sbin, daemon)
         if self._CheckFile(daemon_path):
             daemon_paths.append(daemon_path)
     
     # Done checking all the daemons
     return daemon_paths
Example #17
0
    def __init__(self, daemons=None, base_condor_dir=None, dumb_package=False):
        """
        @param daemons: A list of daemons that will be included in the package
        """
        if daemons is None:
            self.daemons = DEFAULT_GLIDEIN_DAEMONS
        else:
            self.daemons = daemons

        try:
            self.glidein_dir = get_option("GLIDEIN_DIRECTORY")
        except:
            self.glidein_dir = ""

        self.base_condor_dir = base_condor_dir
        self.dumb_package = dumb_package
Example #18
0
    def _CheckDaemons(self):
        """
        Make sure that the daemons that are supposed to be packaged are
        available and readable.
        """

        condor_sbin = get_option("SBIN")
        logging.debug("Found SBIN directory = %s" % condor_sbin)
        daemon_paths = []
        for daemon in self.daemons:
            daemon_path = os.path.join(condor_sbin, daemon)
            if self._CheckFile(daemon_path):
                daemon_paths.append(daemon_path)

        # Done checking all the daemons
        return daemon_paths
Example #19
0
    def _GetDynamicLibraries(self, files, libdirs=['lib', 'lib/condor']):
        """
        Get the dynamic libraries that the files are using
        (Adapted from get_condor_dlls in glideinwms)
        
        @param files: files to check for dynamic libraries
        """

        libstodo = set()
        libsdone = set()
        rlist = []

        condor_dir = get_option("RELEASE_DIR")

        # First, get the initial libraries
        for file in files:
            libstodo.update(self._ldd(file))

        while len(libstodo) > 0:
            lib = libstodo.pop()

            # Already did library?
            if lib in rlist:
                continue

            if not lib.startswith(condor_dir):
                # Check if the library is provided by condor
                # If so, add the condor provided lib to process
                # Overriding the system's library (condor knows best?)
                libname = os.path.basename(lib)
                for libdir in libdirs:
                    if os.path.exists(os.path.join(condor_dir, libdir,
                                                   libname)):
                        new_lib = os.path.join(condor_dir, libdir, libname)
                        if new_lib not in rlist:
                            libstodo.add(new_lib)
                            libsdone.add(lib)
            else:
                # In the condor directory
                new_libstodo = set(self._ldd(lib))
                libsdone.add(lib)
                libstodo.update(new_libstodo - libsdone)
                rlist.append(lib)

        return rlist
Example #20
0
 def _GetDynamicLibraries(self, files, libdirs = ['lib', 'lib/condor']):
     """
     Get the dynamic libraries that the files are using
     (Adapted from get_condor_dlls in glideinwms)
     
     @param files: files to check for dynamic libraries
     """
     
     libstodo = set()
     libsdone = set()
     rlist = []
     
     condor_dir = get_option("RELEASE_DIR")
     
     # First, get the initial libraries
     for file in files:
         libstodo.update(self._ldd(file))
     
     while len(libstodo) > 0:
         lib = libstodo.pop()
         
         # Already did library?
         if lib in rlist:
             continue
         
         if not lib.startswith(condor_dir):
             # Check if the library is provided by condor
             # If so, add the condor provided lib to process
             # Overriding the system's library (condor knows best?)
             libname = os.path.basename(lib)
             for libdir in libdirs:
                 if os.path.exists(os.path.join(condor_dir, libdir, libname)):
                     new_lib = os.path.join(condor_dir, libdir, libname)
                     if new_lib not in rlist:
                         libstodo.add(new_lib)
                         libsdone.add(lib)
         else:
             # In the condor directory
             new_libstodo = set(self._ldd(lib))
             libsdone.add(lib)
             libstodo.update(new_libstodo - libsdone)
             rlist.append(lib)
             
     return rlist
Example #21
0
 def SleepFactory(self):
     sleeptime = int(get_option("iterationtime"))
     logging.info("Sleeping for %i seconds" % sleeptime)
     time.sleep(sleeptime)
Example #22
0
 def SleepFactory(self):
     sleeptime = int(get_option("iterationtime"))
     logging.info("Sleeping for %i seconds" % sleeptime)
     time.sleep(sleeptime)