def get_downtime_fd(entry_name, cmdname):
    try:
        # New style has config all in the factory file
        #if entry_name=='factory':
        config = glideFactoryConfig.GlideinDescript()
        #else:
        #    config=glideFactoryConfig.JobDescript(entry_name)
    except IOError:
        raise RuntimeError("Failed to load config for %s" % entry_name)

    fd = glideFactoryDowntimeLib.DowntimeFile(config.data['DowntimesFile'])
    return fd
def get_downtime_fd_dict(entry_or_id, cmdname, opt_dict):
    out_fds = {}
    if entry_or_id in ('entries', 'All'):
        glideinDescript = glideFactoryConfig.GlideinDescript()
        entries = string.split(glideinDescript.data['Entries'], ',')
        for entry in entries:
            out_fds[entry] = get_downtime_fd(entry, cmdname)
        if (entry_or_id == 'All') and ("entries" not in opt_dict):
            out_fds['factory'] = get_downtime_fd('factory', cmdname)
    else:
        out_fds[entry_or_id] = get_downtime_fd(entry_or_id, cmdname)

    return out_fds
Exemple #3
0
def parse_args():
    if len(sys.argv) < 3:
        raise ValueError("Not enough arguments!")

    factory_dir = sys.argv[1]
    try:
        glideFactoryConfig.factoryConfig.glidein_descript_file = os.path.join(
            factory_dir,
            glideFactoryConfig.factoryConfig.glidein_descript_file)
        glideinDescript = glideFactoryConfig.GlideinDescript()
    except:
        raise ValueError("%s is not a factory!" % factory_dir)

    glideinDescript.factory_dir = factory_dir
    glideinDescript.date_arr = gWftArgsHelper.parse_date(sys.argv[2])
    if len(sys.argv) >= 4:
        glideinDescript.time_arr = gWftArgsHelper.parse_time(sys.argv[3])
    else:
        glideinDescript.time_arr = (0, 0, 0)

    return glideinDescript
def main(parent_pid, sleep_time, advertize_rate, startup_dir, entry_names,
         group_id):
    """
    GlideinFactoryEntryGroup main function

    Setup logging, monitoring, and configuration information. Starts the Entry
    group main loop and handles cleanup at shutdown.

    @type parent_pid: int
    @param parent_pid: The pid for the Factory daemon

    @type sleep_time: int
    @param sleep_time: The number of seconds to sleep between iterations

    @type advertize_rate: int
    @param advertize_rate: The rate at which advertising should occur

    @type startup_dir: string
    @param startup_dir: The "home" directory for the entry.

    @type entry_names: string
    @param entry_names: The CVS name of the entries this process should work on

    @type group_id: string
    @param group_id: Group id
    """

    # Assume name to be group_[0,1,2] etc. Only required to create log_dir
    # where tasks common to the group will be stored. There is no other
    # significance to the group_name and number of entries supported by a group
    # can change between factory reconfigs

    group_name = "group_%s" % group_id

    os.chdir(startup_dir)

    # Setup the lock_dir
    gfi.factoryConfig.lock_dir = os.path.join(startup_dir, "lock")

    # Read information about the glidein and frontends
    glideinDescript = gfc.GlideinDescript()
    frontendDescript = gfc.FrontendDescript()

    # set factory_collector at a global level, since we do not expect it to change
    gfi.factoryConfig.factory_collector = glideinDescript.data[
        'FactoryCollector']

    # Load factory keys
    glideinDescript.load_pub_key()
    glideinDescript.load_old_rsa_key()

    # Dictionary of Entry objects this group will process
    my_entries = {}
    glidein_entries = glideinDescript.data['Entries']

    # Initiate the logs
    logSupport.log_dir = os.path.join(glideinDescript.data['LogDir'],
                                      'factory')
    process_logs = eval(glideinDescript.data['ProcessLogs'])
    init_logs(group_name, logSupport.log_dir, process_logs)

    logSupport.log.info("Starting up")
    logSupport.log.info("Entries processed by %s: %s " %
                        (group_name, entry_names))

    # Check if all the entries in this group are valid
    for entry in string.split(entry_names, ':'):
        if not (entry in string.split(glidein_entries, ',')):
            msg = "Entry '%s' not configured: %s" % (entry, glidein_entries)
            logSupport.log.warning(msg)
            raise RuntimeError(msg)

        # Create entry objects
        my_entries[entry] = glideFactoryEntry.Entry(entry, startup_dir,
                                                    glideinDescript,
                                                    frontendDescript)

    # Create lock file for this group and register its parent
    pid_obj = glideFactoryPidLib.EntryGroupPidSupport(startup_dir, group_name)
    pid_obj.register(parent_pid)

    try:
        try:
            try:
                iterate(parent_pid, sleep_time, advertize_rate,
                        glideinDescript, frontendDescript, group_name,
                        my_entries)
            except KeyboardInterrupt:
                logSupport.log.info("Received signal...exit")
            except:
                logSupport.log.exception("Exception occurred in iterate: ")
                raise
        finally:
            # No need to cleanup. The parent should be doing it
            logSupport.log.info("Dying")
    finally:
        pid_obj.relinquish()
Exemple #5
0
def main():
    """ The main module
    """
    # Move to the working directory
    try:
        if "GLIDEIN_FACTORY_DIR" in os.environ:
            os.chdir(os.environ["GLIDEIN_FACTORY_DIR"])
        else:
            os.chdir("/var/lib/gwms-factory/work-dir/")
    except OSError as ose:
        logging.error("Cannot chdir to /var/lib/gwms-factory/work-dir/: %s",
                      ose)
        return 1

    # Parse command line options
    options = parse_opts()
    entry_name = options.entry_name
    wms_collector = options.wms_collector

    # Set some variables needed later on
    params = {}
    status_sf = {}
    nr_glideins = 1
    idle_lifetime = 3600 * 24
    factory_config = FactoryConfig()
    glidein_descript = gfc.GlideinDescript()
    frontend_descript = gfc.FrontendDescript()
    collector = htcondor.Collector(wms_collector)

    req_name = get_reqname(collector, options.fe_name, entry_name)
    logging.debug("Using reques name %s" % req_name)

    factory_config.submit_dir = '/var/lib/gwms-factory/work-dir'
    constraint_gc = '(MyType=="glideclient") && (Name=="%s")' % (req_name)

    ads_gc = collector.query(htcondor.AdTypes.Any, constraint_gc)
    if not ads_gc:
        logging.error("Cannot find glideclient classad using constraint %s",
                      constraint_gc)
        return 1
    else:
        ad_gc = ads_gc[0]
        log_debug(ad_gc, header='glideclient classad')

        # Load factory config and get some info that will go in the pilot classad
        glidein_descript.load_pub_key()
        sym_key_obj, frontend_sec_name = validate_frontend(
            ad_gc, frontend_descript, glidein_descript.data['PubKeyObj'])
        security_class = sym_key_obj.decrypt_hex(
            ad_gc['GlideinEncParamSecurityClass'])  # GlideinSecurityClass
        proxyid = sym_key_obj.decrypt_hex(ad_gc['GlideinEncParamSubmitProxy'])
        user_name = frontend_descript.get_username(frontend_sec_name,
                                                   security_class)

        # Prepare some values that ends up in the Arguments classad
        # of the pilot, i.e., the ClientWeb instance
        client_web_url = ad_gc['WebURL']  # -clientweb
        client_signtype = ad_gc['WebSignType']  # -signtype
        client_descript = ad_gc['WebDescriptFile']  # -clientdescript
        client_sign = ad_gc['WebDescriptSign']  # -clientsign
        client_group = ad_gc['GroupName']  # -clientgroup
        client_group_web_url = ad_gc['WebGroupURL']  # -clientwebgroup
        # -clientdescriptgroup
        client_group_descript = ad_gc['WebGroupDescriptFile']
        client_group_sign = ad_gc['WebGroupDescriptSign']  # -clientsigngroup
        client_web = ClientWeb(client_web_url, client_signtype,
                               client_descript, client_sign, client_group,
                               client_group_web_url, client_group_descript,
                               client_group_sign)

        # Create the submit_credentials object
        credentials = SubmitCredentials(user_name, security_class)
        credentials.id = proxyid
        credentials.cred_dir = '/var/lib/gwms-factory/client-proxies/user_%s/glidein_gfactory_instance' % user_name
        credfname = '%s_%s' % (ad_gc['ClientName'], proxyid)
        if not credentials.add_security_credential('SubmitProxy', credfname):
            fname = os.path.join(credentials.cred_dir,
                                 'credential_%s' % credfname)
            logging.info((
                "Problems getting credential file using credentials.add_security_credential."
                " Check file %s permissions"), fname)

        # Set the arguments
        # I was using escapeParam for GLIDECLIENT_ReqNode and GLIDECLIENT_Collector but turned out it's not necessary
        params['CONDOR_VERSION'] = 'default'
        params['CONDOR_OS'] = 'default'
        params['CONDOR_ARCH'] = 'default'
        params['GLIDECLIENT_ReqNode'] = ad_gc[
            'GlideinParamGLIDECLIENT_ReqNode']
        params['GLIDECLIENT_Rank'] = ad_gc.get('GlideinParamGLIDECLIENT_Rank',
                                               "1")
        params['GLIDEIN_Collector'] = ad_gc['GlideinParamGLIDEIN_Collector']
        params['USE_MATCH_AUTH'] = ad_gc['GlideinParamUSE_MATCH_AUTH']
        params['Report_Failed'] = 'NEVER'

        # Now that we have everything submit the pilot!
        logging.getLogger().setLevel(logging.DEBUG)
        submitGlideins(entry_name,
                       "test.test",
                       int(nr_glideins),
                       idle_lifetime,
                       "test:test",
                       credentials,
                       client_web,
                       params,
                       status_sf,
                       log=logging.getLogger(),
                       factoryConfig=factory_config)

        return 0
Exemple #6
0
def main(startup_dir):
    """
    Reads in the configuration file and starts up the factory

    @type startup_dir: String
    @param startup_dir: Path to glideinsubmit directory
    """
    # Force integrity checks on all condor operations
    glideFactoryLib.set_condor_integrity_checks()

    glideFactoryInterface.factoryConfig.lock_dir = os.path.join(startup_dir,
                                                                "lock")
    glideFactoryConfig.factoryConfig.glidein_descript_file = \
        os.path.join(startup_dir,
                     glideFactoryConfig.factoryConfig.glidein_descript_file)
    glideinDescript = glideFactoryConfig.GlideinDescript()
    frontendDescript = glideFactoryConfig.FrontendDescript()

    # set factory_collector at a global level, since we do not expect it to change
    glideFactoryInterface.factoryConfig.factory_collector = glideinDescript.data['FactoryCollector']

    # Setup the glideFactoryLib.factoryConfig so that we can process the
    # globals classads
    glideFactoryLib.factoryConfig.config_whoamI(
        glideinDescript.data['FactoryName'],
        glideinDescript.data['GlideinName'])
    glideFactoryLib.factoryConfig.config_dirs(
        startup_dir, glideinDescript.data['LogDir'],
        glideinDescript.data['ClientLogBaseDir'],
        glideinDescript.data['ClientProxiesBaseDir'])

    # Set the Log directory
    logSupport.log_dir = os.path.join(glideinDescript.data['LogDir'], "factory")

    # Configure factory process logging
    process_logs = eval(glideinDescript.data['ProcessLogs'])
    for plog in process_logs:
        if 'ADMIN' in plog['msg_types'].upper():
            logSupport.add_processlog_handler("factoryadmin",
                                              logSupport.log_dir,
                                              "DEBUG,INFO,WARN,ERR",
                                              plog['extension'],
                                              int(float(plog['max_days'])),
                                              int(float(plog['min_days'])),
                                              int(float(plog['max_mbytes'])),
                                              int(float(plog['backup_count'])),
                                              plog['compression'])
        else:
            logSupport.add_processlog_handler("factory",
                                              logSupport.log_dir,
                                              plog['msg_types'],
                                              plog['extension'],
                                              int(float(plog['max_days'])),
                                              int(float(plog['min_days'])),
                                              int(float(plog['max_mbytes'])),
                                              int(float(plog['backup_count'])),
                                              plog['compression'])
    logSupport.log = logging.getLogger("factory")
    logSupport.log.info("Logging initialized")

    if (glideinDescript.data['Entries'].strip() in ('', ',')):
        # No entries are enabled. There is nothing to do. Just exit here.
        log_msg = "No Entries are enabled. Exiting."

        logSupport.log.error(log_msg)
        sys.exit(1)

    write_descript(glideinDescript, frontendDescript, os.path.join(startup_dir, 'monitor/'))

    try:
        os.chdir(startup_dir)
    except:
        logSupport.log.exception("Failed starting Factory. Unable to change to startup_dir: ")
        raise

    try:
        if (is_file_old(glideinDescript.default_rsakey_fname,
                        int(glideinDescript.data['OldPubKeyGraceTime']))):
            # First backup and load any existing key
            logSupport.log.info("Backing up and loading old key")
            glideinDescript.backup_and_load_old_key()
            # Create a new key for this run
            logSupport.log.info("Recreating and loading new key")
            glideinDescript.load_pub_key(recreate=True)
        else:
            # Key is recent enough. Just reuse it.
            logSupport.log.info("Key is recent enough, reusing for this run")
            glideinDescript.load_pub_key(recreate=False)
            logSupport.log.info("Loading old key")
            glideinDescript.load_old_rsa_key()
    except RSAError as e:
        logSupport.log.exception("Failed starting Factory. Exception occurred loading factory keys: ")
        key_fname = getattr(e, 'key_fname', None)
        cwd = getattr(e, 'cwd', None)
        if key_fname and cwd:
            logSupport.log.error("Failed to load RSA key %s with current working direcotry %s", key_fname, cwd)
            logSupport.log.error("If you think the rsa key might be corrupted, try to remove it, and then reconfigure the factory to recreate it")
        raise
    except IOError as ioe:
        logSupport.log.exception("Failed starting Factory. Exception occurred loading factory keys: ")
        if ioe.filename == 'rsa.key' and ioe.errno == 2:
             logSupport.log.error("Missing rsa.key file. Please, reconfigure the factory to recreate it")
        raise
    except:
        logSupport.log.exception("Failed starting Factory. Exception occurred loading factory keys: ")
        raise

    glideFactoryMonitorAggregator.glideFactoryMonitoring.monitoringConfig.my_name = "%s@%s" % (glideinDescript.data['GlideinName'],
               glideinDescript.data['FactoryName'])

    glideFactoryInterface.factoryConfig.advertise_use_tcp = (glideinDescript.data['AdvertiseWithTCP'] in ('True', '1'))
    glideFactoryInterface.factoryConfig.advertise_use_multi = (glideinDescript.data['AdvertiseWithMultiple'] in ('True', '1'))
    sleep_time = int(glideinDescript.data['LoopDelay'])
    advertize_rate = int(glideinDescript.data['AdvertiseDelay'])
    restart_attempts = int(glideinDescript.data['RestartAttempts'])
    restart_interval = int(glideinDescript.data['RestartInterval'])

    try:
        glideinwms_dir = os.path.dirname(os.path.dirname(sys.argv[0]))
        glideFactoryInterface.factoryConfig.glideinwms_version = glideinWMSVersion.GlideinWMSDistro(glideinwms_dir, 'checksum.factory').version()
    except:
        logSupport.log.exception("Non critical Factory error. Exception occurred while trying to retrieve the glideinwms version: ")

    entries = sorted(glideinDescript.data['Entries'].split(','))

    glideFactoryMonitorAggregator.monitorAggregatorConfig.config_factory(
        os.path.join(startup_dir, "monitor"), entries,
        log=logSupport.log
    )

    # create lock file
    pid_obj = glideFactoryPidLib.FactoryPidSupport(startup_dir)

    increase_process_limit()

    # start
    try:
        pid_obj.register()
    except glideFactoryPidLib.pidSupport.AlreadyRunning as err:
        pid_obj.load_registered()
        logSupport.log.exception("Failed starting Factory. Instance with pid %s is aready running. Exception during pid registration: %s" %
                                 (pid_obj.mypid, err))
        raise
    try:
        try:
            spawn(sleep_time, advertize_rate, startup_dir, glideinDescript,
                  frontendDescript, entries, restart_attempts, restart_interval)
        except KeyboardInterrupt as e:
            raise e
        except HUPException as e:
            # inside spawn(), outermost try will catch HUPException, 
            # then the code within the finally will run
            # which will terminate glideFactoryEntryGroup children processes
            # and then the following 3 lines will be executed.
            logSupport.log.info("Received SIGHUP, reload config uid = %d" % os.getuid())
            # must empty the lock file so that when the thread returns from reconfig_glidein and 
            # begins from the beginning, it will not error out which will happen 
            # if the lock file is not empty
            pid_obj.relinquish()
            os.execv(os.path.join(FACTORY_DIR, "../creation/reconfig_glidein"),
                     ['reconfig_glidein', '-update_scripts', 'no', '-sighupreload', '-xml', '/etc/gwms-factory/glideinWMS.xml'])
        except:
            logSupport.log.exception("Exception occurred spawning the factory: ")
    finally:
        pid_obj.relinquish()
def infosys_based(entry_name, opt_dict, infosys_types):
    # find out which entries I need to look at
    # gather downtime fds for them
    config_els = {}
    if entry_name == 'factory':
        return 0  # nothing to do... the whole factory cannot be controlled by infosys
    elif entry_name in ('entries', 'all'):
        # all==entries in this case, since there is nothing to do for the factory
        glideinDescript = glideFactoryConfig.GlideinDescript()
        entries = string.split(glideinDescript.data['Entries'], ',')
        for entry in entries:
            config_els[entry] = {}
    else:
        config_els[entry_name] = {}

    # load the infosys info

    for entry in config_els.keys():
        infosys_fd = cgWDictFile.InfoSysDictFile(
            cgWConsts.get_entry_submit_dir('.', entry), cgWConsts.INFOSYS_FILE)
        infosys_fd.load()

        if len(infosys_fd.keys) == 0:
            # entry not associated with any infosys, cannot be managed, ignore
            del config_els[entry]
            continue

        compatible_infosys = False
        for k in infosys_fd.keys:
            infosys_type = infosys_fd[k][0]
            if infosys_type in infosys_types:
                compatible_infosys = True
                break
        if not compatible_infosys:
            # entry not associated with a compatible infosys, cannot be managed, ignore
            del config_els[entry]
            continue

        config_els[entry]['infosys_fd'] = infosys_fd

    if len(config_els.keys()) == 0:
        return 0  # nothing to do
    # all the remaining entries are handled by one of the supported infosys

    # summarize
    infosys_data = {}
    for entry in config_els.keys():
        infosys_fd = config_els[entry]['infosys_fd']
        for k in infosys_fd.keys:
            infosys_type = infosys_fd[k][0]
            server = infosys_fd[k][1]
            ref = infosys_fd[k][2]
            if infosys_type not in infosys_data:
                infosys_data[infosys_type] = {}
            infosys_data_type = infosys_data[infosys_type]
            if server not in infosys_data_type:
                infosys_data_type[server] = []
            infosys_data_type[server].append({'ref': ref, 'entry_name': entry})

    # get production entries
    production_entries = []
    for infosys_type in infosys_data.keys():
        if infosys_type in infosys_types:
            infosys_data_type = infosys_data[infosys_type]
            for server in infosys_data_type.keys():
                infosys_data_server = infosys_data_type[server]
                if infosys_type == "RESS":
                    production_entries += get_production_ress_entries(
                        server, infosys_data_server)
                elif infosys_type == "BDII":
                    production_entries += get_production_bdii_entries(
                        server, infosys_data_server)
                else:
                    raise RuntimeError("Unknown infosys type '%s'" %
                                       infosys_type)  # should never get here

    # Use the info to put the
    entry_keys = sorted(config_els.keys())
    for entry in entry_keys:
        if entry in production_entries:
            print("%s up" % entry)
            up(entry, ['up'])
        else:
            print("%s down" % entry)
            down(entry, ['down'])

    return 0
def get_entries(factory_dir):
    glideinDescript = glideFactoryConfig.GlideinDescript()
    #glideinDescript=glideFactoryConfig.ConfigFile(factory_dir+"/glidein.descript",lambda s:s)
    return string.split(glideinDescript.data['Entries'], ',')