def test_sshcheckjob(self): """Test sshcheckjob""" clusters = {'delcatty': {'path': '/opt/moab/bin/checkjob', 'master': 'master15.delcatty.gent.vsc'}, 'phanpy': {'path': '/opt/moab/bin/checkjob', 'master': 'master17.phanpy.gent.vsc'}, 'raichu': {'path': '/opt/moab/bin/checkjob', 'master': 'master13.raichu.gent.vsc'}, 'golett': {'path': '/opt/moab/bin/checkjob', 'master': 'master19.golett.gent.vsc'}, 'swalot': {'path': '/opt/moab/bin/checkjob', 'master': 'master21.swalot.gent.vsc'}} checkjob = SshCheckjob( 'master1', 'testuser', clusters=clusters, cache_pickle=True, dry_run=True) self.assertEqual(checkjob._command('/opt/moab/bin/showq'), ['sudo', 'ssh', 'testuser@master1', '/opt/moab/bin/showq']) self.assertEquals(checkjob.info, CheckjobInfo) self.assertEquals(checkjob.info(), {})
def test_sshcheckjob(self): """Test sshcheckjob""" clusters = { 'delcatty': { 'path': '/opt/moab/bin/checkjob', 'master': 'master15.delcatty.gent.vsc' }, 'phanpy': { 'path': '/opt/moab/bin/checkjob', 'master': 'master17.phanpy.gent.vsc' }, 'raichu': { 'path': '/opt/moab/bin/checkjob', 'master': 'master13.raichu.gent.vsc' }, 'golett': { 'path': '/opt/moab/bin/checkjob', 'master': 'master19.golett.gent.vsc' }, 'swalot': { 'path': '/opt/moab/bin/checkjob', 'master': 'master21.swalot.gent.vsc' } } checkjob = SshCheckjob('master1', 'testuser', clusters=clusters, cache_pickle=True, dry_run=True) self.assertEqual( checkjob._command('/opt/moab/bin/showq'), ['sudo', 'ssh', 'testuser@master1', '/opt/moab/bin/showq']) self.assertEquals(checkjob.info, CheckjobInfo) self.assertEquals(checkjob.info(), {})
def main(): # Collect all info # Note: debug option is provided by generaloption # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file options = { 'nagios-check-interval-threshold': NAGIOS_CHECK_INTERVAL_THRESHOLD, 'hosts': ('the hosts/clusters that should be contacted for job information', None, 'extend', []), 'location': ('the location for storing the pickle file: delcatty, muk', str, 'store', 'delcatty'), 'access_token': ('the token that will allow authentication against the account page', None, 'store', None), 'account_page_url': ('', None, 'store', None), 'target_master': ('the master used to execute showq commands', None, 'store', None), 'target_user': ('the user for ssh to the target master', None, 'store', None), } opts = ExtendedSimpleOption(options) try: rest_client = AccountpageClient(token=opts.options.access_token) gpfs = GpfsOperations() storage = VscStorage() storage_name = cluster_user_pickle_store_map[opts.options.location] login_mount_point = storage[storage_name].login_mount_point gpfs_mount_point = storage[storage_name].gpfs_mount_point clusters = {} for host in opts.options.hosts: master = opts.configfile_parser.get(host, "master") checkjob_path = opts.configfile_parser.get(host, "checkjob_path") clusters[host] = { 'master': master, 'path': checkjob_path } checkjob = SshCheckjob( opts.options.target_master, opts.options.target_user, clusters, cache_pickle=True, dry_run=opts.options.dry_run) (job_information, _, _) = checkjob.get_moab_command_information() active_users = job_information.keys() logger.debug("Active users: %s" % (active_users)) logger.debug("Checkjob information: %s" % (job_information)) nagios_user_count = 0 nagios_no_store = 0 stats = {} for user in active_users: path = get_pickle_path(opts.options.location, user, rest_client) try: user_queue_information = CheckjobInfo({user: job_information[user]}) store_on_gpfs(user, path, "checkjob", user_queue_information, gpfs, login_mount_point, gpfs_mount_point, ".checkjob.json.gz", opts.options.dry_run) nagios_user_count += 1 except Exception: logger.exception("Could not store cache file for user %s" % (user)) nagios_no_store += 1 stats["store_users"] = nagios_user_count stats["store_fail"] = nagios_no_store stats["store_fail_critical"] = STORE_LIMIT_CRITICAL except Exception, err: logger.exception("critical exception caught: %s" % (err)) opts.critical("Script failed in a horrible way") sys.exit(NAGIOS_EXIT_CRITICAL)
def main(): # Collect all info # Note: debug option is provided by generaloption # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file options = { 'nagios-check-interval-threshold': NAGIOS_CHECK_INTERVAL_THRESHOLD, 'hosts': ('the hosts/clusters that should be contacted for job information', None, 'extend', []), 'location': ('the location for storing the pickle file: delcatty, muk', str, 'store', 'delcatty'), 'access_token': ('the token that will allow authentication against the account page', None, 'store', None), 'account_page_url': ('', None, 'store', None), 'target_master': ('the master used to execute showq commands', None, 'store', None), 'target_user': ('the user for ssh to the target master', None, 'store', None), } opts = ExtendedSimpleOption(options) try: rest_client = AccountpageClient(token=opts.options.access_token) gpfs = GpfsOperations() storage = VscStorage() storage_name = cluster_user_pickle_store_map[opts.options.location] login_mount_point = storage[storage_name].login_mount_point gpfs_mount_point = storage[storage_name].gpfs_mount_point clusters = {} for host in opts.options.hosts: master = opts.configfile_parser.get(host, "master") checkjob_path = opts.configfile_parser.get(host, "checkjob_path") clusters[host] = {'master': master, 'path': checkjob_path} checkjob = SshCheckjob(opts.options.target_master, opts.options.target_user, clusters, cache_pickle=True, dry_run=opts.options.dry_run) (job_information, _, _) = checkjob.get_moab_command_information() active_users = job_information.keys() logger.debug("Active users: %s" % (active_users)) logger.debug("Checkjob information: %s" % (job_information)) nagios_user_count = 0 nagios_no_store = 0 stats = {} for user in active_users: path = get_pickle_path(opts.options.location, user, rest_client) try: user_queue_information = CheckjobInfo( {user: job_information[user]}) store_on_gpfs(user, path, "checkjob", user_queue_information, gpfs, login_mount_point, gpfs_mount_point, ".checkjob.json.gz", opts.options.dry_run) nagios_user_count += 1 except Exception: logger.exception("Could not store cache file for user %s" % (user)) nagios_no_store += 1 stats["store_users"] = nagios_user_count stats["store_fail"] = nagios_no_store stats["store_fail_critical"] = STORE_LIMIT_CRITICAL except Exception, err: logger.exception("critical exception caught: %s" % (err)) opts.critical("Script failed in a horrible way") sys.exit(NAGIOS_EXIT_CRITICAL)