Exemple #1
0
def main():
    config_root_logger(filename,
                       level="DEBUG",
                       util_level="DEBUG",
                       logfile=logfile)
    sys.excepthook = handle_exception

    util.cfg = util.config_from_metadata()
    while not util.cfg.pubsub_topic_id:
        sleep(300)
        util.cfg = util.config_from_metadata()

    streaming_pull_future = subscriber.subscribe(subscription_path,
                                                 callback=callback)
    log.info(f"Listening for messages on '{subscription_path}'...")

    with subscriber:
        try:
            streaming_pull_future.result()
        except Exception as e:
            log.error(e)
            streaming_pull_future.cancel()
            streaming_pull_future.result()
Exemple #2
0
from time import sleep
import setup
import sys
import util
from collections import namedtuple
from pathlib import Path
from google.cloud import pubsub_v1
from util import project, lkp, cfg
from util import config_root_logger, handle_exception, run, publish_message

filename = Path(__file__).name
logfile = (Path(cfg.slurm_log_dir if cfg else ".") /
           filename).with_suffix(".log")
util.chown_slurm(logfile, mode=0o600)
config_root_logger(filename,
                   level="DEBUG",
                   util_level="DEBUG",
                   logfile=logfile)
log = logging.getLogger(filename)

project_id = project
subscription_id = lkp.hostname

subscriber = pubsub_v1.SubscriberClient()
subscription_path = subscriber.subscription_path(project_id, subscription_id)

StateTuple = namedtuple("StateTuple", "base,flags")


def natural_keys(text):
    """String sorting heuristic function for numbers"""
    def atoi(text):
Exemple #3
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('args', nargs='+', help="nodes [jobid]")
    parser.add_argument('--debug', '-d', dest='debug', action='store_true',
                        help='Enable debugging output')

    if "SLURM_JOB_NODELIST" in os.environ:
        args = parser.parse_args(sys.argv[1:] +
                                 [os.environ['SLURM_JOB_NODELIST'],
                                  os.environ['SLURM_JOB_ID']])
    else:
        args = parser.parse_args()

    nodes = args.args[0]
    job_id = 0
    if len(args.args) > 1:
        job_id = args.args[1]

    if args.debug:
        util.config_root_logger(level='DEBUG', util_level='DEBUG',
                                logfile=LOGFILE)
    else:
        util.config_root_logger(level='INFO', util_level='ERROR',
                                logfile=LOGFILE)
    log = logging.getLogger(Path(__file__).name)
    sys.excepthook = util.handle_exception

    main(nodes, job_id)
Exemple #4
0
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("slurm_cluster_name", help="Slurm cluster name filter")
    parser.add_argument("--partition",
                        "-p",
                        dest="partition_name",
                        help="Slurm partition name filter")
    parser.add_argument(
        "--debug",
        "-d",
        dest="debug",
        action="store_true",
        help="Enable debugging output",
    )

    args = parser.parse_args()

    logfile = (Path(__file__).parent / logger_name).with_suffix(".log")
    if args.debug:
        config_root_logger(logger_name,
                           level="DEBUG",
                           util_level="DEBUG",
                           logfile=logfile)
    else:
        config_root_logger(logger_name,
                           level="INFO",
                           util_level="ERROR",
                           logfile=logfile)

    main(args)
Exemple #5
0
    help="Force attempted creation of the nodelist, whether nodes are exclusive or not.",
)
parser.add_argument(
    "--debug", "-d", dest="debug", action="store_true", help="Enable debugging output"
)


if __name__ == "__main__":
    if "SLURM_JOB_NODELIST" in os.environ:
        argv = [
            *sys.argv[1:],
            os.environ["SLURM_JOB_NODELIST"],
            os.environ["SLURM_JOB_ID"],
        ]
        args = parser.parse_args(argv)
    else:
        args = parser.parse_args()

    util.chown_slurm(LOGFILE, mode=0o600)
    if args.debug:
        util.config_root_logger(
            filename, level="DEBUG", util_level="DEBUG", logfile=LOGFILE
        )
    else:
        util.config_root_logger(
            filename, level="INFO", util_level="ERROR", logfile=LOGFILE
        )
    sys.excepthook = util.handle_exception

    main(args.nodelist, args.job_id, args.force)
Exemple #6
0
        {
            "controller": setup_controller,
            "compute": setup_compute,
            "login": setup_login,
        },
        lkp.instance_role,
        lambda: log.fatal(f"Unknown node role: {lkp.instance_role}"),
    )
    setup()

    end_motd()


if __name__ == "__main__":
    util.chown_slurm(LOGFILE, mode=0o600)
    util.config_root_logger(filename, logfile=LOGFILE, util_level="DEBUG")
    sys.excepthook = util.handle_exception

    lkp = util.Lookup(cfg)  # noqa F811

    try:
        main()
    except subprocess.TimeoutExpired as e:
        log.error(f"""TimeoutExpired:
    command={e.cmd}
    timeout={e.timeout}
    stdout:
{e.stdout.strip()}
    stderr:
{e.stderr.strip()}
""")