def scheduler(args): db = whiplash.db(args.host,args.port,token=args.token) logging.info('%s batcher scheduler connected to db', args.user) while True: time_window = get_times(args,db) if time_window > 0: make_batches(db,time_window) time.sleep(1) if args.test: break
def __init__(self, server, port, username, password, use_pymongo=False): self.log_file = "benchmark_" + str(int(time.time())) + '.dat' self.log_handle = open(self.log_file, 'w') self.use_pymongo = use_pymongo if self.use_pymongo: client = pymongo.MongoClient(server, 27017) client.wdb.authenticate(username, password, mechanism='SCRAM-SHA-1') self.wdb = client.wdb else: self.wdb = whiplash.db(server, port, username=username, password=password, save_token=True)
def scheduler(args): end_time = time.time() + args.time_limit logging.info('local scheduler started') db = whiplash.db(args.host,args.port,token=args.token) logging.info('local scheduler connected to db') num_cpus = mp.cpu_count() if args.num_cpus != None: num_cpus = min(args.num_cpus,num_cpus) assert num_cpus > 0 is_work = check_for_work(db, end_time) logging.info('starting workers') context = mp.get_context('fork') procs = {} for pid in range(num_cpus): procs[pid] = context.Process(target=worker, args=(pid,db,args,end_time,is_work,)) procs[pid].start() while True: time.sleep(1) n_alive = 0 for pid in procs: is_work = check_for_work(db, end_time) if is_work and (not procs[pid].is_alive()): logging.info('worker %i restarting', pid) procs[pid].join() procs[pid] = context.Process(target=worker, args=(pid,db,args,end_time,is_work,)) procs[pid].start() n_alive += 1 elif procs[pid].is_alive(): n_alive += 1 if n_alive == 0: logging.info('stopping workers') for pid in procs: procs[pid].join() break logging.info('local scheduler shutting down')
def scheduler(args): db = whiplash.db(args.host, args.port, token=args.token) logging.info('slurm scheduler connected to db') while True: try: num_pending = int( sp.check_output( "ssh " + args.user + "@" + args.cluster + " \'squeue -u " + args.user + " | grep \" PD \" | grep \"whiplash\" | wc -l\'", shell=True)) except: num_pending = 1 if (db.collection('work_batches').count({}) > 0) and (num_pending == 0): submit_job(args) time.sleep(5) logging.info('slurm scheduler shutting down')
#!/usr/bin/env python3 import sys, random import whiplash print("Login as test user") db = whiplash.db("localhost", 1337, username="******", password="******") print("Check the status of a query for the spin glass solver and instances") filters = { 'input_model': { "set": "test_set" }, 'executable': { "name": "an_ss_ge_fi_vdeg" }, 'params': { "n_sweeps": 100, "n_reps": 10, "seed": 0 }, 'output_model': {} } print(db.status(filters))
#!/usr/bin/env python3 import sys, os, random import whiplash import whipbench as bench print("Login") db = whiplash.db(sys.argv[1], int(sys.argv[2]), username=sys.argv[3], password=sys.argv[4]) print("Reset database") bench.reset_db(db) print("Benchmarking collections") sizes = [2] numbers = [10, 100, 1000] print("sizes:", sizes) print("numbers:", numbers) collections = [ ['models', []], ['executables', ['name', 'description', 'algorithm', 'version', 'build', 'path', 'params']] ] for collection, required_fields in collections: bench.commit(db, collection, sizes, numbers, required_fields=required_fields) bench.commit(db, collection, sizes, numbers, required_fields=required_fields) bench.count(db, collection, sizes, numbers) bench.query_collection(db, collection, sizes, numbers) bench.update(db, collection, sizes, numbers) if collection == 'models': bench.stats(db, collection, sizes, numbers) print("Benchmarking submission") bench.submit(db, sizes, numbers)
#!/usr/bin/env python3 import sys, os, json, random, copy import whiplash print("Read inputs") host = sys.argv[1] port = int(sys.argv[2]) print("Login as test") db = whiplash.db(host, port, username="******", password="******") print("Commit models") n_models = 10 models = [] random.seed(0) for k in range(n_models): N = 5 hamiltonian = [] for i in range(N): for j in range(i + 1, N): value = 2.0 * random.random() - 1.0 hamiltonian.append([[i, j], value]) tags = { "n_spins": N, "name": "test_set_" + str(k), "description": "This is a test model", "set": "test_set", "format": "json" } model = {"content": {"edges": hamiltonian}} model.update(tags)
def scheduler(args): db = whiplash.db(args.host, args.port, username="******", password=os.environ['WHIPLASH_ADMIN_PASSWORD'], save_token=True) logging.info('admin connected to db') time_limit = 24 * 3600 schedulers = {} while True: all_users = get_users(args, db) if len(all_users) > 0: for db_user in all_users: username = db_user['username'] if username not in schedulers: schedulers[username] = {'batcher': th.Thread()} if args.cluster: schedulers[username]['slurm'] = th.Thread() else: schedulers[username]['local'] = th.Thread() flags = " --user " + username + " --token " + db_user[ 'token'] + " --host " + args.host + " --port " + str( args.port) + " --log_dir " + args.log_dir if (args.verbose): flags += " --verbose" if not schedulers[username]['batcher'].is_alive(): logging.info('starting batcher for user %s', username) schedulers[username]['batcher'] = th.Thread( target=start_batcher, args=( args, flags, )) schedulers[username]['batcher'].start() flags += " --num_cpus " + str( args.num_cpus) + " --work_dir " + db_user[ 'work_dir'] + " --time_limit " + str(time_limit) if args.docker: flags += " --docker" if args.dind: flags += " --dind" if args.cluster: if not schedulers[username]['slurm'].is_alive(): if check_access(db_user): flags += " --cluster " + db_user['cluster'] logging.info( 'starting slurm scheduler for user %s', username) schedulers[username]['slurm'] = th.Thread( target=start_slurm_scheduler, args=( args, flags, )) schedulers[username]['slurm'].start() else: logging.info('access denied for user %s', username) else: if not schedulers[username]['local'].is_alive(): logging.info('starting local scheduler for user %s', username) schedulers[username]['local'] = th.Thread( target=start_local_scheduler, args=( args, flags, )) schedulers[username]['local'].start() else: logging.info('local scheduler still alive') if args.test: break else: logging.info('no users found') time.sleep(5) logging.info('terminating user schedulers') for username in schedulers: schedulers[username]['batcher'].join() if args.cluster: schedulers[username]['slurm'].join() else: schedulers[username]['local'].join() logging.info('user scheduler exiting') sys.exit(0)