dt = datetime.datetime.now() timestr = '{}-{}-{}-{}'.format(dt.month, dt.day, dt.hour, dt.minute) for dat, (n_samples, sz) in dat_size_dict.items(): root_cmd = 'python bnn_stein_f.py -dataset={} '.format(dat) param_specs = { 'layers': ['10 10'], 'seed': list(range(1334, 1344)), 'lr': [4e-3], 'n_particles': [20], ('batch_size', 'n_epoch'): [[100, 500]], 'ptb_scale': [1], 'test_freq': [20] } if n_samples > 1000: param_specs[('batch_size', 'n_epoch')] = [[1000, 3000]] if dat in set(['protein_data']): param_specs['layers'] = ['10 10'] param_specs['seed'] = [1333, 1334, 1335, 1336, 1337] log_dir = logdir_fmt.format(timestr, dat) tasks += runner.list_tasks(root_cmd, param_specs, source_dir, log_dir) print('\n'.join([t.cmd for t in tasks])) print(len(tasks)) r = runner.Runner(n_max_gpus=4, n_multiplex=2, n_max_retry=1) r.run_tasks(tasks)
assert (status == runner.Status.CRASHED) == ( task.option_dict['x'] < 0 or task.option_dict['sleep_long'] is True) ctr[status] += 1 slave_working_dir = os.path.dirname(os.path.abspath(__file__)) param_specs = { ('x', 'x1'): [0.5, 0.6, -0.3], # x and x1 will have same value 'y': ['foo', 'bar!'], 'sleep_long': runner.BooleanOpt(true_first=False, no_for_false=True) } log_dir = '/tmp/slave_logs/' if os.path.exists(log_dir): shutil.rmtree(log_dir) tasks = runner.list_tasks('python3 slave.py', param_specs, slave_working_dir, log_dir + 'prefix', max_cpu_time=2, post_kill_cmd='echo "KILLED"') r = runner.Runner(n_max_gpus=1, n_multiplex=4, n_max_retry=-1, on_task_finish=on_task_finish) r.run_tasks(tasks) print(ctr)
import logging import os import shutil import sys logging.basicConfig(stream=sys.stderr, level=logging.DEBUG, format='%(filename)s:%(lineno)s %(levelname)s:%(message)s') slave_working_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../src') param_specs = { 'seed': list(range(1234, 1239)), ('z_dims', 'eps_dims'): [8, 32], 'lr': [1e-3], 'n_iter': [100000], 'optimizer': ['rmsprop'], 'model': ['vae'] } log_dir = os.path.expanduser('~/s-vae-tf/run/svae/vae-explicit/'.format( __file__.split('.')[0])) tasks = runner.list_tasks('python mnist.py -explicit -test ', param_specs, slave_working_dir, log_dir + 'prefix') print('\n'.join([t.cmd for t in tasks])) r = runner.Runner(n_max_gpus=6, n_multiplex=3, n_max_retry=-1) r.run_tasks(tasks)
import os import shutil import sys logging.basicConfig(stream=sys.stderr, level=logging.DEBUG, format='%(filename)s:%(lineno)s %(levelname)s:%(message)s') slave_working_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../src') param_specs = { 'n_iter': [200000], 'seed': list(range(1234, 1244)), 'model': ['wae', 'wae-gan'], 'z_dims': [8], 'eps_dims': [0], 'e_act': ['tanh'], } log_dir = os.path.expanduser('~/s-vae-tf/run/svae/{}/'.format( __file__.split('.')[0])) tasks = runner.list_tasks( 'python mnist.py -observation sigmoid -do_fid -test ', param_specs, slave_working_dir, log_dir + 'prefix') print('\n'.join([t.cmd for t in tasks])) r = runner.Runner(n_max_gpus=6, n_multiplex=3, n_max_retry=-1) r.run_tasks(tasks)
import shutil import sys logging.basicConfig( stream=sys.stderr, level=logging.DEBUG, format='%(filename)s:%(lineno)s %(levelname)s:%(message)s') slave_working_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../src') param_specs = { 'z_dims': [16, 32, 64], 'seed': list(range(1334, 1337)), 'model': ['wae', 'wae-gan', 'wae-mmd'], 'n_iter': [200001], } # NOTE CHANGE THIS log_dir = os.path.expanduser('~/s-vae-tf/run/svae/cifar/'.format(__file__.split('.')[0])) tasks = runner.list_tasks( 'python cifar.py -eps_dim 0 ', param_specs, slave_working_dir, log_dir + 'prefix') print('\n'.join([t.cmd for t in tasks])) print(len(tasks)) # NOTE CHANGE THIS r = runner.Runner( n_max_gpus=6, n_multiplex=1, n_max_retry=-1) r.run_tasks(tasks)
format='%(filename)s:%(lineno)s %(levelname)s:%(message)s') slave_dir = os.path.abspath('.') root_cmd = 'python main.py --layers 100,100 --bandit {}'.format(args.bandit) param_specs = {'-num_context': [40000], '-seed': list(range(1334, 1344))} import datetime dt = datetime.datetime.now() timestr = '{}-{}-{}-{}'.format(dt.month, dt.day, dt.hour, dt.minute) log_dir = os.path.join(slave_dir, '../../run/bandit-{}-{}/'.format(args.bandit, timestr)) os.makedirs(os.path.join(log_dir, "test_{}".format( dt.second))) # make sure we can write tasks = runner.list_tasks(root_cmd, param_specs, slave_dir, log_dir) for i, t in enumerate(tasks): ncmd = t.cmd.replace('-dir', '--logdir').replace('-production ', '') tasks[i] = t._replace(cmd=ncmd) print('\n'.join([t.cmd for t in tasks])) print(args) # r = runner.Runner(n_max_gpus=[0,3], n_multiplex=5, n_max_retry=-1) # r = runner.Runner(n_max_gpus=7, n_multiplex=2, n_max_retry=-1) r = runner.Runner(n_max_gpus=args.max_gpus, n_multiplex=args.n_multiplex, n_max_retry=-1) r.run_tasks(tasks)