import itertools import gpuscheduler import argparse import os from itertools import product parser = argparse.ArgumentParser(description='Compute script.') parser.add_argument('--dry', action='store_true') parser.add_argument('--verbose', action='store_true') args = parser.parse_args() #s = gpuscheduler.Scheduler('/home/tim/data/git/sched/config/') #log_base = '/home/tim/logs/' s = gpuscheduler.HyakScheduler('/gscratch/scrubbed/dettmers/git/sched/config/', verbose=args.verbose) log_base = '/usr/lusers/dettmers/logs/' s.update_host_config('home', mem_threshold=1700, util_threshold=30) s.update_host_config('office', mem_threshold=1700, util_threshold=25) #s.update_host_config('ari', mem_threshold=2500, util_threshold=25) cmd = 'OMP_NUM_THREADS=1 python train.py --cuda --data ../data/wikitext-2/ --dataset wt103 --adaptive --n_layer 12 --dropatt 0.0 --optim adam --tgt_len 150 --mem_len 150 --eval_tgt_len 150 --fp16 --dynamic-loss-scale --eval-interval 100 --work_dir=LM-TFM-wt103/ITER/ --log-interval 10' args2 = {} args2['conv'] = '' #args2['dim2'] = '' #args2['shape2'] = 2 args2['kernel-size'] = 3 #args2['downsample-identity'] = '' args2['d_emb'] = 400 args2['d_model'] = 400
ckp_name = name num_jobs = 96 #account = 'cse' #account = 'stf' #account = 'ark' #partition = 'scavenge' #partition = 'scavenge,learnfair' partition = 'learnfair' #partition = 'uninterrupted' #partition = 'dev' change_dir = 'fairseq_private/' repo = 'fairseq_private' exclude = 'learnfair0285,learnfair0405' s = gpuscheduler.HyakScheduler(verbose=args.verbose, account='', partition=partition, use_gres=False) #s = gpuscheduler.SshScheduler(verbose=args.verbose) for key, value in args2.items(): cmd = cmd + ' --{0} {1}'.format(key, value) fp16 = True args3 = {} args4 = [] time_hours = 0 time_minutes = 15 path = join('/private/home/timdettmers/git/', change_dir, args2['folder'], '*') files = list(glob.iglob(path)) n = len(files) print('Total files: {0}'.format(n))
args2['lr'] = 0.0001 args2['warmup-updates'] = 16000 logfolder = 'multifilter/{0}/'.format('baseline_wiki103') time_hours = 48 cores_per_job = 40 seed_offset = 1 num_seeds = 1 num_GPUs = 8 account = 'cse' #account = 'stf' change_dir = 'multifilter/' s = gpuscheduler.HyakScheduler('/gscratch/cse/dettmers/git/sched/config/', verbose=args.verbose, account=account, partition=account + '-gpu') for key, value in args2.items(): cmd = cmd + ' --{0} {1}'.format(key, value) args3 = {} #args3['lr'] = [0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1.0] #args3['batch-size'] = [64] #args3['batch-size'] = [6, 12] #args3['lr'] = [0.0004, 0.0003] #args3['max-lr'] = [0.0008, 0.0005] #args2['min-lr'] = [1e-08, 1e-07] args4 = [] #args4.append('--model vgg-d --density 0.05') #args4.append('--model alexnet-b --density 0.10')
time_hours = 4 cores_per_job = 4 mem = 16 num_seeds = 1 seed_offset = 0 account = 'cse-ckpt' #account = 'cse' #account = 'stf' #account = 'ark' #partition = account = '-gpu' partition = 'ckpt-gpu' change_dir = 'sparse_learning/mnist_cifar/' repo = 'sparse_learning' s = gpuscheduler.HyakScheduler(verbose=args.verbose, account=account, partition=partition) #s = gpuscheduler.SshScheduler(verbose=args.verbose) for key, value in args2.items(): cmd = cmd + ' --{0} {1}'.format(key, value) args3 = {} args3['model'] = ['wrn-16-8'] #args3['method'] = ['KLmin'] args3['method'] = ['simple'] #args3['method'] = ['KLtopk'] #args3['method'] = ['KLmin', 'KLtopk'] args3['beta'] = [2.0, 4.0, 6.0] #args3['history-size'] = [1000, 2000, 5000] args3['history-size'] = [500, 2000, 5000] args3['metric-history-size'] = [500]