Example #1
0
import itertools
import gpuscheduler
import argparse
import os
from itertools import product

parser = argparse.ArgumentParser(description='Compute script.')
parser.add_argument('--dry', action='store_true')
parser.add_argument('--verbose', action='store_true')
args = parser.parse_args()

#s = gpuscheduler.Scheduler('/home/tim/data/git/sched/config/')
#log_base = '/home/tim/logs/'
s = gpuscheduler.HyakScheduler('/gscratch/scrubbed/dettmers/git/sched/config/',
                               verbose=args.verbose)
log_base = '/usr/lusers/dettmers/logs/'

s.update_host_config('home', mem_threshold=1700, util_threshold=30)
s.update_host_config('office', mem_threshold=1700, util_threshold=25)
#s.update_host_config('ari', mem_threshold=2500, util_threshold=25)

cmd = 'OMP_NUM_THREADS=1 python train.py --cuda --data ../data/wikitext-2/ --dataset wt103 --adaptive --n_layer 12 --dropatt 0.0 --optim adam --tgt_len 150 --mem_len 150 --eval_tgt_len 150 --fp16 --dynamic-loss-scale --eval-interval 100 --work_dir=LM-TFM-wt103/ITER/ --log-interval 10'

args2 = {}
args2['conv'] = ''
#args2['dim2'] = ''
#args2['shape2'] = 2
args2['kernel-size'] = 3
#args2['downsample-identity'] = ''
args2['d_emb'] = 400
args2['d_model'] = 400
Example #2
0
ckp_name = name
num_jobs = 96

#account = 'cse'
#account = 'stf'
#account = 'ark'
#partition = 'scavenge'
#partition = 'scavenge,learnfair'
partition = 'learnfair'
#partition = 'uninterrupted'
#partition = 'dev'
change_dir = 'fairseq_private/'
repo = 'fairseq_private'
exclude = 'learnfair0285,learnfair0405'

s = gpuscheduler.HyakScheduler(verbose=args.verbose, account='', partition=partition, use_gres=False)
#s = gpuscheduler.SshScheduler(verbose=args.verbose)

for key, value in args2.items():
    cmd = cmd + ' --{0} {1}'.format(key, value)

fp16 = True
args3 = {}
args4 = []
time_hours = 0
time_minutes = 15
path = join('/private/home/timdettmers/git/', change_dir, args2['folder'], '*')

files = list(glob.iglob(path))
n = len(files)
print('Total files: {0}'.format(n))
Example #3
0
args2['lr'] = 0.0001
args2['warmup-updates'] = 16000


logfolder = 'multifilter/{0}/'.format('baseline_wiki103')
time_hours = 48
cores_per_job = 40
seed_offset = 1
num_seeds = 1
num_GPUs = 8

account = 'cse'
#account = 'stf'
change_dir = 'multifilter/'

s = gpuscheduler.HyakScheduler('/gscratch/cse/dettmers/git/sched/config/', verbose=args.verbose, account=account, partition=account + '-gpu')

for key, value in args2.items():
    cmd = cmd + ' --{0} {1}'.format(key, value)

args3 = {}
#args3['lr'] = [0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1.0]
#args3['batch-size'] = [64]
#args3['batch-size'] = [6, 12]
#args3['lr'] = [0.0004, 0.0003]
#args3['max-lr'] = [0.0008, 0.0005]
#args2['min-lr'] = [1e-08, 1e-07]

args4 = []
#args4.append('--model vgg-d --density 0.05')
#args4.append('--model alexnet-b --density 0.10')
Example #4
0
time_hours = 4
cores_per_job = 4
mem = 16
num_seeds = 1
seed_offset = 0

account = 'cse-ckpt'
#account = 'cse'
#account = 'stf'
#account = 'ark'
#partition = account = '-gpu'
partition = 'ckpt-gpu'
change_dir = 'sparse_learning/mnist_cifar/'
repo = 'sparse_learning'

s = gpuscheduler.HyakScheduler(verbose=args.verbose, account=account, partition=partition)
#s = gpuscheduler.SshScheduler(verbose=args.verbose)

for key, value in args2.items():
    cmd = cmd + ' --{0} {1}'.format(key, value)

args3 = {}
args3['model'] = ['wrn-16-8']
#args3['method'] = ['KLmin']
args3['method'] = ['simple']
#args3['method'] = ['KLtopk']
#args3['method'] = ['KLmin', 'KLtopk']
args3['beta'] = [2.0, 4.0, 6.0]
#args3['history-size'] = [1000, 2000, 5000]
args3['history-size'] = [500, 2000, 5000]
args3['metric-history-size'] = [500]