def insert2db(rcu, qcu, ic_obj, created, db): # making a new row, "testlab", it will be recorded to the db anyway # regardless of the usage of other accounts, i.e. even when all of # them are 0, it's very important since we need a datapoint at this # snapshot! # rcu, qcu could be {}, {}, meaning cluster is down, else they can # be rcu, qcu could only have a bunch of {key:0}s #### Here is a flaw in the design because when the cluster is #### down, the 0 core usages ({},{}) cannot be distinguished from the real #### 0 core usages ({u1:0, u2:0}, {u1:0, u2:0}) for dd in [rcu, qcu]: dd.update(dict(testlab=sum(dd.values()))) # remove items where its value is 0, rcu = util.prune(rcu, preserved_keys=['testlab']) qcu = util.prune(qcu, preserved_keys=['testlab']) # uccm: users consuming cores at the moment (realnames) # using uccm instead of usermap.values() is trying to eliminate usage items # where the both of user's runningcores notrunning cores are zero uccm = set(rcu.keys() + qcu.keys()) # must use rcu.get(realname, 0), qcu.get(realname, 0) instead of # rcu[realname], or qcu[realname] since the realname could be in either rcu # or qcu, or both. for realname in sorted(uccm): usage = Usage(ic_obj.clustername, realname, rcu.get(realname, 0), qcu.get(realname, 0), created) db.session.add(usage) db.session.commit()
def insert2db(rcu, qcu, ic_obj, created, db): # making a new row, "testlab", it will be recorded to the db anyway # regardless of the usage of other accounts, i.e. even when all of # them are 0, it's very important since we need a datapoint at this # snapshot! # rcu, qcu could be {}, {}, meaning cluster is down, else they can # be rcu, qcu could only have a bunch of {key:0}s #### Here is a flaw in the design because when the cluster is #### down, the 0 core usages ({},{}) cannot be distinguished from the real #### 0 core usages ({u1:0, u2:0}, {u1:0, u2:0}) for dd in [rcu, qcu]: dd.update(dict(testlab=sum(dd.values()))) # remove items where its value is 0, rcu = util.prune(rcu, preserved_keys=['testlab']) qcu = util.prune(qcu, preserved_keys=['testlab']) # uccm: users consuming cores at the moment (realnames) # using uccm instead of usermap.values() is trying to eliminate usage items # where the both of user's runningcores notrunning cores are zero uccm = set(rcu.keys() + qcu.keys()) # must use rcu.get(realname, 0), qcu.get(realname, 0) instead of # rcu[realname], or qcu[realname] since the realname could be in either rcu # or qcu, or both. for realname in sorted(uccm): usage = Usage( ic_obj.clustername, realname, rcu.get(realname, 0), qcu.get(realname, 0), created) db.session.add(usage) db.session.commit()
def gen_report(self, rcu, qcu, usermap, created=None): # first, title of the report title = "{0}|{1}|{2}".format(self.clustername, self.quota, self.cores_per_node) # second, created datetime created = created if created else datetime.datetime.now() report_content = [] # datetime, an addtional "\n" is just for making the reportprettier report_content.append("{0}\n".format(util.format_datetime(created))) if not rcu and not qcu: report_content.append("data not available at the moment") else: rcu, qcu = util.prune(rcu), util.prune(qcu) total_usage = {} for realname in set(usermap.values()): total_usage[realname] = sum( dd.get(realname, 0) for dd in [rcu, qcu]) total_usage = util.prune(total_usage) # from this step on, basically it's about print data from 3 dicts # in a pretty way: rcu, qcu, total_usage # 1. print headers report_content.append("{0:13s} {1:8s} {2:8s} {3:8s}".format( 'USERNAME', 'Running', 'NotRunning', 'TOTAL')) # 2. sort the order of key by total_usage sorted_keys = reversed(sorted(total_usage, key=total_usage.get)) if not len(total_usage) == 0: # not the usage of everyone is zero report_content.append('=' * 44) # 3. print the table for k in sorted_keys: # full name is too long, so last name is used since # firstname is confusing name = k.split()[0] report_content.append( "{0:13s} {1:<8d} {2:<8d} {3:<8d}".format( name, rcu.get(k, 0), qcu.get(k, 0), total_usage.get(k, 0))) # 4. print the footer sum report_content.append('=' * 44) report_content.append("{0:13s} {1:<8d} {2:<8d} {3:<8d}".format( 'SUM', sum(rcu.values()), sum(qcu.values()), sum(total_usage.values()))) report_content.append('=' * 44) # 5. join the final work report_content = '\n'.join(report_content) # 6. since it's displayed on line, need such replacements report_content = report_content.replace("\n", "<br>").replace(" ", " ") return Report(self, report_content, created)
def gen_report(self, rcu, qcu, usermap, created=None): # first, title of the report title = "{0}|{1}|{2}".format(self.clustername, self.quota, self.cores_per_node) # second, created datetime created = created if created else datetime.datetime.now() report_content = [] # datetime, an addtional "\n" is just for making the reportprettier report_content.append("{0}\n".format(util.format_datetime(created))) if not rcu and not qcu: report_content.append("data not available at the moment") else: rcu, qcu = util.prune(rcu), util.prune(qcu) total_usage = {} for realname in set(usermap.values()): total_usage[realname] = sum(dd.get(realname, 0) for dd in [rcu, qcu]) total_usage = util.prune(total_usage) # from this step on, basically it's about print data from 3 dicts # in a pretty way: rcu, qcu, total_usage # 1. print headers report_content.append("{0:13s} {1:8s} {2:8s} {3:8s}".format( 'USERNAME', 'Running', 'NotRunning', 'TOTAL')) # 2. sort the order of key by total_usage sorted_keys = reversed(sorted(total_usage, key=total_usage.get)) if not len(total_usage) == 0: # not the usage of everyone is zero report_content.append('=' * 44) # 3. print the table for k in sorted_keys: # full name is too long, so last name is used since # firstname is confusing name = k.split()[0] report_content.append("{0:13s} {1:<8d} {2:<8d} {3:<8d}".format( name, rcu.get(k, 0), qcu.get(k, 0), total_usage.get(k, 0))) # 4. print the footer sum report_content.append('=' * 44) report_content.append("{0:13s} {1:<8d} {2:<8d} {3:<8d}".format( 'SUM', sum(rcu.values()), sum(qcu.values()), sum(total_usage.values()))) report_content.append('=' * 44) # 5. join the final work report_content = '\n'.join(report_content) # 6. since it's displayed on line, need such replacements report_content = report_content.replace("\n", "<br>").replace(" ", " ") return Report(self, report_content, created)
def configure(save=False, machine=None): """ Read configuration files """ cwd = os.getcwd() path = os.path.realpath(os.path.dirname(__file__)) os.chdir(path) conf = {} exec open('conf/conf.py') in conf if not machine and os.path.isfile('machine'): machine = open('machine').read().strip() if machine: machine = os.path.basename(machine) path = os.path.join('conf', machine, 'conf.py') exec open(path) in conf conf['machine'] = machine if save: open('machine', 'w').write(machine) util.prune(conf, pattern='(^_)|(^.$)') os.chdir(cwd) return conf
def train_model(wrapped_model, model, train_loss_f, model_path, train_loader, test_loader, init_lr, epochs, args): val_loss_f = nn.CrossEntropyLoss() best_model_path = '.'.join(model_path.split('.')[:-1]) + '.best.pth' # tracking stats if not hasattr(model, 'stats'): model.stats = { 'train_loss': [], 'test_acc': [], 'test_loss': [], 'weight': [], 'lr': [], 'macs': [], 'efficiency': [] } start_epoch = 1 else: start_epoch = len(model.stats['test_loss']) curr_weights, _ = util.num_nonzeros(model) if hasattr(model, 'packed_layer_size'): macs = np.sum([x * y for x, y in model.packed_layer_size]) else: macs = curr_weights # optimizer optimizer = optim.RMSprop(util.group_weight(model), lr=init_lr, momentum=0.9, alpha=0.9, weight_decay=4e-5, eps=1.0) print("Optimizer:") print(optimizer) best_acc = 0 prune_epoch = 0 max_prune_rate = 0.8 final_prune_epoch = int(0.9 * args.epochs) num_prune_epochs = 10 prune_rates = [ max_prune_rate * (1 - (1 - (i / num_prune_epochs))**3) for i in range(num_prune_epochs) ] prune_rates[-1] = max_prune_rate prune_epochs = np.linspace(0, final_prune_epoch, num_prune_epochs).astype('i').tolist() prune_rate = 0.1 prune_total = 0.0 prune_cycle = 8 max_prune = 0.7 # pruning stage for epoch in range(start_epoch, epochs + 1): print('[Epoch {}]'.format(epoch)) for g in optimizer.param_groups: lr = g['lr'] break if epoch % prune_cycle == 0 and prune_total < max_prune: prune_total += prune_rate print('Prune Total: {:2.2f}'.format(100. * prune_total)) util.prune(model, prune_total) packing.pack_model(model, args.gamma) macs = np.sum([x * y for x, y in model.packed_layer_size]) curr_weights, num_weights = util.num_nonzeros(model) train_loss = util.train(train_loader, wrapped_model, train_loss_f, optimizer, epoch - 1, args) test_loss, test_acc = util.validate(test_loader, model, val_loss_f, epoch - 1, args) print('LR :: {}'.format(lr)) print('Train Loss:: {}'.format(train_loss)) print('Test Loss:: {}'.format(test_loss)) print('Test Acc.:: {}'.format(test_acc)) print('Nonzeros :: {}'.format(curr_weights)) print('') print('') model.stats['lr'].append(lr) model.optimizer = optimizer.state_dict() model.cpu() torch.save(model, model_path) if test_acc > best_acc and prune_total >= max_prune: print('New best model found') torch.save(model, best_model_path) best_acc = test_acc model.cuda()
def stage(inputs): """ Setup, and optionally launch, a SORD job. """ import glob, time, getopt, shutil import setup # Save start time starttime = time.asctime() print('SORD setup') # Read defaults pm = {} f = os.path.join(os.path.dirname(__file__), 'parameters.py') exec open(f) in pm if 'machine' in inputs: cf = configure.configure(machine=inputs['machine']) else: cf = configure.configure() # Merge inputs inputs = inputs.copy() util.prune(inputs) util.prune(pm) util.prune(cf, pattern='(^_)|(^.$)') for k, v in inputs.iteritems(): if k in cf: cf[k] = v elif k in pm: pm[k] = v else: sys.exit('Unknown parameter: %s = %r' % (k, v)) cf = util.namespace(cf) cf.rundir = os.path.expanduser(cf.rundir) pm = prepare_param(util.namespace(pm), cf.itbuff) # Command line options opts = [ 'n', 'dryrun', 's', 'serial', 'm', 'mpi', 'i', 'interactive', 'q', 'queue', 'd', 'debug', 'g', 'debugging', 't', 'testing', 'p', 'profiling', 'O', 'optimized', 'f', 'force', ] options = ''.join(opts[::2]) long_options = opts[1::2] opts = getopt.getopt(sys.argv[1:], options, long_options)[0] for o, v in opts: if o in ('-n', '--dry-run'): cf.prepare = False elif o in ('-s', '--serial'): cf.mode = 's' elif o in ('-m', '--mpi'): cf.mode = 'm' elif o in ('-i', '--interactive'): cf.run = 'i' elif o in ('-q', '--queue'): cf.run = 'q' elif o in ('-d', '--debug'): cf.optimize = 'g' cf.run = 'g' elif o in ('-g', '--debugging'): cf.optimize = 'g' elif o in ('-t', '--testing'): cf.optimize = 't' elif o in ('-p', '--profiling'): cf.optimize = 'p' elif o in ('-O', '--optimized'): cf.optimize = 'O' elif o in ('-f', '--force'): if os.path.isdir(cf.rundir): shutil.rmtree(cf.rundir) else: sys.exit('Error: unknown option: ' + o) if not cf.prepare: cf.run = False # Partition for parallelization pm.nn = tuple(int(i) for i in pm.nn) maxtotalcores = cf.maxnodes * cf.maxcores if not cf.mode and maxtotalcores == 1: cf.mode = 's' np3 = pm.np3[:] if cf.mode == 's': np3 = [1, 1, 1] nl = [(pm.nn[i] - 1) / np3[i] + 1 for i in range(3)] i = abs(pm.faultnormal) - 1 if i >= 0: nl[i] = max(nl[i], 2) pm.np3 = tuple((pm.nn[i] - 1) / nl[i] + 1 for i in range(3)) cf.np = pm.np3[0] * pm.np3[1] * pm.np3[2] if not cf.mode: cf.mode = 's' if cf.np > 1: cf.mode = 'm' # Resources if cf.maxcores: cf.nodes = min(cf.maxnodes, (cf.np - 1) / cf.maxcores + 1) cf.ppn = (cf.np - 1) / cf.nodes + 1 cf.cores = min(cf.maxcores, cf.ppn) cf.totalcores = cf.nodes * cf.maxcores else: cf.nodes = 1 cf.ppn = cf.np cf.cores = cf.np cf.totalcores = cf.np # RAM and Wall time usage if pm.oplevel in (1, 2): nvars = 20 elif pm.oplevel in (3, 4, 5): nvars = 23 else: nvars = 44 nm = (nl[0] + 2) * (nl[1] + 2) * (nl[2] + 2) cf.pmem = 32 + int(1.2 * nm * nvars * int(cf.dtype[-1]) / 1024 / 1024) cf.ram = cf.pmem * cf.ppn ss = (pm.nt + 10) * cf.ppn * nm / cf.cores / cf.rate sus = int(ss / 3600 * cf.totalcores + 1) mm = ss / 60 * 3.0 + 10 if cf.maxtime: mm = min(mm, 60 * cf.maxtime[0] + cf.maxtime[1]) mm = mm * 3 hh = mm / 60 mm = mm % 60 cf.walltime = '%d:%02d:00' % (hh, mm) #v1.1.01 2 times of orignal hours cf.walltime = '1:00:00' # used in v1.1 print('Machine: ' + cf.machine) print('Cores: %s of %s' % (cf.np, maxtotalcores)) print('Nodes: %s of %s' % (cf.nodes, cf.maxnodes)) print('RAM: %sMb of %sMb per node' % (cf.ram, cf.maxram)) print('Time limit: ' + cf.walltime) print('SUs: %s' % sus) if cf.maxcores and cf.ppn > cf.maxcores: print('Warning: exceding available cores per node (%s)' % cf.maxcores) if cf.ram and cf.ram > cf.maxram: print('Warning: exceding available RAM per node (%sMb)' % cf.maxram) # Compile code if not cf.prepare: return cf setup.build(cf.mode, cf.optimize) # Create run directory print('Run directory: ' + cf.rundir) try: os.makedirs(cf.rundir) except (OSError): sys.exit('%r exists or cannot be created. Use --force to overwrite.' % cf.rundir) for f in 'in', 'out', 'prof', 'stats', 'debug', 'checkpoint': os.mkdir(os.path.join(cf.rundir, f)) # Copy files to run directory cwd = os.path.realpath(os.getcwd()) cf.rundate = time.asctime() cf.name = os.path.basename(cf.rundir) cf.rundir = os.path.realpath(cf.rundir) os.chdir(os.path.realpath(os.path.dirname(__file__))) cf.bin = os.path.join('.', 'sord-' + cf.mode + cf.optimize) path = os.path.join('bin', 'sord-' + cf.mode + cf.optimize) shutil.copy(path, cf.rundir) if os.path.isfile('sord.tgz'): shutil.copy('sord.tgz', cf.rundir) if cf.optimize == 'g': for f in glob.glob(os.path.join('src', '*.f90')): shutil.copy(f, cf.rundir) f = os.path.join('conf', cf.machine, 'templates') if not os.path.isdir(f): f = os.path.join('conf', 'default', 'templates') for d in os.path.join('conf', 'common', 'templates'), f: for f in glob.glob(os.path.join(d, '*')): ff = os.path.join(cf.rundir, os.path.basename(f)) out = open(f).read() % cf.__dict__ open(ff, 'w').write(out) shutil.copymode(f, ff) # Combine metadata meta = util.namespace(pm.__dict__) for k in 'name', 'rundate', 'rundir', 'user', 'os_', 'dtype': setattr(meta, k, getattr(cf, k)) meta.indices = {} meta.xi = {} for f in meta.fieldio: op, filename = f[0], f[8] if filename != '-': meta.indices[filename] = f[7] if 'wi' in op: meta.xi[filename] = f[4] meta.shape = {} for k in meta.indices: nn = [(i[1] - i[0]) / i[2] + 1 for i in meta.indices[k]] nn = [n for n in nn if n > 1] if nn == []: nn = [1] meta.shape[k] = nn # Write files os.chdir(cf.rundir) log = open('log', 'w') log.write(starttime + ': setup started\n') util.save('conf.py', cf, prune_pattern='(^_)|(^.$)') util.save('parameters.py', pm, expand=['fieldio']) util.save('meta.py', meta, expand=['shape', 'xi', 'indices', 'fieldio']) # Return to initial directory os.chdir(cwd) return cf
def train(model, train_loader, val_loader, args): criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs) prune_epoch = 0 max_prune_rate = 0.85 max_prune_rate = 0.8 final_prune_epoch = int(0.5 * args.epochs) num_prune_epochs = 10 prune_rates = [ max_prune_rate * (1 - (1 - (i / num_prune_epochs))**3) for i in range(num_prune_epochs) ] prune_rates[-1] = max_prune_rate prune_epochs = np.linspace(0, final_prune_epoch, num_prune_epochs).astype('i').tolist() print("Pruning Epochs: {}".format(prune_epochs)) print("Pruning Rates: {}".format(prune_rates)) curr_weights, num_weights = util.num_nonzeros(model) macs = curr_weights model.stats = { 'train_loss': [], 'test_acc': [], 'test_loss': [], 'weight': [], 'lr': [], 'macs': [], 'efficiency': [] } best_path = args.save_path.split('.pth')[0] + '.best.pth' best_test_acc = 0 for epoch in range(1, args.epochs + 1): scheduler.step() for g in optimizer.param_groups: lr = g['lr'] break # prune smallest weights up to a set prune_rate if epoch in prune_epochs: util.prune(model, prune_rates[prune_epoch]) curr_weights, num_weights = util.num_nonzeros(model) packing.pack_model(model, args.gamma) macs = np.sum([x * y for x, y in model.packed_layer_size]) curr_weights, num_weights = util.num_nonzeros(model) prune_epoch += 1 if epoch == prune_epochs[-1]: # disable l1 penalty, as target sparsity is reached args.l1_penalty = 0 print(' :: [{}]\tLR {:.4f}\tNonzeros ({}/{})'.format( epoch, lr, curr_weights, num_weights)) train_loss = util.train(train_loader, model, criterion, optimizer, epoch, args) test_loss, test_acc = util.validate(val_loader, model, criterion, epoch, args) is_best = test_acc > best_test_acc best_test_acc = max(test_acc, best_test_acc) model.stats['lr'].append(lr) model.stats['macs'].append(macs) model.stats['weight'].append(curr_weights) model.stats['efficiency'].append(100.0 * (curr_weights / macs)) model.optimizer = optimizer.state_dict() model.epoch = epoch model.cpu() torch.save(model, args.save_path) if is_best: torch.save(model, best_path) model.cuda()