Example #1
0
def insert2db(rcu, qcu, ic_obj, created, db):
    # making a new row, "testlab", it will be recorded to the db anyway
    # regardless of the usage of other accounts, i.e. even when all of
    # them are 0, it's very important since we need a datapoint at this
    # snapshot!

    # rcu, qcu could be {}, {}, meaning cluster is down, else they can
    # be rcu, qcu could only have a bunch of {key:0}s
    #### Here is a flaw in the design because when the cluster is
    #### down, the 0 core usages ({},{}) cannot be distinguished from the real
    #### 0 core usages ({u1:0, u2:0}, {u1:0, u2:0})
    for dd in [rcu, qcu]:
        dd.update(dict(testlab=sum(dd.values())))

    # remove items where its value is 0,
    rcu = util.prune(rcu, preserved_keys=['testlab'])
    qcu = util.prune(qcu, preserved_keys=['testlab'])

    # uccm: users consuming cores at the moment (realnames)
    # using uccm instead of usermap.values() is trying to eliminate usage items
    # where the both of user's runningcores notrunning cores are zero
    uccm = set(rcu.keys() + qcu.keys())

    # must use rcu.get(realname, 0), qcu.get(realname, 0) instead of
    # rcu[realname], or qcu[realname] since the realname could be in either rcu
    # or qcu, or both.
    for realname in sorted(uccm):
        usage = Usage(ic_obj.clustername, realname, rcu.get(realname, 0),
                      qcu.get(realname, 0), created)
        db.session.add(usage)
    db.session.commit()
Example #2
0
def insert2db(rcu, qcu, ic_obj, created, db):
    # making a new row, "testlab", it will be recorded to the db anyway
    # regardless of the usage of other accounts, i.e. even when all of
    # them are 0, it's very important since we need a datapoint at this
    # snapshot!

    # rcu, qcu could be {}, {}, meaning cluster is down, else they can
    # be rcu, qcu could only have a bunch of {key:0}s
    #### Here is a flaw in the design because when the cluster is
    #### down, the 0 core usages ({},{}) cannot be distinguished from the real
    #### 0 core usages ({u1:0, u2:0}, {u1:0, u2:0})
    for dd in [rcu, qcu]:
        dd.update(dict(testlab=sum(dd.values())))

    # remove items where its value is 0,
    rcu = util.prune(rcu, preserved_keys=['testlab'])
    qcu = util.prune(qcu, preserved_keys=['testlab'])

    # uccm: users consuming cores at the moment (realnames)
    # using uccm instead of usermap.values() is trying to eliminate usage items
    # where the both of user's runningcores notrunning cores are zero
    uccm = set(rcu.keys() + qcu.keys())

    # must use rcu.get(realname, 0), qcu.get(realname, 0) instead of
    # rcu[realname], or qcu[realname] since the realname could be in either rcu
    # or qcu, or both.
    for realname in sorted(uccm):
        usage = Usage(
            ic_obj.clustername,
            realname,
            rcu.get(realname, 0),
            qcu.get(realname, 0),
            created)
        db.session.add(usage)
    db.session.commit()
Example #3
0
    def gen_report(self, rcu, qcu, usermap, created=None):
        # first, title of the report
        title = "{0}|{1}|{2}".format(self.clustername, self.quota,
                                     self.cores_per_node)

        # second, created datetime
        created = created if created else datetime.datetime.now()

        report_content = []

        # datetime, an addtional "\n" is just for making the reportprettier
        report_content.append("{0}\n".format(util.format_datetime(created)))
        if not rcu and not qcu:
            report_content.append("data not available at the moment")
        else:
            rcu, qcu = util.prune(rcu), util.prune(qcu)

            total_usage = {}
            for realname in set(usermap.values()):
                total_usage[realname] = sum(
                    dd.get(realname, 0) for dd in [rcu, qcu])
            total_usage = util.prune(total_usage)

            # from this step on, basically it's about print data from 3 dicts
            # in a pretty way: rcu, qcu, total_usage

            # 1. print headers
            report_content.append("{0:13s} {1:8s} {2:8s} {3:8s}".format(
                'USERNAME', 'Running', 'NotRunning', 'TOTAL'))

            # 2. sort the order of key by total_usage
            sorted_keys = reversed(sorted(total_usage, key=total_usage.get))

            if not len(total_usage) == 0:  # not the usage of everyone is zero
                report_content.append('=' * 44)

                # 3. print the table
                for k in sorted_keys:
                    # full name is too long, so last name is used since
                    # firstname is confusing
                    name = k.split()[0]
                    report_content.append(
                        "{0:13s} {1:<8d} {2:<8d} {3:<8d}".format(
                            name, rcu.get(k, 0), qcu.get(k, 0),
                            total_usage.get(k, 0)))

            # 4. print the footer sum
            report_content.append('=' * 44)
            report_content.append("{0:13s} {1:<8d} {2:<8d} {3:<8d}".format(
                'SUM', sum(rcu.values()), sum(qcu.values()),
                sum(total_usage.values())))
            report_content.append('=' * 44)

        # 5. join the final work
        report_content = '\n'.join(report_content)
        # 6. since it's displayed on line, need such replacements
        report_content = report_content.replace("\n",
                                                "<br>").replace(" ", "&nbsp;")

        return Report(self, report_content, created)
Example #4
0
    def gen_report(self, rcu, qcu, usermap, created=None):
        # first, title of the report
        title = "{0}|{1}|{2}".format(self.clustername, self.quota, self.cores_per_node)

        # second, created datetime
        created = created if created else datetime.datetime.now()

        report_content = []

        # datetime, an addtional "\n" is just for making the reportprettier
        report_content.append("{0}\n".format(util.format_datetime(created)))
        if not rcu and not qcu:
            report_content.append("data not available at the moment")
        else:
            rcu, qcu = util.prune(rcu), util.prune(qcu)

            total_usage = {}
            for realname in set(usermap.values()):
                total_usage[realname] = sum(dd.get(realname, 0) for dd in [rcu, qcu])
            total_usage = util.prune(total_usage)    

            # from this step on, basically it's about print data from 3 dicts
            # in a pretty way: rcu, qcu, total_usage

            # 1. print headers
            report_content.append("{0:13s} {1:8s} {2:8s} {3:8s}".format(
                    'USERNAME', 'Running', 'NotRunning', 'TOTAL'))

            # 2. sort the order of key by total_usage
            sorted_keys = reversed(sorted(total_usage, key=total_usage.get))
            
            if not len(total_usage) == 0:       # not the usage of everyone is zero
                report_content.append('=' * 44)

                # 3. print the table
                for k in sorted_keys:
                    # full name is too long, so last name is used since
                    # firstname is confusing
                    name = k.split()[0]
                    report_content.append("{0:13s} {1:<8d} {2:<8d} {3:<8d}".format(
                            name, rcu.get(k, 0), qcu.get(k, 0), total_usage.get(k, 0)))

            # 4. print the footer sum
            report_content.append('=' * 44)
            report_content.append("{0:13s} {1:<8d} {2:<8d} {3:<8d}".format(
                    'SUM', sum(rcu.values()), sum(qcu.values()), 
                    sum(total_usage.values())))
            report_content.append('=' * 44)

        # 5. join the final work
        report_content = '\n'.join(report_content)
        # 6. since it's displayed on line, need such replacements
        report_content = report_content.replace("\n", "<br>").replace(" ", "&nbsp;")

        return Report(self, report_content, created)
Example #5
0
def configure(save=False, machine=None):
    """
    Read configuration files
    """
    cwd = os.getcwd()
    path = os.path.realpath(os.path.dirname(__file__))
    os.chdir(path)
    conf = {}
    exec open('conf/conf.py') in conf
    if not machine and os.path.isfile('machine'):
        machine = open('machine').read().strip()
    if machine:
        machine = os.path.basename(machine)
        path = os.path.join('conf', machine, 'conf.py')
        exec open(path) in conf
        conf['machine'] = machine
        if save:
            open('machine', 'w').write(machine)
    util.prune(conf, pattern='(^_)|(^.$)')
    os.chdir(cwd)
    return conf
Example #6
0
def train_model(wrapped_model, model, train_loss_f, model_path, train_loader,
                test_loader, init_lr, epochs, args):
    val_loss_f = nn.CrossEntropyLoss()
    best_model_path = '.'.join(model_path.split('.')[:-1]) + '.best.pth'

    # tracking stats
    if not hasattr(model, 'stats'):
        model.stats = {
            'train_loss': [],
            'test_acc': [],
            'test_loss': [],
            'weight': [],
            'lr': [],
            'macs': [],
            'efficiency': []
        }
        start_epoch = 1
    else:
        start_epoch = len(model.stats['test_loss'])

    curr_weights, _ = util.num_nonzeros(model)
    if hasattr(model, 'packed_layer_size'):
        macs = np.sum([x * y for x, y in model.packed_layer_size])
    else:
        macs = curr_weights

    # optimizer
    optimizer = optim.RMSprop(util.group_weight(model),
                              lr=init_lr,
                              momentum=0.9,
                              alpha=0.9,
                              weight_decay=4e-5,
                              eps=1.0)
    print("Optimizer:")
    print(optimizer)
    best_acc = 0

    prune_epoch = 0
    max_prune_rate = 0.8
    final_prune_epoch = int(0.9 * args.epochs)
    num_prune_epochs = 10
    prune_rates = [
        max_prune_rate * (1 - (1 - (i / num_prune_epochs))**3)
        for i in range(num_prune_epochs)
    ]
    prune_rates[-1] = max_prune_rate
    prune_epochs = np.linspace(0, final_prune_epoch,
                               num_prune_epochs).astype('i').tolist()

    prune_rate = 0.1
    prune_total = 0.0
    prune_cycle = 8
    max_prune = 0.7

    # pruning stage
    for epoch in range(start_epoch, epochs + 1):
        print('[Epoch {}]'.format(epoch))
        for g in optimizer.param_groups:
            lr = g['lr']
            break

        if epoch % prune_cycle == 0 and prune_total < max_prune:
            prune_total += prune_rate
            print('Prune Total: {:2.2f}'.format(100. * prune_total))
            util.prune(model, prune_total)
            packing.pack_model(model, args.gamma)
            macs = np.sum([x * y for x, y in model.packed_layer_size])
            curr_weights, num_weights = util.num_nonzeros(model)

        train_loss = util.train(train_loader, wrapped_model, train_loss_f,
                                optimizer, epoch - 1, args)
        test_loss, test_acc = util.validate(test_loader, model, val_loss_f,
                                            epoch - 1, args)

        print('LR        :: {}'.format(lr))
        print('Train Loss:: {}'.format(train_loss))
        print('Test  Loss:: {}'.format(test_loss))
        print('Test  Acc.:: {}'.format(test_acc))
        print('Nonzeros  :: {}'.format(curr_weights))
        print('')
        print('')
        model.stats['lr'].append(lr)
        model.optimizer = optimizer.state_dict()

        model.cpu()
        torch.save(model, model_path)
        if test_acc > best_acc and prune_total >= max_prune:
            print('New best model found')
            torch.save(model, best_model_path)
            best_acc = test_acc

        model.cuda()
Example #7
0
def stage(inputs):
    """
    Setup, and optionally launch, a SORD job.
    """
    import glob, time, getopt, shutil
    import setup

    # Save start time
    starttime = time.asctime()
    print('SORD setup')

    # Read defaults
    pm = {}
    f = os.path.join(os.path.dirname(__file__), 'parameters.py')
    exec open(f) in pm
    if 'machine' in inputs:
        cf = configure.configure(machine=inputs['machine'])
    else:
        cf = configure.configure()

    # Merge inputs
    inputs = inputs.copy()
    util.prune(inputs)
    util.prune(pm)
    util.prune(cf, pattern='(^_)|(^.$)')
    for k, v in inputs.iteritems():
        if k in cf:
            cf[k] = v
        elif k in pm:
            pm[k] = v
        else:
            sys.exit('Unknown parameter: %s = %r' % (k, v))
    cf = util.namespace(cf)
    cf.rundir = os.path.expanduser(cf.rundir)
    pm = prepare_param(util.namespace(pm), cf.itbuff)

    # Command line options
    opts = [
        'n',
        'dryrun',
        's',
        'serial',
        'm',
        'mpi',
        'i',
        'interactive',
        'q',
        'queue',
        'd',
        'debug',
        'g',
        'debugging',
        't',
        'testing',
        'p',
        'profiling',
        'O',
        'optimized',
        'f',
        'force',
    ]
    options = ''.join(opts[::2])
    long_options = opts[1::2]
    opts = getopt.getopt(sys.argv[1:], options, long_options)[0]
    for o, v in opts:
        if o in ('-n', '--dry-run'):
            cf.prepare = False
        elif o in ('-s', '--serial'):
            cf.mode = 's'
        elif o in ('-m', '--mpi'):
            cf.mode = 'm'
        elif o in ('-i', '--interactive'):
            cf.run = 'i'
        elif o in ('-q', '--queue'):
            cf.run = 'q'
        elif o in ('-d', '--debug'):
            cf.optimize = 'g'
            cf.run = 'g'
        elif o in ('-g', '--debugging'):
            cf.optimize = 'g'
        elif o in ('-t', '--testing'):
            cf.optimize = 't'
        elif o in ('-p', '--profiling'):
            cf.optimize = 'p'
        elif o in ('-O', '--optimized'):
            cf.optimize = 'O'
        elif o in ('-f', '--force'):
            if os.path.isdir(cf.rundir):
                shutil.rmtree(cf.rundir)
        else:
            sys.exit('Error: unknown option: ' + o)
    if not cf.prepare:
        cf.run = False

    # Partition for parallelization
    pm.nn = tuple(int(i) for i in pm.nn)
    maxtotalcores = cf.maxnodes * cf.maxcores
    if not cf.mode and maxtotalcores == 1:
        cf.mode = 's'
    np3 = pm.np3[:]
    if cf.mode == 's':
        np3 = [1, 1, 1]
    nl = [(pm.nn[i] - 1) / np3[i] + 1 for i in range(3)]
    i = abs(pm.faultnormal) - 1
    if i >= 0:
        nl[i] = max(nl[i], 2)
    pm.np3 = tuple((pm.nn[i] - 1) / nl[i] + 1 for i in range(3))
    cf.np = pm.np3[0] * pm.np3[1] * pm.np3[2]
    if not cf.mode:
        cf.mode = 's'
        if cf.np > 1:
            cf.mode = 'm'

    # Resources
    if cf.maxcores:
        cf.nodes = min(cf.maxnodes, (cf.np - 1) / cf.maxcores + 1)
        cf.ppn = (cf.np - 1) / cf.nodes + 1
        cf.cores = min(cf.maxcores, cf.ppn)
        cf.totalcores = cf.nodes * cf.maxcores
    else:
        cf.nodes = 1
        cf.ppn = cf.np
        cf.cores = cf.np
        cf.totalcores = cf.np

    # RAM and Wall time usage
    if pm.oplevel in (1, 2):
        nvars = 20
    elif pm.oplevel in (3, 4, 5):
        nvars = 23
    else:
        nvars = 44
    nm = (nl[0] + 2) * (nl[1] + 2) * (nl[2] + 2)
    cf.pmem = 32 + int(1.2 * nm * nvars * int(cf.dtype[-1]) / 1024 / 1024)
    cf.ram = cf.pmem * cf.ppn
    ss = (pm.nt + 10) * cf.ppn * nm / cf.cores / cf.rate
    sus = int(ss / 3600 * cf.totalcores + 1)
    mm = ss / 60 * 3.0 + 10
    if cf.maxtime:
        mm = min(mm, 60 * cf.maxtime[0] + cf.maxtime[1])
    mm = mm * 3
    hh = mm / 60
    mm = mm % 60
    cf.walltime = '%d:%02d:00' % (hh, mm)  #v1.1.01 2 times of orignal hours
    cf.walltime = '1:00:00'  # used in v1.1
    print('Machine: ' + cf.machine)
    print('Cores: %s of %s' % (cf.np, maxtotalcores))
    print('Nodes: %s of %s' % (cf.nodes, cf.maxnodes))
    print('RAM: %sMb of %sMb per node' % (cf.ram, cf.maxram))
    print('Time limit: ' + cf.walltime)
    print('SUs: %s' % sus)
    if cf.maxcores and cf.ppn > cf.maxcores:
        print('Warning: exceding available cores per node (%s)' % cf.maxcores)
    if cf.ram and cf.ram > cf.maxram:
        print('Warning: exceding available RAM per node (%sMb)' % cf.maxram)

    # Compile code
    if not cf.prepare:
        return cf
    setup.build(cf.mode, cf.optimize)

    # Create run directory
    print('Run directory: ' + cf.rundir)
    try:
        os.makedirs(cf.rundir)
    except (OSError):
        sys.exit('%r exists or cannot be created. Use --force to overwrite.' %
                 cf.rundir)
    for f in 'in', 'out', 'prof', 'stats', 'debug', 'checkpoint':
        os.mkdir(os.path.join(cf.rundir, f))

    # Copy files to run directory
    cwd = os.path.realpath(os.getcwd())
    cf.rundate = time.asctime()
    cf.name = os.path.basename(cf.rundir)
    cf.rundir = os.path.realpath(cf.rundir)
    os.chdir(os.path.realpath(os.path.dirname(__file__)))
    cf.bin = os.path.join('.', 'sord-' + cf.mode + cf.optimize)
    path = os.path.join('bin', 'sord-' + cf.mode + cf.optimize)
    shutil.copy(path, cf.rundir)
    if os.path.isfile('sord.tgz'):
        shutil.copy('sord.tgz', cf.rundir)
    if cf.optimize == 'g':
        for f in glob.glob(os.path.join('src', '*.f90')):
            shutil.copy(f, cf.rundir)
    f = os.path.join('conf', cf.machine, 'templates')
    if not os.path.isdir(f):
        f = os.path.join('conf', 'default', 'templates')
    for d in os.path.join('conf', 'common', 'templates'), f:
        for f in glob.glob(os.path.join(d, '*')):
            ff = os.path.join(cf.rundir, os.path.basename(f))
            out = open(f).read() % cf.__dict__
            open(ff, 'w').write(out)
            shutil.copymode(f, ff)

    # Combine metadata
    meta = util.namespace(pm.__dict__)
    for k in 'name', 'rundate', 'rundir', 'user', 'os_', 'dtype':
        setattr(meta, k, getattr(cf, k))
    meta.indices = {}
    meta.xi = {}
    for f in meta.fieldio:
        op, filename = f[0], f[8]
        if filename != '-':
            meta.indices[filename] = f[7]
            if 'wi' in op:
                meta.xi[filename] = f[4]
    meta.shape = {}
    for k in meta.indices:
        nn = [(i[1] - i[0]) / i[2] + 1 for i in meta.indices[k]]
        nn = [n for n in nn if n > 1]
        if nn == []:
            nn = [1]
        meta.shape[k] = nn

    # Write files
    os.chdir(cf.rundir)
    log = open('log', 'w')
    log.write(starttime + ': setup started\n')
    util.save('conf.py', cf, prune_pattern='(^_)|(^.$)')
    util.save('parameters.py', pm, expand=['fieldio'])
    util.save('meta.py', meta, expand=['shape', 'xi', 'indices', 'fieldio'])

    # Return to initial directory
    os.chdir(cwd)
    return cf
Example #8
0
def train(model, train_loader, val_loader, args):
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs)
    prune_epoch = 0
    max_prune_rate = 0.85
    max_prune_rate = 0.8
    final_prune_epoch = int(0.5 * args.epochs)
    num_prune_epochs = 10
    prune_rates = [
        max_prune_rate * (1 - (1 - (i / num_prune_epochs))**3)
        for i in range(num_prune_epochs)
    ]
    prune_rates[-1] = max_prune_rate
    prune_epochs = np.linspace(0, final_prune_epoch,
                               num_prune_epochs).astype('i').tolist()
    print("Pruning Epochs: {}".format(prune_epochs))
    print("Pruning Rates: {}".format(prune_rates))

    curr_weights, num_weights = util.num_nonzeros(model)
    macs = curr_weights

    model.stats = {
        'train_loss': [],
        'test_acc': [],
        'test_loss': [],
        'weight': [],
        'lr': [],
        'macs': [],
        'efficiency': []
    }
    best_path = args.save_path.split('.pth')[0] + '.best.pth'
    best_test_acc = 0
    for epoch in range(1, args.epochs + 1):
        scheduler.step()
        for g in optimizer.param_groups:
            lr = g['lr']
            break

        # prune smallest weights up to a set prune_rate
        if epoch in prune_epochs:
            util.prune(model, prune_rates[prune_epoch])
            curr_weights, num_weights = util.num_nonzeros(model)
            packing.pack_model(model, args.gamma)
            macs = np.sum([x * y for x, y in model.packed_layer_size])
            curr_weights, num_weights = util.num_nonzeros(model)
            prune_epoch += 1

        if epoch == prune_epochs[-1]:
            # disable l1 penalty, as target sparsity is reached
            args.l1_penalty = 0

        print('     :: [{}]\tLR {:.4f}\tNonzeros ({}/{})'.format(
            epoch, lr, curr_weights, num_weights))
        train_loss = util.train(train_loader, model, criterion, optimizer,
                                epoch, args)
        test_loss, test_acc = util.validate(val_loader, model, criterion,
                                            epoch, args)

        is_best = test_acc > best_test_acc
        best_test_acc = max(test_acc, best_test_acc)
        model.stats['lr'].append(lr)
        model.stats['macs'].append(macs)
        model.stats['weight'].append(curr_weights)
        model.stats['efficiency'].append(100.0 * (curr_weights / macs))
        model.optimizer = optimizer.state_dict()
        model.epoch = epoch

        model.cpu()
        torch.save(model, args.save_path)
        if is_best:
            torch.save(model, best_path)
        model.cuda()