Ejemplo n.º 1
0
def run(trainset, trainloader, testloader, config):
    check_path('./img')

    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)
    np.random.seed(config.seed)

    exp_name = "experiment_%s_%s_initdim%d_seed%d_grow%f_gra%d_alpha3_new" % (
        config.dataset, config.method, config.dim_hidden, config.seed,
        config.grow_ratio, config.granularity)

    config.resume = False
    if config.resume:
        load_round = config.load_round
        stats = np.load("stats/round_%d_%s.npy" % (load_round, exp_name),
                        allow_pickle=True)
        stats = stats.tolist()
        model = Classifier(config, stats['cfg']).to(config.device)
        ckpt = torch.load("stats/round_%d_%s.pt" % (load_round, exp_name))
        model.load_state_dict(ckpt)

    else:
        model = Classifier(config).to(config.device)
        if config.verbose:
            print('[INFO] initial model trainable parameter number: %d' %
                  model.get_num_params())

        stats = {
            'train_loss': [],
            'test_accuracy': [],
            'compression_rate': [],
            'widths': {},
            'cfg': None,
        }

        for i, layer in enumerate(model.net):
            if isinstance(layer, sp.SpModule) and layer.can_split:
                if config.method == 'fireflyn':
                    stats['widths'][i] = [0, 0]
                else:
                    stats['widths'][i] = 0

    n_batches = len(trainloader)
    print("[INFO] Split method: ", config.method)

    if config.resume:
        load_round = config.load_round
        ckpt = torch.load("checkpoint/roundfull_%d_%s.pt" %
                          (load_round, exp_name))
        model.load_state_dict(ckpt)
        print('load succ')

    load_round = config.load_round
    for round in range(load_round, 10 + 1):
        for epoch in range(1, config.n_epochs + 1):
            if round <= load_round and config.resume:
                break
            loss = 0.
            for i, (x, y) in enumerate(trainloader):
                inputs = x.to(config.device)
                targets = y.to(config.device)
                loss += model.update(inputs, targets)
            loss /= n_batches
            test_acc = test(testloader, model)
            stats['train_loss'].append(loss)
            stats['test_accuracy'].append(test_acc)

            if epoch % 5 == 0:
                print(
                    "[INFO] Round %d Epoch %05d | Training loss is %10.4f | Test accuracy is %10.4f"
                    % (round, epoch, loss, test_acc))

            if epoch == config.n_epochs // 2 - 1:
                model.decay_lr(0.1)

            if epoch == config.n_epochs // 4 * 3 - 1:
                model.decay_lr(0.1)

            if epoch % 20 == 0 or epoch == config.n_epochs:
                np.save("checkpoint/%s.npy" % exp_name, stats)

            if epoch % 20 == 0 or epoch == config.n_epochs:
                torch.save(model.state_dict(), "checkpoint/%s.pt" % exp_name)

        np.save("checkpoint/roundfull_%d_%s.npy" % (round, exp_name), stats)
        torch.save(model.state_dict(),
                   "checkpoint/roundfull_%d_%s.pt" % (round, exp_name))

        if config.method != 'none':
            # Grow the network use NASH
            if config.method == 'random':
                best_acc = 0
                rtime = time.time()
                for n in range(8):
                    newmodel = copy.deepcopy(model)
                    newmodel.set_lr(0.05)
                    newmodel.create_optimizer()
                    n_neurons = newmodel.split(config.method, trainset)
                    loss = 0.
                    for e in range(17):
                        print(e)
                        for i, (x, y) in enumerate(trainloader):
                            inputs = x.to(config.device)
                            targets = y.to(config.device)
                            loss += newmodel.update(inputs, targets)
                        adjust_learning_rate(newmodel.opt, e, 17, 0.05)
                        test_acc = test(testloader, newmodel)
                    if test_acc > best_acc:
                        bestmodel = copy.deepcopy(newmodel)
                    del newmodel
                model = copy.deepcopy(bestmodel)
                del bestmodel
                print('Search Time', time.time() - rtime)
            else:
                n_neurons = model.split(config.method, trainset)

            print('Current parameter size', model.get_num_params())
            CFG = model.get_cfg()
            print('Current cfg', model.get_cfg())
            model.set_lr(0.1)
            lr = 0.1
            model.create_optimizer()
Ejemplo n.º 2
0
def run(trainset, trainloader, testloader, config):
    print("[INFO] into run function")
    check_path('./img')

    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)
    np.random.seed(config.seed)

    exp_name = "exp_%s_%s_initdim%d_seed%d_grow%.3f_gra%d_alpha3_new" % (
        config.dataset,
        config.method,
        config.dim_hidden,
        config.seed,
        config.grow_ratio,
        config.granularity)
    log = config.log

    config.resume = False
    if config.resume:
        # load_round = config.load_round
        # stats = np.load("stats/round_%d_%s.npy" % (load_round, exp_name), allow_pickle=True)
        # stats = stats.tolist()
        # model = Classifier(config, stats['cfg']).to(config.device)
        # ckpt = torch.load("stats/round_%d_%s.pt" % (load_round, exp_name))
        # model.load_state_dict(ckpt)
        pass
    elif config.onlys:
        stats = np.load("%s.npy" % config.ckpt, allow_pickle=True)
        stats = stats.tolist()
        print(stats)
        model = Classifier(config).to(config.device)
        ckpt = torch.load('%s.pt' % config.ckpt)
        model.load_state_dict(ckpt)
        print("[INFO] model done")
    else:
        print("[INFO] load/new model on device %s" % str(config.device))
        model = Classifier(config).to(config.device)
        print("[INFO] model done")
        stats = {
            'train_loss': [],
            'test_accuracy': [],
            'compression_rate': [],
            'widths': {},
            'cfg': None,
        }

        for i, layer in enumerate(model.net):
            if isinstance(layer, sp.SpModule) and layer.can_split:
                if config.method == 'fireflyn':
                    stats['widths'][i] = [0, 0]
                else:
                    stats['widths'][i] = 0
    if config.verbose:
        params = model.get_num_params()
        cfg = model.get_cfg()
        log.info('[INFO] Initial model #params: %10d' % (params))
        log.info('[INFO] Initial model configuration: [{}]'.format(', '.join(map(str, cfg))))
        log.info("[INFO] Split method: {}".format(str(config.method)))
        log.info("=" * 80)
        n_batches = len(trainloader)
        test_acc = test(testloader, model)
        log.info("[INFO] The loaded model acc = %10.4f" % test_acc)
        log.info("[INFO] model:")
        log.info(model)
        log.info("="*80)

    load_round = config.load_round
    for round in range(load_round, config.n_rounds):
        start_time = time.time()
        traindone = False
        if not config.onlys:
            for epoch in range(1, config.n_epochs + 1):
                if round <= load_round and config.resume:
                    break
                loss = 0.
                for i, (inputs, targets) in enumerate(trainloader):
                    # if config.debug and i > 50:
                    #     traindone = True
                    #     break
                    inputs = inputs.to(config.device)
                    targets = targets.to(config.device)
                    loss += model.update(inputs, targets)
                loss /= n_batches
                test_acc = test(testloader, model)
                stats['train_loss'].append(loss)
                stats['test_accuracy'].append(test_acc)

                if epoch == config.n_epochs:
                    traindone = True

                # adjust learning rate
                if epoch == config.n_epochs // 2 - 1:
                    model.decay_lr(0.1)
                if epoch == config.n_epochs // 4 * 3 - 1:
                    model.decay_lr(0.1)

                if epoch % 20 == 0 or traindone:
                    print("[INFO] Round %d Epoch %03d | Training loss is %10.4f | Test accuracy is %10.4f" % (
                        round, epoch, loss, test_acc))
                    np.save("checkpoint/%s/%s.npy" % (config.save, exp_name), stats)
                    torch.save(model.state_dict(), "checkpoint/%s/%s.pt" % (config.save, exp_name))

                if traindone:
                    log.info("[INFO] Round %d:" % round)
                    log.info("[INFO] Training takes %10.4f sec | Training loss: %10.4f | Test accuracy: %10.4f" % (
                    (time.time() - start_time), loss, test_acc))

            np.save("checkpoint/%s/round_%d_%s.npy" % (config.save, round, exp_name), stats)
            torch.save(model.state_dict(), "checkpoint/%s/round_%d_%s.pt" % (config.save, round, exp_name))


        if config.method != 'none':
            # Grow the network use NASH
            if config.method == 'random':
                best_acc = 0
                rtime = time.time()
                for n in range(8):
                    newmodel = copy.deepcopy(model)
                    newmodel.set_lr(0.05)
                    newmodel.create_optimizer()
                    n_neurons = newmodel.split(config.method, trainset)
                    loss = 0.
                    for e in range(17):
                        print(e)
                        for i, (x, y) in enumerate(trainloader):
                            inputs = x.to(config.device);
                            targets = y.to(config.device)
                            loss += newmodel.update(inputs, targets)
                        adjust_learning_rate(newmodel.opt, e, 17, 0.05)
                        test_acc = test(testloader, newmodel)
                    if test_acc > best_acc:
                        bestmodel = copy.deepcopy(newmodel)
                    del newmodel
                model = copy.deepcopy(bestmodel)
                del bestmodel
                print('Search Time', time.time() - rtime)
            else:
                n_neurons, splittime = model.split(config.method, trainset)


            params = model.get_num_params()
            cfg = model.get_cfg()
            log.info('[INFO] Current #params: %10d' % (params))
            log.info('[INFO] Current configuration: [{}]'.format(', '.join(map(str, cfg))))

            test_acc = test(testloader, model)
            log.info(
                "[INFO] Splitting takes %10.4f sec | Test accuracy after splitting: %10.4f" % (splittime, test_acc))
            # log.info(
            #     "[INFO] Test accuracy after splitting: %10.4f" % (test_acc))
            model.set_lr(0.1)
            lr = 0.1
            model.create_optimizer()