Example #1
0
def split_paras(paras):
    num_layers = len(paras)
    arch_paras = []
    quan_paras = []
    for i in range(num_layers):
        para = paras[i]
        arch_para = {}
        quan_para = {}
        for name, _ in ARCH_SPACE.items():
            if name in para:
                arch_para[name] = para[name]
            if 'anchor_point' in para:
                arch_para['anchor_point'] = para['anchor_point']
        for name, _ in QUAN_SPACE.items():
            if name in para:
                quan_para[name] = para[name]
        if arch_para != {}:
            arch_paras.append(arch_para)
        if quan_para != {}:
            quan_paras.append(quan_para)
        if arch_paras == []:
            arch_paras = None
        if quan_paras == []:
            quan_paras = None
    return arch_paras, quan_paras
Example #2
0
File: main.py Project: ND-SCL/NAQS
def quantization_search(device, dir='experiment'):
    dir = os.path.join(dir,
                       f"rLut={args.rLUT}, rThroughput={args.rThroughput}")
    if os.path.exists(dir) is False:
        os.makedirs(dir)
    filepath = os.path.join(dir, f"quantization ({args.episodes} episodes)")
    logger = get_logger(filepath)
    csvfile = open(filepath + '.csv', mode='w+', newline='')
    writer = csv.writer(csvfile)
    logger.info(f"INFORMATION")
    logger.info(f"mode: \t\t\t\t\t {'quantization'}")
    logger.info(f"dataset: \t\t\t\t {args.dataset}")
    logger.info(f"number of child network layers: \t {args.layers}")
    logger.info(f"include stride: \t\t\t {not args.no_stride}")
    logger.info(f"include pooling: \t\t\t {not args.no_pooling}")
    logger.info(f"skip connection: \t\t\t {args.skip}")
    logger.info(f"required # LUTs: \t\t\t {args.rLUT}")
    logger.info(f"required throughput: \t\t\t {args.rThroughput}")
    logger.info(f"Assumed frequency: \t\t\t {CLOCK_FREQUENCY}")
    logger.info(f"training epochs: \t\t\t {args.epochs}")
    logger.info(f"data augmentation: \t\t\t {args.augment}")
    logger.info(f"batch size: \t\t\t\t {args.batch_size}")
    logger.info(f"controller learning rate: \t\t {args.learning_rate}")
    logger.info(f"architecture episodes: \t\t\t {args.episodes}")
    logger.info(f"using multi gpus: \t\t\t {args.multi_gpu}")
    logger.info(f"architecture space: ")
    # for name, value in ARCH_SPACE.items():
    #     logger.info(name + f": \t\t\t\t {value}")
    logger.info(f"quantization space: ")
    for name, value in QUAN_SPACE.items():
        logger.info(name + f": \t\t\t {value}")
    agent = Agent(QUAN_SPACE,
                  args.layers,
                  lr=args.learning_rate,
                  device=torch.device('cpu'),
                  skip=False)
    train_data, val_data = data.get_data(args.dataset,
                                         device,
                                         shuffle=True,
                                         batch_size=args.batch_size,
                                         augment=args.augment)
    input_shape, num_classes = data.get_info(args.dataset)
    writer.writerow(["ID"] +
                    ["Layer {}".format(i)
                     for i in range(args.layers)] + ["Accuracy"] + [
                         "Partition (Tn, Tm)", "Partition (#LUTs)",
                         "Partition (#cycles)", "Total LUT", "Total Throughput"
                     ] + ["Time"])
    child_id, total_time = 0, 0
    logger.info('=' * 50 + "Start exploring quantization space" + '=' * 50)
    best_samples = BestSamples(5)
    arch_paras = [{
        'filter_height': 3,
        'filter_width': 3,
        'stride_height': 1,
        'stride_width': 1,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 7,
        'filter_width': 5,
        'stride_height': 1,
        'stride_width': 1,
        'num_filters': 48,
        'pool_size': 1
    }, {
        'filter_height': 5,
        'filter_width': 5,
        'stride_height': 2,
        'stride_width': 1,
        'num_filters': 48,
        'pool_size': 1
    }, {
        'filter_height': 3,
        'filter_width': 5,
        'stride_height': 1,
        'stride_width': 1,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 5,
        'filter_width': 7,
        'stride_height': 1,
        'stride_width': 1,
        'num_filters': 36,
        'pool_size': 1
    }, {
        'filter_height': 3,
        'filter_width': 1,
        'stride_height': 1,
        'stride_width': 2,
        'num_filters': 64,
        'pool_size': 2
    }]
    model, optimizer = child.get_model(input_shape,
                                       arch_paras,
                                       num_classes,
                                       device,
                                       multi_gpu=args.multi_gpu,
                                       do_bn=False)
    _, val_acc = backend.fit(model,
                             optimizer,
                             train_data=train_data,
                             val_data=val_data,
                             epochs=args.epochs,
                             verbosity=args.verbosity)
    print(val_acc)
    for e in range(args.episodes):
        logger.info('-' * 130)
        child_id += 1
        start = time.time()
        quan_rollout, quan_paras = agent.rollout()
        logger.info("Sample Quantization ID: {}, Sampled actions: {}".format(
            child_id, quan_rollout))
        fpga_model = FPGAModel(rLUT=args.rLUT,
                               rThroughput=args.rThroughput,
                               arch_paras=arch_paras,
                               quan_paras=quan_paras)
        if fpga_model.validate():
            _, reward = backend.fit(model,
                                    optimizer,
                                    val_data=val_data,
                                    quan_paras=quan_paras,
                                    epochs=1,
                                    verbosity=args.verbosity)
        else:
            reward = 0
        agent.store_rollout(quan_rollout, reward)
        end = time.time()
        ep_time = end - start
        total_time += ep_time
        best_samples.register(child_id, quan_rollout, reward)
        writer.writerow([child_id] +
                        [str(quan_paras[i]) for i in range(args.layers)] +
                        [reward] + list(fpga_model.get_info()) + [ep_time])
        logger.info(f"Reward: {reward}, " + f"Elasped time: {ep_time}, " +
                    f"Average time: {total_time/(e+1)}")
        logger.info(f"Best Reward: {best_samples.reward_list[0]}, " +
                    f"ID: {best_samples.id_list[0]}, " +
                    f"Rollout: {best_samples.rollout_list[0]}")
    logger.info('=' * 50 + "Quantization sapce exploration finished" +
                '=' * 50)
    logger.info(f"Total elasped time: {total_time}")
    logger.info(f"Best samples: {best_samples}")
    csvfile.close()
Example #3
0
File: main.py Project: ND-SCL/NAQS
def nested_search(device, dir='experiment'):
    dir = os.path.join(dir,
                       f"rLut={args.rLUT}, rThroughput={args.rThroughput}")
    if os.path.exists(dir) is False:
        os.makedirs(dir)
    filepath = os.path.join(dir, f"nested ({args.episodes} episodes)")
    logger = get_logger(filepath)
    csvfile = open(filepath + '.csv', mode='w+', newline='')
    writer = csv.writer(csvfile)
    logger.info(f"INFORMATION")
    logger.info(f"mode: \t\t\t\t\t {'nested'}")
    logger.info(f"dataset: \t\t\t\t {args.dataset}")
    logger.info(f"number of child network layers: \t {args.layers}")
    logger.info(f"include stride: \t\t\t {not args.no_stride}")
    logger.info(f"include pooling: \t\t\t {not args.no_pooling}")
    logger.info(f"skip connection: \t\t\t {args.skip}")
    logger.info(f"required # LUTs: \t\t\t {args.rLUT}")
    logger.info(f"required throughput: \t\t\t {args.rThroughput}")
    logger.info(f"Assumed frequency: \t\t\t {CLOCK_FREQUENCY}")
    logger.info(f"training epochs: \t\t\t {args.epochs}")
    logger.info(f"data augmentation: \t\t\t {args.augment}")
    logger.info(f"batch size: \t\t\t\t {args.batch_size}")
    logger.info(f"controller learning rate: \t\t {args.learning_rate}")
    logger.info(f"architecture episodes: \t\t\t {args.episodes1}")
    logger.info(f"quantization episodes: \t\t\t {args.episodes2}")
    logger.info(f"using multi gpus: \t\t\t {args.multi_gpu}")
    logger.info(f"architecture space: ")
    for name, value in ARCH_SPACE.items():
        logger.info(name + f": \t\t\t\t {value}")
    logger.info(f"quantization space: ")
    for name, value in QUAN_SPACE.items():
        logger.info(name + f": \t\t\t {value}")
    train_data, val_data = data.get_data(args.dataset,
                                         device,
                                         shuffle=True,
                                         batch_size=args.batch_size,
                                         augment=args.augment)
    input_shape, num_classes = data.get_info(args.dataset)
    writer.writerow(["ID"] +
                    ["Layer {}".format(i)
                     for i in range(args.layers)] + ["Accuracy"] + [
                         "Partition (Tn, Tm)", "Partition (#LUTs)",
                         "Partition (#cycles)", "Total LUT", "Total Throughput"
                     ] + ["Time"])
    arch_agent = Agent(ARCH_SPACE,
                       args.layers,
                       lr=args.learning_rate,
                       device=torch.device('cpu'),
                       skip=args.skip)
    arch_id, total_time = 0, 0
    logger.info('=' * 50 + "Start exploring architecture space" + '=' * 50)
    best_arch = BestSamples(5)
    for e1 in range(args.episodes1):
        logger.info('-' * 130)
        arch_id += 1
        start = time.time()
        arch_rollout, arch_paras = arch_agent.rollout()
        logger.info("Sample Architecture ID: {}, Sampled arch: {}".format(
            arch_id, arch_rollout))
        model, optimizer = child.get_model(input_shape,
                                           arch_paras,
                                           num_classes,
                                           device,
                                           multi_gpu=args.multi_gpu,
                                           do_bn=False)
        backend.fit(model,
                    optimizer,
                    train_data,
                    val_data,
                    epochs=args.epochs,
                    verbosity=args.verbosity)
        quan_id = 0
        best_quan_reward = -1
        logger.info('=' * 50 + "Start exploring quantization space" + '=' * 50)
        quan_agent = Agent(QUAN_SPACE,
                           args.layers,
                           lr=args.learning_rate,
                           device=torch.device('cpu'),
                           skip=False)
        for e2 in range(args.episodes2):
            quan_id += 1
            quan_rollout, quan_paras = quan_agent.rollout()
            fpga_model = FPGAModel(rLUT=args.rLUT,
                                   rThroughput=args.rThroughput,
                                   arch_paras=arch_paras,
                                   quan_paras=quan_paras)
            if fpga_model.validate():
                _, quan_reward = backend.fit(model,
                                             optimizer,
                                             val_data=val_data,
                                             quan_paras=quan_paras,
                                             epochs=1,
                                             verbosity=args.verbosity)
            else:
                quan_reward = 0
            logger.info(
                "Sample Quantization ID: {}, Sampled Quantization: {}, reward: {}"
                .format(quan_id, quan_rollout, quan_reward))
            quan_agent.store_rollout(quan_rollout, quan_reward)
            if quan_reward > best_quan_reward:
                best_quan_reward = quan_reward
                best_quan_rollout, best_quan_paras = quan_rollout, quan_paras
        logger.info('=' * 50 + "Quantization space exploration finished" +
                    '=' * 50)
        arch_reward = best_quan_reward
        arch_agent.store_rollout(arch_rollout, arch_reward)
        end = time.time()
        ep_time = end - start
        total_time += ep_time
        best_arch.register(
            arch_id,
            utility.combine_rollout(arch_rollout, best_quan_rollout,
                                    args.layers), arch_reward)
        writer.writerow([arch_id] + [
            str(arch_paras[i]) + '\n' + str(best_quan_paras[i])
            for i in range(args.layers)
        ] + [arch_reward] + list(fpga_model.get_info()) + [ep_time])
        logger.info(f"Reward: {arch_reward}, " + f"Elasped time: {ep_time}, " +
                    f"Average time: {total_time/(e1+1)}")
        logger.info(f"Best Reward: {best_arch.reward_list[0]}, " +
                    f"ID: {best_arch.id_list[0]}, " +
                    f"Rollout: {best_arch.rollout_list[0]}")
    logger.info('=' * 50 +
                "Architecture & quantization sapce exploration finished" +
                '=' * 50)
    logger.info(f"Total elasped time: {total_time}")
    logger.info(f"Best samples: {best_arch}")
    csvfile.close()
def sync_search(device, dir='experiment'):
    dir = os.path.join(
        dir,
        utility.cleanText(f"rLut-{args.rLUT}_rThroughput-{args.rThroughput}"))
    if os.path.exists(dir) is False:
        os.makedirs(dir)
    filepath = os.path.join(
        dir, utility.cleanText(f"joint_{args.episodes}-episodes"))
    logger = utility.get_logger(filepath)
    csvfile = open(filepath + '.csv', mode='w+', newline='')
    writer = csv.writer(csvfile)
    tb_writer = SummaryWriter(filepath)

    logger.info(f"INFORMATION")
    logger.info(f"mode: \t\t\t\t\t {'joint'}")
    logger.info(f"dataset: \t\t\t\t {args.dataset}")
    logger.info(f"number of child network layers: \t {args.layers}")
    logger.info(f"seed: \t\t\t\t {args.seed}")
    logger.info(f"gpu: \t\t\t\t {args.gpu}")
    logger.info(f"include batchnorm: \t\t\t {args.batchnorm}")
    logger.info(f"include stride: \t\t\t {not args.no_stride}")
    logger.info(f"include pooling: \t\t\t {not args.no_pooling}")
    logger.info(f"skip connection: \t\t\t {args.skip}")
    logger.info(f"required # LUTs: \t\t\t {args.rLUT}")
    logger.info(f"required throughput: \t\t\t {args.rThroughput}")
    logger.info(f"Assumed frequency: \t\t\t {CLOCK_FREQUENCY}")
    logger.info(f"training epochs: \t\t\t {args.epochs}")
    logger.info(f"data augmentation: \t\t\t {args.augment}")
    logger.info(f"batch size: \t\t\t\t {args.batch_size}")
    logger.info(f"controller learning rate: \t\t {args.learning_rate}")
    logger.info(f"controller learning rate: \t\t {args.learning_rate}")
    logger.info(f"architecture episodes: \t\t\t {args.episodes}")
    logger.info(f"using multi gpus: \t\t\t {args.multi_gpu}")
    logger.info(f"architecture space: ")
    for name, value in ARCH_SPACE.items():
        logger.info(name + f": \t\t\t\t {value}")
    logger.info(f"quantization space: ")
    for name, value in QUAN_SPACE.items():
        logger.info(name + f": \t\t\t {value}")

    agent = Agent({
        **ARCH_SPACE,
        **QUAN_SPACE
    },
                  args.layers,
                  lr=args.learning_rate,
                  device=torch.device('cpu'),
                  skip=args.skip)

    train_data, val_data = data.get_data(args.dataset,
                                         device,
                                         shuffle=True,
                                         batch_size=args.batch_size,
                                         augment=args.augment)

    input_shape, num_classes = data.get_info(args.dataset)
    ## (3,32,32) -> (1,3,32,32) add batch dimension
    sample_input = utility.get_sample_input(device, input_shape)

    writer.writerow(["ID"] +
                    ["Layer {}".format(i)
                     for i in range(args.layers)] + ["Accuracy"] + [
                         "Partition (Tn, Tm)", "Partition (#LUTs)",
                         "Partition (#cycles)", "Total LUT", "Total Throughput"
                     ] + ["Time"])

    arch_id, total_time = 0, 0
    best_reward = float('-inf')

    logger.info('=' * 50 +
                "Start exploring architecture & quantization space" + '=' * 50)
    best_samples = BestSamples(5)

    for e in range(args.episodes):
        logger.info('-' * 130)
        arch_id += 1
        start = time.time()
        rollout, paras = agent.rollout()
        logger.info("Sample Architecture ID: {}, Sampled actions: {}".format(
            arch_id, rollout))
        arch_paras, quan_paras = utility.split_paras(paras)

        fpga_model = FPGAModel(rLUT=args.rLUT,
                               rThroughput=args.rThroughput,
                               arch_paras=arch_paras,
                               quan_paras=quan_paras)

        if fpga_model.validate():

            model, optimizer = child.get_model(input_shape,
                                               arch_paras,
                                               num_classes,
                                               device,
                                               multi_gpu=args.multi_gpu,
                                               do_bn=args.batchnorm)

            if args.verbosity > 1:
                print(model)
                torchsummary.summary(model, input_shape)

            if args.adapt:
                num_w = utility.get_net_param(model)
                macs = utility.get_net_macs(model, sample_input)
                tb_writer.add_scalar('num_param', num_w, arch_id)
                tb_writer.add_scalar('macs', macs, arch_id)
                if args.verbosity > 1:
                    print(f"# of param: {num_w}, macs: {macs}")

            _, val_acc = backend.fit(model,
                                     optimizer,
                                     train_data,
                                     val_data,
                                     quan_paras=quan_paras,
                                     epochs=args.epochs,
                                     verbosity=args.verbosity)
        else:
            val_acc = 0

        if args.adapt:
            ## TODO: how to make arch_reward function with macs and latency?
            arch_reward = val_acc
        else:
            arch_reward = val_acc

        agent.store_rollout(rollout, arch_reward)
        end = time.time()
        ep_time = end - start
        total_time += ep_time
        best_samples.register(arch_id, rollout, arch_reward)

        tb_writer.add_scalar('val_acc', val_acc, arch_id)
        tb_writer.add_scalar('arch_reward', arch_reward, arch_id)

        if arch_reward > best_reward:
            best_reward = arch_reward
            tb_writer.add_scalar('best_reward', best_reward, arch_id)
            tb_writer.add_graph(model.eval(), (sample_input, ))

        writer.writerow([arch_id] +
                        [str(paras[i])
                         for i in range(args.layers)] + [arch_reward] +
                        list(fpga_model.get_info()) + [ep_time])
        logger.info(f"Reward: {arch_reward}, " + f"Elasped time: {ep_time}, " +
                    f"Average time: {total_time/(e+1)}")
        logger.info(f"Best Reward: {best_samples.reward_list[0]}, " +
                    f"ID: {best_samples.id_list[0]}, " +
                    f"Rollout: {best_samples.rollout_list[0]}")
    logger.info('=' * 50 +
                "Architecture & quantization sapce exploration finished" +
                '=' * 50)
    logger.info(f"Total elasped time: {total_time}")
    logger.info(f"Best samples: {best_samples}")
    tb_writer.close()
    csvfile.close()