Exemple #1
0
    def __init__(self, resource_conf, down_rate):
        # maximun budget for single configuration, i.e., maximum iterations per configuration in example
        self.R = resource_conf
        # defines configuration downsampling rate (default = 3)
        self.eta = down_rate
        # control how many runs
        self.s_max = floor(log(self.R, self.eta))
        # maximun budget for all configurations
        self.B = (self.s_max + 1) * self.R
        # list of results
        self.results = []

        # parameters for results
        self.counter = 0
        self.best_acc = np.NINF
        self.best_counter = -1

        # parameters for workload
        self.hp_model_arch = cfg_para.hyperband_model_type_list
        self.hp_batch_size = cfg_para.hyperband_batch_size_list
        self.hp_opt = cfg_para.hyperband_optimizer_list
        self.hp_learn_rate = cfg_para.hyperband_learn_rate_list
        self.hp_activation = cfg_para.hyperband_activation_list
        self.hp_random_seed = cfg_para.hyperband_random_seed

        # training dataset
        self.hp_dataset = cfg_para.hyperband_train_dataset
        (self.img_width,
         self.img_height,
         self.num_channel,
         self.num_class) = load_dataset_para(self.hp_dataset)
Exemple #2
0
def evaluate_pack_model(tf_sess, feature_ph, label_ph, pack_model):
    print("start to evaluate")
    hyperband_dataset = cfg_para.hyperband_train_dataset
    img_width, img_height, _, _ = load_dataset_para(hyperband_dataset)
    feature_input, label_input = load_eval_dataset(hyperband_dataset)

    acc_pack = list()

    if hyperband_dataset == 'imagenet':
        acc_sum = 0
        imagenet_batch_size_eval = 50
        num_batch_eval = label_input.shape[0] // imagenet_batch_size_eval
        test_image_list = sorted(os.listdir(feature_input))
        for eval_op in pack_model:
            for n in range(num_batch_eval):
                batch_offset = n * imagenet_batch_size_eval
                batch_end = (n + 1) * imagenet_batch_size_eval
                eval_batch_list = test_image_list[batch_offset:batch_end]
                eval_feature_batch = load_imagenet_raw(feature_input, eval_batch_list, img_height, img_width)
                eval_label_batch = label_input[batch_offset:batch_end]
                acc_batch = tf_sess.run(eval_op, feed_dict={feature_ph: eval_feature_batch,
                                                            label_ph: eval_label_batch})
                acc_sum += acc_batch
            acc_avg = acc_sum / num_batch_eval
            acc_pack.append(acc_avg)
    else:
        for eval_op in pack_model:
            acc_avg = tf_sess.run(eval_op, feed_dict={feature_ph: feature_input, label_ph: label_input})
            acc_pack.append(acc_avg)

    return acc_pack
Exemple #3
0
def train_pack():
    print('start training pack')

    rand_seed_pack = cfg_para.multi_rand_seed

    model_type_list = cfg_para.multi_model_type
    optimizer_list = cfg_para.multi_opt
    num_layer_list = cfg_para.multi_num_layer
    activation_list = cfg_para.multi_activation
    batch_size_list = cfg_para.multi_batch_size
    learning_rate_list = cfg_para.multi_learning_rate

    if len(set(batch_size_list)) == 1:
        is_batch_padding = False
    else:
        is_batch_padding = True

    num_epoch = cfg_para.multi_num_epoch
    train_dataset = cfg_para.multi_train_dataset
    use_tf_timeline = cfg_para.single_use_tb_timeline

    max_batch_size = max(batch_size_list)

    #################################################
    # load dataset
    #################################################

    img_width, img_height, num_channel, num_class = load_dataset_para(
        train_dataset)
    train_feature_input, train_label_input = load_train_dataset(train_dataset)

    #########################
    # build packed model
    #########################

    features = tf.placeholder(tf.float32,
                              [None, img_width, img_height, num_channel])
    labels = tf.placeholder(tf.int64, [None, num_class])

    model_name_abbr = np.random.choice(rand_seed_pack,
                                       len(model_type_list),
                                       replace=False).tolist()
    train_op_pack = list()

    for midx, mt in enumerate(model_type_list):
        dm = ModelImporter(mt,
                           str(model_name_abbr.pop()),
                           num_layer_list[midx],
                           img_height,
                           img_width,
                           num_channel,
                           num_class,
                           batch_size_list[midx],
                           optimizer_list[midx],
                           learning_rate_list[midx],
                           activation_list[midx],
                           batch_padding=is_batch_padding)

        model_entity = dm.get_model_entity()
        model_logit = model_entity.build(features, is_training=True)
        train_op = model_entity.train(model_logit, labels)
        train_op_pack.append(train_op)

    #########################
    # train packed model
    #########################

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True

    step_time = 0
    step_count = 0

    if train_dataset == 'imagenet':
        image_list = sorted(os.listdir(train_feature_input))

    overall_time_start = timer()

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        num_batch = train_label_input.shape[0] // max_batch_size

        for e in range(num_epoch):
            for i in range(num_batch):
                print('epoch %d / %d, step %d / %d' %
                      (e + 1, num_epoch, i + 1, num_batch))

                if i != 0:
                    start_time = timer()

                batch_offset = i * max_batch_size
                batch_end = (i + 1) * max_batch_size
                if train_dataset == 'imagenet':
                    batch_list = image_list[batch_offset:batch_end]
                    train_feature_batch = load_imagenet_raw(
                        train_feature_input, batch_list, img_height, img_width)
                else:
                    train_feature_batch = train_feature_input[
                        batch_offset:batch_end]

                train_label_batch = train_label_input[batch_offset:batch_end]

                if use_tf_timeline:
                    profile_path = cfg_path.profile_path
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()

                    sess.run(train_op_pack,
                             feed_dict={
                                 features: train_feature_batch,
                                 labels: train_label_batch
                             },
                             options=run_options,
                             run_metadata=run_metadata)

                    trace = timeline.Timeline(
                        step_stats=run_metadata.step_stats)
                    trace_file = open(
                        profile_path + '/' +
                        '-'.join(map(str, set(model_type_list))) + '-' +
                        str(len(model_type_list)) +
                        '-'.join(map(str, set(batch_size_list))) + '-' +
                        str(i) + '.json', 'w')
                    trace_file.write(
                        trace.generate_chrome_trace_format(show_dataflow=True,
                                                           show_memory=True))
                else:
                    sess.run(train_op_pack,
                             feed_dict={
                                 features: train_feature_batch,
                                 labels: train_label_batch
                             })

                if i != 0:
                    end_time = timer()
                    dur_time = end_time - start_time
                    print("step time:", dur_time)
                    step_time += dur_time
                    step_count += 1

    overall_time_end = timer()
    overall_time = overall_time_end - overall_time_start
    print(
        f'overall training time (s):{overall_time}, average step time (ms):{step_time / step_count * 1000}'
    )
Exemple #4
0
def train_sequential():
    print('start training sequential')

    rand_seed = cfg_para.multi_rand_seed

    model_type_list = cfg_para.multi_model_type
    optimizer_list = cfg_para.multi_opt
    num_layer_list = cfg_para.multi_num_layer
    activation_list = cfg_para.multi_activation
    batch_size_list = cfg_para.multi_batch_size
    learning_rate_list = cfg_para.multi_learning_rate

    train_dataset = cfg_para.multi_train_dataset

    ##########################################
    # load dataset parameters
    ##########################################

    img_width, img_height, num_channel, num_class = load_dataset_para(
        train_dataset)

    ##########################################
    # build models
    ##########################################

    names = globals()
    for idx in range(len(model_type_list)):
        names['features' + str(idx)] = tf.placeholder(
            tf.float32, [None, img_width, img_height, num_channel])
        names['labels' + str(idx)] = tf.placeholder(tf.int64,
                                                    [None, num_class])

    train_op_list = list()
    model_name_abbr = np.random.choice(rand_seed,
                                       len(model_type_list),
                                       replace=False).tolist()
    for midx, mvalue in enumerate(model_type_list):
        dm = ModelImporter(mvalue,
                           str(model_name_abbr.pop()),
                           num_layer_list[midx],
                           img_width,
                           img_height,
                           num_channel,
                           num_class,
                           batch_size_list[midx],
                           optimizer_list[midx],
                           learning_rate_list[midx],
                           activation_list[midx],
                           batch_padding=False)

        model_entity = dm.get_model_entity()
        model_logit = model_entity.build(names['features' + str(midx)],
                                         is_training=True)
        train_op = model_entity.train(model_logit, names['labels' + str(midx)])
        train_op_list.append(train_op)

    #########################
    # train models
    #########################

    start_time = timer()
    for tidx, tm in enumerate(train_op_list):
        p = Process(target=train_model,
                    args=(tm, batch_size_list[tidx], model_type_list[tidx],
                          tidx, names))
        p.start()
        p.join()
    end_time = timer()
    dur_time = end_time - start_time
    print(f'total training time(s): {dur_time}')
Exemple #5
0
def train_model(train_step_arg, batch_size_arg, model_type_arg, tidx_arg,
                global_args):

    train_dataset = cfg_para.multi_train_dataset
    num_epoch = cfg_para.multi_num_epoch
    use_tf_timeline = cfg_para.multi_use_tb_timeline
    use_cpu = cfg_para.multi_use_cpu

    if use_cpu:
        train_device = '/cpu:0'
    else:
        train_device = '/gpu:0'

    img_width, img_height, num_channel, num_class = load_dataset_para(
        train_dataset)
    train_feature_input, train_label_input = load_train_dataset(train_dataset)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True

    if train_dataset == 'imagenet':
        image_list = sorted(os.listdir(train_feature_input))

    with tf.device(train_device):
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            num_batch = train_label_input.shape[0] // batch_size_arg

            for e in range(num_epoch):
                for i in range(num_batch):
                    print('epoch %d / %d, step %d / %d' %
                          (e + 1, num_epoch, i + 1, num_batch))

                    batch_offset = i * batch_size_arg
                    batch_end = (i + 1) * batch_size_arg
                    if train_dataset == 'imagenet':
                        batch_list = image_list[batch_offset:batch_end]
                        feature_batch = load_imagenet_raw(
                            train_feature_input, batch_list, img_height,
                            img_width)
                    else:
                        feature_batch = train_feature_input[
                            batch_offset:batch_end]

                    label_batch = train_label_input[batch_offset:batch_end]

                    if use_tf_timeline:
                        profile_path = cfg_path.profile_path
                        run_options = tf.RunOptions(
                            trace_level=tf.RunOptions.FULL_TRACE)
                        run_metadata = tf.RunMetadata()
                        sess.run(train_step_arg,
                                 feed_dict={
                                     global_args['features' + str(tidx_arg)]:
                                     feature_batch,
                                     global_args['labels' + str(tidx_arg)]:
                                     label_batch
                                 },
                                 options=run_options,
                                 run_metadata=run_metadata)
                        trace = timeline.Timeline(
                            step_stats=run_metadata.step_stats)
                        trace_file = open(
                            profile_path + '/' + str(model_type_arg) + '-' +
                            str(batch_size_arg) + '-' + str(i) + '.json', 'w')
                        trace_file.write(
                            trace.generate_chrome_trace_format(
                                show_dataflow=True, show_memory=True))
                    else:
                        sess.run(train_step_arg,
                                 feed_dict={
                                     global_args['features' + str(tidx_arg)]:
                                     feature_batch,
                                     global_args['labels' + str(tidx_arg)]:
                                     label_batch
                                 })
Exemple #6
0
def train_single():
    print('start training single')
    rand_seed = cfg_para.single_rand_seed
    num_epoch = cfg_para.single_num_epoch

    model_type = cfg_para.single_model_type
    num_layer = cfg_para.single_num_layer
    learning_rate = cfg_para.single_learning_rate
    activation = cfg_para.single_activation
    batch_size = cfg_para.single_batch_size
    optimizer = cfg_para.single_opt

    train_dataset = cfg_para.single_train_dataset
    use_tf_timeline = cfg_para.single_use_tb_timeline
    use_cpu = cfg_para.single_use_cpu

    if use_cpu:
        train_device = '/cpu:0'
    else:
        train_device = '/gpu:0'

    ##########################################
    # load dataset
    ##########################################

    img_width, img_height, num_channel, num_class = load_dataset_para(
        train_dataset)
    train_feature_input, train_label_input = load_train_dataset(train_dataset)
    eval_feature_input, eval_label_input = load_eval_dataset(train_dataset)

    ##########################################
    # build model
    ##########################################

    feature_ph = tf.placeholder(tf.float32,
                                [None, img_width, img_height, num_channel])
    label_ph = tf.placeholder(tf.int64, [None, num_class])

    model_name_abbr = np.random.choice(rand_seed, 1, replace=False).tolist()

    dm = ModelImporter(model_type,
                       str(model_name_abbr.pop()),
                       num_layer,
                       img_height,
                       img_width,
                       num_channel,
                       num_class,
                       batch_size,
                       optimizer,
                       learning_rate,
                       activation,
                       batch_padding=False)

    model_entity = dm.get_model_entity()
    model_logit = model_entity.build(feature_ph, is_training=True)
    train_op = model_entity.train(model_logit, label_ph)
    eval_op = model_entity.evaluate(model_logit, label_ph)

    ##########################################
    # train model
    ##########################################

    step_time = 0
    step_count = 0

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True

    if train_dataset == 'imagenet':
        image_list = sorted(os.listdir(train_feature_input))

    overall_time_start = timer()
    with tf.device(train_device):
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            num_batch = train_label_input.shape[0] // batch_size

            for e in range(num_epoch):
                for i in range(num_batch):
                    print('epoch %d / %d, step %d / %d' %
                          (e + 1, num_epoch, i + 1, num_batch))

                    if i != 0:
                        start_time = timer()

                    batch_offset = i * batch_size
                    batch_end = (i + 1) * batch_size
                    if train_dataset == 'imagenet':
                        batch_list = image_list[batch_offset:batch_end]
                        train_feature_batch = load_imagenet_raw(
                            train_feature_input, batch_list, img_height,
                            img_width)
                    else:
                        train_feature_batch = train_feature_input[
                            batch_offset:batch_end]

                    train_label_batch = train_label_input[
                        batch_offset:batch_end]

                    if use_tf_timeline:
                        profile_path = cfg_path.profile_path
                        run_options = tf.RunOptions(
                            trace_level=tf.RunOptions.FULL_TRACE)
                        run_metadata = tf.RunMetadata()
                        sess.run(train_op,
                                 feed_dict={
                                     feature_ph: train_feature_batch,
                                     label_ph: train_label_batch
                                 },
                                 options=run_options,
                                 run_metadata=run_metadata)
                        trace = timeline.Timeline(
                            step_stats=run_metadata.step_stats)
                        trace_file = open(
                            profile_path + '/' + str(model_type) + '-' +
                            str(batch_size) + '-' + str(i) + '.json', 'w')
                        trace_file.write(
                            trace.generate_chrome_trace_format(
                                show_dataflow=True, show_memory=True))
                    else:
                        sess.run(train_op,
                                 feed_dict={
                                     feature_ph: train_feature_batch,
                                     label_ph: train_label_batch
                                 })

                    if i != 0:
                        end_time = timer()
                        dur_time = end_time - start_time
                        print("step time:", dur_time)
                        step_time += dur_time
                        step_count += 1

            acc_avg = sess.run(eval_op,
                               feed_dict={
                                   feature_ph: eval_feature_input,
                                   label_ph: eval_label_input
                               })

    print('evaluation accuracy:{}'.format(acc_avg))

    overall_time_end = timer()
    overall_time = overall_time_end - overall_time_start

    print(
        f'overall training time (s):{overall_time}, average step time (ms):{step_time / step_count * 1000}'
    )
Exemple #7
0
def train_model(job_id):
    model_type_list = cfg_para.multi_model_type
    num_layer_list = cfg_para.multi_num_layer
    activation_list = cfg_para.multi_activation
    batch_size_list = cfg_para.multi_batch_size
    learning_rate_list = cfg_para.multi_learning_rate
    optimizer_list = cfg_para.multi_opt

    model_type = model_type_list[job_id]
    num_layer = num_layer_list[job_id]
    activation = activation_list[job_id]
    batch_size = batch_size_list[job_id]
    learning_rate = learning_rate_list[job_id]
    optimizer = optimizer_list[job_id]

    num_epoch = cfg_para.multi_num_epoch
    train_dataset = cfg_para.multi_train_dataset
    use_tf_timeline = cfg_para.multi_use_tb_timeline
    use_cpu = cfg_para.multi_use_cpu

    if use_cpu:
        train_device = '/cpu:0'
    else:
        train_device = '/gpu:0'

    model_name = '{0}-{1}-{2}-{3}-{4}-{5}-{6}-{7}'.format(
        job_id, model_type, num_layer, batch_size, learning_rate, optimizer,
        num_epoch, train_dataset)

    ##########################################
    # load dataset
    ##########################################

    img_width, img_height, num_channel, num_class = load_dataset_para(
        train_dataset)
    train_feature_input, train_label_input = load_train_dataset(train_dataset)

    ##########################################
    # build model
    ##########################################

    features = tf.placeholder(tf.float32,
                              [None, img_width, img_height, num_channel])
    labels = tf.placeholder(tf.int64, [None, num_class])

    dm = ModelImporter(model_type,
                       str(job_id),
                       num_layer,
                       img_height,
                       img_width,
                       num_channel,
                       num_class,
                       batch_size,
                       optimizer,
                       learning_rate,
                       activation,
                       batch_padding=False)

    model_entity = dm.get_model_entity()
    model_logit = model_entity.build(features, is_training=True)
    train_op = model_entity.train(model_logit, labels)

    ##########################################
    # train model
    ##########################################

    step_time = 0
    step_count = 0

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True

    if train_dataset == 'imagenet':
        image_list = sorted(os.listdir(train_feature_input))

    with tf.device(train_device):
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            num_batch = train_label_input.shape[0] // batch_size

            for e in range(num_epoch):
                for i in range(num_batch):
                    print('epoch %d / %d, step %d / %d' %
                          (e + 1, num_epoch, i + 1, num_batch))

                    if i != 0:
                        start_time = timer()

                    batch_offset = i * batch_size
                    batch_end = (i + 1) * batch_size
                    if train_dataset == 'imagenet':
                        batch_list = image_list[batch_offset:batch_end]
                        train_feature_batch = load_imagenet_raw(
                            train_feature_input, batch_list, img_height,
                            img_width)
                    else:
                        train_feature_batch = train_feature_input[
                            batch_offset:batch_end]

                    train_label_batch = train_label_input[
                        batch_offset:batch_end]

                    if use_tf_timeline:
                        profile_path = cfg_path.profile_path
                        run_options = tf.RunOptions(
                            trace_level=tf.RunOptions.FULL_TRACE)
                        run_metadata = tf.RunMetadata()
                        sess.run(train_op,
                                 feed_dict={
                                     features: train_feature_batch,
                                     labels: train_label_batch
                                 },
                                 options=run_options,
                                 run_metadata=run_metadata)

                        trace = timeline.Timeline(
                            step_stats=run_metadata.step_stats)
                        trace_file = open(
                            profile_path + '/' + str(model_type) + '-' +
                            str(batch_size) + '-' + str(i) + '.json', 'w')
                        trace_file.write(
                            trace.generate_chrome_trace_format(
                                show_dataflow=True, show_memory=True))
                    else:
                        sess.run(train_op,
                                 feed_dict={
                                     features: train_feature_batch,
                                     labels: train_label_batch
                                 })

                    if i != 0:
                        end_time = timer()
                        dur_time = end_time - start_time
                        print("step time:", dur_time)
                        step_time += dur_time
                        step_count += 1

    step_time_result = f'average step time (ms) of {model_name}: {step_time / step_count * 1000}'
    return step_time_result