コード例 #1
0
ファイル: doSingle.py プロジェクト: thaarres/hls4ml_cnns
def toHLS(p,r,m,doQK=False,intbits_a=0,odir='cnn_projects'):
  if doQK:
    model = tf.keras.models.load_model('models/{}/model_best.h5'.format(m),custom_objects={'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation,'QBatchNormalization':QBatchNormalization})
    model = strip_pruning(model)
    hls_model = getQKeras(model=model,model_name=m,precision=p,reuse=r,intbits_a=intbits_a,odir=odir)
  
  else:
    model = tf.keras.models.load_model('models/{}/model_best.h5'.format(m),custom_objects={'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation})
    model  = strip_pruning(model)  
    hls_model = getBaseline(model=model,model_name=m,precision=p,reuse=r,intbits_a=intbits_a,odir=odir)
  (x_train, y_train), (x_test, y_test) = getNumpyData('svhn_cropped',oneHot=True)
  wp,ap = numerical(model=model, hls_model=hls_model, X=x_test[:1000])
  ap.axes[0].set_title("")
  add_logo(ap.axes[0], ap, 0.3, position='upper left')
  labels = [item.get_text().replace('batch_normalization','Batch norm.').replace('max_pooling2d','Max Pooling').replace('_',' ').capitalize() for item in ap.axes[0].get_yticklabels()]
  ap.axes[0].set_yticklabels(labels)
  ap.axes[0].set_xlabel('Output')
  ap.axes[0].set_xlim([10.455191523E-13,64])
  ap.savefig('plots/Profile_{}_activations.pdf'.format(model.name))
  del ap
  wp.axes[0].set_title("")
  labels = [item.get_text().replace('batch_normalization','Batch norm.').replace('max_pooling2d','Max Pooling').replace('_',' ').replace('0 0','0, w').replace('0 1','0, b').replace('1 0','1, w').replace('1 1','1, b').replace('2 0','2, w').replace('2 1','2, b').replace('3 0','3, w').replace('3 1','3, b').replace('4 0','4, w').replace('4 1','4, b').replace('5 0','5, w').replace('5 1','5, b').replace('output dense 0','output, w').replace('output dense 1','output, b').replace('norm. 0','norm., w').replace('norm. 1','norm., b').replace(', b,',' 1,').replace('output 0','output, w').replace('output 1','output, b').capitalize() for item in wp.axes[0].get_yticklabels()]
  # labels = [item.replace('Dense','Fused dense + b n.').replace('Output Fused dense + b n.','Output dense') for item in labels]
  wp.axes[0].set_yticklabels(labels)
  wp.axes[0].set_xlim([0.0000009,64])
  wp.axes[0].set_xlabel('Weight')
  add_logo(wp.axes[0], wp, 0.3, position='upper left')
  wp.savefig('plots/Profile_{}_weights.pdf'.format(model.name))
コード例 #2
0
ファイル: doTimingScans.py プロジェクト: thaarres/hls4ml_cnns
def getReports(mname,p,x_test, y_test):
  
  data_ = {}
  
  indir = '/eos/home-t/thaarres/hls4ml_cnns/synthesized_cnns_v4/{}_{}bit_reuse1/'.format(mname,p)
  report_vsynth = Path('{}/vivado_synth.rpt'.format(indir))
  report_csynth = Path('{}/myproject_prj/solution1/syn/report/myproject_csynth.rpt'.format(indir))
  
  if report_vsynth.is_file() and report_csynth.is_file():
    print('Found valid vsynth and synth! Fetching numbers')
    
  
    if mname.find('full')!=-1:
      data_['w']= int(p)
      if int(p)>9:
        model_  = tf.keras.models.load_model('models/{}.h5'.format(mname),custom_objects={'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation})
        model_ = strip_pruning(model_)
        data_['accuracy_keras'], data_['accuracy_hls4ml'] = getBaseline(model_,mname,p,x_test, y_test)
      else:
        data_['accuracy_keras'] = 0.01
        data_['accuracy_hls4ml'] = 0.01
          
    else:
      data_['w']= int(p+1)
      print('models/'+mname+'_{}bit_0/model_best.h5'.format(p))
      model_ = tf.keras.models.load_model('models/'+mname+'_{}bit_0/model_best.h5'.format(p),custom_objects={'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation})
      model_ = strip_pruning(model_)
      data_['accuracy_keras'], data_['accuracy_hls4ml'] = getQKeras(model_,mname,p,x_test, y_test)
    
    
    print('Accuracy: Keras={} hls4ml={}'.format(data_['accuracy_keras'], data_['accuracy_hls4ml']))
    
    
    # Get the resources from the logic synthesis report 
    with report_vsynth.open() as report:
      lines = np.array(report.readlines())
      data_['lut']     = int(lines[np.array(['CLB LUTs*' in line for line in lines])][0].split('|')[2])
      data_['ff']      = int(lines[np.array(['CLB Registers' in line for line in lines])][0].split('|')[2])
      data_['bram']    = float(lines[np.array(['Block RAM Tile' in line for line in lines])][0].split('|')[2])
      data_['dsp']     = int(lines[np.array(['DSPs' in line for line in lines])][0].split('|')[2])
      data_['lut_rel'] = float(lines[np.array(['CLB LUTs*' in line for line in lines])][0].split('|')[5])
      data_['ff_rel']  = float(lines[np.array(['CLB Registers' in line for line in lines])][0].split('|')[5])
      data_['bram_rel']= float(lines[np.array(['Block RAM Tile' in line for line in lines])][0].split('|')[5])
      data_['dsp_rel'] = float(lines[np.array(['DSPs' in line for line in lines])][0].split('|')[5])
    
    with report_csynth.open() as report:
      lines = np.array(report.readlines())
      lat_line = lines[np.argwhere(np.array(['Latency (clock cycles)' in line for line in lines])).flatten()[0] + 6]
      data_['latency_clks'] = int(lat_line.split('|')[2])
      data_['latency_ns']   = float(lat_line.split('|')[2])*5.0
      data_['latency_ii']   = int(lat_line.split('|')[4])
    
    return data_
  
  else:
    # print('No synth reports found! Returning empty data frame')
    return data_
コード例 #3
0
def train_pruned_model(model, dataset, vocab):
    pruned_model = model(vocab_size=len(vocab),
                         embedding_dim=FLAGS.embedding_dim,
                         rnn_units=FLAGS.RNN_units,
                         batch_size=FLAGS.batch_size)

    logdir = tempfile.mkdtemp()
    callbacks = [
        sparsity.UpdatePruningStep(),
        sparsity.PruningSummaries(log_dir=logdir, profile_batch=0)
    ]

    pruned_model.compile(optimizer='adam', loss=loss)
    pruned_model.fit(dataset, epochs=FLAGS.num_epochs, callbacks=callbacks)

    # Save the pruned model for size comparison later
    _, checkpoint_file = tempfile.mkstemp(str(FLAGS.final_sparsity) +
                                          '_pruned.h5',
                                          dir='models/')
    print('Saving pruned model to: ', checkpoint_file)
    tf.keras.models.save_model(pruned_model,
                               checkpoint_file,
                               include_optimizer=False)

    # Strip the pruning wrappers from the pruned model as it is only needed for training
    final_pruned_model = sparsity.strip_pruning(pruned_model)

    _, pruned_keras_file = tempfile.mkstemp('_final_pruned.h5', dir='models/')
    print('Saving pruned model to: ', pruned_keras_file)
    tf.keras.models.save_model(final_pruned_model,
                               pruned_keras_file,
                               include_optimizer=False)

    return pruned_model, final_pruned_model
コード例 #4
0
def go(batch_size, epochs, dataset):

    num_classes = 10

    x_train, y_train, x_test, y_test, input_shape = get_data(dataset, num_classes)

    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    num_train_samples = x_train.shape[0]
    end_step = np.ceil(1.0 * num_train_samples / batch_size).astype(np.int32) * epochs
    print('End step: ' + str(end_step))

    pruning_params = {
        'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.50,
                                                     final_sparsity=0.90,
                                                     begin_step=2000,
                                                     end_step=end_step,
                                                     frequency=100)
    }

    model = train(get_model(input_shape, num_classes, pruning_params=pruning_params),
                  x_train, y_train, batch_size, epochs, x_test, y_test,
                  pruning=True)

    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    keras_file = "mnist_optimized.h5"
    print('Saving model to: ', keras_file)
    # Save removing pruning apparatus
    tf.keras.models.save_model(sparsity.strip_pruning(model), keras_file, include_optimizer=False)
コード例 #5
0
    def _generate_model(self):
        '''to generate the bounding boxes'''
        weights_path = os.path.expanduser(self.weights_path)
        assert weights_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'

        # Load model, or construct model and load weights.
        num_anchors = len(self.anchors)
        num_classes = len(self.class_names)
        #YOLOv3 model has 9 anchors and 3 feature layers but
        #Tiny YOLOv3 model has 6 anchors and 2 feature layers,
        #so we can calculate feature layers number to get model type
        num_feature_layers = num_anchors//3

        try:
            if num_anchors == 5:
                # YOLOv2 use 5 anchors
                yolo_model, _ = get_yolo2_model(self.model_type, num_anchors, num_classes, input_shape=self.model_image_size + (3,), model_pruning=self.pruning_model)
            else:
                yolo_model, _ = get_yolo3_model(self.model_type, num_feature_layers, num_anchors, num_classes, input_shape=self.model_image_size + (3,), model_pruning=self.pruning_model)
            yolo_model.load_weights(weights_path) # make sure model, anchors and classes match
            if self.pruning_model:
                yolo_model = sparsity.strip_pruning(yolo_model)
            yolo_model.summary()
        except Exception as e:
            print(repr(e))
            assert yolo_model.layers[-1].output_shape[-1] == \
                num_anchors/len(yolo_model.output) * (num_classes + 5), \
                'Mismatch between model and given anchor and class sizes'
        print('{} model, anchors, and classes loaded.'.format(weights_path))
        if self.gpu_num>=2:
            yolo_model = multi_gpu_model(yolo_model, gpus=self.gpu_num)

        return yolo_model
コード例 #6
0
def prune_and_initilize(trained_model,
                        pm,
                        initial_weights,
                        layers_to_prune=None):
    sparcity = 1 - pm
    sprasity_sched = ConstantSparsity(
        sparcity,
        0,  # Do sparcity calculation in the first step
        end_step=0,  # Do it only once
        frequency=10000000)

    model = clone_model(trained_model)
    model.set_weights(trained_model.get_weights())

    if is_pruned(model):
        model = strip_pruning(model)

    if layers_to_prune is None:
        layers_to_prune = get_default_layers(model)


#     prunned_model_layers = []
#     for layer in model.layers:
#         if layer.name in layers_to_prune:
#             prunned_model_layers.append(prune_low_magnitude(layer, sprasity_sched))
#         else:
#             prunned_model_layers.append(layer)

#     trained_pruned_model = Sequential(prunned_model_layers)

    trained_pruned_model = apply_wrapper_to_layer(model,
                                                  layers_to_prune,
                                                  prune_low_magnitude,
                                                  sprasity_sched,
                                                  clone=False)
    # Calculates mask
    initialize_pruned_model(trained_pruned_model)

    model.load_weights(initial_weights)
    prunned_model_layers = []
    for i, layer in enumerate(trained_pruned_model.layers):
        if isinstance(layer, pruning_wrapper.PruneLowMagnitude):
            l_weights = model.layers[i].get_weights()
            l_weights[0] = l_weights[0] * layer.pruning_vars[0][1].numpy()
            model.layers[i].set_weights(l_weights)
            prunned_model_layers.append(
                prune_low_magnitude(model.layers[i], sprasity_sched))
        else:
            prunned_model_layers.append(model.layers[i])
    untrained_prunned_model = Sequential(prunned_model_layers)
    untrained_prunned_model.compile(optimizer=optimizers.SGD(lr=0),
                                    loss='sparse_categorical_crossentropy',
                                    metrics='accuracy')
    return untrained_prunned_model
コード例 #7
0
def fitModel(model, train_data, val_data, test_data, stepsPerEpoch,
             evalStepsPerEpoch):
    """Runs Keras fit and saves model.
    Arguments:
      STRATEGY: Mirrored strategy
      models: list of models to train
      train_data: training data
      val_data: validation data  
    Returns:
      None
    """

    if not os.path.exists(FLAGS.outdir + '/%s/' % model.name):
        os.system('mkdir ' + FLAGS.outdir + '/%s/' % model.name)

    callbacks = getCallbacks(FLAGS.outdir + '/%s/' % model.name)
    if FLAGS.prune == True:
        callbacks.append(pruning_callbacks.UpdatePruningStep())

    start = time.time()
    LOSS = tf.keras.losses.CategoricalCrossentropy()
    OPTIMIZER = Adam(learning_rate=FLAGS.lr,
                     beta_1=FLAGS.beta_1,
                     beta_2=FLAGS.beta_2,
                     epsilon=FLAGS.epsilon,
                     amsgrad=True)
    model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=["accuracy"])
    model.summary()

    history = model.fit(train_data,
                        epochs=FLAGS.epochs,
                        validation_data=val_data,
                        callbacks=callbacks,
                        verbose=1)
    model.load_weights(FLAGS.outdir + '/%s/weights_best.h5' % model.name)
    history_dict = history.history
    pd.DataFrame.from_dict(history.history).to_csv(
        FLAGS.outdir + '/%s/history_dict.csv' % model.name, index=False)
    test_score = model.evaluate(test_data)
    print("Done training model {}".format(model.name))
    print('\n Test loss:', test_score[0])
    print('\n Test accuracy:', test_score[1])
    np.savez(FLAGS.outdir + '/%s/scores' % model.name, test_score)

    if FLAGS.prune == True:
        model_stripped = strip_pruning(model)
        model_stripped.save(FLAGS.outdir + '/%s/%s.h5' %
                            (model.name, model.name))
    else:
        model.save(FLAGS.outdir + '/%s/%s.h5' % (model.name, model.name))
    end = time.time()
    print('\n It took {} minutes to train!\n'.format((end - start) / 60.))
コード例 #8
0
def toHLS(p, r, m, doQK=False, intbits_a=0, odir='cnn_projects'):
    if doQK:
        model = tf.keras.models.load_model(
            'models/{}_{}bit_0/model_best.h5'.format(m, p),
            custom_objects={
                'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,
                'QDense': QDense,
                'QConv2D': QConv2D,
                'Clip': Clip,
                'QActivation': QActivation
            })
        model = strip_pruning(model)
        hls_model = getQKeras(model=model,
                              model_name=m,
                              precision=p,
                              reuse=r,
                              intbits_a=intbits_a,
                              odir=odir)

    else:
        model = tf.keras.models.load_model(
            'models/{}_0/model_best.h5'.format(m),
            custom_objects={
                'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,
                'QDense': QDense,
                'QConv2D': QConv2D,
                'Clip': Clip,
                'QActivation': QActivation
            })
        model = strip_pruning(model)
        hls_model = getBaseline(model=model,
                                model_name=m,
                                precision=p,
                                reuse=r,
                                intbits_a=intbits_a,
                                odir=odir)
コード例 #9
0
    def update_eval_model(self, train_model):
        # create a temp weights file to save training result
        tmp_weights_path = os.path.join(
            tempfile.gettempdir(),
            str(random.randint(10, 1000000)) + '.h5')
        train_model.save_weights(tmp_weights_path)

        # load the temp weights to eval model
        self.eval_model.load_weights(tmp_weights_path)
        os.remove(tmp_weights_path)

        if self.model_pruning:
            eval_model = sparsity.strip_pruning(self.eval_model)
        else:
            eval_model = self.eval_model

        return eval_model
コード例 #10
0
    def eval(self):
        if self._mode is None:
            raise ValueError("Please set the 'mode' parameter")
        eval_examples = self._processor.get_dev_examples(args.data_dir)
        eval_file = os.path.join(args.output_dir, "eval.tf_record")
        label_list = self._processor.get_labels()
        self.file_based_convert_examples_to_features(
            eval_examples, label_list, args.max_seq_len, self._tokenizer, eval_file)

        # tf.logging.info("***** Running evaluation *****")
        # tf.logging.info("  Num examples = %d", len(eval_examples))
        # tf.logging.info("  Batch size = %d", self.batch_size)
        num_eval_steps = len(eval_examples) / args.batch_size
        eval_input_fn = self.file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=args.max_seq_len,
            is_training=False,
            batch_size=args.batch_size,
            drop_remainder=False)

        estimator, model = create_estimator(num_eval_steps)
        var_list = model.optimizer.variables()
        result = estimator.evaluate(input_fn=eval_input_fn, steps=None, hooks=[])

        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with tf.io.gfile.GFile(output_eval_file, "w") as writer:
            # tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                # tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
        feature_columns = [tf.feature_column.numeric_column(x) for x in ['input_ids', 'input_mask', 'segment_ids']]
        serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
            tf.feature_column.make_parse_example_spec(feature_columns))
        if args.prune_enabled:
            model = sparsity.strip_pruning(model)
        estimator.export_saved_model(
            export_dir_base=args.output_dir,
            serving_input_receiver_fn=serving_input_fn,
            experimental_mode=tf.estimator.ModeKeys.PREDICT)
        model.reset_metrics()
        model.save(args.keras_model_path)
コード例 #11
0
def make_pruning(model, train_dataset, validation_dataset, n_step, v_step):
    end_step = np.ceil(1.0 * n_step / config.batch_size).astype(np.int32) * config.p_epochs
    pruning_params = {
          'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=config.initial_sparsity,
                                                       final_sparsity=config.final_sparsity,
                                                       begin_step=config.p_begin_step,
                                                       end_step=end_step,
                                                       frequency=config.p_frequency)
    }
    p_model = sparsity.prune_low_magnitude(model, **pruning_params)
    model_setup(p_model)
    callbacks = callbacks_init()
    p_model.fit(train_dataset,
              epochs= config.p_epochs,
              verbose=1,
              callbacks=callbacks,
              validation_data=validation_dataset,
              steps_per_epoch= n_step,
              validation_steps= v_step)
    p_model = sparsity.strip_pruning(p_model)
    return p_model
コード例 #12
0
def main(args):
    annotation_file = args.annotation_file
    log_dir = os.path.join('logs', '000')
    classes_path = args.classes_path
    class_names = get_classes(classes_path)
    num_classes = len(class_names)

    anchors = get_anchors(args.anchors_path)
    num_anchors = len(anchors)

    # get freeze level according to CLI option
    if args.weights_path:
        freeze_level = 0
    else:
        freeze_level = 1

    if args.freeze_level is not None:
        freeze_level = args.freeze_level

    # callbacks for training process
    logging = TensorBoard(log_dir=log_dir,
                          histogram_freq=0,
                          write_graph=False,
                          write_grads=False,
                          write_images=False,
                          update_freq='batch')
    checkpoint = ModelCheckpoint(os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                 monitor='val_loss',
                                 mode='min',
                                 verbose=1,
                                 save_weights_only=False,
                                 save_best_only=True,
                                 period=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  mode='min',
                                  patience=10,
                                  verbose=1,
                                  cooldown=0,
                                  min_lr=1e-10)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=50,
                                   verbose=1,
                                   mode='min')
    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan
    ]

    # get train&val dataset
    dataset = get_dataset(annotation_file)
    if args.val_annotation_file:
        val_dataset = get_dataset(args.val_annotation_file)
        num_train = len(dataset)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = args.val_split
        num_val = int(len(dataset) * val_split)
        num_train = len(dataset) - num_val

    # assign multiscale interval
    if args.multiscale:
        rescale_interval = args.rescale_interval
    else:
        rescale_interval = -1  #Doesn't rescale

    # model input shape check
    input_shape = args.model_image_size
    assert (input_shape[0] % 32 == 0 and input_shape[1] % 32
            == 0), 'model_image_size should be multiples of 32'

    # get different model type & train&val data generator
    if args.model_type.startswith(
            'scaled_yolo4_') or args.model_type.startswith('yolo5_'):
        # Scaled-YOLOv4 & YOLOv5 entrance, use yolo5 submodule but now still yolo3 data generator
        # TODO: create new yolo5 data generator to apply YOLOv5 anchor assignment
        get_train_model = get_yolo5_train_model
        data_generator = yolo5_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo5DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign)
        #val_data_generator = Yolo5DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign)

        tiny_version = False
    elif args.model_type.startswith('yolo3_') or args.model_type.startswith(
            'yolo4_'):
        #if num_anchors == 9:
        # YOLOv3 & v4 entrance, use 9 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign)
        #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign)

        tiny_version = False
    elif args.model_type.startswith(
            'tiny_yolo3_') or args.model_type.startswith('tiny_yolo4_'):
        #elif num_anchors == 6:
        # Tiny YOLOv3 & v4 entrance, use 6 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign)
        #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign)

        tiny_version = True
    elif args.model_type.startswith('yolo2_') or args.model_type.startswith(
            'tiny_yolo2_'):
        #elif num_anchors == 5:
        # YOLOv2 & Tiny YOLOv2 use 5 anchors
        get_train_model = get_yolo2_train_model
        data_generator = yolo2_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval)
        #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes)

        tiny_version = False
    else:
        raise ValueError('Unsupported model type')

    # prepare online evaluation callback
    if args.eval_online:
        eval_callback = EvalCallBack(
            args.model_type,
            dataset[num_train:],
            anchors,
            class_names,
            args.model_image_size,
            args.model_pruning,
            log_dir,
            eval_epoch_interval=args.eval_epoch_interval,
            save_eval_checkpoint=args.save_eval_checkpoint,
            elim_grid_sense=args.elim_grid_sense)
        callbacks.append(eval_callback)

    # prepare train/val data shuffle callback
    if args.data_shuffle:
        shuffle_callback = DatasetShuffleCallBack(dataset)
        callbacks.append(shuffle_callback)

    # prepare model pruning config
    pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(
        np.int32) * args.total_epoch
    if args.model_pruning:
        pruning_callbacks = [
            sparsity.UpdatePruningStep(),
            sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)
        ]
        callbacks = callbacks + pruning_callbacks

    # prepare optimizer
    optimizer = get_optimizer(args.optimizer,
                              args.learning_rate,
                              average_type=None,
                              decay_type=None)

    # support multi-gpu training
    if args.gpu_num >= 2:
        # devices_list=["/gpu:0", "/gpu:1"]
        devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)]
        strategy = tf.distribute.MirroredStrategy(devices=devices_list)
        print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        with strategy.scope():
            # get multi-gpu train model
            model = get_train_model(args.model_type,
                                    anchors,
                                    num_classes,
                                    weights_path=args.weights_path,
                                    freeze_level=freeze_level,
                                    optimizer=optimizer,
                                    label_smoothing=args.label_smoothing,
                                    elim_grid_sense=args.elim_grid_sense,
                                    model_pruning=args.model_pruning,
                                    pruning_end_step=pruning_end_step)

    else:
        # get normal train model
        model = get_train_model(args.model_type,
                                anchors,
                                num_classes,
                                weights_path=args.weights_path,
                                freeze_level=freeze_level,
                                optimizer=optimizer,
                                label_smoothing=args.label_smoothing,
                                elim_grid_sense=args.elim_grid_sense,
                                model_pruning=args.model_pruning,
                                pruning_end_step=pruning_end_step)

    model.summary()

    # Transfer training some epochs with frozen layers first if needed, to get a stable loss.
    initial_epoch = args.init_epoch
    epochs = initial_epoch + args.transfer_epoch
    print("Transfer training stage")
    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))
    #model.fit_generator(train_data_generator,
    model.fit_generator(
        data_generator(dataset[:num_train],
                       args.batch_size,
                       input_shape,
                       anchors,
                       num_classes,
                       args.enhance_augment,
                       rescale_interval,
                       multi_anchor_assign=args.multi_anchor_assign),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(
            dataset[num_train:],
            args.batch_size,
            input_shape,
            anchors,
            num_classes,
            multi_anchor_assign=args.multi_anchor_assign),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=epochs,
        initial_epoch=initial_epoch,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Wait 2 seconds for next stage
    time.sleep(2)

    if args.decay_type or args.average_type:
        # rebuild optimizer to apply learning rate decay or weights averager,
        # only after unfreeze all layers
        if args.decay_type:
            callbacks.remove(reduce_lr)

        if args.average_type == 'ema' or args.average_type == 'swa':
            # weights averager need tensorflow-addons,
            # which request TF 2.x and have version compatibility
            import tensorflow_addons as tfa
            callbacks.remove(checkpoint)
            avg_checkpoint = tfa.callbacks.AverageModelCheckpoint(
                filepath=os.path.join(
                    log_dir,
                    'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                update_weights=True,
                monitor='val_loss',
                mode='min',
                verbose=1,
                save_weights_only=False,
                save_best_only=True,
                period=1)
            callbacks.append(avg_checkpoint)

        steps_per_epoch = max(1, num_train // args.batch_size)
        decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch -
                                         args.transfer_epoch)
        optimizer = get_optimizer(args.optimizer,
                                  args.learning_rate,
                                  average_type=args.average_type,
                                  decay_type=args.decay_type,
                                  decay_steps=decay_steps)

    # Unfreeze the whole network for further tuning
    # NOTE: more GPU memory is required after unfreezing the body
    print("Unfreeze and continue training, to fine-tune.")
    if args.gpu_num >= 2:
        with strategy.scope():
            for i in range(len(model.layers)):
                model.layers[i].trainable = True
            model.compile(optimizer=optimizer,
                          loss={
                              'yolo_loss': lambda y_true, y_pred: y_pred
                          })  # recompile to apply the change

    else:
        for i in range(len(model.layers)):
            model.layers[i].trainable = True
        model.compile(optimizer=optimizer,
                      loss={
                          'yolo_loss': lambda y_true, y_pred: y_pred
                      })  # recompile to apply the change

    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))
    #model.fit_generator(train_data_generator,
    model.fit_generator(
        data_generator(dataset[:num_train],
                       args.batch_size,
                       input_shape,
                       anchors,
                       num_classes,
                       args.enhance_augment,
                       rescale_interval,
                       multi_anchor_assign=args.multi_anchor_assign),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(
            dataset[num_train:],
            args.batch_size,
            input_shape,
            anchors,
            num_classes,
            multi_anchor_assign=args.multi_anchor_assign),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=args.total_epoch,
        initial_epoch=epochs,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Finally store model
    if args.model_pruning:
        model = sparsity.strip_pruning(model)
    model.save(os.path.join(log_dir, 'trained_final.h5'))
コード例 #13
0
    def _generate_model(self):
        '''to generate the bounding boxes'''
        weights_path = None
        #assert weights_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'

        # Load model, or construct model and load weights.
        num_anchors = len(self.anchors)
        num_classes = len(self.class_names)
        #YOLOv3 model has 9 anchors and 3 feature layers but
        #Tiny YOLOv3 model has 6 anchors and 2 feature layers,
        #so we can calculate feature layers number to get model type
        num_feature_layers = num_anchors // 3

        try:
            if self.model_type.startswith(
                    'scaled_yolo4_') or self.model_type.startswith('yolo5_'):
                # Scaled-YOLOv4 & YOLOv5 entrance
                yolo_model, _ = get_yolo5_model(
                    self.model_type,
                    num_feature_layers,
                    num_anchors,
                    num_classes,
                    input_shape=self.model_image_size + (3, ),
                    model_pruning=self.pruning_model)
            elif self.model_type.startswith('yolo3_') or self.model_type.startswith('yolo4_') or \
                 self.model_type.startswith('tiny_yolo3_') or self.model_type.startswith('tiny_yolo4_'):
                # YOLOv3 & v4 entrance
                yolo_model, _ = get_yolo3_model(
                    self.model_type,
                    num_feature_layers,
                    num_anchors,
                    num_classes,
                    input_shape=self.model_image_size + (3, ),
                    model_pruning=self.pruning_model)
            elif self.model_type.startswith(
                    'yolo2_') or self.model_type.startswith('tiny_yolo2_'):
                # YOLOv2 entrance
                yolo_model, _ = get_yolo2_model(
                    self.model_type,
                    num_anchors,
                    num_classes,
                    input_shape=self.model_image_size + (3, ),
                    model_pruning=self.pruning_model)
            else:
                raise ValueError('Unsupported model type')

            yolo_model.load_weights(
                weights_path)  # make sure model, anchors and classes match
            if self.pruning_model:
                yolo_model = sparsity.strip_pruning(yolo_model)
            yolo_model.summary()
        except Exception as e:
            print(repr(e))
            assert yolo_model.layers[-1].output_shape[-1] == \
                num_anchors/len(yolo_model.output) * (num_classes + 5), \
                'Mismatch between model and given anchor and class sizes'
        print('{} model, anchors, and classes loaded.'.format(weights_path))
        # tf2.4와의 호환을 위해 아래를 삭제.
        '''
        if self.gpu_num>=2:
            yolo_model = multi_gpu_model(yolo_model, gpus=self.gpu_num)
        '''
        return yolo_model
コード例 #14
0
ファイル: QKeras_Model.py プロジェクト: DrWatt/MuonTriggerML
def Q_baseline_model(size, epochs, optimizer, X_training, y_training,
                     X_validation, y_validation, output_name):
    '''
    NN Model constructor with loss and accuracy plots.

    Parameters
    ----------
    size : int
        Batch size used in the training process.
    epochs : int
        Number of epochs the model will be trained.
    optimizer : keras.optimizer
        Optimizer function.
    X_training : Numpy array
        Training data set.
    y_training : Numpy array
        True labels for the training set.
    X_validation : Numpy array
        Validation data set.
    y_validation : Numpy array
        True labels for the validation set.
    output_name : str
        Name used for saved plots.

    Returns
    -------
    model : qkeras.sequential
        QKeras model.
    w : numpy array
        Array of final weights used in the model for later inference.

    '''
    pruning = False
    # create model
    name = "RMSE validation"
    name2 = "RMSE training"
    history = History()
    model = Sequential()
    model.add(
        QDense(60,
               input_shape=(27, ),
               kernel_quantizer=quantized_bits(16, 1),
               bias_quantizer=quantized_bits(16, 1),
               kernel_initializer='random_normal'))
    model.add(QActivation(activation=quantized_relu(16, 1), name='relu1'))
    model.add(
        QDense(50,
               kernel_quantizer=quantized_bits(16, 1),
               bias_quantizer=quantized_bits(16, 1)))
    model.add(QActivation(activation=quantized_relu(16, 1), name='relu2'))
    # model.add(Dropout(rate=0.2))
    model.add(
        QDense(30,
               kernel_quantizer=quantized_bits(16, 1),
               bias_quantizer=quantized_bits(16, 1)))
    model.add(QActivation(activation=quantized_relu(16, 1), name='relu3'))
    model.add(
        QDense(40,
               kernel_quantizer=quantized_bits(16, 1),
               bias_quantizer=quantized_bits(16, 1)))
    model.add(QActivation(activation=quantized_relu(16, 1), name='relu4'))
    model.add(
        QDense(15,
               kernel_quantizer=quantized_bits(16, 1),
               bias_quantizer=quantized_bits(16, 1)))
    model.add(QActivation(activation=quantized_relu(16, 1), name='relu5'))

    # model.add(QDense(80,  input_shape=(27,),kernel_quantizer=quantized_bits(16,1),bias_quantizer=quantized_bits(16,1), kernel_initializer='random_normal'))
    # model.add(QActivation(activation=quantized_relu(16,1), name='relu1'))
    # model.add(QDense(50,kernel_quantizer=quantized_bits(16,1),bias_quantizer=quantized_bits(16,1)))
    # model.add(QActivation(activation=quantized_relu(16,1), name='relu2'))
    # model.add(QDense(35,kernel_quantizer=quantized_bits(16,1),bias_quantizer=quantized_bits(16,1)))
    # model.add(QActivation(activation=quantized_relu(16,1), name='relu3'))
    # # # model.add(Dropout(rate=0.2))
    model.add(QDense(1, kernel_quantizer=quantized_bits(16, 1)))
    model.add(QActivation(activation=quantized_relu(16, 1), name='relu6'))
    #model.add(Activation("sigmoid"))
    # model.add(QActivation(activation=quantized_tanh(16,1),name='tanh'))
    if pruning == True:
        print("////////////////////////Training Model with pruning")
        pruning_params = {
            "pruning_schedule":
            pruning_schedule.ConstantSparsity(0.75,
                                              begin_step=2000,
                                              frequency=100)
        }
        model = prune.prune_low_magnitude(model, **pruning_params)
        model.compile(loss='mean_squared_error', optimizer=optimizer)
        model.fit(X_training,
                  y_training,
                  batch_size=size,
                  epochs=epochs,
                  verbose=1,
                  validation_data=(X_validation, y_validation),
                  callbacks=[history,
                             pruning_callbacks.UpdatePruningStep()])

        model = strip_pruning(model)
        w = model.layers[0].weights[0].numpy()
        h, b = np.histogram(w, bins=100)
        plt.figure(figsize=(7, 7))
        plt.bar(b[:-1], h, width=b[1] - b[0])
        plt.semilogy()
        plt.savefig("Zeros' distribution", format='png')
        print('% of zeros = {}'.format(np.sum(w == 0) / np.size(w)))
    else:
        print("////////////////////////Training Model WITHOUT pruning")
        model.compile(loss='mean_squared_error', optimizer=optimizer)
        model.fit(X_training,
                  y_training,
                  batch_size=size,
                  epochs=epochs,
                  verbose=1,
                  validation_data=(X_validation, y_validation),
                  callbacks=[history])
    # Compile model
    # model.compile(loss='mean_squared_error', optimizer=optimizer)
    # model.fit(X_training, y_training,
    #       batch_size=size,
    #       epochs=epochs,
    #       verbose=1,
    #       validation_data=(X_validation, y_validation),callbacks=[history])

    w = []
    for layer in model.layers:
        print(layer)
        w.append(layer.get_weights())

    #print(w)
    train_predictions = model.predict(X_training)
    predictions = model.predict(X_validation)
    lin_mse = mean_squared_error(y_validation, predictions)
    lin_rmse = np.sqrt(lin_mse)
    lin_mse2 = mean_squared_error(y_training, train_predictions)
    lin_rmse2 = np.sqrt(lin_mse2)
    msg = "%s: %f" % (name, lin_rmse)
    msg2 = "%s: %f" % (name2, lin_rmse2)
    print(msg)
    print(msg2)
    fig, ax = plt.subplots()
    # xy=np.vstack([y_validation, predictions])
    #z=gaussian_kde(xy)
    ax.scatter(y_validation, predictions, edgecolors=(0, 0, 0))
    ax.set_title('Regression model predictions (validation set)')
    ax.set_xlabel('Measured $p_T$ (GeV/c)')
    ax.set_ylabel('Predicted $p_T$ (GeV/c)')
    ax.plot([Y.min(), Y.max()], [Y.min(), Y.max()], 'k--', lw=4)
    plt.rc('font', size=20)
    plt.rc('axes', titlesize=18)
    plt.rc('axes', labelsize=18)
    plt.rc('xtick', labelsize=18)
    plt.rc('ytick', labelsize=18)
    plt.rc('legend', fontsize=18)
    plt.rc('figure', titlesize=18)
    plt.tight_layout()
    plt.savefig(outrootname + '/' + '1' + output_name, format='png', dpi=800)
    fig2, ax2 = plt.subplots()
    ax2.plot(history.history['loss'], label='loss')
    ax2.plot(history.history['val_loss'], label='val_loss')
    ax2.set_title('Training and Validation loss per epoch')
    ax2.set_xlabel('# Epoch')
    ax2.set_ylabel('loss')
    plt.legend()
    plt.tight_layout()
    plt.savefig(outrootname + '/' + '2' + output_name, format='png', dpi=800)
    #plt.show()
    del ax, ax2

    return model, w
コード例 #15
0
                                epochs=epochs,
                                validation_split=0.2,
                                shuffle=True,
                                callbacks=callbacks,
                                verbose=1)
training_Time = time.time()

scores = new_pruned_model.evaluate(x_test, y_test, verbose=2)
end_Time = time.time()

print('total training time:', (training_Time - start_Time) / 60, "min")
print('Test time:', end_Time - training_Time)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

final_model = tfmot.strip_pruning(new_pruned_model)
final_model.summary()
final_model.compile(loss='categorical_crossentropy',
                    optimizer=opt,
                    metrics=['accuracy'])
final_scores = final_model.evaluate(x_test, y_test, verbose=2)
print('Test loss:', final_scores[0])
print('Test accuracy:', final_scores[1])

# File Names (need to change / for \\ on laptop)
#model_File = os.path.join(os.getcwd(), 'Models/')
fTime = time.strftime("%d-%b-%H%M", time.localtime())
#file_Name = os.path.join(model_File, fTime+'-GES843-Pruning-Perforated/')
file_Name = model_File
model_Name = 'Pruned.h5'
コード例 #16
0
ファイル: train.py プロジェクト: Chriisbrown/FakeTrackID
                                       experiment.get_parameter("pruning_lr_factor_3")],
                            outputDir=yamlparameters["TrainDir"])

    callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())

    with experiment.train():
    
        keras_model.fit(X_train,y_train,
                        batch_size=yamlparameters["Training_batch_size"],
                        epochs=yamlparameters["Training_epochs"],
                        callbacks=callbacks.callbacks,
                        verbose=1,
                        validation_split=yamlparameters["Training_validation_split"],
                        shuffle=True)
 
    keras_model = strip_pruning(keras_model)
    keras_model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['binary_accuracy'])
    keras_model.save(yamlparameters["TrainDir"]+"/Best_model.h5")

    with experiment.test():
        y_predict = keras_model.predict(X_test,verbose=0)
        loss,binary_accuracy = keras_model.evaluate(X_test, y_test,verbose=0)
        auc = roc_auc_score(y_test,y_predict)
        print("AUC:",auc)
        print("ACC:",binary_accuracy)

        metrics = {
        'loss':loss,
        'accuracy':binary_accuracy,
        'ROC AUC':roc_auc_score
        }
コード例 #17
0
                     best_filepaths,
                     output_filepath=submission_dir +
                     'Ooi_NTU_task1b_3.output.csv',
                     delimiter='\t',
                     newline='\n')
print('Test set predictions saved in ' + submission_dir)

## GET METRICS
n_nz_params = 0
model_size = 0
for best_filepath in best_filepaths:
    with sparsity.prune_scope():  # Need to use this to prevent loading errors
        keras_model = keras.models.load_model(
            best_filepath, compile=False
        )  # Don't compile model to save time because we're not training it here.
    keras_model = sparsity.strip_pruning(keras_model)
    param_dict = get_keras_model_size(keras_model, verbose=False)
    n_nz_params += param_dict['parameters']['non_zero']['count']
    model_size += param_dict['parameters']['non_zero']['bytes'] / 1024

output_meta = make_predictions(eval_features, best_filepaths, save=False)
best_micro_acc, best_macro_acc = accs(
    np.array([row[-3:] for row in output_meta[1:]]), eval_labels)

## PRINT FINAL METRICS
print()
print('============================')
print(' FINAL ANALYSIS FOR MODEL 3 ')
print('============================')
print()
print('Model files used: ')
コード例 #18
0
def _main(args):
    global lr_base, total_epochs
    lr_base = args.learning_rate
    total_epochs = args.total_epoch

    annotation_file = args.annotation_file
    log_dir = 'logs/000/'
    classes_path = args.classes_path
    class_names = get_classes(classes_path)
    num_classes = len(class_names)
    if args.tiny_version:
        anchors_path = 'configs/tiny_yolo_anchors.txt'
    else:
        anchors_path = 'configs/yolo_anchors.txt'
    anchors = get_anchors(anchors_path)
    print("\nanchors = ", anchors)
    print("\nnum_classes = ", num_classes)

    # get freeze level according to CLI option
    if args.weights_path:
        freeze_level = 0
    else:
        freeze_level = 1

    if args.freeze_level is not None:
        freeze_level = args.freeze_level
        print("\n\nFREEZE LEVEL  = ", freeze_level)

    # callbacks for training process
    logging = TensorBoard(log_dir=log_dir,
                          histogram_freq=0,
                          write_graph=False,
                          write_grads=False,
                          write_images=False,
                          update_freq='batch')
    checkpoint = ModelCheckpoint(
        log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
        monitor='val_loss',
        verbose=1,
        save_weights_only=False,
        save_best_only=True,
        period=5)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  patience=5,
                                  verbose=1,
                                  cooldown=0,
                                  min_lr=1e-10)
    lr_scheduler = LearningRateScheduler(learning_rate_scheduler)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=30,
                                   verbose=1)
    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan
    ]

    # get train&val dataset
    dataset = get_dataset(annotation_file)
    if args.val_annotation_file:
        val_dataset = get_dataset(args.val_annotation_file)
        num_train = len(dataset)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = args.val_split
        num_val = int(len(dataset) * val_split)
        num_train = len(dataset) - num_val

    # prepare model pruning config
    pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(
        np.int32) * args.total_epoch
    if args.model_pruning:
        pruning_callbacks = [
            sparsity.UpdatePruningStep(),
            sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)
        ]
        callbacks = callbacks + pruning_callbacks

    # prepare optimizer
    optimizer = get_optimizer(args.optimizer, args.learning_rate)

    # get train model
    model = get_yolo3_train_model(args.model_type,
                                  anchors,
                                  num_classes,
                                  weights_path=args.weights_path,
                                  freeze_level=freeze_level,
                                  optimizer=optimizer,
                                  label_smoothing=args.label_smoothing,
                                  model_pruning=args.model_pruning,
                                  pruning_end_step=pruning_end_step)
    # support multi-gpu training
    if args.gpu_num >= 2:
        model = multi_gpu_model(model, gpus=args.gpu_num)
    model.summary()

    # Train some initial epochs with frozen layers first if needed, to get a stable loss.
    input_shape = args.model_image_size
    assert (input_shape[0] % 32 == 0
            and input_shape[1] % 32 == 0), 'Multiples of 32 required'
    batch_size = args.batch_size
    initial_epoch = 0
    epochs = args.init_epoch
    print("Initial training stage")
    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, batch_size, input_shape))
    model.fit_generator(data_generator_wrapper(dataset[:num_train], batch_size,
                                               input_shape, anchors,
                                               num_classes),
                        steps_per_epoch=max(1, num_train // batch_size),
                        validation_data=data_generator_wrapper(
                            dataset[num_train:], batch_size, input_shape,
                            anchors, num_classes),
                        validation_steps=max(1, num_val // batch_size),
                        epochs=epochs,
                        initial_epoch=initial_epoch,
                        callbacks=callbacks)

    # Apply Cosine learning rate decay only after
    # unfreeze all layers
    if args.cosine_decay_learning_rate:
        callbacks.remove(reduce_lr)
        callbacks.append(lr_scheduler)

    # Unfreeze the whole network for further training
    # NOTE: more GPU memory is required after unfreezing the body
    print("Unfreeze and continue training, to fine-tune.")
    for i in range(len(model.layers)):
        model.layers[i].trainable = True
    model.compile(optimizer=optimizer,
                  loss={
                      'yolo_loss': lambda y_true, y_pred: y_pred
                  })  # recompile to apply the change

    if args.multiscale:
        # prepare multiscale config
        input_shape_list = get_multiscale_list(args.model_type,
                                               args.tiny_version)
        interval = args.rescale_interval

        # Do multi-scale training on different input shape
        # change every "rescale_interval" epochs
        for epoch_step in range(epochs + interval, args.total_epoch, interval):
            # shuffle train/val dataset for cross-validation
            if args.data_shuffle:
                np.random.shuffle(dataset)

            initial_epoch = epochs
            epochs = epoch_step
            # rescale input only from 2nd round, to make sure unfreeze stable
            if initial_epoch != args.init_epoch:
                input_shape = input_shape_list[random.randint(
                    0,
                    len(input_shape_list) - 1)]
            print(
                'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
                .format(num_train, num_val, batch_size, input_shape))
            model.fit_generator(
                data_generator_wrapper(dataset[:num_train], batch_size,
                                       input_shape, anchors, num_classes),
                steps_per_epoch=max(1, num_train // batch_size),
                validation_data=data_generator_wrapper(dataset[num_train:],
                                                       batch_size, input_shape,
                                                       anchors, num_classes),
                validation_steps=max(1, num_val // batch_size),
                epochs=epochs,
                initial_epoch=initial_epoch,
                callbacks=callbacks)
    else:
        # Do single-scale training
        print(
            'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
            .format(num_train, num_val, batch_size, input_shape))
        model.fit_generator(data_generator_wrapper(dataset[:num_train],
                                                   batch_size, input_shape,
                                                   anchors, num_classes),
                            steps_per_epoch=max(1, num_train // batch_size),
                            validation_data=data_generator_wrapper(
                                dataset[num_train:], batch_size, input_shape,
                                anchors, num_classes),
                            validation_steps=max(1, num_val // batch_size),
                            epochs=args.total_epoch,
                            initial_epoch=epochs,
                            callbacks=callbacks)

    # Finally store model
    if args.model_pruning:
        model = sparsity.strip_pruning(model)
    model.save(log_dir + 'trained_final.h5')
コード例 #19
0
def layer_pruned_model():
    #Build a pruned model layer by layer
    epochs = 12
    (x_train, y_train), (x_test, y_test) = prepare_data()
    num_train_samples = x_train.shape[0]
    end_step = np.ceil(1.0 * num_train_samples / batch_size).astype(
        np.int32) * epochs
    print('End step: ' + str(end_step))
    pruning_params = {
        'pruning_schedule':
        sparsity.PolynomialDecay(initial_sparsity=0.50,
                                 final_sparsity=0.90,
                                 begin_step=2000,
                                 end_step=end_step,
                                 frequency=100)
    }

    #build the model
    l = tf.keras.layers
    pruned_model = tf.keras.Sequential([
        sparsity.prune_low_magnitude(l.Conv2D(32,
                                              5,
                                              padding='same',
                                              activation='relu'),
                                     input_shape=input_shape,
                                     **pruning_params),
        l.MaxPooling2D((2, 2), (2, 2), padding='same'),
        l.BatchNormalization(),
        sparsity.prune_low_magnitude(
            l.Conv2D(64, 5, padding='same', activation='relu'),
            **pruning_params),
        l.MaxPooling2D((2, 2), (2, 2), padding='same'),
        l.Flatten(),
        sparsity.prune_low_magnitude(l.Dense(1024, activation='relu'),
                                     **pruning_params),
        l.Dropout(0.4),
        sparsity.prune_low_magnitude(
            l.Dense(num_classes, activation='softmax'), **pruning_params)
    ])

    pruned_model.summary()

    logdir = tempfile.mkdtemp()
    print('Writing training logs to ' + logdir)
    # %tensorboard --logdir={logdir}

    # train the model
    pruned_model.compile(loss=tf.keras.losses.categorical_crossentropy,
                         optimizer='adam',
                         metrics=['accuracy'])
    callbacks = [
        sparsity.UpdatePruningStep(),
        sparsity.PruningSummaries(log_dir=logdir, profile_batch=0)
    ]

    pruned_model.fit(x_train,
                     y_train,
                     batch_size=batch_size,
                     epochs=10,
                     verbose=1,
                     callbacks=callbacks,
                     validation_data=(x_test, y_test))
    score = pruned_model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    # Save and restore
    checkpoint_file = './pruned_checkpoint_file.h5'
    # _, checkpoint_file = tempfile.mkstemp('.h5')
    print('Saving pruned model to: ', checkpoint_file)
    # saved_model() sets include_optimizer to True by default. Spelling it out here
    # to highlight.
    tf.keras.models.save_model(pruned_model,
                               checkpoint_file,
                               include_optimizer=True)

    with sparsity.prune_scope():
        restored_model = tf.keras.models.load_model(checkpoint_file)

    restored_model.fit(x_train,
                       y_train,
                       batch_size=batch_size,
                       epochs=2,
                       verbose=1,
                       callbacks=callbacks,
                       validation_data=(x_test, y_test))

    start_test = time.time()
    score = restored_model.evaluate(x_test, y_test, verbose=0)
    end_test = time.time()
    print('Test latency:', end_test - start_test)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    final_model = sparsity.strip_pruning(pruned_model)
    final_model.summary()
    layer_pruned_file = './layer_pruned_file.h5'
    # _, layer_pruned_file = tempfile.mkstemp('.h5')
    print('Saving pruned model to: ', layer_pruned_file)
    tf.keras.models.save_model(final_model,
                               layer_pruned_file,
                               include_optimizer=False)
コード例 #20
0
def prune_Conv1D(final_sparsity,
                 initial_sparsity=0.0,
                 begin_step=0,
                 frequency=100,
                 version=""):
    # Set up some params
    nb_epoch = 50  # number of epochs to train on
    batch_size = 1024  # training batch size
    num_train_samples = X_train.shape[0]
    end_step = np.ceil(1.0 * num_train_samples / batch_size).astype(
        np.int32) * nb_epoch
    print("End step: ", end_step)

    pruning_params = {
        'pruning_schedule':
        sparsity.PolynomialDecay(initial_sparsity=initial_sparsity,
                                 final_sparsity=final_sparsity,
                                 begin_step=begin_step,
                                 end_step=end_step,
                                 frequency=100)
    }

    l = tf.keras.layers
    dr = 0.5  # dropout rate (%)
    pruned_model = tf.keras.Sequential([
        sparsity.prune_low_magnitude(
            l.Conv1D(128,
                     3,
                     padding='valid',
                     activation="relu",
                     name="conv1",
                     kernel_initializer='glorot_uniform',
                     input_shape=in_shape), **pruning_params),
        sparsity.prune_low_magnitude(
            l.Conv1D(128,
                     3,
                     padding='valid',
                     activation="relu",
                     name="conv2",
                     kernel_initializer='glorot_uniform'), **pruning_params),
        l.MaxPool1D(2),
        sparsity.prune_low_magnitude(
            l.Conv1D(64,
                     3,
                     padding='valid',
                     activation="relu",
                     name="conv3",
                     kernel_initializer='glorot_uniform'), **pruning_params),
        sparsity.prune_low_magnitude(
            l.Conv1D(64,
                     3,
                     padding='valid',
                     activation="relu",
                     name="conv4",
                     kernel_initializer='glorot_uniform'), **pruning_params),
        l.Dropout(dr),
        sparsity.prune_low_magnitude(
            l.Conv1D(32,
                     3,
                     padding='valid',
                     activation="relu",
                     name="conv5",
                     kernel_initializer='glorot_uniform'), **pruning_params),
        sparsity.prune_low_magnitude(
            l.Conv1D(32,
                     3,
                     padding='valid',
                     activation="relu",
                     name="conv6",
                     kernel_initializer='glorot_uniform'), **pruning_params),
        l.Dropout(dr),
        l.MaxPool1D(2),
        l.Flatten(),
        sparsity.prune_low_magnitude(
            l.Dense(128,
                    activation='relu',
                    kernel_initializer='he_normal',
                    name="dense1"), **pruning_params),
        sparsity.prune_low_magnitude(
            l.Dense(len(classes),
                    kernel_initializer='he_normal',
                    name="dense2"), **pruning_params),
        l.Activation('softmax')
    ])

    pruned_model.compile(loss='categorical_crossentropy',
                         optimizer='adam',
                         metrics=["accuracy"])

    pruned_model.summary()

    callbacks = [
        sparsity.UpdatePruningStep(),
        sparsity.PruningSummaries(log_dir=logdir, profile_batch=0)
    ]

    history = pruned_model.fit(X_train,
                               Y_train,
                               batch_size=batch_size,
                               epochs=nb_epoch,
                               verbose=1,
                               validation_data=(X_val, Y_val),
                               callbacks=callbacks)

    score = pruned_model.evaluate(X_test, Y_test, verbose=0)

    print("Test loss: ", score)

    #Save the model
    pruned_model = sparsity.strip_pruning(pruned_model)
    pruned_model.summary()

    # Save the model architecture
    print_model_to_json(
        pruned_model,
        './model/Conv1D-{}.json'.format(str(final_sparsity) + version))

    # Save the weights
    pruned_model.save_weights(
        './model/Conv1D-{}.h5'.format(str(final_sparsity) + version))
コード例 #21
0
ファイル: plot.py プロジェクト: thaarres/hls4ml_cnns
 del (x_train, y_train)
 for model_name in models:
     model_baseline = tf.keras.models.load_model(
         'models/{}.h5'.format(model_name),
         custom_objects={
             'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,
             'QDense': QDense,
             'QConv2D': QConv2D,
             'Clip': Clip,
             'QActivation': QActivation
         })
     # print(model_baseline.get_config())
     # sys.exit()
     score = model_baseline.evaluate(x_test, y_test)
     print('Keras Accuracy {} = {}'.format(model_name, score[1]))
     model_baseline_stripped = strip_pruning(model_baseline)
     intbits_a = 0
     intbits_w = 0
     if model_name.find('full') != -1:
         a = hls4ml.model.profiling.activations_keras(
             model_baseline_stripped, x_test[:100], fmt='summary')
         intbits_a = int(
             np.ceil(
                 max(
                     np.log2(
                         np.array(
                             list(map(lambda x: x['whishi'], a))))))
             + 1)
         w = hls4ml.model.profiling.weights_keras(
             model_baseline_stripped, fmt='summary')
         intbits_w = int(
コード例 #22
0
def main(args):
    annotation_file = args.annotation_file
    classes_path = args.classes_path
    class_names = get_classes(classes_path)
    num_classes = len(class_names)

    anchors = get_anchors(args.anchors_path)
    num_anchors = len(anchors)

    log_dir_path = args.log_directory
    try:
        log_dir = os.path.join('logs', log_dir_path)
    except TypeError:
        date_now = datetime.now()
        log_dir_folder_name = f'{date_now.strftime("%Y_%m_%d_%H%M%S")}_{args.model_type}_TransferEp_{args.transfer_epoch}_TotalEP_{args.total_epoch}'

        log_dir = os.path.realpath(os.path.join(
            'logs',
            log_dir_folder_name
        ))

    # get freeze level according to CLI option
    if args.weights_path:
        freeze_level = 0
    else:
        freeze_level = 1

    if args.freeze_level is not None:
        freeze_level = args.freeze_level

    # How many percentage of layers to unfreeze in fine tuning
    unfreeze_level = args.unfreeze_level

    # callbacks for training process
    logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False,
                          update_freq='batch')
    checkpoint = ModelCheckpoint(
        filepath=log_dir + os.sep + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
        monitor='val_loss',
        mode='min',
        verbose=1,
        save_weights_only=False,
        save_best_only=True,
        period=1
    )
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5, mode='min',
        patience=10,
        verbose=1,
        cooldown=0,
        min_lr=1e-10
    )
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min')
    terminate_on_nan = TerminateOnNaN()

    callbacks = [logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan]

    # get train&val dataset
    dataset = get_dataset(annotation_file)
    if args.val_annotation_file:
        val_dataset = get_dataset(args.val_annotation_file)
        num_train = len(dataset)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = args.val_split
        num_val = int(len(dataset) * val_split)
        num_train = len(dataset) - num_val

    # assign multiscale interval
    if args.multiscale:
        rescale_interval = args.rescale_interval
    else:
        rescale_interval = -1  # Doesn't rescale

    # model input shape check
    input_shape = args.model_image_size
    assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32'

    # get different model type & train&val data generator
    if num_anchors == 9:
        # YOLOv3 use 9 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign)
        # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign)

        tiny_version = False
    elif num_anchors == 6:
        # Tiny YOLOv3 use 6 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign)
        # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign)

        tiny_version = True
    elif num_anchors == 5:
        # YOLOv2 use 5 anchors
        get_train_model = get_yolo2_train_model
        data_generator = yolo2_data_generator_wrapper

        # tf.keras.Sequence style data generator
        # train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval)
        # val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes)

        tiny_version = False
    else:
        raise ValueError('Unsupported anchors number')

    # prepare online evaluation callback
    if args.eval_online:
        eval_callback = EvalCallBack(
            model_type=args.model_type,
            annotation_lines=dataset[num_train:],
            anchors=anchors,
            class_names=class_names,
            model_image_size=args.model_image_size,
            model_pruning=args.model_pruning,
            log_dir=log_dir,
            eval_epoch_interval=args.eval_epoch_interval,
            save_eval_checkpoint=args.save_eval_checkpoint,
            elim_grid_sense=args.elim_grid_sense
        )
        callbacks.append(eval_callback)

    # prepare train/val data shuffle callback
    if args.data_shuffle:
        shuffle_callback = DatasetShuffleCallBack(dataset)
        callbacks.append(shuffle_callback)

    # prepare model pruning config
    pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(np.int32) * args.total_epoch
    if args.model_pruning:
        pruning_callbacks = [sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)]
        callbacks = callbacks + pruning_callbacks

    # prepare optimizer
    optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None)

    # support multi-gpu training
    if args.gpu_num >= 2:
        # devices_list=["/gpu:0", "/gpu:1"]
        devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)]
        strategy = tf.distribute.MirroredStrategy(devices=devices_list)
        print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        with strategy.scope():
            # get multi-gpu train model
            model = get_train_model(
                model_type=args.model_type,
                anchors=anchors,
                num_classes=num_classes,
                weights_path=args.weights_path,
                freeze_level=freeze_level,
                optimizer=optimizer,
                label_smoothing=args.label_smoothing,
                elim_grid_sense=args.elim_grid_sense,
                model_pruning=args.model_pruning,
                pruning_end_step=pruning_end_step
            )

    else:
        # get normal train model
        model = get_train_model(
            model_type=args.model_type,
            anchors=anchors,
            num_classes=num_classes,
            weights_path=args.weights_path,
            freeze_level=freeze_level,
            optimizer=optimizer,
            label_smoothing=args.label_smoothing,
            elim_grid_sense=args.elim_grid_sense,
            model_pruning=args.model_pruning,
            pruning_end_step=pruning_end_step
        )

    if args.show_history:
        model.summary()

    layers_count = len(model.layers)
    print(f'Total layers: {layers_count}')

    # Transfer training some epochs with frozen layers first if needed, to get a stable loss.
    initial_epoch = args.init_epoch
    epochs = initial_epoch + args.transfer_epoch
    print("Transfer training stage")
    print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val,
                                                                                               args.batch_size,
                                                                                               input_shape))
    # model.fit_generator(train_data_generator,
    """
    Transfer training steps, train with freeze layers
    """
    model.fit(
        data_generator(
            annotation_lines=dataset[:num_train],
            batch_size=args.batch_size,
            input_shape=input_shape,
            anchors=anchors,
            num_classes=num_classes,
            enhance_augment=args.enhance_augment,
            rescale_interval=rescale_interval,
            multi_anchor_assign=args.multi_anchor_assign
        ),
        steps_per_epoch=max(1, num_train // args.batch_size),
        # validation_data=val_data_generator,
        validation_data=data_generator(
            annotation_lines=dataset[num_train:],
            batch_size=args.batch_size,
            input_shape=input_shape,
            anchors=anchors,
            num_classes=num_classes,
            multi_anchor_assign=args.multi_anchor_assign
        ),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=epochs,
        initial_epoch=initial_epoch,
        # verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks
    )

    # Wait 2 seconds for next stage
    time.sleep(2)

    if args.decay_type:
        # rebuild optimizer to apply learning rate decay, only after
        # unfreeze all layers
        callbacks.remove(reduce_lr)
        steps_per_epoch = max(1, num_train // args.batch_size)
        decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch)
        optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type,
                                  decay_steps=decay_steps)

    # Unfreeze the whole network for further tuning
    # NOTE: more GPU memory is required after unfreezing the body
    fine_tune_layers = int(layers_count * unfreeze_level)
    print(f"Unfreeze {unfreeze_level * 100}% of layers and continue training, to fine-tune.")
    print(f"Unfroze {fine_tune_layers} layers of {layers_count}")

    if args.gpu_num >= 2:
        with strategy.scope():
            for i in range(layers_count - fine_tune_layers, layers_count):
                model.layers[i].trainable = True
            model.compile(optimizer=optimizer,
                          loss={'yolo_loss': lambda y_true, y_pred: y_pred})  # recompile to apply the change

    else:
        for i in range(layers_count - fine_tune_layers, layers_count):
            model.layers[i].trainable = True
        model.compile(optimizer=optimizer,
                      loss={'yolo_loss': lambda y_true, y_pred: y_pred})  # recompile to apply the change

    print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val,
                                                                                               args.batch_size,
                                                                                               input_shape))
    """
    Fine-tuning steps, more memory will be used. LR (Learning Rate) will be decayed
    """
    # model.fit_generator(train_data_generator,
    model.fit(
        # The YOLO data augmentation generator tool
        data_generator(
            annotation_lines=dataset[:num_train],
            batch_size=args.batch_size,
            input_shape=input_shape,
            anchors=anchors,
            num_classes=num_classes,
            enhance_augment=args.enhance_augment,
            rescale_interval=rescale_interval,
            multi_anchor_assign=args.multi_anchor_assign
        ),
        steps_per_epoch=max(1, num_train // args.batch_size),
        # validation_data=val_data_generator,
        # Validation generator
        validation_data=data_generator(
            annotation_lines=dataset[num_train:],
            batch_size=args.batch_size,
            input_shape=input_shape,
            anchors=anchors,
            num_classes=num_classes,
            multi_anchor_assign=args.multi_anchor_assign
        ),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=args.total_epoch,
        initial_epoch=epochs,
        # verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks
    )

    # Finally store model
    if args.model_pruning:
        model = sparsity.strip_pruning(model)
    model.save(os.path.join(log_dir, 'trained_final.h5'))
コード例 #23
0
 def get_model(self) -> tf.keras.Model:
     return sparsity.strip_pruning(self._model)
コード例 #24
0
    print(cfg)
    hls_model = hls4ml.converters.keras_to_hls(cfg)

    #(img_train, label_train), (img_test, label_test) = tfds.load("svhn_cropped", split=['train', 'test'], batch_size=-1, as_supervised=True,)
    #del (img_train, label_train)

    #wp,ap = numerical(keras_model=m, hls_model=hls_model, X=img_test[:1000])
    #wp.savefig('%s_profile_weights.pdf'%model_name)
    #ap.savefig('%s_profile_activations.pdf'%model_name)
    hls_model.build(csim=False, synth=True, vsynth=True)


indir_name = str(sys.argv[1])
path = "/data/thaarres/hls4ml_docker/hls4ml_cnns/" + indir_name
print("Starting hls project")
files = [f for f in listdir(path) if isfile(join(path, f))]
for f in files:
    model_name = f
    model = tf.keras.models.load_model(path + f,
                                       custom_objects={
                                           'PruneLowMagnitude':
                                           pruning_wrapper.PruneLowMagnitude,
                                           'QDense': QDense,
                                           'QConv2D': QConv2D,
                                           'Clip': Clip,
                                           'QActivation': QActivation
                                       })
    model.summary()
    model_stripped = strip_pruning(model)
    toHLS(model_stripped)
コード例 #25
0
ファイル: benchmarkModels.py プロジェクト: thesps/hls4ml_cnns
  print("Accuracy: Keras={} hls4ml={}".format(data['accuracy_keras'],data['accuracy_hls4ml']))
  hls4ml.utils.plot_model(hls_model, show_shapes=True, show_precision=True, to_file='plot_model_{}.png'.format(precision))

  hls4ml.utils.plot_model(hls_model, show_shapes=True, show_precision=True, to_file='plot_model_{}.png'.format(precision))
  wp,ap = numerical(keras_model=model, hls_model=hls_model, X=x_test[:1000])

  wp.savefig('%s_profile_weights_LayerTypePrecision.pdf'%cfg['OutputDir'])
  ap.savefig('%s_profile_activations_LayerTypePrecision.pdf'%cfg['OutputDir'])
  #hls_model.build(csim=False, synth=True, vsynth=True) 

if __name__ == '__main__':
    model_name = str(sys.argv[1])
    model = tf.keras.models.load_model("models/"+model_name,custom_objects={'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation})
    model.summary()
    model  = strip_pruning(model)
    (x_train, y_train), (x_test, y_test) = getNumpyData('svhn_cropped',oneHot=False)
    a = hls4ml.model.profiling.activations_keras(model, x_test[:1000], fmt='summary')
    intbits_a = int(np.ceil(max(np.log2(np.array(list(map(lambda x : x['whishi'], a)))))) + 1)
    w = hls4ml.model.profiling.weights_keras(model, fmt='summary')
    intbits_w = int(np.ceil(max(np.log2(np.array(list(map(lambda x : x['whishi'], w)))))) + 1)
    print("Starting hls project, using {} int bits for weights+bias and {} int bits for outputs".format(intbits_a,intbits_w))
    precision = [16,14,12,10,8,6,4,3,2,1]
    precision = [16]
    data = {'w':[], 'dsp':[], 'lut':[], 'ff':[], 'bram':[], 'latency_clks':[], 'latency_ns':[], 'latency_ii':[]}
    #Parallel(n_jobs=10, backend='multiprocessing')(delayed(toHLS)(i) for i in precision)
    #precision = np.flip(precision)
    for p in precision:
      toHLS(model,p)
    #for p in precision:
   #     datai = readReports(model_name.replace(".h5","")+"_bw%i"%(p),p)
コード例 #26
0
#new_pruned_model = sparsity.prune_low_magnitude(loaded_model, **new_pruning_params)
#new_pruned_model.summary()

#new_pruned_model.compile(
#    loss=tf.keras.losses.categorical_crossentropy,
#    optimizer='adam',
#    metrics=['accuracy'])

# Add a pruning step callback to peg the pruning step to the optimizer's
# step. Also add a callback to add pruning summaries to tensorboard
#callbacks = [
#    sparsity.UpdatePruningStep(),
#    sparsity.PruningSummaries(log_dir=logdir, profile_batch=0)
#]

# new_pruned_model.fit(train_imgs_scaled, train_labels_enc,
#           batch_size=batch_size,
#           epochs=epochs,
#           verbose=1,
#           callbacks=callbacks,
#           validation_data=(val_imgs_scaled, val_labels_enc))
#
# score = new_pruned_model.evaluate(val_imgs_scaled, val_labels_enc, verbose=0)
# print('Test loss:', score[0])
# print('Test accuracy:', score[1])

final_model = sparsity.strip_pruning(loaded_model)
final_model.summary()
final_model.save('vggPruned.h5')
コード例 #27
0
    sparsity.UpdatePruningStep(),
    sparsity.PruningSummaries(log_dir='./', profile_batch=0)
]

print('[INFO] Start pruning process...')

pruned_model.fit(train_generator,
                 steps_per_epoch=train_generator.__len__(),
                 callbacks=callbacks,
                 epochs=epochs,
                 validation_data=validation_generator,
                 validation_steps=validation_generator.__len__())

pruned_model_path = './models/pruned_MobileNetv2.h5'
# convert pruned model to original
final_model = sparsity.strip_pruning(pruned_model)
tf.keras.models.save_model(final_model,
                           pruned_model_path,
                           include_optimizer=False)

# Zip file
pruned_zip_path = './models/pruned_MobileNetv2.zip'
with zipfile.ZipFile(pruned_zip_path, 'w',
                     compression=zipfile.ZIP_DEFLATED) as f:
    f.write(pruned_model_path)

# Print file size
print("Size of the model before compression: %.2f Mb" %
      (os.path.getsize(model_path) / float(2**20)))

print("Size of the model after compression: %.2f Mb" %
コード例 #28
0
ファイル: train.py プロジェクト: yellowjs0304/homework_2
def main(args):
    #데이터 annotation 파일 경로
    annotation_file = args.annotation_file

    # 결과 log 및 weight가 저장될 경로
    log_dir = os.path.join('logs', '000')

    #클래스 파일 경로
    classes_path = args.classes_path
    class_names = get_classes(classes_path)
    num_classes = len(class_names)

    # anchors 받아오는 라인
    anchors = get_anchors(args.anchors_path)
    num_anchors = len(anchors)

    # get freeze level according to CLI option
    if args.weights_path:
        freeze_level = 0
    else:
        freeze_level = 1

    if args.freeze_level is not None:
        freeze_level = args.freeze_level

    # callbacks for training process
    logging = TensorBoard(log_dir=log_dir,
                          histogram_freq=0,
                          write_graph=False,
                          write_grads=False,
                          write_images=False,
                          update_freq='batch')
    checkpoint = ModelCheckpoint(os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                 monitor='val_loss',
                                 mode='min',
                                 verbose=1,
                                 save_weights_only=False,
                                 save_best_only=True,
                                 period=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  mode='min',
                                  patience=10,
                                  verbose=1,
                                  cooldown=0,
                                  min_lr=1e-10)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=50,
                                   verbose=1,
                                   mode='min')
    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan
    ]

    # 데이터셋 로딩
    dataset = get_dataset(annotation_file)
    if args.val_annotation_file:
        val_dataset = get_dataset(args.val_annotation_file)
        num_train = len(dataset)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = args.val_split
        num_val = int(len(dataset) * val_split)
        num_train = len(dataset) - num_val

    # assign multiscale interval
    if args.multiscale:
        rescale_interval = args.rescale_interval
    else:
        rescale_interval = -1  #Doesn't rescale

    # model input shape check
    input_shape = args.model_image_size
    assert (input_shape[0] % 32 == 0 and input_shape[1] % 32
            == 0), 'model_image_size should be multiples of 32'

    # 모델종류에 따른 data generator 및 모델 생성
    if num_anchors == 9:
        # YOLOv3 use 9 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        tiny_version = False
    elif num_anchors == 6:
        # Tiny YOLOv3 use 6 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        tiny_version = True
    elif num_anchors == 5:
        # YOLOv2 use 5 anchors
        get_train_model = get_yolo2_train_model
        data_generator = yolo2_data_generator_wrapper

        tiny_version = False
    else:
        raise ValueError('Unsupported anchors number')

    # prepare online evaluation callback
    if args.eval_online:
        eval_callback = EvalCallBack(
            args.model_type,
            dataset[num_train:],
            anchors,
            class_names,
            args.model_image_size,
            args.model_pruning,
            log_dir,
            eval_epoch_interval=args.eval_epoch_interval,
            save_eval_checkpoint=args.save_eval_checkpoint,
            elim_grid_sense=args.elim_grid_sense)
        callbacks.append(eval_callback)

    # prepare train/val data shuffle callback
    if args.data_shuffle:
        shuffle_callback = DatasetShuffleCallBack(dataset)
        callbacks.append(shuffle_callback)

    # prepare model pruning config
    pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(
        np.int32) * args.total_epoch
    if args.model_pruning:
        pruning_callbacks = [
            sparsity.UpdatePruningStep(),
            sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)
        ]
        callbacks = callbacks + pruning_callbacks

    # prepare optimizer
    optimizer = get_optimizer(args.optimizer,
                              args.learning_rate,
                              decay_type=None)

    # support multi-gpu training
    if args.gpu_num >= 2:
        # devices_list=["/gpu:0", "/gpu:1"]
        devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)]
        strategy = tf.distribute.MirroredStrategy(devices=devices_list)
        print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        with strategy.scope():
            # get multi-gpu train model
            model = get_train_model(args.model_type,
                                    anchors,
                                    num_classes,
                                    weights_path=args.weights_path,
                                    freeze_level=freeze_level,
                                    optimizer=optimizer,
                                    label_smoothing=args.label_smoothing,
                                    elim_grid_sense=args.elim_grid_sense,
                                    model_pruning=args.model_pruning,
                                    pruning_end_step=pruning_end_step)

    else:
        # get normal train model
        model = get_train_model(args.model_type,
                                anchors,
                                num_classes,
                                weights_path=args.weights_path,
                                freeze_level=freeze_level,
                                optimizer=optimizer,
                                label_smoothing=args.label_smoothing,
                                elim_grid_sense=args.elim_grid_sense,
                                model_pruning=args.model_pruning,
                                pruning_end_step=pruning_end_step)

    model.summary()

    # Transfer training some epochs with frozen layers first if needed, to get a stable loss.
    initial_epoch = args.init_epoch
    epochs = initial_epoch + args.transfer_epoch
    print("Transfer training stage")
    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))

    # 성능향상을 위해 초반 일부 epoch은 Transfer Learning 진행 (Initial Epoch ~ Transfer Epoch)
    model.fit_generator(
        data_generator(dataset[:num_train],
                       args.batch_size,
                       input_shape,
                       anchors,
                       num_classes,
                       args.enhance_augment,
                       rescale_interval,
                       multi_anchor_assign=args.multi_anchor_assign),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(
            dataset[num_train:],
            args.batch_size,
            input_shape,
            anchors,
            num_classes,
            multi_anchor_assign=args.multi_anchor_assign),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=epochs,
        initial_epoch=initial_epoch,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Wait 2 seconds for next stage
    time.sleep(2)

    if args.decay_type:
        # rebuild optimizer to apply learning rate decay, only after
        # unfreeze all layers
        callbacks.remove(reduce_lr)
        steps_per_epoch = max(1, num_train // args.batch_size)
        decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch -
                                         args.transfer_epoch)
        optimizer = get_optimizer(args.optimizer,
                                  args.learning_rate,
                                  decay_type=args.decay_type,
                                  decay_steps=decay_steps)

    # Unfreeze the whole network for further tuning
    # NOTE: more GPU memory is required after unfreezing the body
    print("Unfreeze and continue training, to fine-tune.")
    if args.gpu_num >= 2:
        with strategy.scope():
            for i in range(len(model.layers)):
                model.layers[i].trainable = True
            model.compile(optimizer=optimizer,
                          loss={
                              'yolo_loss': lambda y_true, y_pred: y_pred
                          })  # recompile to apply the change

    else:
        for i in range(len(model.layers)):
            model.layers[i].trainable = True
        model.compile(optimizer=optimizer,
                      loss={
                          'yolo_loss': lambda y_true, y_pred: y_pred
                      })  # recompile to apply the change

    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))

    # Transfer Learning 이후 나머지 Epoch에 대하여 학습 진행 (Transfer Epoch ~ Total Epoch)
    # 이 부분이 필요없거나 학습 시간이 너무 오래 걸릴 경우 Total Epoch을 Transfer와 동일하게 두고, 아래 학습을 진행하지 않고 넘어갈 수 있음
    # 본인 컴퓨터 사양에 맞춰서 진행
    model.fit_generator(
        data_generator(dataset[:num_train],
                       args.batch_size,
                       input_shape,
                       anchors,
                       num_classes,
                       args.enhance_augment,
                       rescale_interval,
                       multi_anchor_assign=args.multi_anchor_assign),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(
            dataset[num_train:],
            args.batch_size,
            input_shape,
            anchors,
            num_classes,
            multi_anchor_assign=args.multi_anchor_assign),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=args.total_epoch,
        initial_epoch=epochs,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Finally store model
    if args.model_pruning:
        model = sparsity.strip_pruning(model)
    model.save(os.path.join(log_dir, 'trained_final.h5'))
コード例 #29
0
ファイル: train.py プロジェクト: grifon-239/diploma
def main(args):
    annotation_file = args.annotation_file
    log_dir = os.path.join('logs', '000')
    classes_path = args.classes_path
    class_names = get_classes(classes_path)
    num_classes = len(class_names)

    print('classes_path =', classes_path)
    print('class_names = ', class_names)
    print('num_classes = ', num_classes)

    anchors = get_anchors(args.anchors_path)
    num_anchors = len(anchors)

    # get freeze level according to CLI option
    if args.weights_path:
        freeze_level = 0
    else:
        freeze_level = 1

    if args.freeze_level is not None:
        freeze_level = args.freeze_level

    # callbacks for training process
    logging = TensorBoard(log_dir=log_dir,
                          histogram_freq=0,
                          write_graph=False,
                          write_grads=False,
                          write_images=False,
                          update_freq='batch')
    checkpoint = ModelCheckpoint(os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                 monitor='val_loss',
                                 verbose=1,
                                 save_weights_only=False,
                                 save_best_only=True,
                                 period=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  patience=10,
                                  verbose=1,
                                  cooldown=0,
                                  min_lr=1e-10)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=50,
                                   verbose=1)
    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan
    ]

    # get train&val dataset
    dataset = get_dataset(annotation_file)
    if args.val_annotation_file:
        val_dataset = get_dataset(args.val_annotation_file)
        num_train = len(dataset)
        print('num_train = ', num_train)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = args.val_split
        num_val = int(len(dataset) * val_split)
        num_train = len(dataset) - num_val

    # assign multiscale interval
    if args.multiscale:
        rescale_interval = args.rescale_interval
    else:
        rescale_interval = -1  #Doesn't rescale

    # model input shape check
    input_shape = args.model_image_size
    assert (input_shape[0] % 32 == 0
            and input_shape[1] % 32 == 0), 'Multiples of 32 required'

    # get different model type & train&val data generator
    if num_anchors == 9:
        # YOLOv3 use 9 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval)
        #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes)

        tiny_version = False
    elif num_anchors == 6:
        # Tiny YOLOv3 use 6 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval)
        #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes)

        tiny_version = True
    elif num_anchors == 5:
        # YOLOv2 use 5 anchors
        get_train_model = get_yolo2_train_model
        data_generator = yolo2_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval)
        #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes)

        tiny_version = False
    else:
        raise ValueError('Unsupported anchors number')

    # prepare online evaluation callback
    if args.eval_online:
        eval_callback = EvalCallBack(
            args.model_type,
            dataset[num_train:],
            anchors,
            class_names,
            args.model_image_size,
            args.model_pruning,
            log_dir,
            eval_epoch_interval=args.eval_epoch_interval,
            save_eval_checkpoint=args.save_eval_checkpoint)
        callbacks.append(eval_callback)

    # prepare train/val data shuffle callback
    if args.data_shuffle:
        shuffle_callback = DatasetShuffleCallBack(dataset)
        callbacks.append(shuffle_callback)

    # prepare model pruning config
    pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(
        np.int32) * args.total_epoch
    if args.model_pruning:
        pruning_callbacks = [
            sparsity.UpdatePruningStep(),
            sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)
        ]
        callbacks = callbacks + pruning_callbacks

    # prepare optimizer
    optimizer = get_optimizer(args.optimizer,
                              args.learning_rate,
                              decay_type=None)

    # get train model
    model = get_train_model(args.model_type,
                            anchors,
                            num_classes,
                            weights_path=args.weights_path,
                            freeze_level=freeze_level,
                            optimizer=optimizer,
                            label_smoothing=args.label_smoothing,
                            model_pruning=args.model_pruning,
                            pruning_end_step=pruning_end_step)
    # support multi-gpu training
    template_model = None
    if args.gpu_num >= 2:
        # keep the template model for saving result
        template_model = model
        model = multi_gpu_model(model, gpus=args.gpu_num)
        # recompile multi gpu model
        model.compile(optimizer=optimizer,
                      loss={
                          'yolo_loss': lambda y_true, y_pred: y_pred
                      })
    model.summary()

    # Transfer training some epochs with frozen layers first if needed, to get a stable loss.
    initial_epoch = args.init_epoch  #####################################################################################################
    epochs = initial_epoch + args.transfer_epoch
    print("Transfer training stage")
    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))
    #model.fit_generator(train_data_generator,
    model.fit_generator(
        data_generator(dataset[:num_train], args.batch_size, input_shape,
                       anchors, num_classes, args.enhance_augment),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(dataset[num_train:], args.batch_size,
                                       input_shape, anchors, num_classes),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=epochs,
        initial_epoch=initial_epoch,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Wait 2 seconds for next stage
    time.sleep(2)

    if args.decay_type:
        # rebuild optimizer to apply learning rate decay, only after
        # unfreeze all layers
        callbacks.remove(reduce_lr)
        steps_per_epoch = max(1, num_train // args.batch_size)
        decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch -
                                         args.transfer_epoch)
        optimizer = get_optimizer(args.optimizer,
                                  args.learning_rate,
                                  decay_type=args.decay_type,
                                  decay_steps=decay_steps)

    # Unfreeze the whole network for further tuning
    # NOTE: more GPU memory is required after unfreezing the body
    print("Unfreeze and continue training, to fine-tune.")
    for i in range(len(model.layers)):
        model.layers[i].trainable = True
    model.compile(optimizer=optimizer,
                  loss={
                      'yolo_loss': lambda y_true, y_pred: y_pred
                  })  # recompile to apply the change

    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))
    #model.fit_generator(train_data_generator,
    model.fit_generator(
        data_generator(dataset[:num_train], args.batch_size, input_shape,
                       anchors, num_classes, args.enhance_augment,
                       rescale_interval),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(dataset[num_train:], args.batch_size,
                                       input_shape, anchors, num_classes),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=args.total_epoch,
        initial_epoch=epochs,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Finally store model
    if args.model_pruning:
        if template_model is not None:
            template_model = sparsity.strip_pruning(template_model)
        else:
            model = sparsity.strip_pruning(model)

    if template_model is not None:
        template_model.save(os.path.join(log_dir, 'trained_final.h5'))
    else:
        model.save(os.path.join(log_dir, 'trained_final.h5'))
コード例 #30
0
def train(cfg):
    
    epochs = cfg['epochs']
    save_dir = cfg['save_dir']
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    shape = (int(cfg['height']), int(cfg['width']), 3)

    n_class = int(cfg['class_number'])
    batch_size = int(cfg['batch_size'])


    if cfg['model'] == 'mymodel':
        from model.my_model import MyModel
        model = MyModel(shape, n_class).build()

    if cfg['model'] == 'v2':
        from model.mobilenet_v2 import MyModel
        model = MyModel(shape, n_class).buildRaw()


    train_generator, validation_generator, count1, count2 = generate(batch_size, shape[:2], cfg['train_dir'], cfg['eval_dir'])
    print(count1, count2)


    earlystop = EarlyStopping(monitor='val_acc', patience=4, verbose=0, mode='auto')
    checkpoint = ModelCheckpoint(filepath=os.path.join("save", 'prune_e_{epoch:02d}_{val_loss:.3f}_{val_acc:.3f}.h5'),
                 monitor='val_acc', save_best_only=False, save_weights_only=False)
    reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.1, patience=2, verbose=1, min_lr=1e-7)



    model_path = r'./save/v2'

    

    # x_train, y_train = train_generator.next()
    # num_train_samples = batch_size
    # x_test, y_test = validation_generator.next()

    
    loaded_model = tf.keras.models.load_model(os.path.join(model_path,'e_06_0.20_1.00.h5'))
    score = loaded_model.evaluate_generator(validation_generator, count2//batch_size)
    print('original Test loss:', score[0])
    print('original Test accuracy:', score[1])


    
    end_step = np.ceil(1.0 * count1 / batch_size).astype(np.int32) * epochs
    print(end_step)
    new_pruning_params = {'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.50, 
                                                                        final_sparsity=0.90,  
                                                                        begin_step=0,
                                                                        end_step=end_step,                                                         
                                                                        frequency=100)}
    new_pruned_model = sparsity.prune_low_magnitude(loaded_model, **new_pruning_params)

    #new_pruned_model.summary()
    opt = Adam(lr=float(0.0001))
    new_pruned_model.compile(loss=tf.keras.losses.categorical_crossentropy,     
                            optimizer=opt,     
                            metrics=['acc'])
    #现在我们开始训练和修剪模型。

    #Add a pruning step callback to peg the pruning step to the optimizer's
    #step. Also add a callback to add pruning summaries to tensorboard
    logdir = "./save/log"
    callbacks = [earlystop,checkpoint,reduce_lr,
                sparsity.UpdatePruningStep(),    
                sparsity.PruningSummaries(log_dir=logdir, profile_batch=0)]
    # new_pruned_model.fit(x_train, y_train,          
    #                 batch_size=batch_size,          
    #                 epochs=epochs,          
    #                 verbose=1,          
    #                 callbacks=callbacks,          
    #                 validation_data=(x_test, y_test))

    new_pruned_model.fit_generator(train_generator, 
            validation_data=validation_generator, 
            steps_per_epoch=100,#count1 // batch_size,
            validation_steps=count2 // batch_size,
            epochs=epochs,
            callbacks=callbacks)

    score = new_pruned_model.evaluate_generator(validation_generator, count2//batch_size)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])


    final_model = sparsity.strip_pruning(new_pruned_model)

    new_pruned_keras_file = "save/pruned_model.h5"
    tf.keras.models.save_model(final_model, new_pruned_keras_file, include_optimizer=False)