def toHLS(p,r,m,doQK=False,intbits_a=0,odir='cnn_projects'): if doQK: model = tf.keras.models.load_model('models/{}/model_best.h5'.format(m),custom_objects={'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation,'QBatchNormalization':QBatchNormalization}) model = strip_pruning(model) hls_model = getQKeras(model=model,model_name=m,precision=p,reuse=r,intbits_a=intbits_a,odir=odir) else: model = tf.keras.models.load_model('models/{}/model_best.h5'.format(m),custom_objects={'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation}) model = strip_pruning(model) hls_model = getBaseline(model=model,model_name=m,precision=p,reuse=r,intbits_a=intbits_a,odir=odir) (x_train, y_train), (x_test, y_test) = getNumpyData('svhn_cropped',oneHot=True) wp,ap = numerical(model=model, hls_model=hls_model, X=x_test[:1000]) ap.axes[0].set_title("") add_logo(ap.axes[0], ap, 0.3, position='upper left') labels = [item.get_text().replace('batch_normalization','Batch norm.').replace('max_pooling2d','Max Pooling').replace('_',' ').capitalize() for item in ap.axes[0].get_yticklabels()] ap.axes[0].set_yticklabels(labels) ap.axes[0].set_xlabel('Output') ap.axes[0].set_xlim([10.455191523E-13,64]) ap.savefig('plots/Profile_{}_activations.pdf'.format(model.name)) del ap wp.axes[0].set_title("") labels = [item.get_text().replace('batch_normalization','Batch norm.').replace('max_pooling2d','Max Pooling').replace('_',' ').replace('0 0','0, w').replace('0 1','0, b').replace('1 0','1, w').replace('1 1','1, b').replace('2 0','2, w').replace('2 1','2, b').replace('3 0','3, w').replace('3 1','3, b').replace('4 0','4, w').replace('4 1','4, b').replace('5 0','5, w').replace('5 1','5, b').replace('output dense 0','output, w').replace('output dense 1','output, b').replace('norm. 0','norm., w').replace('norm. 1','norm., b').replace(', b,',' 1,').replace('output 0','output, w').replace('output 1','output, b').capitalize() for item in wp.axes[0].get_yticklabels()] # labels = [item.replace('Dense','Fused dense + b n.').replace('Output Fused dense + b n.','Output dense') for item in labels] wp.axes[0].set_yticklabels(labels) wp.axes[0].set_xlim([0.0000009,64]) wp.axes[0].set_xlabel('Weight') add_logo(wp.axes[0], wp, 0.3, position='upper left') wp.savefig('plots/Profile_{}_weights.pdf'.format(model.name))
def getReports(mname,p,x_test, y_test): data_ = {} indir = '/eos/home-t/thaarres/hls4ml_cnns/synthesized_cnns_v4/{}_{}bit_reuse1/'.format(mname,p) report_vsynth = Path('{}/vivado_synth.rpt'.format(indir)) report_csynth = Path('{}/myproject_prj/solution1/syn/report/myproject_csynth.rpt'.format(indir)) if report_vsynth.is_file() and report_csynth.is_file(): print('Found valid vsynth and synth! Fetching numbers') if mname.find('full')!=-1: data_['w']= int(p) if int(p)>9: model_ = tf.keras.models.load_model('models/{}.h5'.format(mname),custom_objects={'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation}) model_ = strip_pruning(model_) data_['accuracy_keras'], data_['accuracy_hls4ml'] = getBaseline(model_,mname,p,x_test, y_test) else: data_['accuracy_keras'] = 0.01 data_['accuracy_hls4ml'] = 0.01 else: data_['w']= int(p+1) print('models/'+mname+'_{}bit_0/model_best.h5'.format(p)) model_ = tf.keras.models.load_model('models/'+mname+'_{}bit_0/model_best.h5'.format(p),custom_objects={'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation}) model_ = strip_pruning(model_) data_['accuracy_keras'], data_['accuracy_hls4ml'] = getQKeras(model_,mname,p,x_test, y_test) print('Accuracy: Keras={} hls4ml={}'.format(data_['accuracy_keras'], data_['accuracy_hls4ml'])) # Get the resources from the logic synthesis report with report_vsynth.open() as report: lines = np.array(report.readlines()) data_['lut'] = int(lines[np.array(['CLB LUTs*' in line for line in lines])][0].split('|')[2]) data_['ff'] = int(lines[np.array(['CLB Registers' in line for line in lines])][0].split('|')[2]) data_['bram'] = float(lines[np.array(['Block RAM Tile' in line for line in lines])][0].split('|')[2]) data_['dsp'] = int(lines[np.array(['DSPs' in line for line in lines])][0].split('|')[2]) data_['lut_rel'] = float(lines[np.array(['CLB LUTs*' in line for line in lines])][0].split('|')[5]) data_['ff_rel'] = float(lines[np.array(['CLB Registers' in line for line in lines])][0].split('|')[5]) data_['bram_rel']= float(lines[np.array(['Block RAM Tile' in line for line in lines])][0].split('|')[5]) data_['dsp_rel'] = float(lines[np.array(['DSPs' in line for line in lines])][0].split('|')[5]) with report_csynth.open() as report: lines = np.array(report.readlines()) lat_line = lines[np.argwhere(np.array(['Latency (clock cycles)' in line for line in lines])).flatten()[0] + 6] data_['latency_clks'] = int(lat_line.split('|')[2]) data_['latency_ns'] = float(lat_line.split('|')[2])*5.0 data_['latency_ii'] = int(lat_line.split('|')[4]) return data_ else: # print('No synth reports found! Returning empty data frame') return data_
def train_pruned_model(model, dataset, vocab): pruned_model = model(vocab_size=len(vocab), embedding_dim=FLAGS.embedding_dim, rnn_units=FLAGS.RNN_units, batch_size=FLAGS.batch_size) logdir = tempfile.mkdtemp() callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=logdir, profile_batch=0) ] pruned_model.compile(optimizer='adam', loss=loss) pruned_model.fit(dataset, epochs=FLAGS.num_epochs, callbacks=callbacks) # Save the pruned model for size comparison later _, checkpoint_file = tempfile.mkstemp(str(FLAGS.final_sparsity) + '_pruned.h5', dir='models/') print('Saving pruned model to: ', checkpoint_file) tf.keras.models.save_model(pruned_model, checkpoint_file, include_optimizer=False) # Strip the pruning wrappers from the pruned model as it is only needed for training final_pruned_model = sparsity.strip_pruning(pruned_model) _, pruned_keras_file = tempfile.mkstemp('_final_pruned.h5', dir='models/') print('Saving pruned model to: ', pruned_keras_file) tf.keras.models.save_model(final_pruned_model, pruned_keras_file, include_optimizer=False) return pruned_model, final_pruned_model
def go(batch_size, epochs, dataset): num_classes = 10 x_train, y_train, x_test, y_test, input_shape = get_data(dataset, num_classes) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') num_train_samples = x_train.shape[0] end_step = np.ceil(1.0 * num_train_samples / batch_size).astype(np.int32) * epochs print('End step: ' + str(end_step)) pruning_params = { 'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.50, final_sparsity=0.90, begin_step=2000, end_step=end_step, frequency=100) } model = train(get_model(input_shape, num_classes, pruning_params=pruning_params), x_train, y_train, batch_size, epochs, x_test, y_test, pruning=True) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) keras_file = "mnist_optimized.h5" print('Saving model to: ', keras_file) # Save removing pruning apparatus tf.keras.models.save_model(sparsity.strip_pruning(model), keras_file, include_optimizer=False)
def _generate_model(self): '''to generate the bounding boxes''' weights_path = os.path.expanduser(self.weights_path) assert weights_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' # Load model, or construct model and load weights. num_anchors = len(self.anchors) num_classes = len(self.class_names) #YOLOv3 model has 9 anchors and 3 feature layers but #Tiny YOLOv3 model has 6 anchors and 2 feature layers, #so we can calculate feature layers number to get model type num_feature_layers = num_anchors//3 try: if num_anchors == 5: # YOLOv2 use 5 anchors yolo_model, _ = get_yolo2_model(self.model_type, num_anchors, num_classes, input_shape=self.model_image_size + (3,), model_pruning=self.pruning_model) else: yolo_model, _ = get_yolo3_model(self.model_type, num_feature_layers, num_anchors, num_classes, input_shape=self.model_image_size + (3,), model_pruning=self.pruning_model) yolo_model.load_weights(weights_path) # make sure model, anchors and classes match if self.pruning_model: yolo_model = sparsity.strip_pruning(yolo_model) yolo_model.summary() except Exception as e: print(repr(e)) assert yolo_model.layers[-1].output_shape[-1] == \ num_anchors/len(yolo_model.output) * (num_classes + 5), \ 'Mismatch between model and given anchor and class sizes' print('{} model, anchors, and classes loaded.'.format(weights_path)) if self.gpu_num>=2: yolo_model = multi_gpu_model(yolo_model, gpus=self.gpu_num) return yolo_model
def prune_and_initilize(trained_model, pm, initial_weights, layers_to_prune=None): sparcity = 1 - pm sprasity_sched = ConstantSparsity( sparcity, 0, # Do sparcity calculation in the first step end_step=0, # Do it only once frequency=10000000) model = clone_model(trained_model) model.set_weights(trained_model.get_weights()) if is_pruned(model): model = strip_pruning(model) if layers_to_prune is None: layers_to_prune = get_default_layers(model) # prunned_model_layers = [] # for layer in model.layers: # if layer.name in layers_to_prune: # prunned_model_layers.append(prune_low_magnitude(layer, sprasity_sched)) # else: # prunned_model_layers.append(layer) # trained_pruned_model = Sequential(prunned_model_layers) trained_pruned_model = apply_wrapper_to_layer(model, layers_to_prune, prune_low_magnitude, sprasity_sched, clone=False) # Calculates mask initialize_pruned_model(trained_pruned_model) model.load_weights(initial_weights) prunned_model_layers = [] for i, layer in enumerate(trained_pruned_model.layers): if isinstance(layer, pruning_wrapper.PruneLowMagnitude): l_weights = model.layers[i].get_weights() l_weights[0] = l_weights[0] * layer.pruning_vars[0][1].numpy() model.layers[i].set_weights(l_weights) prunned_model_layers.append( prune_low_magnitude(model.layers[i], sprasity_sched)) else: prunned_model_layers.append(model.layers[i]) untrained_prunned_model = Sequential(prunned_model_layers) untrained_prunned_model.compile(optimizer=optimizers.SGD(lr=0), loss='sparse_categorical_crossentropy', metrics='accuracy') return untrained_prunned_model
def fitModel(model, train_data, val_data, test_data, stepsPerEpoch, evalStepsPerEpoch): """Runs Keras fit and saves model. Arguments: STRATEGY: Mirrored strategy models: list of models to train train_data: training data val_data: validation data Returns: None """ if not os.path.exists(FLAGS.outdir + '/%s/' % model.name): os.system('mkdir ' + FLAGS.outdir + '/%s/' % model.name) callbacks = getCallbacks(FLAGS.outdir + '/%s/' % model.name) if FLAGS.prune == True: callbacks.append(pruning_callbacks.UpdatePruningStep()) start = time.time() LOSS = tf.keras.losses.CategoricalCrossentropy() OPTIMIZER = Adam(learning_rate=FLAGS.lr, beta_1=FLAGS.beta_1, beta_2=FLAGS.beta_2, epsilon=FLAGS.epsilon, amsgrad=True) model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=["accuracy"]) model.summary() history = model.fit(train_data, epochs=FLAGS.epochs, validation_data=val_data, callbacks=callbacks, verbose=1) model.load_weights(FLAGS.outdir + '/%s/weights_best.h5' % model.name) history_dict = history.history pd.DataFrame.from_dict(history.history).to_csv( FLAGS.outdir + '/%s/history_dict.csv' % model.name, index=False) test_score = model.evaluate(test_data) print("Done training model {}".format(model.name)) print('\n Test loss:', test_score[0]) print('\n Test accuracy:', test_score[1]) np.savez(FLAGS.outdir + '/%s/scores' % model.name, test_score) if FLAGS.prune == True: model_stripped = strip_pruning(model) model_stripped.save(FLAGS.outdir + '/%s/%s.h5' % (model.name, model.name)) else: model.save(FLAGS.outdir + '/%s/%s.h5' % (model.name, model.name)) end = time.time() print('\n It took {} minutes to train!\n'.format((end - start) / 60.))
def toHLS(p, r, m, doQK=False, intbits_a=0, odir='cnn_projects'): if doQK: model = tf.keras.models.load_model( 'models/{}_{}bit_0/model_best.h5'.format(m, p), custom_objects={ 'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude, 'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation }) model = strip_pruning(model) hls_model = getQKeras(model=model, model_name=m, precision=p, reuse=r, intbits_a=intbits_a, odir=odir) else: model = tf.keras.models.load_model( 'models/{}_0/model_best.h5'.format(m), custom_objects={ 'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude, 'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation }) model = strip_pruning(model) hls_model = getBaseline(model=model, model_name=m, precision=p, reuse=r, intbits_a=intbits_a, odir=odir)
def update_eval_model(self, train_model): # create a temp weights file to save training result tmp_weights_path = os.path.join( tempfile.gettempdir(), str(random.randint(10, 1000000)) + '.h5') train_model.save_weights(tmp_weights_path) # load the temp weights to eval model self.eval_model.load_weights(tmp_weights_path) os.remove(tmp_weights_path) if self.model_pruning: eval_model = sparsity.strip_pruning(self.eval_model) else: eval_model = self.eval_model return eval_model
def eval(self): if self._mode is None: raise ValueError("Please set the 'mode' parameter") eval_examples = self._processor.get_dev_examples(args.data_dir) eval_file = os.path.join(args.output_dir, "eval.tf_record") label_list = self._processor.get_labels() self.file_based_convert_examples_to_features( eval_examples, label_list, args.max_seq_len, self._tokenizer, eval_file) # tf.logging.info("***** Running evaluation *****") # tf.logging.info(" Num examples = %d", len(eval_examples)) # tf.logging.info(" Batch size = %d", self.batch_size) num_eval_steps = len(eval_examples) / args.batch_size eval_input_fn = self.file_based_input_fn_builder( input_file=eval_file, seq_length=args.max_seq_len, is_training=False, batch_size=args.batch_size, drop_remainder=False) estimator, model = create_estimator(num_eval_steps) var_list = model.optimizer.variables() result = estimator.evaluate(input_fn=eval_input_fn, steps=None, hooks=[]) output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with tf.io.gfile.GFile(output_eval_file, "w") as writer: # tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): # tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) feature_columns = [tf.feature_column.numeric_column(x) for x in ['input_ids', 'input_mask', 'segment_ids']] serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( tf.feature_column.make_parse_example_spec(feature_columns)) if args.prune_enabled: model = sparsity.strip_pruning(model) estimator.export_saved_model( export_dir_base=args.output_dir, serving_input_receiver_fn=serving_input_fn, experimental_mode=tf.estimator.ModeKeys.PREDICT) model.reset_metrics() model.save(args.keras_model_path)
def make_pruning(model, train_dataset, validation_dataset, n_step, v_step): end_step = np.ceil(1.0 * n_step / config.batch_size).astype(np.int32) * config.p_epochs pruning_params = { 'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=config.initial_sparsity, final_sparsity=config.final_sparsity, begin_step=config.p_begin_step, end_step=end_step, frequency=config.p_frequency) } p_model = sparsity.prune_low_magnitude(model, **pruning_params) model_setup(p_model) callbacks = callbacks_init() p_model.fit(train_dataset, epochs= config.p_epochs, verbose=1, callbacks=callbacks, validation_data=validation_dataset, steps_per_epoch= n_step, validation_steps= v_step) p_model = sparsity.strip_pruning(p_model) return p_model
def main(args): annotation_file = args.annotation_file log_dir = os.path.join('logs', '000') classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # get different model type & train&val data generator if args.model_type.startswith( 'scaled_yolo4_') or args.model_type.startswith('yolo5_'): # Scaled-YOLOv4 & YOLOv5 entrance, use yolo5 submodule but now still yolo3 data generator # TODO: create new yolo5 data generator to apply YOLOv5 anchor assignment get_train_model = get_yolo5_train_model data_generator = yolo5_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo5DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo5DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif args.model_type.startswith('yolo3_') or args.model_type.startswith( 'yolo4_'): #if num_anchors == 9: # YOLOv3 & v4 entrance, use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif args.model_type.startswith( 'tiny_yolo3_') or args.model_type.startswith('tiny_yolo4_'): #elif num_anchors == 6: # Tiny YOLOv3 & v4 entrance, use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = True elif args.model_type.startswith('yolo2_') or args.model_type.startswith( 'tiny_yolo2_'): #elif num_anchors == 5: # YOLOv2 & Tiny YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported model type') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=None, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) else: # get normal train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type or args.average_type: # rebuild optimizer to apply learning rate decay or weights averager, # only after unfreeze all layers if args.decay_type: callbacks.remove(reduce_lr) if args.average_type == 'ema' or args.average_type == 'swa': # weights averager need tensorflow-addons, # which request TF 2.x and have version compatibility import tensorflow_addons as tfa callbacks.remove(checkpoint) avg_checkpoint = tfa.callbacks.AverageModelCheckpoint( filepath=os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), update_weights=True, monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) callbacks.append(avg_checkpoint) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=args.average_type, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") if args.gpu_num >= 2: with strategy.scope(): for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change else: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
def _generate_model(self): '''to generate the bounding boxes''' weights_path = None #assert weights_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' # Load model, or construct model and load weights. num_anchors = len(self.anchors) num_classes = len(self.class_names) #YOLOv3 model has 9 anchors and 3 feature layers but #Tiny YOLOv3 model has 6 anchors and 2 feature layers, #so we can calculate feature layers number to get model type num_feature_layers = num_anchors // 3 try: if self.model_type.startswith( 'scaled_yolo4_') or self.model_type.startswith('yolo5_'): # Scaled-YOLOv4 & YOLOv5 entrance yolo_model, _ = get_yolo5_model( self.model_type, num_feature_layers, num_anchors, num_classes, input_shape=self.model_image_size + (3, ), model_pruning=self.pruning_model) elif self.model_type.startswith('yolo3_') or self.model_type.startswith('yolo4_') or \ self.model_type.startswith('tiny_yolo3_') or self.model_type.startswith('tiny_yolo4_'): # YOLOv3 & v4 entrance yolo_model, _ = get_yolo3_model( self.model_type, num_feature_layers, num_anchors, num_classes, input_shape=self.model_image_size + (3, ), model_pruning=self.pruning_model) elif self.model_type.startswith( 'yolo2_') or self.model_type.startswith('tiny_yolo2_'): # YOLOv2 entrance yolo_model, _ = get_yolo2_model( self.model_type, num_anchors, num_classes, input_shape=self.model_image_size + (3, ), model_pruning=self.pruning_model) else: raise ValueError('Unsupported model type') yolo_model.load_weights( weights_path) # make sure model, anchors and classes match if self.pruning_model: yolo_model = sparsity.strip_pruning(yolo_model) yolo_model.summary() except Exception as e: print(repr(e)) assert yolo_model.layers[-1].output_shape[-1] == \ num_anchors/len(yolo_model.output) * (num_classes + 5), \ 'Mismatch between model and given anchor and class sizes' print('{} model, anchors, and classes loaded.'.format(weights_path)) # tf2.4와의 호환을 위해 아래를 삭제. ''' if self.gpu_num>=2: yolo_model = multi_gpu_model(yolo_model, gpus=self.gpu_num) ''' return yolo_model
def Q_baseline_model(size, epochs, optimizer, X_training, y_training, X_validation, y_validation, output_name): ''' NN Model constructor with loss and accuracy plots. Parameters ---------- size : int Batch size used in the training process. epochs : int Number of epochs the model will be trained. optimizer : keras.optimizer Optimizer function. X_training : Numpy array Training data set. y_training : Numpy array True labels for the training set. X_validation : Numpy array Validation data set. y_validation : Numpy array True labels for the validation set. output_name : str Name used for saved plots. Returns ------- model : qkeras.sequential QKeras model. w : numpy array Array of final weights used in the model for later inference. ''' pruning = False # create model name = "RMSE validation" name2 = "RMSE training" history = History() model = Sequential() model.add( QDense(60, input_shape=(27, ), kernel_quantizer=quantized_bits(16, 1), bias_quantizer=quantized_bits(16, 1), kernel_initializer='random_normal')) model.add(QActivation(activation=quantized_relu(16, 1), name='relu1')) model.add( QDense(50, kernel_quantizer=quantized_bits(16, 1), bias_quantizer=quantized_bits(16, 1))) model.add(QActivation(activation=quantized_relu(16, 1), name='relu2')) # model.add(Dropout(rate=0.2)) model.add( QDense(30, kernel_quantizer=quantized_bits(16, 1), bias_quantizer=quantized_bits(16, 1))) model.add(QActivation(activation=quantized_relu(16, 1), name='relu3')) model.add( QDense(40, kernel_quantizer=quantized_bits(16, 1), bias_quantizer=quantized_bits(16, 1))) model.add(QActivation(activation=quantized_relu(16, 1), name='relu4')) model.add( QDense(15, kernel_quantizer=quantized_bits(16, 1), bias_quantizer=quantized_bits(16, 1))) model.add(QActivation(activation=quantized_relu(16, 1), name='relu5')) # model.add(QDense(80, input_shape=(27,),kernel_quantizer=quantized_bits(16,1),bias_quantizer=quantized_bits(16,1), kernel_initializer='random_normal')) # model.add(QActivation(activation=quantized_relu(16,1), name='relu1')) # model.add(QDense(50,kernel_quantizer=quantized_bits(16,1),bias_quantizer=quantized_bits(16,1))) # model.add(QActivation(activation=quantized_relu(16,1), name='relu2')) # model.add(QDense(35,kernel_quantizer=quantized_bits(16,1),bias_quantizer=quantized_bits(16,1))) # model.add(QActivation(activation=quantized_relu(16,1), name='relu3')) # # # model.add(Dropout(rate=0.2)) model.add(QDense(1, kernel_quantizer=quantized_bits(16, 1))) model.add(QActivation(activation=quantized_relu(16, 1), name='relu6')) #model.add(Activation("sigmoid")) # model.add(QActivation(activation=quantized_tanh(16,1),name='tanh')) if pruning == True: print("////////////////////////Training Model with pruning") pruning_params = { "pruning_schedule": pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100) } model = prune.prune_low_magnitude(model, **pruning_params) model.compile(loss='mean_squared_error', optimizer=optimizer) model.fit(X_training, y_training, batch_size=size, epochs=epochs, verbose=1, validation_data=(X_validation, y_validation), callbacks=[history, pruning_callbacks.UpdatePruningStep()]) model = strip_pruning(model) w = model.layers[0].weights[0].numpy() h, b = np.histogram(w, bins=100) plt.figure(figsize=(7, 7)) plt.bar(b[:-1], h, width=b[1] - b[0]) plt.semilogy() plt.savefig("Zeros' distribution", format='png') print('% of zeros = {}'.format(np.sum(w == 0) / np.size(w))) else: print("////////////////////////Training Model WITHOUT pruning") model.compile(loss='mean_squared_error', optimizer=optimizer) model.fit(X_training, y_training, batch_size=size, epochs=epochs, verbose=1, validation_data=(X_validation, y_validation), callbacks=[history]) # Compile model # model.compile(loss='mean_squared_error', optimizer=optimizer) # model.fit(X_training, y_training, # batch_size=size, # epochs=epochs, # verbose=1, # validation_data=(X_validation, y_validation),callbacks=[history]) w = [] for layer in model.layers: print(layer) w.append(layer.get_weights()) #print(w) train_predictions = model.predict(X_training) predictions = model.predict(X_validation) lin_mse = mean_squared_error(y_validation, predictions) lin_rmse = np.sqrt(lin_mse) lin_mse2 = mean_squared_error(y_training, train_predictions) lin_rmse2 = np.sqrt(lin_mse2) msg = "%s: %f" % (name, lin_rmse) msg2 = "%s: %f" % (name2, lin_rmse2) print(msg) print(msg2) fig, ax = plt.subplots() # xy=np.vstack([y_validation, predictions]) #z=gaussian_kde(xy) ax.scatter(y_validation, predictions, edgecolors=(0, 0, 0)) ax.set_title('Regression model predictions (validation set)') ax.set_xlabel('Measured $p_T$ (GeV/c)') ax.set_ylabel('Predicted $p_T$ (GeV/c)') ax.plot([Y.min(), Y.max()], [Y.min(), Y.max()], 'k--', lw=4) plt.rc('font', size=20) plt.rc('axes', titlesize=18) plt.rc('axes', labelsize=18) plt.rc('xtick', labelsize=18) plt.rc('ytick', labelsize=18) plt.rc('legend', fontsize=18) plt.rc('figure', titlesize=18) plt.tight_layout() plt.savefig(outrootname + '/' + '1' + output_name, format='png', dpi=800) fig2, ax2 = plt.subplots() ax2.plot(history.history['loss'], label='loss') ax2.plot(history.history['val_loss'], label='val_loss') ax2.set_title('Training and Validation loss per epoch') ax2.set_xlabel('# Epoch') ax2.set_ylabel('loss') plt.legend() plt.tight_layout() plt.savefig(outrootname + '/' + '2' + output_name, format='png', dpi=800) #plt.show() del ax, ax2 return model, w
epochs=epochs, validation_split=0.2, shuffle=True, callbacks=callbacks, verbose=1) training_Time = time.time() scores = new_pruned_model.evaluate(x_test, y_test, verbose=2) end_Time = time.time() print('total training time:', (training_Time - start_Time) / 60, "min") print('Test time:', end_Time - training_Time) print('Test loss:', scores[0]) print('Test accuracy:', scores[1]) final_model = tfmot.strip_pruning(new_pruned_model) final_model.summary() final_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) final_scores = final_model.evaluate(x_test, y_test, verbose=2) print('Test loss:', final_scores[0]) print('Test accuracy:', final_scores[1]) # File Names (need to change / for \\ on laptop) #model_File = os.path.join(os.getcwd(), 'Models/') fTime = time.strftime("%d-%b-%H%M", time.localtime()) #file_Name = os.path.join(model_File, fTime+'-GES843-Pruning-Perforated/') file_Name = model_File model_Name = 'Pruned.h5'
experiment.get_parameter("pruning_lr_factor_3")], outputDir=yamlparameters["TrainDir"]) callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep()) with experiment.train(): keras_model.fit(X_train,y_train, batch_size=yamlparameters["Training_batch_size"], epochs=yamlparameters["Training_epochs"], callbacks=callbacks.callbacks, verbose=1, validation_split=yamlparameters["Training_validation_split"], shuffle=True) keras_model = strip_pruning(keras_model) keras_model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['binary_accuracy']) keras_model.save(yamlparameters["TrainDir"]+"/Best_model.h5") with experiment.test(): y_predict = keras_model.predict(X_test,verbose=0) loss,binary_accuracy = keras_model.evaluate(X_test, y_test,verbose=0) auc = roc_auc_score(y_test,y_predict) print("AUC:",auc) print("ACC:",binary_accuracy) metrics = { 'loss':loss, 'accuracy':binary_accuracy, 'ROC AUC':roc_auc_score }
best_filepaths, output_filepath=submission_dir + 'Ooi_NTU_task1b_3.output.csv', delimiter='\t', newline='\n') print('Test set predictions saved in ' + submission_dir) ## GET METRICS n_nz_params = 0 model_size = 0 for best_filepath in best_filepaths: with sparsity.prune_scope(): # Need to use this to prevent loading errors keras_model = keras.models.load_model( best_filepath, compile=False ) # Don't compile model to save time because we're not training it here. keras_model = sparsity.strip_pruning(keras_model) param_dict = get_keras_model_size(keras_model, verbose=False) n_nz_params += param_dict['parameters']['non_zero']['count'] model_size += param_dict['parameters']['non_zero']['bytes'] / 1024 output_meta = make_predictions(eval_features, best_filepaths, save=False) best_micro_acc, best_macro_acc = accs( np.array([row[-3:] for row in output_meta[1:]]), eval_labels) ## PRINT FINAL METRICS print() print('============================') print(' FINAL ANALYSIS FOR MODEL 3 ') print('============================') print() print('Model files used: ')
def _main(args): global lr_base, total_epochs lr_base = args.learning_rate total_epochs = args.total_epoch annotation_file = args.annotation_file log_dir = 'logs/000/' classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) if args.tiny_version: anchors_path = 'configs/tiny_yolo_anchors.txt' else: anchors_path = 'configs/yolo_anchors.txt' anchors = get_anchors(anchors_path) print("\nanchors = ", anchors) print("\nnum_classes = ", num_classes) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level print("\n\nFREEZE LEVEL = ", freeze_level) # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint( log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', verbose=1, save_weights_only=False, save_best_only=True, period=5) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1, cooldown=0, min_lr=1e-10) lr_scheduler = LearningRateScheduler(learning_rate_scheduler) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=30, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate) # get train model model = get_yolo3_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) # support multi-gpu training if args.gpu_num >= 2: model = multi_gpu_model(model, gpus=args.gpu_num) model.summary() # Train some initial epochs with frozen layers first if needed, to get a stable loss. input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'Multiples of 32 required' batch_size = args.batch_size initial_epoch = 0 epochs = args.init_epoch print("Initial training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, batch_size, input_shape)) model.fit_generator(data_generator_wrapper(dataset[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper( dataset[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size), epochs=epochs, initial_epoch=initial_epoch, callbacks=callbacks) # Apply Cosine learning rate decay only after # unfreeze all layers if args.cosine_decay_learning_rate: callbacks.remove(reduce_lr) callbacks.append(lr_scheduler) # Unfreeze the whole network for further training # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change if args.multiscale: # prepare multiscale config input_shape_list = get_multiscale_list(args.model_type, args.tiny_version) interval = args.rescale_interval # Do multi-scale training on different input shape # change every "rescale_interval" epochs for epoch_step in range(epochs + interval, args.total_epoch, interval): # shuffle train/val dataset for cross-validation if args.data_shuffle: np.random.shuffle(dataset) initial_epoch = epochs epochs = epoch_step # rescale input only from 2nd round, to make sure unfreeze stable if initial_epoch != args.init_epoch: input_shape = input_shape_list[random.randint( 0, len(input_shape_list) - 1)] print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, batch_size, input_shape)) model.fit_generator( data_generator_wrapper(dataset[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper(dataset[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size), epochs=epochs, initial_epoch=initial_epoch, callbacks=callbacks) else: # Do single-scale training print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, batch_size, input_shape)) model.fit_generator(data_generator_wrapper(dataset[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper( dataset[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size), epochs=args.total_epoch, initial_epoch=epochs, callbacks=callbacks) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(log_dir + 'trained_final.h5')
def layer_pruned_model(): #Build a pruned model layer by layer epochs = 12 (x_train, y_train), (x_test, y_test) = prepare_data() num_train_samples = x_train.shape[0] end_step = np.ceil(1.0 * num_train_samples / batch_size).astype( np.int32) * epochs print('End step: ' + str(end_step)) pruning_params = { 'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.50, final_sparsity=0.90, begin_step=2000, end_step=end_step, frequency=100) } #build the model l = tf.keras.layers pruned_model = tf.keras.Sequential([ sparsity.prune_low_magnitude(l.Conv2D(32, 5, padding='same', activation='relu'), input_shape=input_shape, **pruning_params), l.MaxPooling2D((2, 2), (2, 2), padding='same'), l.BatchNormalization(), sparsity.prune_low_magnitude( l.Conv2D(64, 5, padding='same', activation='relu'), **pruning_params), l.MaxPooling2D((2, 2), (2, 2), padding='same'), l.Flatten(), sparsity.prune_low_magnitude(l.Dense(1024, activation='relu'), **pruning_params), l.Dropout(0.4), sparsity.prune_low_magnitude( l.Dense(num_classes, activation='softmax'), **pruning_params) ]) pruned_model.summary() logdir = tempfile.mkdtemp() print('Writing training logs to ' + logdir) # %tensorboard --logdir={logdir} # train the model pruned_model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy']) callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=logdir, profile_batch=0) ] pruned_model.fit(x_train, y_train, batch_size=batch_size, epochs=10, verbose=1, callbacks=callbacks, validation_data=(x_test, y_test)) score = pruned_model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) # Save and restore checkpoint_file = './pruned_checkpoint_file.h5' # _, checkpoint_file = tempfile.mkstemp('.h5') print('Saving pruned model to: ', checkpoint_file) # saved_model() sets include_optimizer to True by default. Spelling it out here # to highlight. tf.keras.models.save_model(pruned_model, checkpoint_file, include_optimizer=True) with sparsity.prune_scope(): restored_model = tf.keras.models.load_model(checkpoint_file) restored_model.fit(x_train, y_train, batch_size=batch_size, epochs=2, verbose=1, callbacks=callbacks, validation_data=(x_test, y_test)) start_test = time.time() score = restored_model.evaluate(x_test, y_test, verbose=0) end_test = time.time() print('Test latency:', end_test - start_test) print('Test loss:', score[0]) print('Test accuracy:', score[1]) final_model = sparsity.strip_pruning(pruned_model) final_model.summary() layer_pruned_file = './layer_pruned_file.h5' # _, layer_pruned_file = tempfile.mkstemp('.h5') print('Saving pruned model to: ', layer_pruned_file) tf.keras.models.save_model(final_model, layer_pruned_file, include_optimizer=False)
def prune_Conv1D(final_sparsity, initial_sparsity=0.0, begin_step=0, frequency=100, version=""): # Set up some params nb_epoch = 50 # number of epochs to train on batch_size = 1024 # training batch size num_train_samples = X_train.shape[0] end_step = np.ceil(1.0 * num_train_samples / batch_size).astype( np.int32) * nb_epoch print("End step: ", end_step) pruning_params = { 'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=initial_sparsity, final_sparsity=final_sparsity, begin_step=begin_step, end_step=end_step, frequency=100) } l = tf.keras.layers dr = 0.5 # dropout rate (%) pruned_model = tf.keras.Sequential([ sparsity.prune_low_magnitude( l.Conv1D(128, 3, padding='valid', activation="relu", name="conv1", kernel_initializer='glorot_uniform', input_shape=in_shape), **pruning_params), sparsity.prune_low_magnitude( l.Conv1D(128, 3, padding='valid', activation="relu", name="conv2", kernel_initializer='glorot_uniform'), **pruning_params), l.MaxPool1D(2), sparsity.prune_low_magnitude( l.Conv1D(64, 3, padding='valid', activation="relu", name="conv3", kernel_initializer='glorot_uniform'), **pruning_params), sparsity.prune_low_magnitude( l.Conv1D(64, 3, padding='valid', activation="relu", name="conv4", kernel_initializer='glorot_uniform'), **pruning_params), l.Dropout(dr), sparsity.prune_low_magnitude( l.Conv1D(32, 3, padding='valid', activation="relu", name="conv5", kernel_initializer='glorot_uniform'), **pruning_params), sparsity.prune_low_magnitude( l.Conv1D(32, 3, padding='valid', activation="relu", name="conv6", kernel_initializer='glorot_uniform'), **pruning_params), l.Dropout(dr), l.MaxPool1D(2), l.Flatten(), sparsity.prune_low_magnitude( l.Dense(128, activation='relu', kernel_initializer='he_normal', name="dense1"), **pruning_params), sparsity.prune_low_magnitude( l.Dense(len(classes), kernel_initializer='he_normal', name="dense2"), **pruning_params), l.Activation('softmax') ]) pruned_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"]) pruned_model.summary() callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=logdir, profile_batch=0) ] history = pruned_model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_val, Y_val), callbacks=callbacks) score = pruned_model.evaluate(X_test, Y_test, verbose=0) print("Test loss: ", score) #Save the model pruned_model = sparsity.strip_pruning(pruned_model) pruned_model.summary() # Save the model architecture print_model_to_json( pruned_model, './model/Conv1D-{}.json'.format(str(final_sparsity) + version)) # Save the weights pruned_model.save_weights( './model/Conv1D-{}.h5'.format(str(final_sparsity) + version))
del (x_train, y_train) for model_name in models: model_baseline = tf.keras.models.load_model( 'models/{}.h5'.format(model_name), custom_objects={ 'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude, 'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation }) # print(model_baseline.get_config()) # sys.exit() score = model_baseline.evaluate(x_test, y_test) print('Keras Accuracy {} = {}'.format(model_name, score[1])) model_baseline_stripped = strip_pruning(model_baseline) intbits_a = 0 intbits_w = 0 if model_name.find('full') != -1: a = hls4ml.model.profiling.activations_keras( model_baseline_stripped, x_test[:100], fmt='summary') intbits_a = int( np.ceil( max( np.log2( np.array( list(map(lambda x: x['whishi'], a)))))) + 1) w = hls4ml.model.profiling.weights_keras( model_baseline_stripped, fmt='summary') intbits_w = int(
def main(args): annotation_file = args.annotation_file classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) log_dir_path = args.log_directory try: log_dir = os.path.join('logs', log_dir_path) except TypeError: date_now = datetime.now() log_dir_folder_name = f'{date_now.strftime("%Y_%m_%d_%H%M%S")}_{args.model_type}_TransferEp_{args.transfer_epoch}_TotalEP_{args.total_epoch}' log_dir = os.path.realpath(os.path.join( 'logs', log_dir_folder_name )) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # How many percentage of layers to unfreeze in fine tuning unfreeze_level = args.unfreeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint( filepath=log_dir + os.sep + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1 ) reduce_lr = ReduceLROnPlateau( monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10 ) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 # Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # get different model type & train&val data generator if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) # val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( model_type=args.model_type, annotation_lines=dataset[num_train:], anchors=anchors, class_names=class_names, model_image_size=args.model_image_size, model_pruning=args.model_pruning, log_dir=log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense ) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model( model_type=args.model_type, anchors=anchors, num_classes=num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step ) else: # get normal train model model = get_train_model( model_type=args.model_type, anchors=anchors, num_classes=num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step ) if args.show_history: model.summary() layers_count = len(model.layers) print(f'Total layers: {layers_count}') # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, input_shape)) # model.fit_generator(train_data_generator, """ Transfer training steps, train with freeze layers """ model.fit( data_generator( annotation_lines=dataset[:num_train], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, enhance_augment=args.enhance_augment, rescale_interval=rescale_interval, multi_anchor_assign=args.multi_anchor_assign ), steps_per_epoch=max(1, num_train // args.batch_size), # validation_data=val_data_generator, validation_data=data_generator( annotation_lines=dataset[num_train:], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, multi_anchor_assign=args.multi_anchor_assign ), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, # verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks ) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body fine_tune_layers = int(layers_count * unfreeze_level) print(f"Unfreeze {unfreeze_level * 100}% of layers and continue training, to fine-tune.") print(f"Unfroze {fine_tune_layers} layers of {layers_count}") if args.gpu_num >= 2: with strategy.scope(): for i in range(layers_count - fine_tune_layers, layers_count): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change else: for i in range(layers_count - fine_tune_layers, layers_count): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, input_shape)) """ Fine-tuning steps, more memory will be used. LR (Learning Rate) will be decayed """ # model.fit_generator(train_data_generator, model.fit( # The YOLO data augmentation generator tool data_generator( annotation_lines=dataset[:num_train], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, enhance_augment=args.enhance_augment, rescale_interval=rescale_interval, multi_anchor_assign=args.multi_anchor_assign ), steps_per_epoch=max(1, num_train // args.batch_size), # validation_data=val_data_generator, # Validation generator validation_data=data_generator( annotation_lines=dataset[num_train:], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, multi_anchor_assign=args.multi_anchor_assign ), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, # verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks ) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
def get_model(self) -> tf.keras.Model: return sparsity.strip_pruning(self._model)
print(cfg) hls_model = hls4ml.converters.keras_to_hls(cfg) #(img_train, label_train), (img_test, label_test) = tfds.load("svhn_cropped", split=['train', 'test'], batch_size=-1, as_supervised=True,) #del (img_train, label_train) #wp,ap = numerical(keras_model=m, hls_model=hls_model, X=img_test[:1000]) #wp.savefig('%s_profile_weights.pdf'%model_name) #ap.savefig('%s_profile_activations.pdf'%model_name) hls_model.build(csim=False, synth=True, vsynth=True) indir_name = str(sys.argv[1]) path = "/data/thaarres/hls4ml_docker/hls4ml_cnns/" + indir_name print("Starting hls project") files = [f for f in listdir(path) if isfile(join(path, f))] for f in files: model_name = f model = tf.keras.models.load_model(path + f, custom_objects={ 'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude, 'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation }) model.summary() model_stripped = strip_pruning(model) toHLS(model_stripped)
print("Accuracy: Keras={} hls4ml={}".format(data['accuracy_keras'],data['accuracy_hls4ml'])) hls4ml.utils.plot_model(hls_model, show_shapes=True, show_precision=True, to_file='plot_model_{}.png'.format(precision)) hls4ml.utils.plot_model(hls_model, show_shapes=True, show_precision=True, to_file='plot_model_{}.png'.format(precision)) wp,ap = numerical(keras_model=model, hls_model=hls_model, X=x_test[:1000]) wp.savefig('%s_profile_weights_LayerTypePrecision.pdf'%cfg['OutputDir']) ap.savefig('%s_profile_activations_LayerTypePrecision.pdf'%cfg['OutputDir']) #hls_model.build(csim=False, synth=True, vsynth=True) if __name__ == '__main__': model_name = str(sys.argv[1]) model = tf.keras.models.load_model("models/"+model_name,custom_objects={'PruneLowMagnitude': pruning_wrapper.PruneLowMagnitude,'QDense': QDense, 'QConv2D': QConv2D, 'Clip': Clip, 'QActivation': QActivation}) model.summary() model = strip_pruning(model) (x_train, y_train), (x_test, y_test) = getNumpyData('svhn_cropped',oneHot=False) a = hls4ml.model.profiling.activations_keras(model, x_test[:1000], fmt='summary') intbits_a = int(np.ceil(max(np.log2(np.array(list(map(lambda x : x['whishi'], a)))))) + 1) w = hls4ml.model.profiling.weights_keras(model, fmt='summary') intbits_w = int(np.ceil(max(np.log2(np.array(list(map(lambda x : x['whishi'], w)))))) + 1) print("Starting hls project, using {} int bits for weights+bias and {} int bits for outputs".format(intbits_a,intbits_w)) precision = [16,14,12,10,8,6,4,3,2,1] precision = [16] data = {'w':[], 'dsp':[], 'lut':[], 'ff':[], 'bram':[], 'latency_clks':[], 'latency_ns':[], 'latency_ii':[]} #Parallel(n_jobs=10, backend='multiprocessing')(delayed(toHLS)(i) for i in precision) #precision = np.flip(precision) for p in precision: toHLS(model,p) #for p in precision: # datai = readReports(model_name.replace(".h5","")+"_bw%i"%(p),p)
#new_pruned_model = sparsity.prune_low_magnitude(loaded_model, **new_pruning_params) #new_pruned_model.summary() #new_pruned_model.compile( # loss=tf.keras.losses.categorical_crossentropy, # optimizer='adam', # metrics=['accuracy']) # Add a pruning step callback to peg the pruning step to the optimizer's # step. Also add a callback to add pruning summaries to tensorboard #callbacks = [ # sparsity.UpdatePruningStep(), # sparsity.PruningSummaries(log_dir=logdir, profile_batch=0) #] # new_pruned_model.fit(train_imgs_scaled, train_labels_enc, # batch_size=batch_size, # epochs=epochs, # verbose=1, # callbacks=callbacks, # validation_data=(val_imgs_scaled, val_labels_enc)) # # score = new_pruned_model.evaluate(val_imgs_scaled, val_labels_enc, verbose=0) # print('Test loss:', score[0]) # print('Test accuracy:', score[1]) final_model = sparsity.strip_pruning(loaded_model) final_model.summary() final_model.save('vggPruned.h5')
sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir='./', profile_batch=0) ] print('[INFO] Start pruning process...') pruned_model.fit(train_generator, steps_per_epoch=train_generator.__len__(), callbacks=callbacks, epochs=epochs, validation_data=validation_generator, validation_steps=validation_generator.__len__()) pruned_model_path = './models/pruned_MobileNetv2.h5' # convert pruned model to original final_model = sparsity.strip_pruning(pruned_model) tf.keras.models.save_model(final_model, pruned_model_path, include_optimizer=False) # Zip file pruned_zip_path = './models/pruned_MobileNetv2.zip' with zipfile.ZipFile(pruned_zip_path, 'w', compression=zipfile.ZIP_DEFLATED) as f: f.write(pruned_model_path) # Print file size print("Size of the model before compression: %.2f Mb" % (os.path.getsize(model_path) / float(2**20))) print("Size of the model after compression: %.2f Mb" %
def main(args): #데이터 annotation 파일 경로 annotation_file = args.annotation_file # 결과 log 및 weight가 저장될 경로 log_dir = os.path.join('logs', '000') #클래스 파일 경로 classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) # anchors 받아오는 라인 anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # 데이터셋 로딩 dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # 모델종류에 따른 data generator 및 모델 생성 if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) else: # get normal train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) # 성능향상을 위해 초반 일부 epoch은 Transfer Learning 진행 (Initial Epoch ~ Transfer Epoch) model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") if args.gpu_num >= 2: with strategy.scope(): for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change else: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) # Transfer Learning 이후 나머지 Epoch에 대하여 학습 진행 (Transfer Epoch ~ Total Epoch) # 이 부분이 필요없거나 학습 시간이 너무 오래 걸릴 경우 Total Epoch을 Transfer와 동일하게 두고, 아래 학습을 진행하지 않고 넘어갈 수 있음 # 본인 컴퓨터 사양에 맞춰서 진행 model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): annotation_file = args.annotation_file log_dir = os.path.join('logs', '000') classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) print('classes_path =', classes_path) print('class_names = ', class_names) print('num_classes = ', num_classes) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) print('num_train = ', num_train) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'Multiples of 32 required' # get different model type & train&val data generator if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # get train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) # support multi-gpu training template_model = None if args.gpu_num >= 2: # keep the template model for saving result template_model = model model = multi_gpu_model(model, gpus=args.gpu_num) # recompile multi gpu model model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch ##################################################################################################### epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: if template_model is not None: template_model = sparsity.strip_pruning(template_model) else: model = sparsity.strip_pruning(model) if template_model is not None: template_model.save(os.path.join(log_dir, 'trained_final.h5')) else: model.save(os.path.join(log_dir, 'trained_final.h5'))
def train(cfg): epochs = cfg['epochs'] save_dir = cfg['save_dir'] if not os.path.exists(save_dir): os.mkdir(save_dir) shape = (int(cfg['height']), int(cfg['width']), 3) n_class = int(cfg['class_number']) batch_size = int(cfg['batch_size']) if cfg['model'] == 'mymodel': from model.my_model import MyModel model = MyModel(shape, n_class).build() if cfg['model'] == 'v2': from model.mobilenet_v2 import MyModel model = MyModel(shape, n_class).buildRaw() train_generator, validation_generator, count1, count2 = generate(batch_size, shape[:2], cfg['train_dir'], cfg['eval_dir']) print(count1, count2) earlystop = EarlyStopping(monitor='val_acc', patience=4, verbose=0, mode='auto') checkpoint = ModelCheckpoint(filepath=os.path.join("save", 'prune_e_{epoch:02d}_{val_loss:.3f}_{val_acc:.3f}.h5'), monitor='val_acc', save_best_only=False, save_weights_only=False) reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.1, patience=2, verbose=1, min_lr=1e-7) model_path = r'./save/v2' # x_train, y_train = train_generator.next() # num_train_samples = batch_size # x_test, y_test = validation_generator.next() loaded_model = tf.keras.models.load_model(os.path.join(model_path,'e_06_0.20_1.00.h5')) score = loaded_model.evaluate_generator(validation_generator, count2//batch_size) print('original Test loss:', score[0]) print('original Test accuracy:', score[1]) end_step = np.ceil(1.0 * count1 / batch_size).astype(np.int32) * epochs print(end_step) new_pruning_params = {'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.50, final_sparsity=0.90, begin_step=0, end_step=end_step, frequency=100)} new_pruned_model = sparsity.prune_low_magnitude(loaded_model, **new_pruning_params) #new_pruned_model.summary() opt = Adam(lr=float(0.0001)) new_pruned_model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=opt, metrics=['acc']) #现在我们开始训练和修剪模型。 #Add a pruning step callback to peg the pruning step to the optimizer's #step. Also add a callback to add pruning summaries to tensorboard logdir = "./save/log" callbacks = [earlystop,checkpoint,reduce_lr, sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=logdir, profile_batch=0)] # new_pruned_model.fit(x_train, y_train, # batch_size=batch_size, # epochs=epochs, # verbose=1, # callbacks=callbacks, # validation_data=(x_test, y_test)) new_pruned_model.fit_generator(train_generator, validation_data=validation_generator, steps_per_epoch=100,#count1 // batch_size, validation_steps=count2 // batch_size, epochs=epochs, callbacks=callbacks) score = new_pruned_model.evaluate_generator(validation_generator, count2//batch_size) print('Test loss:', score[0]) print('Test accuracy:', score[1]) final_model = sparsity.strip_pruning(new_pruned_model) new_pruned_keras_file = "save/pruned_model.h5" tf.keras.models.save_model(final_model, new_pruned_keras_file, include_optimizer=False)