def build(config, main_prog, startup_prog, is_train=True, is_distributed=True): """ Build a program using a model and an optimizer 1. create feeds 2. create a dataloader 3. create a model 4. create fetchs 5. create an optimizer Args: config(dict): config main_prog(): main program startup_prog(): startup program is_train(bool): train or valid is_distributed(bool): whether to use distributed training method Returns: dataloader(): a bridge between the model and the data fetchs(dict): dict of model outputs(included loss and measures) """ with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): use_mix = config.get('use_mix') and is_train use_distillation = config.get('use_distillation') feeds = create_feeds(config.image_shape, use_mix=use_mix) dataloader = create_dataloader(feeds.values()) out = create_model(config.ARCHITECTURE, feeds['image'], config.classes_num, is_train) fetchs = create_fetchs(out, feeds, config.ARCHITECTURE, config.topk, config.classes_num, epsilon=config.get('ls_epsilon'), use_mix=use_mix, use_distillation=use_distillation) if is_train: optimizer = create_optimizer(config) lr = optimizer._global_learning_rate() fetchs['lr'] = (lr, AverageMeter('lr', 'f', need_avg=False)) optimizer = mixed_precision_optimizer(config, optimizer) if is_distributed: optimizer = dist_optimizer(config, optimizer) optimizer.minimize(fetchs['loss'][0]) if config.get('use_ema'): global_steps = fluid.layers.learning_rate_scheduler._decay_step_counter( ) ema = ExponentialMovingAverage(config.get('ema_decay'), thres_steps=global_steps) ema.update() return dataloader, fetchs, ema return dataloader, fetchs
def __init__(self, config): super(BiDAF, self).__init__() with self.init_scope(): self.word_emb = L.EmbedID(config.word_vocab_size, config.word_emb_dim, initialW=config.word_emb, ignore_label=-1) self.char_emb = L.EmbedID(config.char_vocab_size, config.char_emb_dim, ignore_label=-1) self.char_conv = CharacterConvolution(config) self.highway_network = HighwayNetwork(config) self.word_enc_dim = config.word_emb_dim + config.char_out_dim self.dropout_rate = config.dropout_rate self.context_bilstm = BiLSTM(self.word_enc_dim, config.hidden_size, self.dropout_rate, config) #in=200 self.attention_layer = AttentionFlow(config) self.modeling_bilstm_g0 = BiLSTM(self.word_enc_dim * 4, config.hidden_size, self.dropout_rate, config) #in=800 self.modeling_bilstm_g1 = BiLSTM(config.hidden_size * 2, config.hidden_size, self.dropout_rate, config) #in=200 self.modeling_bilstm_g2 = BiLSTM(self.word_enc_dim * 7, config.hidden_size, self.dropout_rate, config) #in=1400 self.y_logits_layer = L.Linear(None, 1) self.y2_logits_layer = L.Linear(None, 1) self.char_out_dim = config.char_out_dim self.skip_word_in_result = config.skip_word_in_result self.no_ema = config.no_ema if not self.no_ema: self.ema = ExponentialMovingAverage(config.decay_rate) self.ema_init = True self.multi_gpu = len(config.gpu) > 1
def _create_ema_updater(self): self.ema_updater = ExponentialMovingAverage(self.ema_model.parameters(), self.cfg.ema_decay, use_num_updates=self.cfg.use_num_updates, rampup_steps=self.cfg.rampup_steps, rampup_decay=self.cfg.rampup_decay)
train_indices, val_indices = train_test_split(np.arange(len(meta_parameters_dictionary['train_labels'])), test_size = 0.1) if early: early_test_indices = [] for j in range(len(meta_parameters_dictionary['test_labels'])): if meta_parameters_dictionary['test_labels_stage'][j,0]==1 or meta_parameters_dictionary['test_labels_stage'][j,1]==1: early_test_indices.append(j) test_indices = range(len(meta_parameters_dictionary['test_labels'])) meta_parameters_dictionary['train_indices']=train_indices meta_parameters_dictionary['val_indices']=val_indices meta_parameters_dictionary['test_indices'] = np.array(test_indices) training_generator = data(meta_parameters_dictionary,batch_size,True,False) val_generator = data(meta_parameters_dictionary,batch_size,False,True) test_generator = data(meta_parameters_dictionary,batch_size,False,False) csv_logger = CSVLogger(os.path.join(LOGDIR,'training_{}.log'.format(test_cohort))) lrate = CyclicLR(base_lr=0.001,max_lr= 0.01,step_size=100,mode='triangular2') checkpointer = ExponentialMovingAverage(filepath=checkpoint_dir+'cyclic_{}_{}.h5'.format(test_cohort,i),save_best_only=True, save_weights_only=True,custom_objects={'cox_regression_loss':cox_regression_loss},verbose=1) lr_monitor = LambdaCallback(on_epoch_begin=lambda epoch, logs:print(tf.eval(model.optimizer.lr))) lr_callback = ReduceLROnPlateau(monitor='val_loss',factor=0.5,patience=2, min_lr = 0.00001) model = get_model(cube_size, clinical_features_size,kernel_size = (3,3,3)) history = model.fit_generator(training_generator, verbose =2, epochs=steps, callbacks=[lr_callback,lr_monitor,lrate,csv_logger,checkpointer],validation_data= val_generator,workers=8, use_multiprocessing=True, shuffle=True) print(i) try: model.load_weights(checkpoint_dir+'cyclic_{}_{}.h5'.format(test_cohort,i)) except OSError: print('Could not find checkpoint:'+ checkpoint_dir+'cyclic_{}_{}.h5'.format(test_cohort,i)) continue #tensorboard_callback = TensorBoard(log_dir=LOGDIR, histogram_freq=0, write_graph=True) #early_stopping_monitor = EarlyStopping(monitor='val_loss', patience=10) #weights = model.get_weights() #start_time = time.time()
def setup(self): self.ema = ExponentialMovingAverage()
class TestEMADefaultAlpha(): def setup(self): self.ema = ExponentialMovingAverage() def test_no_input(self): assert self.ema.get() is None def test_one_zero(self): self.ema.put(0) assert self.ema.get() == 0 def test_one_one(self): self.ema.put(1) assert self.ema.get() == 1 def test_three_values(self): self.ema.put(1) self.ema.put(2) self.ema.put(3) assert self.ema.get() == 2.25 def test_three_values_list(self): self.ema.put([1, 2, 3]) assert self.ema.get() == 2.25 def test_ten_values_list(self): self.ema.put([-1, -1, -1, -1, -1, -1, -1, -1, -1, 1]) assert self.ema.get() == 0.0 def test_get_halfway(self): self.ema.put([-1, -1, -1, -1, -1]) assert self.ema.get() == -1.0 self.ema.put([-1, -1, -1, -1, 1]) assert self.ema.get() == 0.0
class TestEMALowAlpha(): def setup(self): self.ema = ExponentialMovingAverage(alpha=0.2) def test_no_input(self): assert self.ema.get() is None def test_one_zero(self): self.ema.put(0) assert self.ema.get() == 0 def test_one_one(self): self.ema.put(1) assert self.ema.get() == 1 def test_three_values(self): self.ema.put(1) self.ema.put(2) self.ema.put(3) assert self.ema.get() == approx(1.56) def test_ten_values(self): self.ema.put([-1, -1, -1, -1, -1, -1, -1, -1, -1, 1]) assert self.ema.get() == approx(-0.6)