def define_models_GAN(rand_dim, data_dim, base_n_count, type=None): generator_input_tensor = layers.Input(shape=(rand_dim, )) generated_image_tensor = generator_network(generator_input_tensor, data_dim, base_n_count) generated_or_real_image_tensor = layers.Input(shape=(data_dim, )) if type == 'Wasserstein': discriminator_output = critic_network(generated_or_real_image_tensor, data_dim, base_n_count) else: discriminator_output = discriminator_network( generated_or_real_image_tensor, data_dim, base_n_count) generator_model = models.Model(inputs=[generator_input_tensor], outputs=[generated_image_tensor], name='generator') discriminator_model = models.Model(inputs=[generated_or_real_image_tensor], outputs=[discriminator_output], name='discriminator') combined_output = discriminator_model( generator_model(generator_input_tensor)) combined_model = models.Model(inputs=[generator_input_tensor], outputs=[combined_output], name='combined') return generator_model, discriminator_model, combined_model
def rnn_model(params, training_dr_lstm=True, training_dr_ll=True): """RNN model for text.""" input_shape = (params['fix_len']) seq_input = layers.Input(shape=input_shape) # vocab+1 because of padding seq_emb = layers.Embedding(params['vocab_size'] + 1, params['emb_size'], input_length=params['fix_len'])(seq_input) lstm_out = layers.LSTM(params['hidden_lstm_size'], dropout=params['dropout_rate_lstm'])( seq_emb, training=training_dr_lstm) out = layers.Dropout(rate=params['dropout_rate'], seed=params['random_seed'])(lstm_out, training=training_dr_ll) if params['variational']: # scale kl loss by number of training examples. # larger training dataset depends less on prior def scaled_kl_fn(p, q, _): return tfp.distributions.kl_divergence(q, p) / params['n_train'] logits = tfpl.DenseReparameterization( params['n_class_in'], activation=None, kernel_divergence_fn=scaled_kl_fn, bias_posterior_fn=tfpl.util.default_mean_field_normal_fn(), name='last_layer')(out) else: logits = layers.Dense( params['n_class_in'], activation=None, kernel_regularizer=regularizers.l2(params['reg_weight']), bias_regularizer=regularizers.l2(params['reg_weight']), name='last_layer')(out) probs = layers.Softmax(axis=1)(logits) return models.Model(seq_input, probs, name='rnn')
def get_model(cfg, encoder_inputs, encoder_outputs): decoder_inputs = layers.Input(shape=(None, ), name='Decoder-Input') # for teacher forcing dec_emb = layers.Embedding(cfg.num_input_tokens, cfg.latent_dim, name='Decoder-Embedding', mask_zero=False)(decoder_inputs) dec_bn = layers.BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb) decoder_gru = layers.GRU(cfg.latent_dim, return_state=True, return_sequences=True, name='Decoder-GRU') decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=encoder_outputs) x = layers.BatchNormalization( name='Decoder-Batchnorm-2')(decoder_gru_output) decoder_dense = layers.Dense(cfg.num_output_tokens, activation='softmax', name='Final-Output-Dense') decoder_outputs = decoder_dense(x) model = models.Model([encoder_inputs, decoder_inputs], decoder_outputs) return model
def encoder_model(architecture='inception_v3', pre_trained_dataset='imagenet', downsample_factor=8): """Returns encoder model. Defines the encoder model to learn the representations for image dataset. In this example, we are considering the InceptionV3 model trained on ImageNet dataset, followed by simple average pooling-based downsampling. Args: architecture: Base architecture of encoder model (e.g. 'inception_v3') pre_trained_dataset: The dataset used to pre-train the encoder model downsample_factor: Downsample factor for the outputs Raises: NameError: Returns name errors if architecture is not 'inception_v3' """ tf_input = layers.Input(shape=(input_shape[0], input_shape[1], 3)) if architecture == 'inception_v3': model = applications.inception_v3.InceptionV3( input_tensor=tf_input, weights=pre_trained_dataset, include_top=False) output_pooled = \ layers.AveragePooling2D((downsample_factor, downsample_factor), strides=(downsample_factor, downsample_factor))(model.output) else: raise NameError('Invalid architecture') return models.Model(model.input, output_pooled)
def define_models_CGAN(rand_dim, data_dim, label_dim, base_n_count, type=None): generator_input_tensor = layers.Input(shape=(rand_dim, )) labels_tensor = layers.Input(shape=(label_dim, )) # updated for class generated_image_tensor = generator_network_w_label( generator_input_tensor, labels_tensor, data_dim, label_dim, base_n_count) # updated for class generated_or_real_image_tensor = layers.Input( shape=(data_dim + label_dim, )) # updated for class if type == 'Wasserstein': discriminator_output = critic_network( generated_or_real_image_tensor, data_dim + label_dim, base_n_count) # updated for class else: discriminator_output = discriminator_network( generated_or_real_image_tensor, data_dim + label_dim, base_n_count) # updated for class generator_model = models.Model( inputs=[generator_input_tensor, labels_tensor], outputs=[generated_image_tensor], name='generator') # updated for class discriminator_model = models.Model(inputs=[generated_or_real_image_tensor], outputs=[discriminator_output], name='discriminator') combined_output = discriminator_model( generator_model([generator_input_tensor, labels_tensor])) # updated for class combined_model = models.Model( inputs=[generator_input_tensor, labels_tensor], outputs=[combined_output], name='combined') # updated for class return generator_model, discriminator_model, combined_model
def get_decoder_model(model): latent_dim = model.get_layer('Decoder-Embedding').output_shape[-1] decoder_inputs = model.get_layer('Decoder-Input').input dec_emb = model.get_layer('Decoder-Embedding')(decoder_inputs) dec_bn = model.get_layer('Decoder-Batchnorm-1')(dec_emb) gru_inference_state_input = layers.Input(shape=(latent_dim, ), name='hidden_state_input') gru_out, gru_state_out = model.get_layer('Decoder-GRU') \ ([dec_bn, gru_inference_state_input]) dec_bn2 = model.get_layer('Decoder-Batchnorm-2')(gru_out) dense_out = model.get_layer('Final-Output-Dense')(dec_bn2) decoder_model = models.Model([decoder_inputs, gru_inference_state_input], [dense_out, gru_state_out]) return decoder_model
def get_encoder_model(cfg): encoder_inputs = layers.Input(shape=(cfg.len_input_seq, ), name='Encoder-Input') x = layers.Embedding(cfg.num_input_tokens, cfg.latent_dim, name='Encoder-Embedding', mask_zero=False)(encoder_inputs) x = layers.BatchNormalization(name='Encoder-Batchnorm-1')(x) _, state_h = layers.GRU(cfg.latent_dim, return_state=True,\ name='Encoder-Last-GRU')(x) encoder_model = models.Model(inputs=encoder_inputs, outputs=state_h, name='Encoder-Model') encoder_outputs = encoder_model(encoder_inputs) return encoder_model, encoder_inputs, encoder_outputs
def ResNet50(method, num_classes, num_updates, dropout_rate): """Instantiates the ResNet50 architecture. Args: method: `str`, method for accounting for uncertainty. Must be one of ['vanilla', 'll_dropout', 'll_svi', 'dropout', 'svi', 'dropout_nofirst'] num_classes: `int` number of classes for image classification. num_updates: integer, total steps in an epoch (for weighting the loss) dropout_rate: Dropout rate for ll_dropout, dropout methods. Returns: A Keras model instance. pylint: disable=invalid-name """ # Determine proper input shape if backend.image_data_format() == 'channels_first': input_shape = (3, 224, 224) bn_axis = 1 else: input_shape = (224, 224, 3) bn_axis = 3 if (method in ['dropout', 'll_dropout', 'dropout_nofirst' ]) != (dropout_rate > 0.): raise ValueError( 'Dropout rate should be nonzero iff a dropout method is used.' 'Method is {}, dropout is {}.'.format(method, dropout_rate)) use_variational_layers = method == 'svi' hidden_layer_dropout = dropout_rate if method in [ 'dropout', 'dropout_nofirst' ] else 0. img_input = layers.Input(shape=input_shape) x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) if (dropout_rate > 0.) and (method != 'dropout_nofirst'): x = layers.Dropout(hidden_layer_dropout)(x, training=True) x = layers.Conv2D(64, (7, 7), use_bias=False, strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='conv1')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name='bn_conv1')(x) x = layers.Activation('relu')(x) x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) conv_block = functools.partial( conv_block_base, num_updates=num_updates, dropout_rate=hidden_layer_dropout, use_variational_layers=use_variational_layers) identity_block = functools.partial( identity_block_base, num_updates=num_updates, dropout_rate=hidden_layer_dropout, use_variational_layers=use_variational_layers) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = layers.GlobalAveragePooling2D(name='avg_pool')(x) if dropout_rate > 0.: x = layers.Dropout(dropout_rate)(x, training=True) if method in ['ll_svi', 'svi']: x = tfpl.dense_variational_v2.DenseVariational( units=num_classes, make_posterior_fn=posterior_mean_field, make_prior_fn=functools.partial(prior_trainable, num_updates=num_updates), use_bias=True, kl_weight=1. / num_updates, kl_use_exact=True, name='fc1000')(x) else: x = layers.Dense(num_classes, kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc1000')(x) # Create model. return models.Model(img_input, x, name='resnet50')
def main(_): if FLAGS.is_tempscale: tf.enable_v2_behavior() params = { 'num_epochs': FLAGS.num_epochs, 'fix_len': FLAGS.fix_len, 'batch_size': FLAGS.batch_size, 'n_class': FLAGS.n_class, 'emb_size': FLAGS.emb_size, 'vocab_size': FLAGS.vocab_size, 'hidden_lstm_size': FLAGS.hidden_lstm_size, 'dropout_rate': FLAGS.dropout_rate, 'dropout_rate_lstm': FLAGS.dropout_rate_lstm, 'learning_rate': FLAGS.learning_rate, 'reg_weight': FLAGS.reg_weight, 'tr_out_dir': FLAGS.tr_out_dir, 'data_pkl_file': FLAGS.data_pkl_file, 'master': FLAGS.master, 'clip_norm': FLAGS.clip_norm, 'random_seed': FLAGS.random_seed, 'variational': FLAGS.variational, 'n_class_in': None, 'n_train': None, } # load in-dist. and skewed in-dist. datasets data = classifier.load_np_dataset(params['data_pkl_file']) # load OOD dataset n_ood = 5600 test_lm1b_x_pad, _ = load_ood_dataset(n_ood, params['fix_len'], data.vocab, params['vocab_size']) # list of ckpt dir model_dir = os.path.join(FLAGS.model_dir, FLAGS.method) ckpt_dirs = tf.io.gfile.listdir(model_dir) # how many replicates for ensemble if FLAGS.is_ensemble: assert len(ckpt_dirs) > 1 n_ensemble = len(ckpt_dirs) if n_ensemble == 0: logging.fatal('no model ckpt') else: n_ensemble = 1 pred = {} # dict for final prediction score # dict for saving pred from different models pred_accum = {'in': [], 'skew': [], 'ood': []} for i in range(n_ensemble): ckpt_dir = os.path.join(model_dir, ckpt_dirs[i], 'model') if not tf.io.gfile.isdir(ckpt_dir): continue print('ckpt_dir={}'.format(ckpt_dir)) # load params with tf.gfile.GFile(os.path.join(ckpt_dir, 'params.json'), mode='rb') as f: params_json = yaml.safe_load(f) params.update(params_json) params['master'] = '' print('params after load={}'.format(params)) tf.reset_default_graph() # create model model = classifier.rnn_model( params, training_dr_lstm=params['dropout_rate_lstm'] != 0.0, training_dr_ll=params['dropout_rate'] != 0.0) # load model model.load_weights(ckpt_dir + '/model.ckpt') # predict if FLAGS.method in ['ll-svi', 'dropout', 'll-dropout']: # need to run multiple times and get mean prediction assert FLAGS.n_pred_sample > 1 else: FLAGS.n_pred_sample = 1 pred_k = {'in': [], 'skew': [], 'ood': []} for _ in range(FLAGS.n_pred_sample): pred_tr_in = model.predict(data.in_sample_examples) acc_tr_in = np.mean( data.in_sample_labels == np.argmax(pred_tr_in, axis=1)) pred_test_in = model.predict(data.test_in_sample_examples) acc_test_in = np.mean( data.test_in_sample_labels == np.argmax(pred_test_in, axis=1)) print('in-dist. acc_tr={}, acc_test={}'.format( acc_tr_in, acc_test_in)) pred_test_skew = model.predict(data.test_oos_examples) pred_test_ood = model.predict(test_lm1b_x_pad) if FLAGS.is_tempscale: # temperature scaling # logits for temp scaling last_layer_model = models.Model( inputs=model.input, outputs=model.get_layer('last_layer').output) logits = last_layer_model.predict(data.dev_in_sample_examples) opt_temp = calibration_lib.find_scaling_temperature( data.dev_in_sample_labels, logits, temp_range=(1e-5, 1e5)) pred_test_in = calibration_lib.apply_temperature_scaling( opt_temp, pred_test_in) pred_test_skew = calibration_lib.apply_temperature_scaling( opt_temp, pred_test_skew) pred_test_ood = calibration_lib.apply_temperature_scaling( opt_temp, pred_test_ood) # save in a list pred_k['in'].append(pred_test_in) pred_k['skew'].append(pred_test_skew) pred_k['ood'].append(pred_test_ood) pred_k_in_mean = np.mean(np.stack(pred_k['in']), axis=0) pred_k_skew_mean = np.mean(np.stack(pred_k['skew']), axis=0) pred_k_ood_mean = np.mean(np.stack(pred_k['ood']), axis=0) pred_accum['in'].append(pred_k_in_mean) pred_accum['skew'].append(pred_k_skew_mean) pred_accum['ood'].append(pred_k_ood_mean) # if ensemble, then take the mean pred['in'] = np.mean(np.stack(pred_accum['in']), axis=0) pred['skew'] = np.mean(np.stack(pred_accum['skew']), axis=0) pred['ood'] = np.mean(np.stack(pred_accum['ood']), axis=0) # prediction accuracy for in-dist. pred['in_true_labels'] = data.test_in_sample_labels acc = np.mean(data.test_in_sample_labels == np.argmax(pred['in'], axis=1)) print('== (optionally ensemble) acc={} =='.format(acc)) print('== eval in and skew using max(Py|x) ==') neg = list(np.max(pred['in'], axis=1)) pos = list(np.max(pred['skew'], axis=1)) print('auc={}'.format(compute_auc(neg, pos, pos_label=0))) print('== eval in and ood using max(Py|x) ==') neg = list(np.max(pred['in'], axis=1)) pos = list(np.max(pred['ood'], axis=1)) print('auc={}'.format(compute_auc(neg, pos, pos_label=0))) # save the predictions pred_file_name = 'pred_nensemb{}_npred{}_tempscale{}.pkl'.format( len(pred_accum['in']), FLAGS.n_pred_sample, FLAGS.is_tempscale) with tf.gfile.Open(os.path.join(model_dir, pred_file_name), 'wb') as f: pickle.dump(pred, f, protocol=2)
def __init__(self, obs_size, action_size, actor_model=None, critic_model=None, use_target_network=False, learning_rate=1e-3, reward_discount=0.99, tau=0.001): self.obs_size = obs_size self.action_size = action_size self.use_target_network = use_target_network self.lr = learning_rate self.rd = reward_discount self.tau = tau # Create models if not provided if actor_model is None: actor_model = models.Sequential() actor_model.add( layers.Dense(16, input_shape=obs_size, activation='relu')) actor_model.add(layers.Dense(16, activation='relu')) actor_model.add(layers.Dense(16, activation='relu')) actor_model.add( layers.Dense(action_size, name='action', activation='tanh')) actor_model.summary() self.actor_model = actor_model if critic_model is None: state_input = layers.Input(shape=obs_size) action_input = layers.Input(shape=action_size) all_input = layers.Concatenate()([state_input, action_input]) h1 = layers.Dense(32, activation='relu')(all_input) h2 = layers.Dense(32, activation='relu')(h1) h3 = layers.Dense(32, activation='relu')(h2) output = layers.Dense(1, name='q-value')(h3) critic_model = models.Model(inputs=[state_input, action_input], outputs=output) critic_model.summary() self.critic_model = critic_model if use_target_network: self.target_network_critic = tf.keras.models.clone_model( self.critic_model) self.target_network_actor = tf.keras.models.clone_model( self.actor_model) self.state_ph = tf.placeholder(tf.float32, shape=(None, *obs_size)) self.actions_ph = tf.placeholder(tf.float32, shape=(None, action_size)) self.rewards_ph = tf.placeholder(tf.float32, shape=(None)) self.next_states_ph = tf.placeholder(tf.float32, shape=(None, *obs_size)) self.is_done_ph = tf.placeholder(tf.float32, shape=(None)) self.loss = self.q_loss(self.state_ph, self.actions_ph, self.rewards_ph, self.next_states_ph, self.is_done_ph) aer = self.action_expected_reward(self.state_ph) self.train_critic_step = tf.train.AdamOptimizer( learning_rate).minimize( self.loss, var_list=self.critic_model.trainable_variables) self.train_actor_step = tf.train.AdamOptimizer( learning_rate / 10).minimize( -aer, var_list=self.actor_model.trainable_variables) sess.run(tf.global_variables_initializer())