def define_graph(config): network_tpl = tf.make_template('network', network, config=config) inputs = tf.placeholder(tf.float32, [None, config.num_inputs]) targets = tf.placeholder(tf.float32, [None, 1]) num_visible = tf.placeholder(tf.int32, []) batch_size = tf.to_float(tf.shape(inputs)[0]) data_mean, data_noise, data_uncertainty = network_tpl(inputs) ood_inputs = inputs + tf.random_normal( tf.shape(inputs), 0.0, config.noise_std) ood_mean, ood_noise, ood_uncertainty = network_tpl(ood_inputs) losses = [ -tfd.Normal(data_mean, data_noise).log_prob(targets), -tfd.Bernoulli(data_uncertainty).log_prob(0), -tfd.Bernoulli(ood_uncertainty).log_prob(1), ] if config.center_at_target: losses.append(-tfd.Normal(ood_mean, ood_noise).log_prob(targets)) loss = sum(tf.reduce_sum(loss) for loss in losses) / batch_size optimizer = tf.train.AdamOptimizer(config.learning_rate) gradients, variables = zip(*optimizer.compute_gradients( loss, colocate_gradients_with_ops=True)) if config.clip_gradient: gradients, _ = tf.clip_by_global_norm(gradients, config.clip_gradient) optimize = optimizer.apply_gradients(zip(gradients, variables)) data_uncertainty = tf.sigmoid(data_uncertainty) if not config.center_at_target: data_mean = (1 - data_uncertainty) * data_mean + data_uncertainty * 0 data_noise = (1 - data_uncertainty) * data_noise + data_uncertainty * 0.1 return tools.AttrDict(locals())
def define_graph(config): network_tpl = tf.make_template('network', network, config=config) inputs = tf.placeholder(tf.float32, [None, config.num_inputs]) targets = tf.placeholder(tf.float32, [None, 1]) num_visible = tf.placeholder(tf.int32, []) batch_size = tf.shape(inputs)[0] data_dist, mean_dist = network_tpl(inputs) #output from network assert len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) divergence = sum([ tf.reduce_sum(tensor) for tensor in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) ]) num_batches = tf.to_float(num_visible) / tf.to_float(batch_size) losses = [ config.divergence_scale * divergence / num_batches, -data_dist.log_prob(targets), ] loss = sum(tf.reduce_sum(loss) for loss in losses) / tf.to_float(batch_size) optimizer = tf.train.AdamOptimizer(config.learning_rate) gradients, variables = zip( *optimizer.compute_gradients(loss, colocate_gradients_with_ops=True)) if config.clip_gradient: gradients, _ = tf.clip_by_global_norm(gradients, config.clip_gradient) optimize = optimizer.apply_gradients(zip(gradients, variables)) data_mean = mean_dist.mean() data_noise = data_dist.stddev() data_uncertainty = mean_dist.stddev() return tools.AttrDict(locals())
def generate_vargrad_dataset(length=1000, noise_slope=0.2): random = np.random.RandomState(0) inputs = np.linspace(-1, 1, length) noise_std = np.maximum(0, (inputs + 1) * noise_slope) targets = 0.5 * +np.sin(25 * inputs) + random.normal(0, noise_std) targets += 0.5 * inputs domain = np.linspace(-1.2, 1.2, 1000) train_split = np.repeat([False, True, False, True, False], 200) test_split = (1 - train_split).astype(bool) domain, inputs, targets = domain[:, None], inputs[:, None], targets[:, None] test_inputs, test_targets = inputs[test_split], targets[test_split] train_inputs, train_targets = inputs[train_split], targets[train_split] train = tools.AttrDict(inputs=train_inputs, targets=train_targets) test = tools.AttrDict(inputs=test_inputs, targets=test_targets) return tools.AttrDict(domain=domain, train=train, test=test, target_scale=1)
def load_numpy_dataset(directory, train_amount=None, test_amount=None): filepath = os.path.expanduser(directory + '-train-inputs.npy') random = np.random.RandomState(0) with tf.gfile.Open(filepath, 'rb') as file_: train_inputs = np.load(file_).astype(np.float32) filepath = directory + '-train-targets.npy' with tf.gfile.Open(filepath, 'rb') as file_: train_targets = np.load(file_).astype(np.float32) filepath = directory + '-test-inputs.npy' with tf.gfile.Open(filepath, 'rb') as file_: test_inputs = np.load(file_).astype(np.float32) filepath = directory + '-test-targets.npy' with tf.gfile.Open(filepath, 'rb') as file_: test_targets = np.load(file_).astype(np.float32) if train_amount: train_indices = random.permutation(len(train_inputs))[:train_amount] train_inputs = train_inputs[train_indices] train_targets = train_targets[train_indices] if test_amount: test_amount = random.permutation(len(test_inputs))[:test_amount] test_inputs = test_inputs[test_amount] test_targets = test_targets[test_amount] domain = test_inputs[::10] # Subsample inputs for visualization. mean = train_inputs.mean(0)[None] std = train_inputs.std(0)[None] + 1e-6 train_inputs = (train_inputs - mean) / std test_inputs = (test_inputs - mean) / std domain = (domain - mean) / std mean = train_targets.mean(0)[None] std = train_targets.std(0)[None] + 1e-6 train_targets = (train_targets - mean) / std test_targets = (test_targets - mean) / std train = tools.AttrDict(inputs=train_inputs, targets=train_targets) test = tools.AttrDict(inputs=test_inputs, targets=test_targets) return tools.AttrDict(domain=domain, train=train, test=test, target_scale=std)
def default_schedule(model): config = tools.AttrDict() config.num_epochs = 2000 config.num_initial = 10 config.num_select = 10 config.select_after_epochs = range(50, 2000, 50) config.eval_after_epochs = range(0, 2000, 50) config.log_after_epochs = range(0, 2000, 500) config.visualize_after_epochs = range(50, 2000, 500) config.batch_size = 10 config.temperature = 0.5 config.filetype = 'png' if model == 'det': config.has_uncertainty = False return config
def passive_schedule(model): config = tools.AttrDict() config.num_epochs = 100 config.num_initial = 1000 #999999999 #All data [or as much until memory error] config.num_select = 100 # 999999999 #All data config.select_after_epochs = range( 0) #or range(99999998,99999999) #Don't go through selection process config.eval_after_epochs = range(0, 2000, 50) config.log_after_epochs = range(config.num_epochs) config.visualize_after_epochs = range(config.num_epochs) config.batch_size = 10 config.temperature = 0.5 config.filetype = 'png' if model == 'det': config.has_uncertainty = False return config
def define_graph(config): network_tpl = tf.make_template('network', network, config=config) inputs = tf.placeholder(tf.float32, [None, config.num_inputs]) targets = tf.placeholder(tf.float32, [None, 1]) num_visible = tf.placeholder(tf.int32, []) batch_size = tf.shape(inputs)[0] data_dist, mean_dist = network_tpl(inputs) ood_inputs = inputs + tf.random_normal(tf.shape(inputs), 0.0, config.noise_std) ood_data_dist, ood_mean_dist = network_tpl(ood_inputs) assert len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) divergence = sum([ tf.reduce_sum(tensor) for tensor in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) ]) num_batches = tf.to_float(num_visible) / tf.to_float(batch_size) if config.center_at_target: ood_mean_prior = tfd.Normal(targets, 1.0) else: ood_mean_prior = tfd.Normal(0.0, 1.0) losses = [ config.divergence_scale * divergence / num_batches, -data_dist.log_prob(targets), config.ncp_scale * tfd.kl_divergence(ood_mean_prior, ood_mean_dist), ] if config.ood_std_prior: sg = tf.stop_gradient ood_std_dist = tfd.Normal(sg(ood_mean_dist.mean()), ood_data_dist.stddev()) ood_std_prior = tfd.Normal(sg(ood_mean_dist.mean()), config.ood_std_prior) divergence = tfd.kl_divergence(ood_std_prior, ood_std_dist) losses.append(config.ncp_scale * divergence) loss = sum(tf.reduce_sum(loss) for loss in losses) / tf.to_float(batch_size) optimizer = tf.train.AdamOptimizer(config.learning_rate) gradients, variables = zip( *optimizer.compute_gradients(loss, colocate_gradients_with_ops=True)) if config.clip_gradient: gradients, _ = tf.clip_by_global_norm(gradients, config.clip_gradient) optimize = optimizer.apply_gradients(zip(gradients, variables)) data_mean = mean_dist.mean() data_noise = data_dist.stddev() data_uncertainty = mean_dist.stddev() return tools.AttrDict(locals())
def default_config(model): config = tools.AttrDict() config.num_inputs = 1 config.layer_sizes = [200, 200] # [50, 50] if model == 'bbb': config.divergence_scale = 0.1 if model == 'bbb_ncp': config.noise_std = 0.5 config.ncp_scale = 0.1 config.divergence_scale = 0 config.ood_std_prior = 0.1 config.center_at_target = True if model == 'det_mix_ncp': config.noise_std = 0.5 config.center_at_target = True config.learning_rate = 3e-4 config.weight_std = 0.1 config.clip_gradient = 100.0 return config
def default_config(model): config = tools.AttrDict() config.num_inputs = 19 #8 #THey use 8 features. But what features do they use??? Instead, we will use our own set, for now happens to be 19 since there is some one hot encoding. config.layer_sizes = [50, 50] if model == 'bbb': config.divergence_scale = 1.0 if model == 'bbb_ncp': config.noise_std = 0.1 config.ncp_scale = 0.1 config.divergence_scale = 0 config.ood_std_prior = None config.center_at_target = True if model == 'det_mix_ncp': config.noise_std = 0.1 config.center_at_target = True config.learning_rate = 1e-3 config.weight_std = 0.1 config.clip_gradient = 100.0 return config
def default_config(model): config = tools.AttrDict() config.num_inputs = 784 config.layer_sizes = [] config.bayesian_layer_sizes = [200] if model == 'bbb' or model == 'bbb_mnist': config.divergence_scale = 1.0 if model == 'bbb_ncp': config.noise_std = 0.1 config.ncp_scale = 0.1 config.divergence_scale = 1 config.ood_std_prior = None config.center_at_target = True if model == 'det_mix_ncp': config.noise_std = 0.1 config.center_at_target = True config.learning_rate = 1e-3 config.weight_std = 0.1 config.clip_gradient = 100.0 return config
def define_graph(config): network_tpl = tf.make_template('network', network, config=config) inputs = tf.placeholder(tf.float32, [None, config.num_inputs]) targets = tf.placeholder(tf.float32, [None, 1]) num_visible = tf.placeholder(tf.int32, []) batch_size = tf.shape(inputs)[0] data_dist = network_tpl(inputs) losses = [ -data_dist.log_prob(targets), ] loss = sum(tf.reduce_sum(loss) for loss in losses) / tf.to_float(batch_size) optimizer = tf.train.AdamOptimizer(config.learning_rate) gradients, variables = zip(*optimizer.compute_gradients( loss, colocate_gradients_with_ops=True)) if config.clip_gradient: gradients, _ = tf.clip_by_global_norm(gradients, config.clip_gradient) optimize = optimizer.apply_gradients(zip(gradients, variables)) data_mean = data_dist.mean() data_noise = data_dist.stddev() data_uncertainty = data_dist.stddev() return tools.AttrDict(locals())