def __init__(self, value, transform=None, fixed=False, name=None, learning_rate=None, summ=False): self.value = value self.fixed = fixed if name is None: self.name = "param" else: self.name = name if transform is None: self.transform = transforms.Identity() else: self.transform = transform if self.fixed: self.tf_opt_var = tf.constant(self.value, name=name, dtype=float_type) else: self.tf_opt_var = Variable(self.transform.backward(self.value), name=name, dtype=float_type) if learning_rate is not None and fixed is False: self.tf_opt_var.set_learning_rate(learning_rate) if summ: self.variable_summaries(self.tf_opt_var)
def __init__(self, classifier: Model): """Initialize the running metrics and store the classifier. Args: classifier: The pre-trained classifier model """ self.classifier = classifier # The initial values will be used for resetting the running metrics num_features = classifier.feature_extract.output_shape[-1] self._init_mean = tf.zeros([num_features], dtype=tf.float64) self._init_cov = tf.zeros([num_features, num_features], dtype=tf.float64) self._init_num = tf.constant(0, dtype=tf.float64) # The running metrics are the mean and covariance for the Gaussian # distribution along with the total number of examples. self._mean: Dict[str, Variable] = { kind: Variable(self._init_mean, trainable=False) for kind in ("real", "gen") } self._cov: Dict[str, Variable] = { kind: Variable(self._init_cov, trainable=False) for kind in ("real", "gen") } self._total_num: Dict[str, Variable] = { kind: Variable(self._init_num, trainable=False) for kind in ("real", "gen") }
def train_step(self, paragraph_tokens, ref_question, global_step: tf.Variable): losses = [] preds = [[] for _ in range(ref_question.shape[0])] context = paragraph_tokens past = None types = tf.constant(0, dtype=tf.int32, shape=paragraph_tokens.shape) batch_dim = paragraph_tokens.shape[0] for i in tf.range(ref_question.shape[1]): ref_tokens = ref_question[:, i] if tf.reduce_any( tf.not_equal(ref_tokens, self.embedder.padding_token)): predictions, past, token_loss = self.token_pred_and_loss( context, past, ref_tokens, types) context = tf.expand_dims(ref_question[:, i], axis=1) types = tf.constant(1, dtype=tf.int32, shape=(batch_dim, 1)) for ind, ref in enumerate(ref_tokens): if ref != self.embedder.padding_token: preds[ind].append(predictions[ind]) losses.append(token_loss) if self.print_predictions: for i, pred in enumerate(preds): ref = self.embedder.tokenizer.decode(ref_question[i]) pred = self.embedder.tokenizer.decode(tf.stack(pred)) paragraph = self.embedder.tokenizer.decode(paragraph_tokens[i]) tf.print(paragraph, "\n", ref, "\n", pred, "\n") global_step.assign(global_step + 1) with self.train_summary_writer.as_default(): total_loss = tf.reduce_mean(losses) tf.summary.scalar('loss', total_loss, step=global_step) return total_loss
def test_array_parameters_evaluate(self, qubit_device_2_wires, tol): """Test that array parameters gives same result as positional arguments.""" a, b, c = tf.constant(0.5), tf.constant(0.54), tf.constant(0.3) def ansatz(x, y, z): qml.QubitStateVector(np.array([1, 0, 1, 1]) / np.sqrt(3), wires=[0, 1]) qml.Rot(x, y, z, wires=0) qml.CNOT(wires=[0, 1]) return qml.expval(qml.PauliZ(0)), qml.expval(qml.PauliY(1)) @qml.qnode(qubit_device_2_wires, interface='tf') def circuit1(x, y, z): return ansatz(x, y, z) @qml.qnode(qubit_device_2_wires, interface='tf') def circuit2(x, array): return ansatz(x, array[0], array[1]) @qml.qnode(qubit_device_2_wires, interface='tf') def circuit3(array): return ansatz(*array) positional_res = circuit1(a, b, c) array_res1 = circuit2(a, Variable([b, c])) array_res2 = circuit3(Variable([a, b, c])) assert np.allclose(positional_res.numpy(), array_res1.numpy(), atol=tol, rtol=0) assert np.allclose(positional_res.numpy(), array_res2.numpy(), atol=tol, rtol=0)
def test_qnode_array_parameters_2_vector_return(self, qubit_device_2_wires, tol): """Test that QNode can take arrays as input arguments, and that they interact properly with TensorFlow Test case for a circuit that returns a 2-vector.""" # The objective of this test is not to check if the results are correctly calculated, # but to check that the interoperability of the different return types works. @qml.qnode(qubit_device_2_wires, interface='tf') def circuit(dummy1, array, dummy2): qml.RY(0.5 * array[0,1], wires=0) qml.RY(-0.5 * array[1,1], wires=0) qml.RY(array[1,0], wires=1) return qml.expval(qml.PauliX(0)), qml.expval(qml.PauliX(1)) # returns a 2-vector grad_target = (np.array(1.), np.array([[0.5, 0.43879, 0], [0, -0.43879, 0]]), np.array(-0.4)) cost_target = 1.03257 args = (Variable(0.46), Variable([[2., 3., 0.3], [7., 4., 2.1]]), Variable(-0.13)) def cost(x, array, y): c = tf.cast(circuit(tf.constant(0.111), array, tf.constant(4.5)), tf.float32) c = c[0] # get a scalar return c +0.5*array[0,0] +x -0.4*y with tf.GradientTape() as tape: cost_res = cost(*args) grad_res = np.array([i.numpy() for i in tape.gradient(cost_res, [args[0], args[2]])]) assert np.allclose(cost_res.numpy(), cost_target, atol=tol, rtol=0) assert np.allclose(grad_res, np.fromiter(grad_target[::2], dtype=np.float32), atol=tol, rtol=0)
def test_qnode_evaluation_agrees(self, qubit_device_2_wires, tol): """Tests that simple example is consistent.""" @qml.qnode(qubit_device_2_wires, interface='autograd') def circuit(phi, theta): qml.RX(phi[0], wires=0) qml.RY(phi[1], wires=1) qml.CNOT(wires=[0, 1]) qml.PhaseShift(theta[0], wires=0) return qml.expval(qml.PauliZ(0)) @qml.qnode(qubit_device_2_wires, interface='tf') def circuit_tf(phi, theta): qml.RX(phi[0], wires=0) qml.RY(phi[1], wires=1) qml.CNOT(wires=[0, 1]) qml.PhaseShift(theta[0], wires=0) return qml.expval(qml.PauliZ(0)) phi = [0.5, 0.1] theta = [0.2] phi_t = Variable(phi) theta_t = Variable(theta) autograd_eval = circuit(phi, theta) tf_eval = circuit_tf(phi_t, theta_t) assert np.allclose(autograd_eval, tf_eval.numpy(), atol=tol, rtol=0)
def test_qnode_gradient_agrees(self, qubit_device_2_wires, tol): """Tests that simple gradient example is consistent.""" @qml.qnode(qubit_device_2_wires, interface='autograd') def circuit(phi, theta): qml.RX(phi[0], wires=0) qml.RY(phi[1], wires=1) qml.CNOT(wires=[0, 1]) qml.PhaseShift(theta[0], wires=0) return qml.expval(qml.PauliZ(0)) @qml.qnode(qubit_device_2_wires, interface='tf') def circuit_tf(phi, theta): qml.RX(phi[0], wires=0) qml.RY(phi[1], wires=1) qml.CNOT(wires=[0, 1]) qml.PhaseShift(theta[0], wires=0) return qml.expval(qml.PauliZ(0)) phi = [0.5, 0.1] theta = [0.2] phi_t = Variable(phi) theta_t = Variable(theta) dcircuit = qml.grad(circuit, [0, 1]) autograd_grad = dcircuit(phi, theta) with tf.GradientTape() as g: g.watch([phi_t, theta_t]) y = circuit_tf(phi_t, theta_t) tf_grad = g.gradient(y, [phi_t, theta_t]) assert np.allclose(autograd_grad[0], tf_grad[0], atol=tol, rtol=0) assert np.allclose(autograd_grad[1], tf_grad[1], atol=tol, rtol=0)
def sample_visible_given_hidden(self, hidden_batch: np.array, bias_visible_tf: tf.Variable, weights_tf: tf.Variable, binary: bool = True) -> np.array: """Sample visible units from the hidden Args: hidden_batch (np.array): Batch of hidden data in shape (batch_size, no_hidden) bias_visible_tf (tf.Variable): Tensorflow variable for visible bias weights_tf (tf.Variable): Tensorflow variable for weights binary (bool, optional): True to binarize, False for raw probabilities. Defaults to True. Returns: np.array: Samples in shape (batch_size, no_visible) """ bias_visible = np.transpose(bias_visible_tf.numpy()) weights = weights_tf.numpy() ef = lambda i_batch: self.activation_visible(hidden_batch=hidden_batch, bias_visible=bias_visible, weights=weights, i_batch=i_batch) no_visible = bias_visible_tf.shape[1] return self._sample_x_given_y(y_batch=hidden_batch, no_x=no_visible, ef=ef, binary=binary)
def test_grad_tf(self, qnodes, skip_if_no_tf_support, parallel, interface): """Test correct gradient of the QNodeCollection using the tf interface""" if parallel and qml.tape_mode_active(): pytest.skip( "There appears to be a race condition when constructing TF tapes in parallel" ) qnode1, qnode2 = qnodes # calculate the gradient of the collection using tf params = Variable([0.5643, -0.45]) qc = qml.QNodeCollection([qnode1, qnode2]) with tf.GradientTape() as tape: tape.watch(params) if parallel: with pytest.warns(UserWarning): cost = sum(qc(params, parallel=parallel)) else: cost = sum(qc(params, parallel=parallel)) # the gradient will be None res = tape.gradient(cost, params).numpy() # calculate the gradient of the QNodes individually using tf params = Variable([0.5643, -0.45]) with tf.GradientTape() as tape: tape.watch(params) cost = sum(qnode1(params) + qnode2(params)) expected = tape.gradient(cost, params).numpy() assert np.all(res == expected)
def defining_variables(): # Define the 1-dimensional variable A1 A1 = Variable([1, 2, 3, 4]) print('\n A1: ', A1) # Convert A1 to a numpy array and assign it to B1 B1 = A1.numpy() print('\n B1: ', B1)
def train_epoch(model, optimizer, dataset, epoch_index: int, batch_index: tf.Variable, log_freq: int = 250, writer=None): to_fine_tune = [v for v in model.trainable_variables] epoch_metrics = make_metric_dict( ["Localization", "Confidence", "WeightedTotal"]) era_metrics = make_metric_dict( ["Localization", "Confidence", "WeightedTotal"]) for (_, met) in era_metrics.items(): met.reset_states() epoch_samples = 0 era_samples = 0 _log("Started new training epoch") batch_start = batch_index.numpy() for batch in dataset: batch_index.assign_add(1) epoch_samples += len(batch["image"]) era_samples += len(batch["image"]) keys = [ "cls_targets", "cls_weights", "reg_targets", "reg_weights", "matched" ] images, shapes = model.preprocess(batch["image"]) model.provide_groundtruth_direct(**{k: batch[k] for k in keys}) with tf.GradientTape() as tape: prediction_dict = model.predict(images, shapes) loss_dict = model.loss(prediction_dict) gradients = tape.gradient(loss_dict["WeightedTotal"], to_fine_tune) optimizer.apply_gradients(zip(gradients, to_fine_tune)) update_metric_dict(epoch_metrics, loss_dict) update_metric_dict(era_metrics, loss_dict) if (batch_index - batch_start) % log_freq == 0: _log(f"Completed {batch_index - batch_start} batches") if writer: l_dict = metric2scalar_dict( era_metrics, prefix=f"Loss/Train/Last_{log_freq}_Batches", v_func=lambda v: v / era_samples, reset_states=True) write_scalars(writer, l_dict, step=batch_index) if writer: l_dict = metric2scalar_dict(epoch_metrics, prefix=f"Loss/Train/Epoch", v_func=lambda v: v / epoch_samples, reset_states=True) write_scalars(writer, l_dict, step=epoch_index)
def make_layer(self, inputs, in_size, out_size, activate=None): '''添加神经层''' weights = Variable(random_normal([in_size, out_size])) basis = Variable(zeros([1, out_size]) + 0.1) result = matmul(inputs, weights) + basis if activate is None: return result else: return activate(result)
def unsaple_2d(image: tf.Variable, size: int): """ Operation which produced image `size` times bigger. If input image has size (10,10) then output image wil have (10*size, 10*size) shape. """ width = int(image.get_shape()[1] * size) height = int(image.get_shape()[2] * size) return tf.image.resize_nearest_neighbor(image, (height, width))
def logisticlayer(X,n_hidden,n_classes,name, with_relu=True,with_dropout = False,keep_prob = 0.9): with name_scope(name): W = Variable(initial_value=tf.random_normal((n_hidden,n_classes),stddev=0.01)) b = Variable(initial_value=tf.random_normal((n_classes,))) layer = tf.matmul(X,W)+b if with_dropout: layer = tf.nn.dropout(layer,keep_prob=keep_prob) if with_relu: layer = tf.nn.relu(layer) return layer
def add_layer(input,in_size,out_size,activation_function=None): Weights = Variable(random_normal([out_size,in_size])) Biases = Variable(zeros([out_size,1])+0.1) Wx_plus_b = matmul(Weights,input)+Biases if activation_function==None: output = Wx_plus_b else: output = activation_function(transpose(Wx_plus_b)) output = transpose(output) return output
def _st(model: tf.keras.Model, gen_img: tf.Variable, content_path: str, style_path: str, content_layers: List[str], style_layers: List[str], lpi: Callable, opt: tf.train.AdamOptimizer, content_weight=1e3, style_weight=1e-2, num_iterations=100) -> None: """ Style transfer from a style image to a source image with a given pre-trained network :param model: The model to use for the style transfer :param gen_img: The generated image to modify INPLACE :param content_path: The path to the source image to paint the style :param style_path: The path to the image to use the style :param content_layers: The list of content layers to use :param style_layers: The list of style layers to use :param lpi: The function to use to load and process image :param opt: The Adam optimizer to use :param content_weight: The weight for the content loss :param style_weight: The weight for the style loss :param num_iterations: The number of iteration to paint :return: The best image associated with his best loss """ # Get the style and content feature representations (from our specified intermediate layers) style_features, content_features = compute_feature_representations( model, lpi, content_path, style_path, len(style_layers)) gram_style_features = [ gram_matrix(style_feature) for style_feature in style_features ] loss_weights = (style_weight, content_weight) cfg = { 'model': model, 'loss_weights': loss_weights, 'gen_img': gen_img, 'gram_style_features': gram_style_features, 'content_features': content_features, 'num_style_layers': len(style_layers), 'num_content_layers': len(content_layers) } norm_means = np.array([103.939, 116.779, 123.68]) min_vals = -norm_means max_vals = 255 - norm_means for i in range(num_iterations): grads, all_loss = compute_grads(cfg) loss, style_score, content_score = all_loss opt.apply_gradients([(grads, gen_img)]) clipped = tf.clip_by_value(gen_img, min_vals, max_vals) gen_img.assign(clipped) _logger.info( f"Iteration n°{i} | loss : {loss} | style_score : {style_score} | content_score : {content_score}" )
class Param: ''' Copied and modified from GPflow(https://github.com/GPflow/) ''' def __init__(self, value, transform=None, fixed=False, name=None, learning_rate=None, summ=False): self.value = value self.fixed = fixed if name is None: self.name = "param" else: self.name = name if transform is None: self.transform = transforms.Identity() else: self.transform = transform if self.fixed: self.tf_opt_var = tf.constant(self.value, name=name, dtype=float_type) else: self.tf_opt_var = Variable(self.transform.backward(self.value), name=name, dtype=float_type) if learning_rate is not None and fixed is False: self.tf_opt_var.set_learning_rate(learning_rate) if summ: self.variable_summaries(self.tf_opt_var) def get_optv(self): return self.tf_opt_var def get_tfv(self): if self.fixed: return self.tf_opt_var else: return self.transform.tf_forward(self.tf_opt_var) def variable_summaries(self, var): tf.summary.histogram(self.name, var) @property def shape(self): return self.value.shape
def reader(context: Tuple[tf.Variable, tf.Variable], emb0: tf.Variable, n_slots: None, weights=None, step_size=1.0, scale_prediction=0.0, start_from_zeros=False, loss_grad=loss_quadratic_grad, emb_update=multilinear_grad): """ Read a series of data and update the embeddings accordingly Args: context (Tuple[tf.Variable, tf.Variable]): contextual information emb0 (tf.Variable): initial embeddings n_slots (int): number of slots to update weights: weights give to every observation in the inputs. Size: (batch_size, n_obs) loss_grad: gradient of the loss emb_update: update of the embeddings (could be the gradient of the score with respect to the embeddings) Returns: The variable representing updated embeddings """ if context is None: # empty contexts are not read return emb0 context_inputs, context_ouputs = context # context_inputs has shape (n_data, n_obs, order) n_data, n_obs, order = [d.value for d in context_inputs.get_shape()] step_size = tf.Variable(step_size, name='step_size', trainable=True) if len(emb0.get_shape()) > 2: # different set of embeddings for every data n_data2, n_ent, rank = [d.value for d in emb0.get_shape()] if n_slots is None: n_slots = n_ent shift_indices = tf.constant( n_ent * np.reshape(np.outer(range(n_data), np.ones(n_obs * order)), (n_data, n_obs, order)), dtype='int64') emb0_rsh = tf.reshape(emb0, (-1, rank)) grad_score, preds = emb_update(emb0_rsh, context_inputs + shift_indices, score=True) else: rank = emb0.get_shape()[1].value grad_score, preds = emb_update(emb0, context_inputs, score=True) update_strength = tf.tile(tf.reshape(loss_grad(preds * scale_prediction, context_ouputs) * weights, (n_data, n_obs, 1, 1)), (1, 1, 2, rank)) grad_loss = tf.reshape(grad_score, (n_data, n_obs, 2, rank)) * update_strength one_hot = tf.Variable(np.eye(n_slots + 1, n_slots, dtype=np.float32), trainable=False) # last column removed indic_mat = tf.gather(one_hot, tf.minimum(context_inputs, n_slots)) # shape: (n_data, n_obs, order, n_slots) total_grad_loss = tf.reduce_sum(tf.batch_matmul(indic_mat, grad_loss, adj_x=True), 1) if start_from_zeros: return total_grad_loss * step_size # size of the output: (n_data, n_slots, rank) else: if len(emb0.get_shape()) > 2: # different set of embeddings for every data initial_slot_embs = emb0[:, :n_slots, :] else: initial_slot_embs = tf.reshape(tf.tile(emb0[:n_slots, :], (n_data, 1)), (n_data, n_slots, rank)) return initial_slot_embs - total_grad_loss * step_size # size of the output: (n_data, n_slots, rank)
def _do(self, input_tensor_variable: tf.Variable, params_tensor_variable: tf.Variable) -> (tf.Tensor, Callable): """ Forward pass with both input and parameter variables This in-between function is necessary in order to have the custom gradient work in Tensorflow. That is the reason for returning the grad() function as well. Parameters ---------- input_tensor_variable the tf.Variable which holds the values of the input params_tensor_variable the tf.Variable which holds the values of the parameters Returns ------- result The result of the forwarding """ if params_tensor_variable.shape != self._params_len: raise TequilaMLException( 'Received input of len {} when Objective takes {} inputs.'. format(len(params_tensor_variable.numpy()), self._input_len)) params_tensor_variable = tf.stack(params_tensor_variable) if input_tensor_variable.shape != self._input_len: raise TequilaMLException( 'Received input of len {} when Objective takes {} inputs.'. format(len(input_tensor_variable.numpy()), self._input_len)) input_tensor_variable = tf.stack(input_tensor_variable) def grad(upstream): input_gradient_values, parameter_gradient_values = self.get_grads_values( ) # Convert to tensor in_Tensor = tf.convert_to_tensor(input_gradient_values, dtype=self._cast_type) par_Tensor = tf.convert_to_tensor(parameter_gradient_values, dtype=self._cast_type) # Multiply with the upstream in_Upstream = tf.dtypes.cast(upstream, self._cast_type) * in_Tensor par_Upstream = tf.dtypes.cast(upstream, self._cast_type) * par_Tensor # Transpose and sum return tf.reduce_sum(tf.transpose(in_Upstream), axis=0), tf.reduce_sum( tf.transpose(par_Upstream), axis=0) return self.realForward(inputs=input_tensor_variable, params=params_tensor_variable), grad
class NPLM(Model): def __init__(self, input_shape, NU, NUR, NU0, SIGMA, architecture=[1, 10, 1], weight_clipping=1., ParNet_weights=None, train_nu=True, train_BSM=True, name=None, **kwargs): super().__init__(name=name, **kwargs) architecturePar = ParNet_weights.split('layers', 1)[1] architecturePar = architecturePar.split('_act', 1)[0] architecturePar = architecturePar.split('_') layersPar = [] for layer in architecturePar: layersPar.append(int(layer)) inputsizePar = layersPar[0] input_shapePar = (None, inputsizePar) activationPar = ParNet_weights.split('act', 1)[1] activationPar = activationPar.split('_', 1)[0] wcPar = ParNet_weights.split('wclip', 1)[1] wcPar = float(wcPar.split('/', 1)[0]) self.Delta = ParametricNet(input_shapePar, architecture=[architecturePar], weight_clipping=[wcPar], activationPar) self.Delta.load_weights(ParNet_weights) #don't want to train Delta for module in self.Delta.layers: for layer in module.layers: layer.trainable = False self.nu = Variable(initial_value=NU, dtype="float32", trainable=train_nu, name='nu') self.nuR = Variable(initial_value=NUR, dtype="float32", trainable=False, name='nuR') self.nu0 = Variable(initial_value=NU0, dtype="float32", trainable=False, name='nu0') self.sig = Variable(initial_value=SIGMA, dtype="float32", trainable=False, name='sigma') if train_BSM: self.BSMfinder = DNN(input_shape, architecture, weight_clipping) self.train_BSM = train_BSM self.build(input_shape) def call(self, x): nu = tf.squeeze(self.nu) nuR = tf.squeeze(self.nuR) nu0 = tf.squeeze(self.nu0) sigma = tf.squeeze(self.sig) Laux = tf.reduce_sum(-0.5*((nu-nu0)**2 - (nuR-nu0)**2)/sigma**2 ) Laux = Laux*tf.ones_like(x[:, 0:1]) Lratio = 0 delta = self.Delta.call(x) Lratio = tf.math.log((1+delta[:, 0:1]*nu[0]/sigma[0])**2 + (delta[:, 1:2]*nu[0]/sigma[0])**2) # scale Lratio += tf.math.log((tf.ones_like(delta[:, 1:2])+nu[1])/(tf.ones_like(delta[:, 1:2])+nuR[1])) # norm BSM = tf.zeros_like(Laux) if self.train_BSM: BSM = self.BSMfinder(x) output = tf.keras.layers.Concatenate(axis=1)([BSM+Lratio, Laux]) self.add_metric(tf.reduce_mean(Laux), aggregation='mean', name='Laux') self.add_metric(nu[0], aggregation='mean', name='scale_barrel') self.add_metric(nu[1], aggregation='mean', name='efficiency_barrel') return output
def test_qnode_evaluation_agrees(self, qnodes, tol): """Tests that simple example is consistent.""" circuit, circuit_tf = qnodes phi = [0.5, 0.1] theta = [0.2] phi_t = Variable(phi) theta_t = Variable(theta) autograd_eval = circuit(phi, theta) tf_eval = circuit_tf(phi_t, theta_t) assert np.allclose(autograd_eval, tf_eval.numpy(), atol=tol, rtol=0)
class BSMfinderUpgrade(Model): def __init__(self, input_shape, edgebinlist, mean_ref, A1matrix, A0matrix, NUmatrix, NURmatrix, NU0matrix, SIGMAmatrix, architecture, weight_clipping, na\ me=None, **kwargs): super().__init__(name=name, **kwargs) self.oi = BinStepLayer(input_shape, edgebinlist, mean_ref) self.ei = LinearExpLayer(input_shape, A0matrix, A0matrix) self.eiR = LinearExpLayer(input_shape, A0matrix, A1matrix) self.nu = Variable(initial_value=NUmatrix, dtype="float32", trainable=True, name='nu') self.nuR = Variable(initial_value=NURmatrix, dtype="float32", trainable=False, name='nuR') self.nu0 = Variable(initial_value=NU0matrix, dtype="float32", trainable=False, name='nu0') self.sig = Variable(initial_value=SIGMAmatrix, dtype="float32", trainable=False, name='sigma') self.f = BSMfinder(input_shape, architecture, weight_clipping) self.build(input_shape)
def test_composition_qnodes_gradient(self, qnodes, x, y): """Test the gradient of composition of two QNode circuits""" f, g = qnodes xt = Variable(x) yt = Variable(y) # compose function with xt as input with tf.GradientTape() as tape: tape.watch([xt]) y = f(xt) grad1 = tape.gradient(y, xt) with tf.GradientTape() as tape: tape.watch([xt]) y = f(xt) grad2 = tape.gradient(y, xt) assert tf.equal(grad1, grad2) # compose function with a as input with tf.GradientTape() as tape: tape.watch([xt]) a = f(xt) y = f(a) grad1 = tape.gradient(y, a) with tf.GradientTape() as tape: tape.watch([xt]) a = f(xt) y = f(a) grad2 = tape.gradient(y, a) assert tf.equal(grad1, grad2) # compose function with b as input with tf.GradientTape() as tape: tape.watch([xt]) b = g(xt) y = g(b) grad1 = tape.gradient(y, b) with tf.GradientTape() as tape: tape.watch([xt]) b = g(xt) y = g(b) grad2 = tape.gradient(y, b) assert tf.equal(grad1, grad2)
def replay_train_prioritized(self): if not self.memory.tree.n_entries >= self.train_start: return if self.epsilon > self.epsilon_min: # self.epsilon *= self.epsilon_decay self.epsilon -= self.epsilon_decay minibatch, idxs, is_weights = self.memory.sample(self.batch_size) minibatch = np.array(minibatch).transpose() state = np.vstack(minibatch[0]) action = (minibatch[1]) reward = list(minibatch[2]) next_state = np.vstack(minibatch[3]) done = minibatch[4] done = done.astype(int) target_next = self.target_network.predict(next_state) pred_online = self.action_network.predict(state) a = tf.convert_to_tensor(action.reshape(-1, 1).astype(int)) a = np.squeeze(action.reshape(-1, 1).astype(int)) one_hot_action = np.zeros((self.batch_size, self.action_size)) #one hot encoding to mark best action for i in range(self.batch_size): if a[i] == 0: one_hot_action[i] = [1, 0] else: one_hot_action[i] = [0, 1] pred_online = tf.math.reduce_sum(pred_online * one_hot_action, axis=1) target = reward + (1 - done) * self.gamma * np.amax(target_next) errors = tf.convert_to_tensor(abs(pred_online - target)) pred_online = Variable(pred_online) target = Variable(target) # update priority for i in range(self.batch_size): idx = idxs[i] self.memory.update(idx, errors[i]) def loss(): return tf.reduce_mean( tf.convert_to_tensor(is_weights) * tf.losses.mean_squared_error(target, pred_online)) # self.optimizer.minimize(loss, [pred_online, target]) self.optimizer.minimize(loss, [pred_online])
class Param: ''' Inheriting from GPFlow TODO : add a fixed flag in which case this should return tf.tensor instead of tf.Variable ''' def __init__(self,value,transform = None,fixed=False,name=None,learning_rate=None,summ=False): self.value = value self.fixed = fixed if name is None: self.name = "param" else: self.name = name if transform is None: self.transform=transforms.Identity() else: self.transform = transform if self.fixed: self.tf_opt_var = tf.constant(self.value,name=name,dtype=float_type) else: self.tf_opt_var = Variable(self.transform.backward(self.value),name=name,dtype=float_type) if learning_rate is not None: self.tf_opt_var.set_learning_rate(learning_rate) if summ: self.variable_summaries(self.tf_opt_var) def get_optv(self): return self.tf_opt_var def get_tfv(self): if self.fixed: return self.tf_opt_var else: return self.transform.tf_forward(self.tf_opt_var) def variable_summaries(self,var): """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" mean = tf.reduce_mean(var) tf.summary.scalar(self.name, mean) tf.summary.histogram(self.name, var) @property def shape(self): return self.value.shape
def answerer(embeddings, tuples: tf.Variable, scoring=multilinear): """ Evaluate the score of tuples with embeddings that are specific to every data sample Args: embeddings (tf.Variable): embedding tensor with shape (n_data, n_slots, rank) tuples: question tensor with int64 entries and shape (n_data, n_tuples, order) scoring: operator that is used to compute the scores Returns: scores (tf.Tensor): scores tensor with shape (n_data, n_tuples) """ n_data, n_slots, rank = [d.value for d in embeddings.get_shape()] n_data, n_tuples, order = [d.value for d in tuples.get_shape()] shift_indices = tf.constant(np.reshape( np.outer(range(n_data), np.ones(n_tuples * n_slots)) * n_slots, (n_data, n_tuples, n_slots)), dtype='int64') questions_shifted = tuples + shift_indices preds = scoring( tf.reshape(embeddings, (n_data * n_slots, rank)), tf.reshape(questions_shifted, (n_data * n_tuples, order))) return tf.reshape(preds, (n_data, n_tuples))
def test_gradient(self, tol): """Test differentiation works""" dev = qml.device("default.qubit", wires=1) def ansatz(params, **kwargs): qml.RX(params[0], wires=0) qml.RY(params[1], wires=0) coeffs = [0.2, 0.5] observables = [qml.PauliX(0), qml.PauliY(0)] H = qml.vqe.Hamiltonian(coeffs, observables) a, b = 0.54, 0.123 params = Variable([a, b], dtype=tf.float64) cost = qml.ExpvalCost(ansatz, H, dev, interface="tf") with tf.GradientTape() as tape: loss = cost(params) res = np.array(tape.gradient(loss, params)) expected = [ -coeffs[0] * np.sin(a) * np.sin(b) - coeffs[1] * np.cos(a), coeffs[0] * np.cos(a) * np.cos(b), ] assert np.allclose(res, expected, atol=tol, rtol=0)
def dyke_dqn_agent(env: TFPyEnvironment, layers: Optional[List[Layer]] = None) -> DqnAgent: """ Prepares a deep Q-network (DQN) agent for use in the dyke maintenance environment. :param env: The dyke environment on which to base the DQN agent. :param layers: Optional. A list of layers to supply to the DQN agent's network. :return: The agent. """ layers = fully_connected_dyke_dqn_agent_network( sizes=(100, 50)) if layers is None else layers # prepare the Q-values layer action_as: BoundedArraySpec = from_spec(env.action_spec()) number_actions: int = int(action_as.maximum - action_as.minimum + 1) q_values_layer: Layer = Dense(units=number_actions, activation=None, kernel_initializer=RandomUniform( minval=-3e-3, maxval=3e-3), bias_initializer=Constant(-2e-1)) net = Sequential(layers=layers + [q_values_layer]) # instantiate and return the agent optimizer = Adam(learning_rate=1e-3) train_step_counter = Variable(initial_value=0) return DqnAgent(time_step_spec=env.time_step_spec(), action_spec=env.action_spec(), q_network=net, optimizer=optimizer, epsilon_greedy=0.1, td_errors_loss_fn=element_wise_squared_loss, train_step_counter=train_step_counter)
def weight_pruning(w: tf.Variable, k: float) -> tf.Variable: """Performs pruning on a weight matrix w in the following way: - The absolute value of all elements in the weight matrix are computed. - The indices of the smallest k% elements based on their absolute values are selected. - All elements with the matching indices are set to 0. Args: w: The weight matrix. k: The percentage of values (units) that should be pruned from the matrix. Returns: The unit pruned weight matrix. """ k = tf.cast( tf.round(tf.size(w, out_type=tf.float32) * tf.constant(k)), dtype=tf.int32 ) w_reshaped = tf.reshape(w, [-1]) _, indices = tf.nn.top_k(tf.negative(tf.abs(w_reshaped)), k, sorted=True, name=None) mask = tf.scatter_nd_update( tf.Variable( tf.ones_like(w_reshaped, dtype=tf.float32), name="mask", trainable=False ), tf.reshape(indices, [-1, 1]), tf.zeros([k], tf.float32), ) return w.assign(tf.reshape(w_reshaped * mask, tf.shape(w)))
def unit_pruning(w: tf.Variable, k: float) -> tf.Variable: """Performs pruning on a weight matrix w in the following way: - The euclidean norm of each column is computed. - The indices of smallest k% columns based on their euclidean norms are selected. - All elements in the columns that have the matching indices are set to 0. Args: w: The weight matrix. k: The percentage of columns that should be pruned from the matrix. Returns: The weight pruned weight matrix. """ k = tf.cast( tf.round(tf.cast(tf.shape(w)[1], tf.float32) * tf.constant(k)), dtype=tf.int32 ) norm = tf.norm(w, axis=0) row_indices = tf.tile(tf.range(tf.shape(w)[0]), [k]) _, col_indices = tf.nn.top_k(tf.negative(norm), k, sorted=True, name=None) col_indices = tf.reshape( tf.tile(tf.reshape(col_indices, [-1, 1]), [1, tf.shape(w)[0]]), [-1] ) indices = tf.stack([row_indices, col_indices], axis=1) return w.assign( tf.scatter_nd_update(w, indices, tf.zeros(tf.shape(w)[0] * k, tf.float32)) )
def tf_show(var: tf.Variable, name=None, summarize=1000): """ Useful function to print the value of the current variable during evaluation Args: var: variable to show name: name to display summarize: number of values to display Returns: the same variable but wrapped with a Print module """ name = name or var.name shape = tuple([d.value for d in var.get_shape()]) return tf.Print(var, [var], message=name + str(shape), summarize=summarize)
def check_shape(var1_tf: tf.Variable, var2_np: np.ndarray): if var1_tf.get_shape().as_list() != list(var2_np.shape): log("Shapes do not match! Exception will follow.", color="red")