def l2_normalize(vector): square_sum = dy.sqrt( dy.bmax( dy.sum_elems(dy.square(vector)), np.finfo(float).eps * dy.ones((1))[0], )) return dy.cdiv(vector, square_sum)
def __call__(self, s1, s2): b_nli = dy.parameter(self.b_nli) W_nli_1 = dy.parameter(self.W_nli_1) W_nli_2 = dy.parameter(self.W_nli_2) W_nli_u = dy.parameter(self.W_nli_u) W_nli_v = dy.parameter(self.W_nli_v) u = dy.square(s1 - s2) v = dy.cmult(s1, s2) relu = dy.rectify(dy.affine_transform([b_nli, W_nli_1, s1, W_nli_2, s2, W_nli_u, u, W_nli_v, v])) b_s = dy.parameter(self.b_s) w_s = dy.parameter(self.w_s) return dy.affine_transform([b_s, w_s, relu])
def calc_reinforce_loss(words, tags, delta): dy.renew_cg() # Transduce all batch elements with an LSTM word_reps = LSTM.transduce([LOOKUP[x] for x in words]) # Softmax scores W = dy.parameter(W_sm) b = dy.parameter(b_sm) #calculate the probability distribution scores = [dy.affine_transform([b, W, x]) for x in word_reps] losses = [ dy.pickneglogsoftmax(score, tag) for score, tag in zip(scores, tags) ] probs = [-dy.exp(loss).as_array() for loss in losses] #then take samples from the probability distribution samples = [np.random.choice(range(len(x)), p=x) for x in probs] #calculate accuracy=reward correct = [sample == tag for sample, tag in zip(samples, tags)] r_i = float(sum(correct)) / len(correct) r = dy.constant((1), r_i) # Reward baseline for each word W_bl = dy.parameter(W_bl_p) b_bl = dy.parameter(b_bl_p) r_b = [ dy.affine_transform([b_bl, W_bl, dy.nobackprop(x)]) for x in word_reps ] #we need to take the value in order to break the computation graph #as the reward portion is trained seperatley and not backpropogated through during the overall score rewards_over_baseline = [(r - dy.nobackprop(x)) for x in r_b] #the scores for training the baseline baseline_scores = [dy.square(r - x) for x in r_b] #then calculate the reinforce scores using reinforce reinforce_scores = [ r_s * score for r_s, score in zip(rewards_over_baseline, scores) ] #we want the first len(sent)-delta scores from xent then delta scores from reinforce #for mixer if len(scores) > delta: mixer_scores = scores[:len(scores) - delta] + reinforce_scores[delta - 1:] else: mixer_scores = reinforce_scores return dy.esum(mixer_scores), dy.esum(baseline_scores)
def loss(self, features, y): b1 = dy.parameter(self.b1) W1 = dy.parameter(self.W1) b2 = dy.parameter(self.b2) W2 = dy.parameter(self.W2) x = dy.inputVector(features) prediction = dy.tanh( dy.affine_transform( [b2, W2, dy.tanh(dy.affine_transform([b1, W1, x]))])) loss = dy.square(prediction - y) return prediction, loss
def calc_reinforce_loss(words, tags, delta): dy.renew_cg() # Transduce all batch elements with an LSTM word_reps = LSTM.transduce([LOOKUP[x] for x in words]) # Softmax scores W = dy.parameter(W_sm) b = dy.parameter(b_sm) #calculate the probability distribution scores = [dy.affine_transform([b, W, x]) for x in word_reps] losses = [dy.pickneglogsoftmax(score, tag) for score, tag in zip(scores, tags)] probs = [-dy.exp(loss).as_array() for loss in losses] #then take samples from the probability distribution samples = [np.random.choice(range(len(x)), p=x) for x in probs] #calculate accuracy=reward correct = [sample == tag for sample, tag in zip(samples, tags)] r_i = float(sum(correct))/len(correct) r = dy.constant((1), r_i) # Reward baseline for each word W_bl = dy.parameter(W_bl_p) b_bl = dy.parameter(b_bl_p) r_b = [dy.affine_transform([b_bl, W_bl, dy.nobackprop(x)]) for x in word_reps] #we need to take the value in order to break the computation graph #as the reward portion is trained seperatley and not backpropogated through during the overall score rewards_over_baseline = [(r - dy.nobackprop(x)) for x in r_b] #the scores for training the baseline baseline_scores = [dy.square(r - x) for x in r_b] #then calculate the reinforce scores using reinforce reinforce_scores = [r_s*score for r_s, score in zip(rewards_over_baseline, scores)] #we want the first len(sent)-delta scores from xent then delta scores from reinforce #for mixer if len(scores) > delta: mixer_scores = scores[:len(scores)-delta] + reinforce_scores[delta-1:] else: mixer_scores = reinforce_scores return dy.esum(mixer_scores), dy.esum(baseline_scores)
def _perform_calc_loss( self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: assert hasattr(model, "attender") and hasattr(model.attender, "attention_vecs"), \ "Must be called after MLELoss with models that have attender." masked_attn = model.attender.attention_vecs if trg.mask is not None: trg_mask = 1 - (trg.mask.np_arr.transpose()) masked_attn = [ dy.cmult(attn, dy.inputTensor(mask, batched=True)) for attn, mask in zip(masked_attn, trg_mask) ] loss = dy.sum_elems(dy.square(1 - dy.esum(masked_attn))) units = [t.len_unpadded() for t in trg] return losses.FactoredLossExpr( {"global_fertility": losses.LossExpr(loss, units)})
def global_fertility(self, a): return dy.sum_elems(dy.square(1 - dy.esum(a)))
def mse_loss(predictions, target): diff = predictions - target square = dy.square(diff) mean = dy.mean_elems(square) return mean
def global_fertility(self, a: Sequence[dy.Expression]) -> dy.Expression: return dy.sum_elems(dy.square(1 - dy.esum(a)))
def predict(self, features, task_name, train=False): """ Steps through the computation graph and obtains predictions for the provided input features. :param features: a list of word embeddings for every word in the sequence :param task_name: the name of the task that should be predicted :param train: if the model is training; apply noise in this case :return output: the output predictions penalty: the summed subspace penalty (0 if no constraint) """ if train: # noise is added only at training time features = [dynet.noise(fe, self.noise_sigma) for fe in features] # only if we use cross-stitch we have a layer for each task; # otherwise we just have one layer for all tasks num_layers = self.h_layers inputs = [features] * len(self.task_names) inputs_rev = [features] * len(self.task_names) target_task_id = self.task_names.index( task_name) if self.cross_stitch else 0 # collect the forward and backward sequences for each task at every # layer for the layer connection units layer_forward_sequences = [] layer_backward_sequences = [] penalty = dynet.const_parameter(self.subspace_penalty) for i in range(0, num_layers): forward_sequences = [] backward_sequences = [] for j in range(num_task_layers): predictor = self.predictors['inner'][i][j] forward_sequence, backward_sequence = predictor.predict_sequence( inputs[j], inputs_rev[j]) if i > 0 and self.activation: # activation between LSTM layers forward_sequence = [ self.activation(s) for s in forward_sequence ] backward_sequence = [ self.activation(s) for s in backward_sequence ] forward_sequences.append(forward_sequence) backward_sequences.append(backward_sequence) if self.num_subspaces == 2 and self.constraint_weight != 0: # returns a list per layer, i.e. here a list with one item lstm_parameters = \ predictor.builder.get_parameter_expressions()[0] # lstm parameters consists of these weights: # Wix,Wih,Wic,bi,Wox,Woh,Woc,bo,Wcx,Wch,bc for param_idx in range(len(lstm_parameters)): if param_idx in self.constrain_matrices: W = lstm_parameters[param_idx] W_shape = np.array(W.value()).shape if (len(W_shape) < 2): W_shape = [W_shape[0], 1] # split matrix into its two subspaces W_subspaces = dynet.reshape( W, (self.num_subspaces, W_shape[0] / float(self.num_subspaces), W_shape[1])) subspace_1, subspace_2 = W_subspaces[ 0], W_subspaces[1] # calculate the matrix product of the two matrices matrix_product = dynet.transpose( subspace_1) * subspace_2 # take the squared Frobenius norm by squaring # every element and then summing them squared_frobenius_norm = dynet.sum_elems( dynet.square(matrix_product)) penalty += squared_frobenius_norm if self.cross_stitch: # takes as input a list of input lists and produces a list of # outputs where the index indicates the task forward_sequences = self.predictors['cross_stitch'][i].stitch( forward_sequences) backward_sequences = self.predictors['cross_stitch'][i].stitch( backward_sequences) inputs = forward_sequences inputs_rev = backward_sequences layer_forward_sequences.append(forward_sequences) layer_backward_sequences.append(backward_sequences) if i == num_layers - 1: output_predictor = \ self.predictors['output_layers_dict'][task_name] # get the forward/backward states of all task layers task_forward_sequences = [ layer_seq_list[target_task_id][-1] for layer_seq_list in layer_forward_sequences ] task_backward_sequences = [ layer_seq_list[target_task_id][0] for layer_seq_list in layer_backward_sequences ] if (num_layers > 1): forward_input = \ self.predictors['layer_stitch'][ target_task_id].stitch(task_forward_sequences) backward_input = \ self.predictors['layer_stitch'][ target_task_id].stitch(task_backward_sequences) else: forward_input = task_forward_sequences[0] backward_input = task_backward_sequences[0] concat_layer = dynet.concatenate( [forward_input, backward_input]) if train and self.noise_sigma > 0.0: concat_layer = dynet.noise(concat_layer, self.noise_sigma) output = [] if ('sentiment' in task_name): #Multi-label for i in range(len(output_predictor)): output.append(output_predictor[i](concat_layer)) else: output.append(output_predictor(concat_layer)) #output = output_predictor.predict_sequence(concat_layer) return output, penalty raise Exception('Error: This place should not be reached.')
def predict(self, features, task_name, train=False): """ Steps through the computation graph and obtains predictions for the provided input features. :param features: a list of concatenated word and character-based embeddings for every word in the sequence :param task_name: the name of the task that should be predicted :param train: if the model is training; apply noise in this case :return output: the output predictions penalty: the summed subspace penalty (0 if no constraint) """ if train: # only do at training time features = [dynet.noise(fe, self.noise_sigma) for fe in features] output_expected_at_layer = self.predictors['task_expected_at'][ task_name] output_expected_at_layer -= 1 # remove 1 as layers are 0-indexed # only if we use cross-stitch we have a layer for each task; # otherwise we just have one layer for all tasks num_layers = self.h_layers num_task_layers = len(self.predictors['inner'][0]) inputs = [features] * num_task_layers inputs_rev = [features] * num_task_layers # similarly, with cross-stitching, we have multiple output layers target_task_id = self.task_names.index( task_name) if self.cross_stitch else 0 # collect the forward and backward sequences for each task at every # layer for the layer connection units layer_forward_sequences = [] layer_backward_sequences = [] penalty = dynet.parameter(self.subspace_penalty, update=False) for i in range(0, num_layers): forward_sequences = [] backward_sequences = [] for j in range(num_task_layers): predictor = self.predictors['inner'][i][j] forward_sequence, backward_sequence = predictor.predict_sequence( inputs[j], inputs_rev[j]) if i > 0 and self.activation: # activation between LSTM layers forward_sequence = [ self.activation(s) for s in forward_sequence ] backward_sequence = [ self.activation(s) for s in backward_sequence ] forward_sequences.append(forward_sequence) backward_sequences.append(backward_sequence) if self.num_subspaces == 2 and self.constraint_weight != 0: # returns a list per layer, i.e. here a list with one item lstm_parameters = \ predictor.builder.get_parameter_expressions()[0] # lstm parameters consists of these weights: # Wix,Wih,Wic,bi,Wox,Woh,Woc,bo,Wcx,Wch,bc for param_idx in range(len(lstm_parameters)): if param_idx in self.constrain_matrices: W = lstm_parameters[param_idx] W_shape = np.array(W.value()).shape # split matrix into its two subspaces W_subspaces = dynet.reshape( W, (self.num_subspaces, W_shape[0] / float(self.num_subspaces), W_shape[1])) subspace_1, subspace_2 = W_subspaces[ 0], W_subspaces[1] # calculate the matrix product of the two matrices matrix_product = dynet.transpose( subspace_1) * subspace_2 # take the squared Frobenius norm by squaring # every element and then summing them squared_frobenius_norm = dynet.sum_elems( dynet.square(matrix_product)) penalty += squared_frobenius_norm if self.cross_stitch: # takes as input a list of input lists and produces a list of # outputs where the index indicates the task forward_sequences = self.predictors['cross_stitch'][i].stitch( forward_sequences) backward_sequences = self.predictors['cross_stitch'][i].stitch( backward_sequences) inputs = forward_sequences inputs_rev = backward_sequences layer_forward_sequences.append(forward_sequences) layer_backward_sequences.append(backward_sequences) if i == output_expected_at_layer: output_predictor = \ self.predictors['output_layers_dict'][task_name] # get the forward/backward states of all task layers task_forward_sequences = [ layer_seq_list[target_task_id] for layer_seq_list in layer_forward_sequences ] task_backward_sequences = [ layer_seq_list[target_task_id] for layer_seq_list in layer_backward_sequences ] if self.layer_connect == STITCH: # stitch the forward and backward sequences together forward_inputs = \ self.predictors['layer_stitch'][ target_task_id].stitch(task_forward_sequences) backward_inputs = \ self.predictors['layer_stitch'][ target_task_id].stitch(task_backward_sequences) elif self.layer_connect == SKIP: # use skip connections forward_inputs = [ dynet.esum(list(layer_states)) for layer_states in zip(*task_forward_sequences) ] backward_inputs = [ dynet.esum(list(layer_states)) for layer_states in zip(*task_backward_sequences) ] else: # otherwise just use the sequences from the last layer forward_inputs = forward_sequences[target_task_id] backward_inputs = backward_sequences[target_task_id] if self.layer_connect == CONCAT: layer_concatenated = [] # concatenate forward and backward states of layers for fwd_seqs, bwd_seqs in zip(task_forward_sequences, task_backward_sequences): layer_concatenated.append([ dynet.concatenate([f, b]) for f, b in zip(fwd_seqs, reversed(bwd_seqs)) ]) # concatenate the states of all the task layers concat_layer = [ dynet.concatenate(list(layer_states)) for layer_states in zip(*layer_concatenated) ] else: concat_layer = [ dynet.concatenate([f, b]) for f, b in zip( forward_inputs, reversed(backward_inputs)) ] if train and self.noise_sigma > 0.0: concat_layer = [ dynet.noise(fe, self.noise_sigma) for fe in concat_layer ] output = output_predictor.predict_sequence(concat_layer) return output, penalty raise Exception('Error: This place should not be reached.')
def l2_normalize(x): square_sum = dynet.sqrt(dynet.bmax(dynet.sum_elems(dynet.square(x)), np.finfo(float).eps * dynet.ones((1))[0])) return dynet.cdiv(x, square_sum)
def l2_normalize(x): epsilon = np.finfo(float).eps * dy.ones(pred.dim()[0]) norm = dy.sqrt(dy.sum_elems(dy.square(x))) sign = dy.cdiv(x, dy.bmax(dy.abs(x), epsilon)) return dy.cdiv(dy.cmult(sign, dy.bmax(dy.abs(x), epsilon)), dy.bmax(norm, epsilon[0]))
def __call__(self, x: dy.Expression, att_mask: np.ndarray, batch_mask: np.ndarray, p: numbers.Real): """ x: expression of dimensions (input_dim, time) x batch att_mask: numpy array of dimensions (time, time); pre-transposed batch_mask: numpy array of dimensions (batch, time) p: dropout prob """ sent_len = x.dim()[0][1] batch_size = x[0].dim()[1] if self.downsample_factor > 1: if sent_len % self.downsample_factor != 0: raise ValueError( "For 'reshape' downsampling, sequence lengths must be multiples of the downsampling factor. " "Configure batcher accordingly.") if batch_mask is not None: batch_mask = batch_mask[:, ::self.downsample_factor] sent_len_out = sent_len // self.downsample_factor sent_len = sent_len_out out_mask = x.mask if self.downsample_factor > 1 and out_mask is not None: out_mask = out_mask.lin_subsampled( reduce_factor=self.downsample_factor) x = ExpressionSequence(expr_tensor=dy.reshape( x.as_tensor(), (x.dim()[0][0] * self.downsample_factor, x.dim()[0][1] / self.downsample_factor), batch_size=batch_size), mask=out_mask) residual = SAAMTimeDistributed()(x) else: residual = SAAMTimeDistributed()(x) sent_len_out = sent_len if self.model_dim != self.input_dim * self.downsample_factor: residual = self.res_shortcut.transform(residual) # Concatenate all the words together for doing vectorized affine transform if self.kq_pos_encoding_type is None: kvq_lin = self.linear_kvq.transform(SAAMTimeDistributed()(x)) key_up = self.shape_projection( dy.pick_range(kvq_lin, 0, self.head_count * self.dim_per_head), batch_size) value_up = self.shape_projection( dy.pick_range(kvq_lin, self.head_count * self.dim_per_head, 2 * self.head_count * self.dim_per_head), batch_size) query_up = self.shape_projection( dy.pick_range(kvq_lin, 2 * self.head_count * self.dim_per_head, 3 * self.head_count * self.dim_per_head), batch_size) else: assert self.kq_pos_encoding_type == "embedding" encoding = self.kq_positional_embedder.embed_sent( sent_len).as_tensor() kq_lin = self.linear_kq.transform(SAAMTimeDistributed()( ExpressionSequence( expr_tensor=dy.concatenate([x.as_tensor(), encoding])))) key_up = self.shape_projection( dy.pick_range(kq_lin, 0, self.head_count * self.dim_per_head), batch_size) query_up = self.shape_projection( dy.pick_range(kq_lin, self.head_count * self.dim_per_head, 2 * self.head_count * self.dim_per_head), batch_size) v_lin = self.linear_v.transform(SAAMTimeDistributed()(x)) value_up = self.shape_projection(v_lin, batch_size) if self.cross_pos_encoding_type: assert self.cross_pos_encoding_type == "embedding" emb1 = dy.pick_range(dy.parameter(self.cross_pos_emb_p1), 0, sent_len) emb2 = dy.pick_range(dy.parameter(self.cross_pos_emb_p2), 0, sent_len) key_up = dy.reshape(key_up, (sent_len, self.dim_per_head, self.head_count), batch_size=batch_size) key_up = dy.concatenate_cols( [dy.cmult(key_up, emb1), dy.cmult(key_up, emb2)]) key_up = dy.reshape(key_up, (sent_len, self.dim_per_head * 2), batch_size=self.head_count * batch_size) query_up = dy.reshape( query_up, (sent_len, self.dim_per_head, self.head_count), batch_size=batch_size) query_up = dy.concatenate_cols( [dy.cmult(query_up, emb2), dy.cmult(query_up, -emb1)]) query_up = dy.reshape(query_up, (sent_len, self.dim_per_head * 2), batch_size=self.head_count * batch_size) scaled = query_up * dy.transpose( key_up / math.sqrt(self.dim_per_head) ) # scale before the matrix multiplication to save memory # Apply Mask here if not self.ignore_masks: if att_mask is not None: att_mask_inp = att_mask * -100.0 if self.downsample_factor > 1: att_mask_inp = att_mask_inp[::self.downsample_factor, :: self.downsample_factor] scaled += dy.inputTensor(att_mask_inp) if batch_mask is not None: # reshape (batch, time) -> (time, head_count*batch), then *-100 inp = np.resize(np.broadcast_to(batch_mask.T[:, np.newaxis, :], (sent_len, self.head_count, batch_size)), (1, sent_len, self.head_count * batch_size)) \ * -100 mask_expr = dy.inputTensor(inp, batched=True) scaled += mask_expr if self.diag_gauss_mask: diag_growing = np.zeros((sent_len, sent_len, self.head_count)) for i in range(sent_len): for j in range(sent_len): diag_growing[i, j, :] = -(i - j)**2 / 2.0 e_diag_gauss_mask = dy.inputTensor(diag_growing) e_sigma = dy.parameter(self.diag_gauss_mask_sigma) if self.square_mask_std: e_sigma = dy.square(e_sigma) e_sigma_sq_inv = dy.cdiv( dy.ones(e_sigma.dim()[0], batch_size=batch_size), dy.square(e_sigma)) e_diag_gauss_mask_final = dy.cmult(e_diag_gauss_mask, e_sigma_sq_inv) scaled += dy.reshape(e_diag_gauss_mask_final, (sent_len, sent_len), batch_size=batch_size * self.head_count) # Computing Softmax here. attn = dy.softmax(scaled, d=1) if LOG_ATTENTION: yaml_logger.info({ "key": "selfatt_mat_ax0", "value": np.average(attn.value(), axis=0).dumps(), "desc": self.desc }) yaml_logger.info({ "key": "selfatt_mat_ax1", "value": np.average(attn.value(), axis=1).dumps(), "desc": self.desc }) yaml_logger.info({ "key": "selfatt_mat_ax0_ent", "value": entropy(attn.value()).dumps(), "desc": self.desc }) yaml_logger.info({ "key": "selfatt_mat_ax1_ent", "value": entropy(attn.value().transpose()).dumps(), "desc": self.desc }) self.select_att_head = 0 if self.select_att_head is not None: attn = dy.reshape(attn, (sent_len, sent_len, self.head_count), batch_size=batch_size) sel_mask = np.zeros((1, 1, self.head_count)) sel_mask[0, 0, self.select_att_head] = 1.0 attn = dy.cmult(attn, dy.inputTensor(sel_mask)) attn = dy.reshape(attn, (sent_len, sent_len), batch_size=self.head_count * batch_size) # Applying dropout to attention if p > 0.0: drop_attn = dy.dropout(attn, p) else: drop_attn = attn # Computing weighted attention score attn_prod = drop_attn * value_up # Reshaping the attn_prod to input query dimensions out = dy.reshape(attn_prod, (sent_len_out, self.dim_per_head * self.head_count), batch_size=batch_size) out = dy.transpose(out) out = dy.reshape(out, (self.model_dim, ), batch_size=batch_size * sent_len_out) # out = dy.reshape_transpose_reshape(attn_prod, (sent_len_out, self.dim_per_head * self.head_count), (self.model_dim,), pre_batch_size=batch_size, post_batch_size=batch_size*sent_len_out) if self.plot_attention: from sklearn.metrics.pairwise import cosine_similarity assert batch_size == 1 mats = [] for i in range(attn.dim()[1]): mats.append(dy.pick_batch_elem(attn, i).npvalue()) self.plot_att_mat( mats[-1], "{}.sent_{}.head_{}.png".format( self.plot_attention, self.plot_attention_counter, i), 300) avg_mat = np.average(mats, axis=0) self.plot_att_mat( avg_mat, "{}.sent_{}.head_avg.png".format(self.plot_attention, self.plot_attention_counter), 300) cosim_before = cosine_similarity(x.as_tensor().npvalue().T) self.plot_att_mat( cosim_before, "{}.sent_{}.cosim_before.png".format( self.plot_attention, self.plot_attention_counter), 600) cosim_after = cosine_similarity(out.npvalue().T) self.plot_att_mat( cosim_after, "{}.sent_{}.cosim_after.png".format( self.plot_attention, self.plot_attention_counter), 600) self.plot_attention_counter += 1 # Adding dropout and layer normalization if p > 0.0: res = dy.dropout(out, p) + residual else: res = out + residual ret = self.layer_norm.transform(res) return ret
newSeason.append(newSeason[0]) newLevels.append(1 * dy.cdiv(y[0], newSeason[0])) #perform smoothing for i in range(1, len(df)): newLevels.append(levelSm * dy.cdiv(y[i], newSeason[i]) + (1 - levelSm) * newLevels[i - 1]) newSeason.append(seasonSm * dy.cdiv(y[i], newLevels[i]) + (1 - seasonSm) * newSeason[i]) s = dy.concatenate(newSeason) l = dy.concatenate(newLevels) #penalize sudden level changes (should be scale independent - it is dependent)\ #should penalize 2nd derivative l_log_diff = dy.log(dy.cdiv(l[1:], l[0:l.dim()[0][0] - 1])) l_penalty = l_log_diff[1:] - l_log_diff[0:l_log_diff.dim()[0][0] - 1] level_loss = dy.mean_elems(dy.square(l_penalty)) * 10 print(level_loss.value()) preds = [] outputs = [] #wez y i usun sezonowosc i level for i in range(n, len(df) - h): inputs = y[i - n:i] #n okresy curr_season = s[i - n:i] inputs = dy.cdiv(inputs, l[i]) inputs = dy.cdiv(inputs, curr_season) inputs = dy.log(inputs) reseasonalize = s[i + 1] #poprzedni okres +1 krok preds.append(dy.exp(fcstr(inputs)) * l[i] * reseasonalize) outputs.append(y[i + 1]) #+1 krok
def global_fertility(self, a: Sequence[tt.Tensor]) -> tt.Tensor: return dy.sum_elems(dy.square(1 - dy.esum(a)))