def forward(self, user_id, seq, item_id): item_embs = np.expand_dims(self.Q(seq), 1) user_emb = self.P(user_id) # (4096, 10) out, out_h, out_v, out_hs = None, None, None, [] # 横向卷积 if self.d_prime: out_v = self.conv_v(item_embs) out_v = out_v.reshape( out_v.shape[0], self.fc1_dim_v) # (4096, 4*10) # 纵向卷积 - 时间 if self.d: for conv, maxp in zip(self.conv_h, self.max_pool): # 滑动 conv_out = np.squeeze(npx.relu(conv(item_embs)), axis=3) t = maxp(conv_out) pool_out = np.squeeze(t, axis=2) out_hs.append(pool_out) out_h = np.concatenate(out_hs, axis=1) # (4096, 16*3) out = np.concatenate([out_v, out_h], axis=1) # (4096, 4*10+16*3) z = self.fc(self.dropout(out)) # (4096, 10) # 和user_emb x = np.concatenate([z, user_emb], axis=1) # (4096, 20) # 和item_emb计算 q_prime_i = np.squeeze(self.Q_prime(item_id)) # (4096, 20) b = np.squeeze(self.b(item_id)) res = (x * q_prime_i).sum(1) + b # (4096,) return res
def incremental_decode(self, step_hidden_states, step_position_embeddings, past_key_value, mem_states, step_mem_attn_mask): # 1. self-attention out = self.self_attn_layer_norm(step_hidden_states) step_self_query, step_self_key, step_self_value = ( self.transpose_for_scores(self.self_attn_q(out)), self.transpose_for_scores(self.self_attn_k(out)), self.transpose_for_scores(self.self_attn_v(out))) self_key, self_value = (np.concatenate( [past_key_value[0], step_self_key], axis=self._time_axis), np.concatenate( [past_key_value[1], step_self_value], axis=self._time_axis)) out, _ = self.self_attn(step_self_query, self_key, self_value, None, step_position_embeddings) out = self.dropout(self.self_attn_proj(out)) step_hidden_states = step_hidden_states + out # 2. cross-attention out = self.cross_attn_layer_norm(step_hidden_states) step_cross_query, cross_key, cross_value = ( self.transpose_for_scores(self.cross_attn_q(out)), self.transpose_for_scores(self.cross_attn_k(mem_states)), self.transpose_for_scores(self.cross_attn_v(mem_states))) out, _ = self.cross_attn(step_cross_query, cross_key, cross_value, step_mem_attn_mask) out = self.dropout(self.cross_attn_proj(out)) step_hidden_states = step_hidden_states + out # 3. feed forward step_hidden_states = self.ffn(step_hidden_states) return step_hidden_states, (self_key, self_value)
def forward(self, positions): """ Parameters ---------- positions : NDArray Shape (..., ) Returns ------- ret : Shape (..., units) """ emb = np.expand_dims(positions.astype(self._dtype), axis=-1) * self.base_mult.data() sin_emb = np.sin(emb) cos_emb = np.cos(emb) if self._units % 2 == 0: return np.concatenate([sin_emb, cos_emb], axis=-1) else: return np.concatenate([ sin_emb, cos_emb, np.expand_dims(np.zeros_like(positions).astype(self._dtype), axis=-1) ], axis=-1)
def forward(self, inputs: np.ndarray, previous_states: Optional[np.ndarray] = None, input_lengths: Optional[np.ndarray] = None, bias: Optional[np.ndarray] = None, *args) -> Tuple[np.ndarray, np.ndarray]: # mypy: ignore """ Computes multi-head attention on a set of inputs, serving as queries, keys, and values. If sequence lengths are provided, they will be used to mask the attention scores. A bias mask may also be used to mask the attention scores. May also use a cache of previously computed inputs. Returns a ndarray of shape (batch, max_length, output_depth). :param inputs: Input Data. Shape: (max_length, batch, input_depth). :param input_lengths: Optional lengths of inputs to mask attention scores. Shape: (batch, 1). :param bias: Optional 3d bias tensor to mask attention scores. :param previous_states: Optional list with two ndarrays - previous input's keys and values. Shape: 2 * (batch, max_length+1, depth_att). :return: ndarray of shape (batch, max_length, output_depth). """ proj = self.ff_in(inputs) queries, kv_1, kv_2 = np.split(proj, 3, axis=2) states = np.concatenate((kv_1, kv_2), axis=2) if previous_states is not None: states = np.concatenate((previous_states, states), axis=0) return self._attend(queries, states, lengths=input_lengths, bias=bias), states
def k_fold_cross_valid(k, epochs, verbose_epoch, X_train, y_train, learning_rate, weight_decay, batch_size): """Conducts k-fold cross validation for the model.""" assert k > 1 fold_size = X_train.shape[0] // k train_loss_sum = 0.0 test_loss_sum = 0.0 for test_idx in range(k): X_val_test = X_train[test_idx * fold_size:(test_idx + 1) * fold_size, :] y_val_test = y_train[test_idx * fold_size:(test_idx + 1) * fold_size] val_train_defined = False for i in range(k): if i != test_idx: X_cur_fold = X_train[i * fold_size:(i + 1) * fold_size, :] y_cur_fold = y_train[i * fold_size:(i + 1) * fold_size] if not val_train_defined: X_val_train = X_cur_fold y_val_train = y_cur_fold val_train_defined = True else: X_val_train = np.concatenate([X_val_train, X_cur_fold], axis=0) y_val_train = np.concatenate([y_val_train, y_cur_fold], axis=0) net = get_net() train_loss = train(net, X_val_train, y_val_train, epochs, verbose_epoch, learning_rate, weight_decay, batch_size) train_loss_sum += train_loss test_loss = get_rmse_log(net, X_val_test, y_val_test) print("Test loss: %f" % test_loss) test_loss_sum += test_loss return train_loss_sum / k, test_loss_sum / k
def get_logits(self, hidden): """Get all the logits. Parameters ---------- F hidden The hidden representation Shape (..., in_units) Returns ------- logits Shape (..., |V|) """ if self._cutoffs is None: if self._in_units != self._embed_size: hidden = self.inter_proj_l[0](hidden) logits = self.out_proj_l[0](hidden) return logits else: all_logits = [] if self._div_val == 1.0: if self._in_units == self._embed_size: all_scores = self.out_proj_l[0](hidden) tail_cluster_scores = self.tail_cluster_score_proj(hidden) else: inter_hidden = self.inter_proj_l[0](hidden) all_scores = self.out_proj_l[0](inter_hidden) tail_cluster_scores = self.tail_cluster_score_proj( inter_hidden) all_scores_l = np.split(all_scores, self._cutoffs, axis=-1) head_scores = all_scores_l[0] else: inter_hidden = self.inter_proj_l[0](hidden) head_scores = self.out_proj_l[0](inter_hidden) tail_cluster_scores = self.tail_cluster_score_proj( inter_hidden) head_tail_cluster_logits = \ npx.log_softmax(np.concatenate([head_scores, tail_cluster_scores], axis=-1), axis=-1) head_logits, tail_cluster_logits = \ np.split(head_tail_cluster_logits, [self._cutoffs[0]], axis=-1) tail_cluster_logits = np.split(tail_cluster_logits, self._num_tail_clusters, axis=-1) all_logits.append(head_logits) for i in range(1, len(self._cutoffs) + 1): if self._div_val == 1.0: ele_scores = all_scores_l[i] else: ele_scores = self.out_proj_l[i]( self.inter_proj_l[i](hidden)) ele_logits = npx.log_softmax(ele_scores, axis=-1) ele_logits = tail_cluster_logits[-i] + ele_logits all_logits.append(ele_logits) return np.concatenate(all_logits, axis=-1)
def forward(self, x, layer_states): """ Parameters ---------- x - layout = 'NT' Shape (batch_size, seq_length, C_in) - layout = 'TN' Shape (seq_length, batch_size, C_in) layer_states - layout = 'NT' Shape (2, batch_size, prev_len, C_in) - layout = 'TN' Shape (2, prev_len, batch_size, C_in) """ x = self.ln(x) if self._layout == 'NT': batch_axis, time_axis = 0, 1 prev_len = npx.shape_array(layer_states)[2] else: batch_axis, time_axis = 1, 0 prev_len = npx.shape_array(layer_states)[1] query, key, value = np.split(self.qkv(x), 3, axis=-1) if layer_states is not None: prev_key, prev_value = layer_states[0], layer_states[1] key = np.concatenate([prev_key, key], axis=time_axis) value = np.concatenate([prev_value, value], axis=time_axis) new_states = np.stack([key, value], axis=0) # gen mask query_pos = npx.arange_like(query, axis=time_axis) if prev_len is not None: query_pos = query_pos + prev_len key_pos = npx.arange_like(key, axis=time_axis) # (query_len, key_len) mask = (npx.reshape(key_pos, (1, -1)) <= npx.reshape(query_pos, (-1, 1))).astype( self._dtype) # broadcast to (batch_size, query_len, key_len) mask = npx.broadcast_like(np.expand_dims(mask, axis=0), query, lhs_axes=0, rhs_axes=batch_axis) query = npx.reshape(query, (-2, -2, self._num_heads, -1)) key = npx.reshape(key, (-2, -2, self._num_heads, -1)) value = npx.reshape(value, (-2, -2, self._num_heads, -1)) out, [_, attn_weight] = self.attention_cell(query, key, value, mask) out = self.out_proj(out) out = self.hidden_dropout(out) return out, new_states
def forward(self, user_id, item_id): p_mf = self.P(user_id) q_mf = self.Q(item_id) gmf = p_mf * q_mf p_mlp = self.U(user_id) q_mlp = self.V(item_id) mlp = self.mlp(np.concatenate([p_mlp, q_mlp], axis=1)) # 1024*20 con_res = np.concatenate([gmf, mlp], axis=1) return np.sum(con_res, axis=-1) # 1024*1
def forward(self, user_id, item_id): p_mf = self.P(user_id) # p_mf: (batch_size, num_hidden) q_mf = self.Q(item_id) x = p_mf * q_mf p_mlp = self.U(user_id) q_mlp = self.V(item_id) # print(p_mf.shape) mlp = self.mlp(np.concatenate([p_mlp, q_mlp], axis=1)) con_res = np.concatenate([x, mlp], axis=1) return self.prediction_layer(con_res)
def get_k_fold_data(k, i, X, y): assert k > 1 fold_size = X.shape[0] // k X_train, y_train = None, None for j in range(k): idx = slice(j * fold_size, (j + 1) * fold_size) X_part, y_part = X[idx, :], y[idx] if j == i: X_valid, y_valid = X_part, y_part elif X_train is None: X_train, y_train = X_part, y_part else: X_train = np.concatenate([X_train, X_part], 0) y_train = np.concatenate([y_train, y_part], 0) return X_train, y_train, X_valid, y_valid
def forward(self, V_A, V_B): # Sum up both sets of comparison vectors V_A = V_A.sum(axis=1) V_B = V_B.sum(axis=1) # Feed the concatenation of both summarization results into an MLP Y_hat = self.h(np.concatenate([V_A, V_B], axis=1)) return Y_hat
def get_answerable_logits(self, contextual_embedding, p_mask): """Get the answerable logits. Parameters ---------- contextual_embedding Shape (batch_size, sequence_length, C) p_mask Shape (batch_size, sequence_length) Mask the sequence. 0 --> Denote that the element is masked, 1 --> Denote that the element is not masked Returns ------- answerable_logits Shape (batch_size, 2) """ # Shape (batch_size, sequence_length) start_scores = np.squeeze(self.start_scores(contextual_embedding), -1) start_score_weights = masked_softmax(start_scores, p_mask, axis=-1) start_agg_feature = npx.batch_dot(np.expand_dims(start_score_weights, axis=1), contextual_embedding) start_agg_feature = np.squeeze(start_agg_feature, 1) cls_feature = contextual_embedding[:, 0, :] answerable_scores = self.answerable_scores(np.concatenate([start_agg_feature, cls_feature], axis=-1)) answerable_logits = npx.log_softmax(answerable_scores, axis=-1) return answerable_logits
def train_s2s(model: gluon.nn.Block, data_iter, lr, num_epochs, tgt_vocab, device): trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': lr}) loss = MaskedSoftmaxCELoss() animator = d2l.Animator(xlabel='epocs', ylabel='loss', xlim=[10, num_epochs]) for epoch in range(num_epochs): timer = d2l.Timer() metric = d2l.Accumulator(2) for batch in data_iter: X, X_valid_len, Y, Y_valid_len = [ x.as_in_ctx(device) for x in batch ] bos_id = tgt_vocab['<bos>'] bos = np.array([bos_id] * Y.shape[0], ctx=device) bos = bos.reshape(-1, 1) dec_input = np.concatenate( [bos, Y[:, :-1]], axis=1) # Teacher forcing # bos+Y(除了最后一个字符) with autograd.record(): Y_hat, _ = model(X, dec_input) l = loss(Y_hat, Y, Y_valid_len) l.backward() d2l.grad_clipping(model, 1) num_tokens = Y_valid_len.sum() trainer.step(num_tokens) metric.add(l.sum(), num_tokens) if (epoch + 1) % 10 == 0: animator.add(epoch + 1, (metric[0] / metric[1], )) print(f'loss {metric[0] / metric[1]:.3f}, {metric[1] / timer.stop():.1f} ' f'tokens/sec on {str(device)}')
def get_end_logits(self, contextual_embedding, start_positions, p_mask): """ Parameters ---------- contextual_embedding Shape (batch_size, sequence_length, C) start_positions Shape (batch_size, N) We process multiple candidates simultaneously p_mask Shape (batch_size, sequence_length) Returns ------- end_logits Shape (batch_size, N, sequence_length) """ # Select the features at the start_positions # start_feature will have shape (batch_size, N, C) start_features = select_vectors_by_position(contextual_embedding, start_positions) # Concatenate the start_feature and the contextual_embedding contextual_embedding = np.expand_dims(contextual_embedding, axis=1) # (B, 1, T, C) start_features = np.expand_dims(start_features, axis=2) # (B, N, 1, C) concat_features = np.concatenate([npx.broadcast_like(start_features, contextual_embedding, 2, 2), npx.broadcast_like(contextual_embedding, start_features, 1, 1)], axis=-1) # (B, N, T, 2C) end_scores = self.end_scores(concat_features) end_scores = np.squeeze(end_scores, -1) end_logits = masked_logsoftmax(end_scores, mask=np.expand_dims(p_mask, axis=1), axis=-1) return end_logits
def _get_vocab_slice_ids(restrict_lexicon: Optional[lexicon.TopKLexicon], source_words: np.ndarray, raw_constraint_list: List[Optional[constrained.RawConstraintList]], eos_id: int, beam_size: int) -> Tuple[np.ndarray, int, List[Optional[constrained.RawConstraintList]]]: vocab_slice_ids = np.array(restrict_lexicon.get_trg_ids(source_words.astype("int32", copy=False).asnumpy()), dtype='int32') ctx = source_words.ctx if any(raw_constraint_list): # Add the constraint IDs to the list of permissibled IDs, and then project them into the reduced space constraint_ids = np.array(word_id for sent in raw_constraint_list for phr in sent for word_id in phr) vocab_slice_ids = onp.lib.arraysetops.union1d(vocab_slice_ids, constraint_ids) # type: ignore full_to_reduced = dict((val, i) for i, val in enumerate(vocab_slice_ids)) raw_constraint_list = [[[full_to_reduced[x] for x in phr] for phr in sent] for sent in raw_constraint_list] # pad to a multiple of 8. vocab_slice_ids = np.pad(vocab_slice_ids, (0, 7 - ((len(vocab_slice_ids) - 1) % 8)), mode='constant', constant_values=eos_id) vocab_slice_ids_shape = vocab_slice_ids.shape[0] if vocab_slice_ids_shape < beam_size + 1: # This fixes an edge case for toy models, where the number of vocab ids from the lexicon is # smaller than the beam size. logger.warning("Padding vocab_slice_ids (%d) with EOS to have at least %d+1 elements to expand", vocab_slice_ids_shape, beam_size) n = beam_size - vocab_slice_ids_shape + 1 vocab_slice_ids = np.concatenate((vocab_slice_ids, np.full((n,), fill_value=eos_id, ctx=ctx, dtype='int32')), axis=0) logger.debug(f'decoder softmax size: {vocab_slice_ids_shape}') return vocab_slice_ids, vocab_slice_ids_shape, raw_constraint_list
def forward(self, best_hyp_indices, best_word_indices, finished, scores_accumulated, lengths, reference_lengths, factors=None): # Reorder fixed-size beam data according to best_hyp_indices (ascending) finished = np.take(finished, best_hyp_indices, axis=0) lengths = np.take(lengths, best_hyp_indices, axis=0) reference_lengths = np.take(reference_lengths, best_hyp_indices, axis=0) # Normalize hypotheses that JUST finished all_finished = np.expand_dims(np.logical_or(best_word_indices == self.pad_id, best_word_indices == self.eos_id), axis=1) newly_finished = np.logical_xor(all_finished, finished) scores_accumulated = np.where(newly_finished, self._scorer(scores_accumulated, npx.cast(lengths, self.dtype), reference_lengths), scores_accumulated) # Recompute finished. Hypotheses are finished if they are extended with <pad> or <eos> finished = np.logical_or(best_word_indices == self.pad_id, best_word_indices == self.eos_id) finished = npx.cast(np.expand_dims(finished, axis=1), 'int32') # Concatenate sorted secondary target factors to best_word_indices. Shape: (batch*beam, num_factors) best_word_indices = np.expand_dims(best_word_indices, axis=1) if factors is not None: secondary_factors = np.take(factors, best_hyp_indices, axis=0) best_word_indices = np.concatenate((best_word_indices, secondary_factors), axis=1) return best_word_indices, finished, scores_accumulated, lengths, reference_lengths
def forward(self, target_dists, finished, inactive, scores_accumulated, lengths, max_lengths, unk_dist, pad_dist, eos_dist): # make sure to avoid generating <unk> if unk_dist is specified if unk_dist is not None: target_dists = target_dists + unk_dist # broadcast hypothesis score to each prediction. # scores_accumulated. Shape: (batch*beam, 1) # target_dists. Shape: (batch*beam, vocab_size) scores = target_dists + scores_accumulated # Special treatment for finished and inactive rows. Inactive rows are inf everywhere; # finished rows are inf everywhere except column zero (pad_id), which holds the accumulated model score. # Items that are finished (but not inactive) get their previous accumulated score for the <pad> symbol, # infinity otherwise. # pad_dist. Shape: (batch*beam, vocab_size) pad_dist = np.concatenate((scores_accumulated, pad_dist), axis=1) scores = np.where(np.logical_or(finished, inactive), pad_dist, scores) # Update lengths of all items, except those that were already finished. This updates # the lengths for inactive items, too, but that doesn't matter since they are ignored anyway. lengths = lengths + (1 - finished) # Items that are at their maximum length and not finished now are forced to produce the <eos> symbol. # That is, we keep scores for hypotheses below max length or finished, and 'force-eos' the rest. below_max_length = lengths < max_lengths scores = np.where(np.logical_or(below_max_length, finished), scores, eos_dist + scores) return scores, lengths
def decode_step(self, step_input: np.ndarray, states: List[np.ndarray], vocab_slice_ids: Optional[np.ndarray] = None): outputs = [] # type: List[np.ndarray] new_states = [] # type: List[np.ndarray] factor_outputs = [] # type: List[List[np.ndarray]] state_index = 0 for model, model_state_structure in zip(self._models, self.state_structure()): model_states = states[state_index:state_index+len(model_state_structure)] state_index += len(model_state_structure) logits, model_states, target_factor_outputs = model.decode_step(step_input, model_states, vocab_slice_ids) probs = npx.softmax(logits, axis=-1, temperature=self._softmax_temperature) outputs.append(probs) target_factor_probs = [npx.softmax(tfo, axis=-1) for tfo in target_factor_outputs] factor_outputs.append(target_factor_probs) new_states += model_states scores = self._interpolation(outputs) target_factors = None # type: Optional[np.ndarray] if factor_outputs: # target factors are greedily 'decoded'. factor_predictions = [npx.cast(np.expand_dims(np.argmin(self._interpolation(fs), axis=-1), axis=1), dtype='int32') for fs in zip(*factor_outputs)] if factor_predictions: target_factors = factor_predictions[0] if len(factor_predictions) == 1 \ else np.concatenate(factor_predictions, axis=1) return scores, new_states, target_factors
def forward(self,x): p1 = self.p1_1(x) p2 = self.p2_2(self.p2_1(x)) p3 = self.p3_1(self.p3_1(x)) p4 = self.p4_2(self.p4_1(x)) return np.concatenate((p1,p2,p3,p4), axis=1)
def rnn(inputs, hidden_states, params): ''' inputs shape: (num_steps[seq-len],batch_size,vocab_size) return: outputs,(H,) ''' W_xh, W_hh, b_h, W_ho, b_o = params H, = hidden_states outputs = [] hidden_states = [] # X shape: (batch_size,vocab_size) print( f"rnn loops {inputs.shape[0]} times along seq_length axis---------\n") i = 1 for X in inputs: # 沿着num_steps(sequence length)循环 print(f"loops {i} times \n") i += 1 H = mxnp.dot(X, W_xh) + mxnp.dot(H, W_hh) + b_h H = mxnp.tanh(H) hidden_states.append(H) print( f"---rnn input(X,H) and weights' shape---------\n" f" ---X.shape={X.shape},W_xh.shape={W_xh.shape}\n" f" ---H.shape={H.shape},W_hh.shape={W_hh.shape},b_h.shape={b_h.shape}\n" f" ---W_ho.shape={W_ho.shape},b_o.shape={b_o.shape}\n") Y = mxnp.dot(H, W_ho) + b_o outputs.append(Y) print(f"---rnn output's shape---------\n" f" ---Y.shape={Y.shape},H.shape={H.shape}\n") Ys = mxnp.concatenate(outputs, axis=0) print(f"Final Ys.shape={Ys.shape}") return Ys, (H, ), hidden_states, outputs
def offset_inverse(anchors, offset_preds): c_anc = d2l.box_corner_to_center(anchors) c_pred_bb_xy = (offset_preds[:, :2] * anchors[:, 2:]) / 10 + c_anc[:, :2] c_pred_bb_wh = np.exp(offset_preds[:, 2:] / 5) * c_anc[:, 2:] c_pred_bb = np.concatenate((c_pred_bb_xy, c_pred_bb_wh), axis=1) predicted_bb = d2l.box_center_to_corner(c_pred_bb) return predicted_bb
def forward(self, _layer_lo, _layer_hi): up = self.up(_layer_lo) up = FFx.relu(up) x = FF.concatenate([up, _layer_hi], axis=1) x = self.conv_normed(x) return x
def forward(self, x, y): x = self.upconv(x) # x = self.upBN(x) x = np.concatenate([x, y], axis=1) x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) return x
def multibox_prior(data, sizes, ratios): #data: batch, channels, height, width in_height, in_width = data.shape[-2:] device, num_sizes, num_ratios = data.ctx, len(sizes), len(ratios) boxes_per_pixel = num_sizes + num_ratios - 1 size_tensor = np.array(sizes, ctx=device) ratio_tensor = np.array(ratios, ctx=device) # Offsets are required to move the anchor to center of a pixel # Since pixel (height=1, width=1), we choose to offset our centers by 0.5 offset_w, offset_h = 0.5, 0.5 steps_h = 1.0 / in_height # Scaled steps in y axis steps_w = 1.0 / in_width # Scaled steps in x axis # Generate all center points for the anchor boxes center_h = (np.arange(in_height, ctx=device) + offset_h) * steps_h center_w = (np.arange(in_width, ctx=device) + offset_w) * steps_w shift_x, shift_y = np.meshgrid(center_w, center_h) shift_x, shift_y = shift_x.reshape(-1), shift_y.reshape(-1) # Generate boxes_per_pixel number of heights and widths which are later # used to create anchor box corner coordinates (xmin, xmax, ymin, ymax) # concat (various sizes, first ratio) and (first size, various ratios) w = np.concatenate((size_tensor * np.sqrt(ratio_tensor[0]), size_tensor[0]* np.sqrt(ratio_tensor[1:])))\ * in_height / in_width h = np.concatenate((size_tensor / np.sqrt(ratio_tensor[0]), sizes[0] / np.sqrt(ratio_tensor[1:]))) # Divide by 2 to get half height and half width anchor_manipulations = np.tile( np.stack((-w, -h, w, h)).T, (in_height * in_width, 1)) / 2 # Each center point will have boxes_per_pixel number of anchor boxes, so # generate grid of all anchor box centers with boxes_per_pixel repeats out_grid = np.stack([shift_x, shift_y, shift_x, shift_y], axis=1).repeat(boxes_per_pixel, axis=0) output = out_grid + anchor_manipulations # print(output) print(in_height, in_width) return np.expand_dims(output, axis=0)
def forward(self, X, state): enc_outputs, hidden_state, enc_valid_len = state X = self.embedding(X).swapaxes(0, 1) outputs = [] for x in X: # query shape: (batch_size, 1, num_hiddens) query = np.expand_dims(hidden_state[0][-1], axis=1) # context has same shape as query context = self.attention_cell(query, enc_outputs, enc_outputs, enc_valid_len) # Concatenate on the feature dimension x = np.concatenate((context, np.expand_dims(x, axis=1)), axis=-1) # Reshape x to (1, batch_size, embed_size + num_hiddens) out, hidden_state = self.rnn(x.swapaxes(0, 1), hidden_state) outputs.append(out) outputs = self.dense(np.concatenate(outputs, axis=0)) return outputs.swapaxes(0, 1), [enc_outputs, hidden_state, enc_valid_len]
def n_dim_array_operations(): x = np.array([1, 2, 4, 8]) y = np.array([2, 2, 2, 2]) print(x + y, x - y, x * y, x / y, x**y) # The ** operator is exponentiation print("e^x of {} = {}".format(x, np.exp(x))) print("sin(x) of {} = {}".format(x, np.sin(x))) x = np.arange(12).reshape(3, 4) y = np.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) axis0 = np.concatenate([x, y], axis=0) print("concat axis 0 : {}, shape {}".format(axis0, axis0.shape)) axis1 = np.concatenate([x, y], axis=1) print("concat axis 1 : {}, shape {}".format(axis1, axis1.shape)) equal = x == y greater = x > y print("equal x = y: {} == {} = {}".format(x, y, equal)) print("greater x > y: {} > {} = {}".format(x, y, greater))
def batch_check(x1, x2, axises, shapes): for a, s in zip(axises, shapes): x1.attach_grad() with mx.autograd.record(): y = np.concatenate((x1, x2), axis=a) assert y.shape == s y.backward() assert x1.grad.shape == (2, INT_OVERFLOW) assert x1.grad[0][0] == 1
def offset_boxes(anchors, assigned_bb, eps=1e-6): c_anc = d2l.box_corner_to_center(anchors) c_assigned_bb = d2l.box_corner_to_center(assigned_bb) offset_xy = 10 * (c_assigned_bb[:, :2] - c_anc[:, :2]) / c_anc[:, 2:] # standard deviation = 0.1 offset_wh = 5 * np.log( eps + c_assigned_bb[:, 2:] / c_anc[:, 2:]) # standard deviation = 0.2 offset = np.concatenate([offset_xy, offset_wh], axis=1) print(offset.shape, 'das') return offset
def forward(self, inputs): # Concatenate the output of two embedding layers with shape of # (batch size, no. of words, word vector dimension) by word vector embeddings = np.concatenate(( self.embedding(inputs), self.constant_embedding(inputs)), axis=2) # According to the input format required by Conv1D, the word vector # dimension, that is, the channel dimension of the one-dimensional # convolutional layer, is transformed into the previous dimension embeddings = embeddings.transpose(0, 2, 1) # For each one-dimensional convolutional layer, after max-over-time # pooling, an ndarray with the shape of (batch size, channel size, 1) # can be obtained. Use the flatten function to remove the last # dimension and then concatenate on the channel dimension encoding = np.concatenate([ np.squeeze(self.pool(conv(embeddings)), axis=-1) for conv in self.convs], axis=1) # After applying the dropout method, use a fully connected layer to # obtain the output outputs = self.decoder(self.dropout(encoding)) return outputs
def forward(self, input1, input2): out = mx.contrib.nd.BilinearResize2D(input1.as_nd_ndarray(), scale_height=2., scale_width=2.) out = out.as_np_ndarray() out = self.conv1(out) out = FFx.relu(out) out2 = self.conv2(FF.concatenate([out, input2], axis=1)) out2 = FFx.relu(out2) return out2