def forward(self, cur_input, state, encoder_outputs): # 当循环神经网络有多个隐藏层时,取靠近输出层的单层隐藏状态 single_layer_state = [state[0][-1].expand_dims(0)] encoder_outputs = encoder_outputs.reshape((self.max_seq_len, -1, self.encoder_num_hiddens)) hidden_broadcast = nd.broadcast_axis(single_layer_state[0], axis=0, size=self.max_seq_len) encoder_outputs_and_hiddens = nd.concat(encoder_outputs, hidden_broadcast, dim=2) energy = self.attention(encoder_outputs_and_hiddens) batch_attention = nd.softmax(energy, axis=0).transpose((1, 2, 0)) batch_encoder_outputs = encoder_outputs.swapaxes(0, 1) decoder_context = nd.batch_dot(batch_attention, batch_encoder_outputs) #改这里 input_and_context = nd.concat(nd.expand_dims(self.embedding(cur_input), axis=1), decoder_context, dim=2) concat_input = self.rnn_concat_input(input_and_context).reshape((1, -1, 0)) concat_input = self.dropout(concat_input) state = [nd.broadcast_axis(single_layer_state[0], axis=0,size=self.num_layers)] output, state = self.rnn(concat_input, state) output = self.dropout(output) output = self.out(output).reshape((-3, -1)) return output, state
def _forward_alg(self, feats): ''' CRF 概率计算的前向算法 feats:长度为句子长度的列表,列表中每个元素为一个 nd.array,代表一批中每个词的特征向量,形状为: (batch_size, tagset_size) ''' # 定义前向向量 batch_size = feats[0].shape[0] alphas = [[-10000.] * self.tagset_size] alphas[0][self.tag2idx[START_TAG]] = 0. alphas = nd.array(alphas, ctx=self.ctx) alphas = nd.broadcast_axis(alphas, axis=0, size=batch_size) for feat in feats: alphas_t = [] for next_tag in range(self.tagset_size): emit_score = feat[:, next_tag].reshape((batch_size, -1)) # trans_score 中的每个分值是从 i 转移到 next_tag 的评分 trans_score = nd.broadcast_axis( self.transitions.data()[next_tag].reshape((1, -1)), axis=0, size=batch_size) next_tag_var = alphas + emit_score + trans_score # log_sum_exp(next_tag_var)得到的值的形状: (batch_size, 1) alphas_t.append(log_sum_exp(next_tag_var)) alphas = nd.concat(*alphas_t, dim=1) terminal_var = alphas + self.transitions.data()[self.tag2idx[STOP_TAG]] alpha = log_sum_exp(terminal_var) alpha = alpha.reshape((-1, )) assert alpha.shape == (batch_size, ) return alpha
def forward(self, x_num, x_cat): # preprocess store_embed = self.store_embedding(x_cat[:, :, 0]) embed_concat = nd.concat( store_embed, #x_cat[:,:,1:2], self.nYear_embedding(x_cat[:, :, 2]), self.nMonth_embedding(x_cat[:, :, 3]), self.mDay_embedding(x_cat[:, :, 4]), self.wday_embedding(x_cat[:, :, 5]), self.nHour_embedding(x_cat[:, :, 6]), dim=2) input_store = nd.broadcast_axis(store_embed[:, 0:1, :], axis=1, size=168) output = nd.concat(input_store, x_num.reshape((x_num.shape[0], x_num.shape[1], 1)), dim=2) output = nd.transpose(output, axes=(0, 2, 1)) #kip_connections = [] for sub_TCN in self.TCN: output = self.residue_forward(output, sub_TCN) output = nd.transpose(output, axes=(0, 2, 1)) output = nd.reshape(output, (output.shape[0], 1, -1)) #print(output.shape) output = nd.broadcast_axis(output, axis=1, size=24) #post_concat = nd.concat(output, embed_concat, dim=2) output = self.net(self.post_res(output, embed_concat)) return output
def _viterbi_decode(self, feats): ''' CRF 的预测算法,维特比算法,即根据特征找出最好的路径 feats:长度为句子长度的列表,列表中每个元素为一个 nd.array,代表一批中每个词的特征向量,形状为: (batch_size, tagset_size) ''' backpointers = [] batch_size = feats[0].shape[0] vvars = nd.full((1, self.tagset_size), -10000., ctx=self.ctx) vvars[0, self.tag2idx[START_TAG]] = 0 # vvars 形状:(batch_size, tagset_size) vvars = nd.broadcast_axis(vvars, axis=0, size=batch_size) for feat in feats: bptrs_t = [] viterbivars_t = [] for next_tag in range(self.tagset_size): next_tag_var = vvars + nd.broadcast_axis( self.transitions.data()[next_tag].reshape((1, -1)), axis=0, size=batch_size) # best_tag_id 形状(batch_size, 1) best_tag_id = nd.argmax(next_tag_var, axis=1, keepdims=True) bptrs_t.append(best_tag_id) # viterbivars_t 列表中每个元素的形状为 (batch_size, 1) viterbivars_t.append( nd.pick(next_tag_var, best_tag_id, axis=1, keepdims=True)) vvars = (nd.concat(*viterbivars_t, dim=1) + feat) # bptrs_t 形状 :(batch_size, tagset_size) bptrs_t = nd.concat(*bptrs_t, dim=1) backpointers.append(bptrs_t) # 转换到 STOP_TAG terminal_var = vvars + self.transitions.data()[self.tag2idx[START_TAG]] best_tag_id = nd.argmax(terminal_var, axis=1) # path_score 形状(batch_size, ) path_score = nd.pick(terminal_var, best_tag_id, axis=1) # 根据反向指针 backpointers 去解码最好的路径 best_path = [best_tag_id] for bptrs_t in reversed(backpointers): best_tag_id = nd.pick(bptrs_t, best_tag_id, axis=1) best_path.append(best_tag_id) # 移除开始符号 # start 形状为 (batch_size, ) start = best_path.pop() # 检查start是否为开始符号 for i in range(batch_size): assert start[i].asscalar() == self.tag2idx[START_TAG] best_path.reverse() # 构建最佳路径的矩阵 new_best_path = [] for best_tag_id in best_path: best_tag_id = best_tag_id.reshape((-1, 1)) new_best_path.append(best_tag_id) best_path_matrix = nd.concat(*new_best_path, dim=1) return path_score, best_path_matrix
def forward(self, cur_input, state, encoder_outputs): # 当RNN为多层时,取最靠近输出层的单层隐含状态。 # state.shape is [(1, batch_size, decoder_hidden_dim)] single_layer_state = [state[0][-1].expand_dims(0)] # encoder_outputs.shape is (max_seq_len, batch_size * encoder_hidden_dim) encoder_outputs = encoder_outputs.reshape( (self.max_seq_len, -1, self.encoder_hidden_dim)) # single_layer_state尺寸: [(1, batch_size, decoder_hidden_dim)] # hidden_broadcast尺寸: (max_seq_len, batch_size, decoder_hidden_dim) hidden_broadcast = nd.broadcast_axis(single_layer_state[0], axis=0, size=self.max_seq_len) # encoder_outputs_and_hiddens尺寸: # (max_seq_len, batch_size, encoder_hidden_dim + decoder_hidden_dim) encoder_outputs_and_hiddens = nd.concat(encoder_outputs, hidden_broadcast, dim=2) # energy尺寸: (max_seq_len, batch_size, 1) energy = self.attention(encoder_outputs_and_hiddens) # batch_attention尺寸: (batch_size, 1, max_seq_len) batch_attention = nd.softmax(energy, axis=0).transpose((1, 2, 0)) # batch_encoder_outputs尺寸: (batch_size, max_seq_len, encoder_hidden_dim) batch_encoder_outputs = encoder_outputs.swapaxes(0, 1) # decoder_context尺寸: (batch_size, 1, encoder_hidden_dim) decoder_context = nd.batch_dot(batch_attention, batch_encoder_outputs) # cur_input尺寸: (batch_size,) # input_and_context尺寸: (batch_size, 1, decoder_hidden_dim + encoder_hidden_dim ) input_and_context = nd.concat(nd.expand_dims(self.embedding(cur_input), axis=1), decoder_context, dim=2) # concat_input尺寸: (1, batch_size, decoder_hidden_dim) concat_input = self.rnn_concat_input(input_and_context).reshape( (1, -1, 0)) concat_input = self.dropout(concat_input) # 当RNN为多层时,用单层隐含状态初始化各个层的隐含状态。 state = [ nd.broadcast_axis(single_layer_state[0], axis=0, size=self.num_layers) ] # XXX 注意:state 是 [nd.NDArray] output, state = self.rnn(concat_input, state) output = self.dropout(output) output = self.out(output) output = nd.reshape(output, (-3, -1)) # output尺寸: (batch_size * 1, output_dim) return output, state
def simple_broadcast(self, *args): """ Broadcast a sequence of 1 dimensional arrays. Example:: >>> simple_broadcast( astensor([1]), astensor([2, 2]), astensor([3, 3, 3])) [[1. 1. 1.] [2. 2. 2.] [3. 3. 3.]] Args: args (Array of Tensors): Sequence of arrays Returns: MXNet NDArray: The sequence broadcast together. """ max_dim = max(map(len, args)) broadcast = [] for arg in args: if len(arg) < max_dim: broadcast.append( nd.broadcast_axis(arg[0], axis=len(arg.shape) - 1, size=max_dim)) else: broadcast.append(arg) return nd.stack(*broadcast)
def forward(self, xNum, xCat): # embed the auxiliary variables embedConcat = nd.concat( self.stationEmbedding(xCat[:,:,0]), self.nYearEmbedding(xCat[:,:,1]), self.nMonthEmbedding(xCat[:,:,2]), self.mDayEmbedding(xCat[:,:,3]), self.wdayEmbedding(xCat[:,:,4]), self.nHourEmbedding(xCat[:,:,5]), dim=2) # The training and testing embedTrain = embedConcat[:,0:168,:] embedTest = embedConcat[:,168:,:] # The input series for encoding xNum = xNum.reshape((xNum.shape[0],xNum.shape[1],1)) inputSeries = nd.concat(xNum, embedTrain, dim=2) inputSeries = nd.transpose(inputSeries, axes=(0,2,1)) for subTCN in self.encoder: inputSeries = subTCN(inputSeries) # The output output = inputSeries output = nd.transpose(output, axes=(0,2,1)) output = nd.reshape(output,(output.shape[0], 1,-1)) output = nd.broadcast_axis(output, axis=1, size=self.outputSize) # the decoder output=self.outputLayer(self.decoder(output, embedTest)) #output = nd.sum_axis(output, axis=2) mu = nd.sum_axis(self.mu(output), axis=2) sigma = nd.sum_axis(self.sigma(output), axis=2) return mu, sigma
def forward(self, xNum, xCat): # embed the auxiliary variables embedConcat = nd.concat(self.stationEmbedding(xCat[:, :, 0]), self.nYearEmbedding(xCat[:, :, 1]), self.nMonthEmbedding(xCat[:, :, 2]), self.mDayEmbedding(xCat[:, :, 3]), self.wdayEmbedding(xCat[:, :, 4]), self.nHourEmbedding(xCat[:, :, 5]), dim=2) # The training and testing embedTrain = embedConcat[:, 0:self.inputSize, ] # only consider the id for the input series embedTest = embedConcat[:, self.inputSize:, :] # The input series for encoding xNum = xNum.reshape((xNum.shape[0], xNum.shape[1], 1)) #inputSeries = nd.concat(xNum, embedTrain, dim=2) inputSeries = xNum inputSeries = nd.transpose(inputSeries, axes=(0, 2, 1)) for subTCN in self.encoder: inputSeries = subTCN(inputSeries) # The output output = inputSeries output = nd.transpose(output, axes=(0, 2, 1)) output = nd.reshape(output, (output.shape[0], 1, -1)) output = nd.broadcast_axis(output, axis=1, size=self.outputSize) # the decoder output = self.outputLayer(self.decoder(output, embedTest)) #output = nd.sum_axis(output, axis=2) # The quantile outputs outputQ10 = nd.sum_axis(self.Q10(output), axis=2) outputQ50 = nd.sum_axis(self.Q50(output), axis=2) outputQ90 = nd.sum_axis(self.Q90(output), axis=2) return outputQ10, outputQ50, outputQ90
def update_alphas(data, alphas): """Calculate the batch update alpha for each time step Args: data (NDArray): NDArray shape: (seq_len, batch_size, self.tagset_size) alphas (NDArray): NDArray shape: (batch_size, self.tagset_size) """ # alphas_t shape: (self.tagset_size, batch_size, self.tagset_size) alphas_t = nd.broadcast_axis(nd.expand_dims(alphas, axis=0), axis=0, size=self.tagset_size) # emit_score shape: (self.tagset_size, batch_size, 1) emit_score = nd.transpose(nd.expand_dims(data, axis=0), axes=(2, 1, 0)) # trans_score shape: (self.tagset_size, 1, self.tagset_size) trans_score = nd.expand_dims(self.transitions.data(), axis=1) # next_tag_var shape: (self.tagset_size, batch_size, self.tagset_size) next_tag_var = alphas_t + emit_score + trans_score # alphas shape: (self.tagset_size, batch_size) alphas = log_sum_exp(next_tag_var) # alphas shape: (batch_size, self.tagset_size) alphas = nd.transpose(alphas, axes=(1, 0)) return data, alphas
def forward(self, x_num, x_cat): # preprocess embed_concat = nd.concat(self.id_embedding(x_cat[:, :, 0]), self.nYear_embedding(x_cat[:, :, 1]), self.nMonth_embedding(x_cat[:, :, 2]), self.mDay_embedding(x_cat[:, :, 3]), self.wday_embedding(x_cat[:, :, 4]), self.nHour_embedding(x_cat[:, :, 5]), dim=2) embed_train = embed_concat[:, 0:168, :] embed_test = embed_concat[:, 168:, :] x_num = x_num.reshape(x_num.shape[0], x_num.shape[1], -1) conv_x = nd.concat(x_num, embed_train, dim=2) conv_x = nd.transpose(conv_x, axes=(0, 2, 1)) output = conv_x #skip_connections = [] for sub_TCN in self.TCN: output = self.residue_forward(output, sub_TCN) #skip_connections.append(skip) #print(skip_connections) #output1 = sum([s[:,:,-1] for s in skip_connections] output = output[:, :, -1:] output = nd.transpose(output, axes=(0, 2, 1)) output = nd.broadcast_axis(output, axis=1, size=24) post_concat = nd.concat(output, embed_test, dim=2) output = self.net(self.post_res(post_concat)) output = output.reshape(output.shape[0], -1) return output
def attention_forward(model, enc_states, dec_state): # 将解码器隐藏状态广播到和编码器隐藏状态形状相同后进行连结 dec_states = nd.broadcast_axis( dec_state.expand_dims(0), axis=0, size=enc_states.shape[0]) enc_and_dec_states = nd.concat(enc_states, dec_states, dim=2) e = model(enc_and_dec_states) # 形状为(时间步数, 批量大小, 1) alpha = nd.softmax(e, axis=0) # 在时间步维度做softmax运算 return (alpha * enc_states).sum(axis=0) # 返回背景变量
def make_values_L(range_min, range_max, L, batch_size): logs_L = np.linspace(0, np.log(range_max * 1.0 / range_min), num=L / 2) values_L = nd.array(1.0 / range_min * np.exp(-logs_L)) values_L = nd.expand_dims(nd.expand_dims(values_L, axis=0), axis=2) return nd.broadcast_axis(values_L, axis=0, size=batch_size)
def begin_state(self,*args,**kwargs): #useage decoder.begin_state(batch_size=4,func=nd.zeros,vid_feat = features) video_feat = kwargs['vid_feat'] init_state = self.vid_init_state(video_feat)# init_state = init_state.reshape(1,*(init_state.shape)) # LNC for layer is 1 kwargs.pop('vid_feat') states = self.rnn.begin_state(*args,**kwargs) states[0] = nd.broadcast_axis(init_state,size=self.num_layers,axis=0) return states
def attention_forward(model, enc_states, dec_state): # 将解码器隐藏状态⼴播到跟编码器隐藏状态形状相同后进⾏连结。 dec_states = nd.broadcast_axis(dec_state.expand_dims(0), axis=0, size=enc_states.shape[0]) enc_and_dec_states = nd.concat(enc_states, dec_states, dim=2) e = model(enc_and_dec_states) # 形状为(时间步数,批量⼤⼩,1)。 alpha = nd.softmax(e, axis=0) # 在时间步维度做 softmax 运算。 return (alpha * enc_states).sum(axis=0) # 返回背景变量。
def attention_forward(model, enc_states, dec_state): # Broadcast adding for hidden state of decoder and of encoder dec_states = nd.broadcast_axis(dec_state.expand_dims(0), axis=0, size=enc_states.shape[0]) enc_and_dec_states = nd.concat(enc_states, dec_states, dim=2) e = model(enc_and_dec_states) # shape is (times, batchsize, 1) alpha = nd.softmax(e, axis=0) # softmax on times dimension return (alpha * enc_states).sum(axis=0) # return background variables
def postprocess(self, x, embed_test): output = nd.relu(x) output = self.conv_post_1(output) output = nd.relu(output) output = self.conv_post_2(output) output = nd.broadcast_axis(output, axis=1, size=24) embed_result = nd.concat(output, embed_test, dim=2) output = self.outputLayer(self.net(embed_result)) output = output.reshape(output.shape[0], -1) return output
def forward(self, x_num, x_cat): # preprocess store_embed = self.store_embedding(x_cat[:, :, 0]) embed_concat = nd.concat( store_embed, #x_cat[:,:,1:2], self.nYear_embedding(x_cat[:, :, 2]), self.nMonth_embedding(x_cat[:, :, 3]), self.mDay_embedding(x_cat[:, :, 4]), self.wday_embedding(x_cat[:, :, 5]), self.nHour_embedding(x_cat[:, :, 6]), dim=2) input_store = nd.broadcast_axis(store_embed[:, 0:1, :], axis=1, size=168) # store id (in-dependent feature) is added as an extra channel to the input ts # add extra dimmension to ts (x_num) and concat with store id output = nd.concat(input_store, x_num.reshape((x_num.shape[0], x_num.shape[1], 1)), dim=2) # reshape to (m, channels, width) output = nd.transpose(output, axes=(0, 2, 1)) #kip_connections = [] for sub_TCN in self.TCN: # iteriraš čez ResidualTCN blocke output = self.residue_forward( output, sub_TCN ) # residue_forward je metoda definirana spodaj v tem classu output = nd.transpose(output, axes=(0, 2, 1)) output = nd.reshape(output, (output.shape[0], 1, -1)) #print(output.shape) output = nd.broadcast_axis(output, axis=1, size=24) #post_concat = nd.concat(output, embed_concat, dim=2) output = self.net(self.post_res(output, embed_concat)) return output
def rbf_kernels(self, x: NDArray, y: NDArray): """ Computes exp(-c ||x - y||^2). ||x - y||^2 = x . x + y . y - 2 x . y Compute each term separately. x is are original features, y are features used for similarity """ cross_products = nd.dot(x, y) x_products = nd.sum(sqr(x), axis=1, keepdims=True) x_products = nd.broadcast_axis(x_products, axis=1, size=y.shape[1]) y_products = nd.sum(sqr(y), axis=0, keepdims=True) y_products = nd.broadcast_axis(y_products, axis=0, size=x.shape[0]) sqr_difs = x_products + y_products - 2 * cross_products print(nd.mean(x_products), nd.mean(y_products), nd.mean(cross_products)) print(nd.mean(sqr_difs)) res = nd.exp(-0.5 * sqr_difs) print(res.shape) return res
def positional(x): batch_size, length, model_dim = x.shape # (length, 1) pos = nd.arange(length).expand_dims(1) # (1, model_dim/2), 10000^(2i/model_dim) div = nd.power(10000, nd.arange(model_dim / 2) * 2 / model_dim) out = nd.zeros((length, model_dim)) out[:, 0::2] = nd.sin(pos / div) out[:, 1::2] = nd.cos(pos / div) return nd.broadcast_axis(out.expand_dims(0), axis=0, size=batch_size)
def make_dynamic_dec(T, values_L): values_T = nd.array(np.linspace(1, T, num=T), ctx=values_L.context) values_T = nd.expand_dims(nd.expand_dims(values_T, axis=0), axis=2) values_T = nd.broadcast_axis(values_T, axis=0, size=values_L.shape[0]) values_TL = nd.batch_dot(values_T, values_L, transpose_b=True) values_sin = nd.sin(values_TL) values_cos = nd.cos(values_TL) return nd.concat(values_sin, values_cos, dim=2)
def forward(self, x_num, x_cat): # preprocess store_embed = self.store_embedding(x_cat[:, :, 0]) embed_concat = nd.concat(store_embed, self.nYear_embedding(x_cat[:, :, 2]), self.nMonth_embedding(x_cat[:, :, 3]), self.mDay_embedding(x_cat[:, :, 4]), self.wday_embedding(x_cat[:, :, 5]), self.nHour_embedding(x_cat[:, :, 6]), self.holiday_embedding(x_cat[:, :, 7]), dim=2) input_store = nd.broadcast_axis(store_embed[:, 0:1, :], axis=1, size=168) output = nd.concat(input_store, x_num.reshape((x_num.shape[0], x_num.shape[1], 1)), dim=2) output = nd.transpose(output, axes=(0, 2, 1)) #kip_connections = [] for sub_TCN in self.TCN: output = self.residue_forward(output, sub_TCN) #skip_connections.append(output) #output = sum([s[:,:,-1] for s in skip_connections]) output = nd.transpose(output, axes=(0, 2, 1)) output = nd.reshape(output, (output.shape[0], 1, -1)) #print(output.shape) output = nd.broadcast_axis(output, axis=1, size=self.output_ax) #post_concat = nd.concat(output, embed_concat, dim=2) output = self.net(self.post_res(output, embed_concat)) output_Q10 = self.Q10(output) output_Q10 = output_Q10.reshape(output_Q10.shape[0], -1) output_Q50 = self.Q50(output) output_Q50 = output_Q50.reshape(output_Q50.shape[0], -1) output_Q90 = self.Q90(output) output_Q90 = output_Q90.reshape(output_Q90.shape[0], -1) return output_Q10, output_Q50, output_Q90
def forward(self, query, key, value, mask): # Project and transpose from (batch_size, num_items, units) to # (batch_size * num_heads, num_items, p), where units = p * num_heads. query, key, value = [ transpose_qkv(X, self.num_heads) for X in (self.W_q(query), self.W_k(key), self.W_v(value)) ] if mask is not None: # Replicate mask for each of the num_heads heads mask = nd.broadcast_axis(nd.expand_dims(mask, axis=1), axis=1, size=self.num_heads)\ .reshape(shape=(-1, 0, 0), reverse=True) output = self.attention(query, key, value, mask) # Transpose from (batch_size * num_heads, num_items, p) back to # (batch_size, num_items, units) return transpose_output(output, self.num_heads)
def forward(self, cur_input, state, encoder_outputs): # 当循环神经网络有多个隐藏层时,取靠近输出层的单层隐藏状态 single_layer_state = [state[0][-1].expand_dims(0)] #encoder_output的shape是(max_seq_len,-1,encoder_num_hiddens) encoder_outputs = encoder_outputs.reshape( (self.max_seq_len, -1, self.encoder_num_hiddens)) # [16,1,0], got [60,16,256] last_outputs = nn.Dense(1, in_units=encoder_outputs, flatten=True) last_outputs = last_outputs.reshape((4, 256)) print(last_outputs.shape) # last_outputs = encoder_outputs[-1, :, :] # [16,256] #last_outputs = nd.expand_dims(last_outputs, axis=1) #print(last_outputs.shape) # last_outputs.swapaxes(0,1) # [16, 1, 256] # hidden_broadcast = nd.broadcast_axis(single_layer_state[0], axis=0, # size=self.max_seq_len) # encoder_outputs_and_hiddens = nd.concat(encoder_outputs, # hidden_broadcast, dim=2) #print("after swap: " , last_outputs.shape) #print(nd.expand_dims(self.embedding(cur_input), axis=1).shape) input_and_context = nd.concat(nd.expand_dims(self.embedding(cur_input), axis=1), last_outputs, dim=2) concat_input = self.rnn_concat_input(input_and_context).reshape( (1, -1, 0)) concat_input = self.dropout(concat_input) state = [ nd.broadcast_axis(single_layer_state[0], axis=0, size=self.num_layers) ] output, state = self.rnn(concat_input, state) output = self.dropout(output) #print('output.shape:\n') #print(output.shape) output = self.out(output) #print('dense shape:\n') #print(output.shape) output = output.reshape((-3, -1)) return output, state
def attention_forward(attention, enc_states, dec_state): """ enc_states: (max_length, batch_size, num_hiddens) dec_state: (batch_size, num_hidden) """ dec_state = dec_state.expand_dims(0) dec_states = nd.broadcast_axis(dec_state, axis=0, size=enc_states.shape[0]) enc_and_dec_states = nd.concat(enc_states, dec_states, dim=2) """ enc_and_dec_states: (max_length, batch_size, 2*num_hiddens) attention(enc_and_dec_states): (max_length, batch_size, 1) alpha_prob: (max_length, batch_size, 1) """ alpha_prob = nd.softmax(attention(enc_and_dec_states), axis=0) return (alpha_prob * enc_states).sum(axis=0)
def forward(self, x_num, x_cat): # preprocess embed_concat = nd.concat( self.id_embedding(x_cat[:,:,0]), self.nYear_embedding(x_cat[:,:,1]), self.nMonth_embedding(x_cat[:,:,2]), dim=2) output = self.preprocess(x_num) for sub_TCN in self.TCN: output = self.residue_forward(output, sub_TCN) #output=nd.transpose(output, axes=(0,2,1)) #print(output.shape) output = nd.broadcast_axis(output, axis=1, size=12) post_concat = nd.concat(output, embed_concat, dim=2) output=self.net(self.post_res(post_concat)) # output_mu = self.mu(output) output_mu = output_mu.reshape(output_mu.shape[0],-1) output_sigma = self.sigma(output) output_sigma = output_sigma.reshape(output_sigma.shape[0],-1) return output_mu, output_sigma
def forward(self, x_num, x_cat): # preprocess embed_concat = nd.concat( self.store_embedding(x_cat[:,:,0]), #x_cat[:,:,1:2], self.nYear_embedding(x_cat[:,:,2]), self.nMonth_embedding(x_cat[:,:,3]), self.mDay_embedding(x_cat[:,:,4]), self.wday_embedding(x_cat[:,:,5]), self.nHour_embedding(x_cat[:,:,6]), dim=2) output = self.preprocess(x_num) for sub_TCN in self.TCN: output = self.residue_forward(output, sub_TCN) output=nd.transpose(output, axes=(0,2,1)) output = nd.reshape(output,(output.shape[0], 1,-1)) #print(output.shape) output = nd.broadcast_axis(output, axis=1, size=24) #post_concat = nd.concat(output, embed_concat, dim=2) output=self.net(self.post_res(output,embed_concat)) return output
def update_decode(data, states): feat = data vvars_iner = states # vvars_t shape: (self.tagset_size, batch_size, self.tagset_size) vvars_t = nd.broadcast_axis(nd.expand_dims(vvars_iner, axis=0), axis=0, size=self.tagset_size) # trans shape: (self.tagset_size, 1, self.tagset_size) trans = nd.expand_dims(self.transitions.data(), axis=1) next_tag_var = vvars_t + trans # best_tag_id shape: (self.tagset_size, batch_size) best_tag_id = nd.argmax(next_tag_var, axis=-1) # bptrs_t, viterbivars_t shape :(batch_size, tagset_size) viterbivars_t = nd.transpose(nd.pick(next_tag_var, best_tag_id, axis=-1), axes=(1, 0)) bptrs_t = nd.transpose(best_tag_id, axes=(1, 0)) vvars_iner = viterbivars_t + feat return bptrs_t, vvars_iner
def forward(self, x_num, x_cat): # preprocess embed_concat = nd.concat(self.store_embedding(x_cat[:, :, 0]), x_cat[:, :, 1:2], self.nYear_embedding(x_cat[:, :, 2]), self.nMonth_embedding(x_cat[:, :, 3]), self.mDay_embedding(x_cat[:, :, 4]), self.wday_embedding(x_cat[:, :, 5]), self.nHour_embedding(x_cat[:, :, 6]), dim=2) output = self.preprocess(x_num) conv_result = self.pool1(self.conv2(self.conv1(output))) #conv_result = conv_result.reshape((conv_result.shape[0], conv_result.shape[2])) #output=nd.transpose(output, axes=(0,2,1)) #output = nd.reshape(output,(output.shape[0], 1,-1)) #print(output.shape) output = nd.broadcast_axis(conv_result, axis=1, size=24) post_concat = nd.concat(output, embed_concat, dim=2) #output=self.net(self.post_res(post_concat)) output = self.net(self.post_res(output, embed_concat)) return output
def simple_broadcast(self, *args): """ Broadcast a sequence of 1 dimensional arrays. Example: >>> import pyhf >>> pyhf.set_backend(pyhf.tensor.mxnet_backend()) >>> pyhf.tensorlib.simple_broadcast( ... pyhf.tensorlib.astensor([1]), ... pyhf.tensorlib.astensor([2, 3, 4]), ... pyhf.tensorlib.astensor([5, 6, 7])) <BLANKLINE> [[1. 1. 1.] [2. 3. 4.] [5. 6. 7.]] <NDArray 3x3 @cpu(0)> Args: args (Array of Tensors): Sequence of arrays Returns: MXNet NDArray: The sequence broadcast together. """ args = [self.astensor(arg) for arg in args] max_dim = max(map(len, args)) try: assert not [arg for arg in args if 1 < len(arg) < max_dim] except AssertionError as error: log.error( 'ERROR: The arguments must be of compatible size: 1 or %i', max_dim) raise error broadcast = [ arg if len(arg) > 1 else nd.broadcast_axis( arg[0], axis=len(arg.shape) - 1, size=max_dim) for arg in args ] return nd.stack(*broadcast)
def attention_forward(attention, cur_features, cur_state): """ cur_features: (batch_size, num_features) cur_state: (batch_size, num_hidden) """ cur_features = cur_features.T # (num_features, batch_size) cur_state = cur_state.expand_dims(0) # (1, batch_size, num_hidden) cur_states = nd.broadcast_axis(cur_state, axis=0, size=cur_features.shape[0]) cur_features = cur_features.expand_dims(2) features_and_cur_states = nd.concat(cur_features, cur_states, dim=2) """ features_and_cur_states: (num_features, batch_size, num_hiddens + 1) attention(features_and_cur_states): (num_features, batch_size, 1) alpha_prob: (num_features, batch_size, 1) """ alpha_prob = nd.softmax(attention(features_and_cur_states), axis=0) return (alpha_prob * cur_states).sum(axis=0)