def forward(self, x): #x.shape: (batchsize, 8, 1152, 1, 1) #routing_weight.shape: (1, 1, 1152, 1, 10) routing_weight = nd.softmax(nd.zeros(shape=(1, 1, self.ni, 1, self.no), ctx=x.context), axis=1) #u.shape: (batchsize, 1, 1152, 16, 10) u = nd.sum(x * self.W.data(), axis=1, keepdims=True) #s.shape: (batchsize, 1, 1, 16, 10) s = nd.sum(u * routing_weight, axis=2, keepdims=True) #v.shape: (batchsize, 1, 1, 16, 10) v = Squash(s, axis=3) for i in range(self.nr): #print(i, nd.sum(nd.sum(nd.sum(nd.square(u*v), axis=3, keepdims=True), axis=2, keepdims=True).reshape((self.bs,10)),axis=1)) routing_weight = routing_weight + nd.sum( u * v, axis=3, keepdims=True) c = nd.softmax(routing_weight, axis=2) s = nd.sum(u * c, axis=2, keepdims=True) v = Squash(s, axis=3) return nd.reshape(v, shape=(-1, self.lvo, self.no))
def forward(self, x): if self.routing is not None: routing_weight = nd.softmax(nd.zeros(shape=(1, 1, self.num_points), ctx=x.context), axis=2) trans = self.stn(x) x = nd.transpose(x, (0, 2, 1)) x = nd.batch_dot(x, trans) x = nd.transpose(x, (0, 2, 1)) x = nd.relu(self.bn1(self.conv1(x))) pointfeat = x x = nd.relu(self.bn2(self.conv2(x))) x = self.bn3(self.conv3(x)) if self.routing is not None: s = nd.sum(x * routing_weight, axis=2, keepdims=True) # v = Squash(s, axis=1) for _ in range(self.routing): routing_weight = routing_weight + nd.sum( x * s, axis=1, keepdims=True) c = nd.softmax(routing_weight, axis=2) s = nd.sum(x * c, axis=2, keepdims=True) # v = Squash(s, axis=1) x = s else: x = self.mp1(x) if self.global_feat: return x, trans else: x = x.repeat(self.num_points, axis=2) return nd.concat(x, pointfeat, dim=1), trans
def forward(self, c, q): x = nd.concat(c, q, c * q) S = self.dense(x) S_bar = nd.softmax(S, axis=1) S_2bar = nd.softmax(S, axis=2) A = S_bar * Q.T B = S_bar * S_2_bar.T * c.T return A, B
def forward(self, input_data): freq = input_data[:, 0:2].expand_dims(1) input_data = input_data[:, 2:] e1_vec_start = FIXED_WORD_LENGTH * DIMENSION x = input_data[:, :e1_vec_start].reshape( (input_data.shape[0], FIXED_WORD_LENGTH, DIMENSION)) # (m, 60, 110) e1neimask = input_data[:, e1_vec_start:e1_vec_start + MASK_LENGTH] # (m, 51) e1edge = input_data[:, e1_vec_start + MASK_LENGTH:e1_vec_start + MASK_LENGTH + ENTITY_EDGE_VEC_LENGTH].reshape( (input_data.shape[0], ENTITY_DEGREE, WORD_DIMENSION * 2)) # (m, 51, 200) e1neigh = e1edge[:, :, :WORD_DIMENSION] e2_vec_start = e1_vec_start + MASK_LENGTH + ENTITY_EDGE_VEC_LENGTH e2neimask = input_data[:, e2_vec_start:e2_vec_start + MASK_LENGTH] # (m, 51) e2edge = input_data[:, e2_vec_start + MASK_LENGTH:e2_vec_start + MASK_LENGTH + ENTITY_EDGE_VEC_LENGTH].reshape( (input_data.shape[0], ENTITY_DEGREE, WORD_DIMENSION * 2)) # (m, 51,200) e2neigh = e2edge[:, :, :WORD_DIMENSION] gru = self.gru x = nd.transpose(x, axes=(1, 0, 2)) h = gru(x) ht = nd.transpose(h, axes=(1, 0, 2)) gru_out = self.gru_out y1 = gru_out(ht.expand_dims(1)) # (m,200) att = self.center_att e1edge = nd.tanh(e1edge) e1g = att(e1edge) * freq[:, :, :1] # (m,51,1) e1g = e1g * e1neimask.expand_dims(2) e1g = nd.softmax(e1g, axis=1) e1gt = nd.transpose(e1g, axes=(0, 2, 1)) # (m,1,151) e1n = nd.batch_dot(e1gt, e1neigh) # (m,1,100) e1n = e1n.reshape((e1n.shape[0], 100)) # (m,100) e2edge = nd.tanh(e2edge) e2g = att(e2edge) * freq[:, :, 1:] # (m,51,1) e2g = e2g * e2neimask.expand_dims(2) e2g = nd.softmax(e2g, axis=1) e2gt = nd.transpose(e2g, axes=(0, 2, 1)) # (m,1,151) e2n = nd.batch_dot(e2gt, e2neigh) # (m,1,100) e2n = e2n.reshape((e2n.shape[0], 100)) # (m,100) center_y = nd.concat(e1n, e2n, dim=1) # (m,200) center_out = self.center_out center_y = center_out(center_y) out = self.output y4 = nd.concat(y1, center_y, dim=1) y5 = out(y4) return y5
def classify(net,image): # one gpu is necessary if len(image): transformed_img = data.transforms.presets.imagenet.transform_eval(mx.nd.array(image).as_in_context(gpu(0))) pred = net(transformed_img) ind = nd.argmax(pred, axis=1).astype('int') action = class_list[ind.asscalar()] print(action, nd.softmax(pred)[0][ind].asscalar()) return action,nd.softmax(pred)[0][ind].asscalar() else: return "None Action", 0.0
def forward(self, inputs, begin_state=None): # pylint: disable=arguments-differ """Implement the forward computation that the awd language model and cache model use. Parameters ----------- inputs : NDArray input tensor with shape `(sequence_length, batch_size)` when `layout` is "TNC". begin_state : list initial recurrent state tensor with length equals to num_layers. the initial state with shape `(1, batch_size, num_hidden)` Returns -------- out: NDArray output tensor with shape `(sequence_length, batch_size, input_size)` when `layout` is "TNC". out_states: list output recurrent state tensor with length equals to num_layers. the state with shape `(1, batch_size, num_hidden)` encoded_raw: list The list of outputs of the model's encoder with length equals to num_layers. the shape of every encoder's output `(sequence_length, batch_size, num_hidden)` encoded_dropped: list The list of outputs with dropout of the model's encoder with length equals to num_layers. The shape of every encoder's dropped output `(sequence_length, batch_size, num_hidden)` """ encoded = self.embedding(inputs) if not begin_state: begin_state = self.begin_state(batch_size=inputs.shape[1]) out_states = [] encoded_raw = [] encoded_dropped = [] for i, (e, s) in enumerate(zip(self.encoder, begin_state)): encoded, state = e(encoded, s) encoded_raw.append(encoded) out_states.append(state) if self._drop_h and i != len(self.encoder)-1: encoded = nd.Dropout(encoded, p=self._drop_h, axes=(0,)) encoded_dropped.append(encoded) if self._dropout: encoded = nd.Dropout(encoded, p=self._dropout, axes=(0,)) encoded_dropped.append(encoded) latent = nd.Dropout(self.latent(encoded), p=self._drop_l, axes=(0,)) logit = self.decoder(latent.reshape(-1, self._embed_size)) prior_logit = self.prior(encoded).reshape(-1, self._num_experts) prior = nd.softmax(prior_logit) prob = nd.softmax(logit.reshape(-1, self._vocab_size)) prob = prob.reshape(-1, self._num_experts, self._vocab_size) prob = (prob * prior.expand_dims(2).broadcast_to(prob.shape)).sum(axis=1) out = nd.log(nd.add(prob, 1e-8)).reshape(-1, inputs.shape[1], self._vocab_size) return out, out_states, encoded_raw, encoded_dropped
def test_accur(target, it, *input): LambdaMin = 5.0 LambdaMax = 1500.0 lamb = 1500.0 theta, phi = input batch_size = target.size lamb = max(LambdaMin, LambdaMax / (1 + 0.1 * it)) # because indexing is not differentiable in mxnet, we must do this output = theta - theta / (1 + lamb) + phi / (1 + lamb) nd.softmax(output, out=output) v, idx = nd.topk(output, ret_typ='both') real = (idx == target.reshape(-1, 1).astype(idx.dtype)) return nd.sum(real) / batch_size, nd.sum(real * v) / batch_size
def forward(self, cur_input, state, encoder_outputs): # 当循环神经网络有多个隐藏层时,取靠近输出层的单层隐藏状态 single_layer_state = [state[0][-1].expand_dims(0)] #encoder_output的shape是(max_seq_len,-1,encoder_num_hiddens) encoder_outputs = encoder_outputs.reshape( (self.max_seq_len, -1, self.encoder_num_hiddens)) hidden_broadcast = nd.broadcast_axis(single_layer_state[0], axis=0, size=self.max_seq_len) encoder_outputs_and_hiddens = nd.concat(encoder_outputs, hidden_broadcast, dim=2) energy = self.attention(encoder_outputs_and_hiddens) batch_attention = nd.softmax(energy, axis=0) batch_attention = nd.softmax(energy, axis=0).transpose((1, 2, 0)) #print(batch_attention.shape) batch_encoder_outputs = encoder_outputs.swapaxes(0, 1) decoder_context = nd.batch_dot(batch_attention, batch_encoder_outputs) #改这里 input_and_context = nd.concat(nd.expand_dims(self.embedding(cur_input), axis=1), decoder_context, dim=2) concat_input = self.rnn_concat_input(input_and_context).reshape( (1, -1, 0)) concat_input = self.dropout(concat_input) state = [ nd.broadcast_axis(single_layer_state[0], axis=0, size=self.num_layers) ] output, state = self.rnn(concat_input, state) output = self.dropout(output) #print('output.shape:\n') #print(output.shape) output = self.out(output) #print('dense shape:\n') #print(output.shape) output = output.reshape((-3, -1)) return output, state
def _get_co_attention(as_, bs_, r, lamb=k_lambda): """ as_, bs_: (batch_size, seq_len, embed_size) r: (batch_size, seq_len, seq_len, 5) """ e = nd.batch_dot(as_, bs_, transpose_b=True) + lamb * F( r, ctx) # (batch_size, seq_len, seq_len,) alpha = nd.softmax(e, axis=2) # alpha_ij = exp(eij) / SUM_k(exp(eik)) beta = nd.softmax(e, axis=1) # beta_ij = exp(ij) / SUM_k(exp(ekj)) beta = nd.transpose(beta, axes=[0, 2, 1]) # transpose becasue of softmax axis=1 ac = nd.batch_dot(alpha, bs_) # bc = nd.batch_dot(beta, as_) return ac, bc, alpha, beta
def forward(self, inputs, begin_state=None): """Implement forward computation. Parameters ---------- inputs : NDArray The training dataset. begin_state : list The initial hidden states. Returns ------- out: NDArray The output of the model. out_states: list The list of output states of the model's encoder. """ encoded = self.embedding(inputs) if not begin_state: begin_state = self.begin_state(batch_size=inputs.shape[1]) out_states = [] encoded_raw = [] encoded_dropped = [] for i, (e, s) in enumerate(zip(self.encoder, begin_state)): encoded, state = e(encoded, s) encoded_raw.append(encoded) out_states.append(state) if self._drop_h and i != len(self.encoder) - 1: encoded = nd.Dropout(encoded, p=self._drop_h, axes=(0, )) encoded_dropped.append(encoded) if self._dropout: encoded = nd.Dropout(encoded, p=self._dropout, axes=(0, )) states = out_states encoded_dropped.append(encoded) latent = nd.Dropout(self.latent(encoded), p=self._drop_l, axes=(0, )) logit = self.decoder(latent.reshape(-1, self._embed_size)) prior_logit = self.prior(encoded).reshape(-1, self._num_experts) prior = nd.softmax(prior_logit) prob = nd.softmax(logit.reshape(-1, self._vocab_size)) prob = prob.reshape(-1, self._num_experts, self._vocab_size) prob = (prob * prior.expand_dims(2).broadcast_to(prob.shape)).sum(axis=1) out = nd.log(nd.add(prob, 1e-8)).reshape(-1, inputs.shape[1], self._vocab_size) return out, out_states, encoded_raw, encoded_dropped
def msg_reduce(self, node): state = node.mailbox['state'] alpha = node.mailbox['alpha'] alpha = nd.softmax(alpha, axis=1) new_state = nd.relu(nd.sum(alpha * state, axis=1)) return {'new_state': new_state}
def route(self, x): ''' b_mat = nd.zeros((x.shape[0], self.num_cap_in, self.num_cap, 1, x.shape[4], x.shape[5]), ctx=x.context) c_mat = nd.softmax(b_mat, axis=2) # s = nd.sum(x/self.num_cap, axis=1) s = nd.sum(x*c_mat, axis=1) # print x.reshape((x.shape[0],self.num_cap,-1,x.shape[4], x.shape[5]))[0,0,0,0,0] # print s[0,0,0,0,0] # print s1[0,0,0,0,0] # u_no_gradient = nd.stop_gradient(x) # s = nd.sum(u_no_gradient* c_mat, axis=1) v = squash(s, 2) ''' b_mat = nd.zeros((x.shape[0], self.num_cap_in, self.num_cap, 1, x.shape[4], x.shape[5]), ctx=x.context) u = x u_no_gradient = nd.stop_gradient(x) for i in range(self.route_num): # print i, nd.max(u).asnumpy()[0], nd.min(u).asnumpy()[0] c_mat = nd.softmax(b_mat, axis=2) if i == self.route_num - 1: s = nd.sum(u * c_mat, axis=1) else: s = nd.sum(u_no_gradient * c_mat, axis=1) v = squash(s, 2) v1 = nd.expand_dims(v, axis=1) if i != self.route_num - 1: update_term = nd.sum(u_no_gradient * v1, axis=3, keepdims=True) b_mat = b_mat + update_term # print v.shape # v = nd.transpose(v, (0,2,1,3,4)) return v
def calculation(self, input_str, char_indices, indices_char, input_digits = 9, lchars = 14, ctx = mx.cpu()): input_str = 'S' + input_str + 'E' X = nd.zeros((1, input_digits, lchars), ctx = ctx) for t, char in enumerate(input_str): X[0, t, char_indices[char]] = 1 Y_init = nd.zeros((1, lchars), ctx = ctx) Y_init[0, char_indices['S']] = 1 begin_state = self.encoder.begin_state(batch_size = 1, ctx = ctx) enout, (h, c) = self.encoder(X, begin_state) next_h = h[1] next_c = c[1] deout = Y_init for i in range(self.out_seq_len): deout, (next_h, next_c) = self.decoder(deout, [next_h, next_c]) deout = nd.expand_dims(deout, axis = 1) deout = self.batchnorm(deout) deout = deout[:, 0, :] deout_sm = self.dense(deout) deout = nd.one_hot(nd.argmax(nd.softmax(deout_sm, axis = 1), axis = 1), depth = self.vocab_size) if i == 0: ret_seq = indices_char[nd.argmax(deout_sm, axis = 1).asnumpy()[0].astype('int')] else: ret_seq += indices_char[nd.argmax(deout_sm, axis = 1).asnumpy()[0].astype('int')] if ret_seq[-1] == ' ' or ret_seq[-1] == 'E': break return ret_seq.strip('E').strip()
def dot_attention(query, key, value, mask, dropout=0.0): # query: (batch_size, h, length_q, model_dim/h) # key: (batch_size, h, length_k, model_dim/h) # value: (batch_size, h, length_k, model_dim/h) query_shape = query.shape query = query.reshape(-3, -2) key = key.reshape(-3, -2) value = value.reshape(-3, -2) # matmul, t: (batch_size*h, length_q, length_k) t = nd.batch_dot(query, key.swapaxes(1, 2)) / math.sqrt(query.shape[-1]) # masked # mask PAD and future words m = nd.full(t.shape, LARGE_NEGATIVE_VALUE) mask = nd.ones(t.shape) * mask t = nd.where(mask, t, m) # softmax t = nd.softmax(t, axis=-1) if dropout > 0.0: t = nd.dropout(t, p=dropout) # (batch_size, h, length_q, model_dim/h) return nd.batch_dot(t, value).reshape(query_shape)
def Route(self, x): # b_mat = nd.repeat(self.b_mat.data(), repeats=x.shape[0], axis=0)#nd.stop_gradient(nd.repeat(self.b_mat.data(), repeats=x.shape[0], axis=0)) b_mat = nd.zeros((x.shape[0], 1, self.num_cap, self.num_locations), ctx=x.context) x_expand = nd.repeat(nd.expand_dims(x, 2), repeats=self.num_cap, axis=2) x_expand = nd.repeat(nd.expand_dims(x_expand, axis=2), repeats=self.units, axis=2) w_expand = nd.expand_dims(self.w_ij.data(), axis=0) u_ = w_expand * x_expand u = nd.sum(u_, axis=1) u_no_gradient = nd.stop_gradient(u) for i in range(self.route_num): c_mat = nd.softmax(b_mat, axis=2) if i == self.route_num - 1: s = nd.sum(u * c_mat, axis=-1) else: s = nd.sum(u_no_gradient * c_mat, axis=-1) v = squash(s, 1) v1 = nd.expand_dims(v, axis=-1) if i != self.route_num - 1: update_term = nd.sum(u_no_gradient * v1, axis=1, keepdims=True) b_mat = b_mat + update_term return v
def mxnet_cifar10(im): img = image.imread(im) # plt.imshow(img.asnumpy()) # plt.show() # transform image transform_fn = transforms.Compose([ transforms.Resize(32), transforms.CenterCrop(32), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) img = transform_fn(img) # plt.imshow(nd.transpose(img, (1,2,0)).asnumpy()) # plt.show() # load pre-trained model net = get_model('cifar_resnet110_v1', classes=10, pretrained=True) # predict class pred = net(img.expand_dims(axis=0)) class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] ind = nd.argmax(pred, axis=1).astype('int') return [str(class_names[ind.asscalar()]), str(round(nd.softmax(pred)[0][ind].asscalar(), 2))]
def predict_all(X, net, ctx, dfeat, batch_size=64, cnn_flag=False): ''' :param X: an ndarray containing the data. The first axis is over examples :param net: trained model :param dfeat: the dimensionality of the vectorized feature :param batchsize: batchsize used in iterators. default is 64. :return: Two ndarrays containing the soft and hard predictions of the classifier. ''' data_iterator = mx.io.NDArrayIter(X, None, batch_size, shuffle=False) ypred_soft = [] ypred = [] for i, batch in enumerate(data_iterator): if cnn_flag: data = batch.data[0].as_in_context(ctx) else: data = batch.data[0].as_in_context(ctx).reshape((-1, dfeat)) output = net(data) softpredictions = nd.softmax(output, axis=1) predictions = nd.argmax(output, axis=1) ypred_soft.append(softpredictions) ypred.append(predictions) ypred_soft_all = nd.concatenate(ypred_soft, axis=0) ypred_all = nd.concatenate(ypred, axis=0) # iterator automatically pads the last minibatch, so the length of the vectors might be different. ypred_all = ypred_all[:X.shape[0]] ypred_soft_all = ypred_soft_all[:X.shape[0], ] return ypred_all, ypred_soft_all
def forward(self, x, _mask=None): map1 = self.linear1(x) map2 = self.linear2(map1) #map2 = exp_mask_for_tensor(map2, x_mask) soft = nd.softmax(map2, axis=self.axis) out = (soft * x).sum(axis=self.axis) return out
def targetClassify(model_name,input_pic,target_class): # The purpose of this is to simply output the percent probability that # the image is specified target_class # Load specified Model # Assume pretrained net = get_model(model_name, pretrained=True) classes = net.classes classInd = -1; # Find index of target class for i,j in enumerate(classes): if target_class == j.lower(): classInd = i break # Exit if target class not found if classInd == -1: print("ERROR: Target class not found in this model : %s" % target_class) return # Load Images, assume all data is in "images/" directory img = image.imread("images/" + input_pic) # Transform and predict img = transform_eval(img) pred = net(img) # use softmax and print probability #prob = nd.softmax(pred) print("Probability of class [%s] for [%s]: %.3f" % (classes[classInd],input_pic,nd.softmax(pred)[0][classInd].asscalar()))
def predictor_0(neural_network, features_npy, use_softmax=False): fmx = nd.array(features_npy) outputs = neural_network(fmx) if use_softmax: outputs = nd.softmax(outputs) outputs = outputs.asnumpy() return outputs
def forward(self, x, x_mask=None): N, T, D = tuple(x.shape) # bs, sl, vec bs, sl, vec = tuple(x.shape) direct_mask = get_direct_mask(bs, sl, self.direction) #x_mask_tile = x_mask.expand_dims(1) #mask = np.logical_and(direct_mask, x_mask_tile).astype(float) mask = direct_mask.astype('float32') x_map = self.linear1(x) # bs, sl, vec #x_map_tile = x_map.expand_dims(1) # x_map_tile = nd.tile(x_map.expand_dims(1), (1, sl, 1, 1)) # bs, sl, sl, vec x_map_drop = self.dropout(x_map) dependent = self.linear2(x_map_drop) dependent_etd = dependent.expand_dims(1) head = self.linear3(x_map_drop) head_etd = head.expand_dims(2) loggits = scaled_tanh(dependent_etd + head_etd + self.f_bias, 5.0) loggits_masked = exp_mask_for_tensor(loggits, mask) attn_score = nd.softmax(loggits_masked, 2) attn_score = mask_for_tensor(attn_score, mask) attn_result = (attn_score * x_map_tile).nansum(2) fusion_gate = nd.sigmoid( self.linear4(x_map) + self.linear5(attn_result) + self.o_bias) output = fusion_gate * x_map + (1 - fusion_gate) * attn_result return output
def forward(self, x): with x.context: keys = self.key_layer(x) queries = self.query_layer(x) values = self.value_layer(x) logits = nd.linalg_gemm2(queries, keys.swapaxes(2, 1)) if self.show_shape: print("keys shape:{}".format(keys.shape)) print("queries shape:{}".format(queries.shape)) print("logits shape:{}".format(logits.shape)) #Generate masking part mask = np.full(shape=(logits.shape[1], logits.shape[2]), fill_value=1).astype('float') mask = np.triu(mask, 1) mask = np.expand_dims(mask, 0) mask = np.repeat(mask, logits.shape[0], 0) np.place(mask, mask == 1, 0.0) np.place(mask, mask == 0, 1.0) mask = nd.array(mask) logits = nd.elemwise_mul(logits, mask) probs = nd.softmax(logits / self.sqrt_k, axis=2) if self.show_shape: print("probs shape:{}".format(probs.shape)) print("values shape:{}".format(values.shape)) read = nd.linalg_gemm2(probs, values) concat_data = nd.concat(x, read, dim=2) return concat_data
def dev(ch_bert, model, ch_vocab, dev_dataiter, logger, ctx): TP_s = 0 FP_s = 0 FN_s = 0 example_ids = [] for content, token_types, valid_len, label, example_id in tqdm( dev_dataiter): example_ids.extend(example_id) content = content.as_in_context(ctx) token_types = token_types.as_in_context(ctx) valid_len = valid_len.as_in_context(ctx) label = label.as_in_context(ctx) output = model(content, token_types, valid_len) predict = nd.argmax(nd.softmax(output, axis=-1), axis=-1) label = label.as_in_context(ctx) tp_s = int(nd.sum(nd.equal(predict, label)).asscalar()) fp_s = int( nd.sum(nd.not_equal(predict, label) * nd.equal(label, 0)).asscalar()) fn_s = int( nd.sum(nd.not_equal(predict, label) * nd.equal(label, 1)).asscalar()) TP_s += tp_s FP_s += fp_s FN_s += fn_s P_s = TP_s / (TP_s + FP_s) R_s = TP_s / (TP_s + FN_s) F = (2 * P_s * R_s) / (P_s + R_s) logger.info("F:{}".format(F)) return F
def _predict_tabular_data(self, new_data, process=True, predict_proba=True): # TODO ensure API lines up with tabular.Model class. """ Specific TabularNN method to produce predictions on new (unprocessed) data. Returns 1D numpy array unless predict_proba=True and task is multi-class classification (not binary). Args: new_data (pd.Dataframe or TabularNNDataset): new data to make predictions on. If you want to make prediction for just a single row of new_data, pass in: new_data.iloc[[row_index]] process (bool): should new data be processed (if False, new_data must be TabularNNDataset) predict_proba (bool): should we output class-probabilities (not used for regression) """ if process: new_data = self.process_test_data(new_data, batch_size=self.batch_size, num_dataloading_workers=self.num_dataloading_workers_inference, labels=None) if not isinstance(new_data, TabularNNDataset): raise ValueError("new_data must of of type TabularNNDataset if process=False") if self.problem_type == REGRESSION or not predict_proba: preds = nd.zeros((new_data.num_examples,1)) else: preds = nd.zeros((new_data.num_examples, self.num_net_outputs)) i = 0 for batch_idx, data_batch in enumerate(new_data.dataloader): data_batch = new_data.format_batch_data(data_batch, self.ctx) preds_batch = self.model(data_batch) batch_size = len(preds_batch) if self.problem_type != REGRESSION: if not predict_proba: # need to take argmax preds_batch = nd.argmax(preds_batch, axis=1, keepdims=True) else: # need to take softmax preds_batch = nd.softmax(preds_batch, axis=1) preds[i:(i+batch_size)] = preds_batch i = i+batch_size if self.problem_type == REGRESSION or not predict_proba: return preds.asnumpy().flatten() # return 1D numpy array elif self.problem_type == BINARY and predict_proba: return preds[:,1].asnumpy() # for binary problems, only return P(Y==+1) return preds.asnumpy() # return 2D numpy array
def act(self, stochastic, input_): value, logits = self.forward(input_) if stochastic: action = nd.sample_multinomial(nd.softmax(logits)) else: action = nd.argmax(logits, axis=-1).astype('int32') return action, value
def __call__(self, output, label): output = nd.softmax(output).asnumpy() label = label.asnumpy().astype('int').reshape((-1, )) output[range(label.shape[0]), label] = 1 label = nd.array(output).argmin(axis=1).one_hot( self.num_class).astype('float32') return label
def forward(self, query, values, head=False): """ 计算Attention权重与输出向量 :param query: 查询,即当前步Decoder的输入 :param values: 值,即Encoder中每一个时间步向量 :return: (Attention输出向量, Attention权重) """ #print('In Attention') hidden_with_time_axis = nd.expand_dims(query, 1) #print('hidden_with_time:', hidden_with_time_axis.shape) score = self.V( nd.tanh(self.W1(values) + self.W2(hidden_with_time_axis))) #print('\t score:',score.shape) attention_weights = nd.softmax(score, axis=1) #print('\t attention_weight:', attention_weights.shape) #print('\t values:', values.shape) context_vector = attention_weights * values #print('\t mid_context_vector:',context_vector.shape) if head is True: context_vector = nd.sum(context_vector, axis=2) else: context_vector = nd.sum(context_vector, axis=1) # print('\t context',context_vector.shape) context_vector = nd.expand_dims(context_vector, axis=0) return context_vector, attention_weights
def forward(self, am, bm, alpha_r, beta_r): av = self.inference_composition_a( am) # (batch_size, seq_len, hidden*2) (32,45,600) bv = self.inference_composition_b(bm) max_pool_a = nd.max(av, axis=1) max_pool_b = nd.max(bv, axis=1) mean_pool_a = nd.mean(av, axis=1) mean_pool_b = nd.mean(bv, axis=1) weight_pool_weight_a = nd.softmax(self.weight_pooling_dense_a(alpha_r)) weight_pool_weight_b = nd.softmax(self.weight_pooling_dense_b(beta_r)) aw = nd.sum(weight_pool_weight_a * av, axis=1) bw = nd.sum(weight_pool_weight_b * bv, axis=1) out = self.final_mlp( nd.concat(max_pool_a, mean_pool_a, aw, max_pool_b, mean_pool_b, bw)) return out
def beam_search_translate(encoder, decoder, input_seq, max_length, ctx, beam_size, in_vocab, out_vocab): in_tokens = input_seq.lower().split(' ') in_tokens += [EOS] + [PAD] * (max_length - len(in_tokens) - 1) enc_input = nd.array([in_vocab.to_indices(in_tokens)], ctx=ctx) enc_state = encoder.begin_state(batch_size=1, ctx=ctx) enc_output, enc_state = encoder(enc_input, enc_state) dec_input = nd.array([out_vocab.token_to_idx[BOS]], ctx=ctx) dec_state = decoder.begin_state(enc_state) output_tokens = [] # the first character prediction dec_output, dec_state = decoder(dec_input, dec_state, enc_output) topk = nd.topk(dec_output, k=beam_size, ret_typ='indices').asnumpy().astype('int32') for idx in topk[0]: score = nd.softmax(dec_output[0])[idx].asscalar() sample_output = predict_rest(encoder, decoder, input_seq, max_length, idx, dec_state, enc_output, score, in_vocab, out_vocab, ctx) output_tokens.append(sample_output) for idx in range(len(output_tokens)): output_tokens[idx][1] = math.log(output_tokens[idx][1]) / (len( output_tokens[idx][0])**0.75) return output_tokens
def forward(self, a): B, L, H = a.shape tilde_a = self.f(a.reshape(B * L, H)).reshape( B, L, self.hidden_size) # shape = [B, L1, H] e = nd.linalg.gemm2(A=tilde_a, B=tilde_a.transpose([0, 2, 1])) alpha = nd.linalg.gemm2(nd.softmax(e), tilde_a) return alpha
def get_inception_score(images, splits=10): """ Inception_score function. The images will be divided into 'splits' parts, and calculate each inception_score separately, then return the mean and std of inception_scores of these parts. :param images: Images(num x c x w x h) that needs to calculate inception_score. :param splits: :return: mean and std of inception_score """ assert (images.shape[1] == 3) # load inception model if inception_model is None: _init_inception() # resize images to adapt inception model(inceptionV3) if images.shape[2] != 299: images = resize(images, 299, 299) preds = [] bs = 4 n_batches = int(math.ceil(float(images.shape[0])/float(bs))) # to get the predictions/picture of inception model for i in range(n_batches): sys.stdout.write(".") sys.stdout.flush() inps = images[(i * bs):min((i + 1) * bs, len(images))] # inps size. bs x 3 x 299 x 299 pred = nd.softmax(inception_model(inps)) # pred size. bs x 1000 preds.append(pred.asnumpy()) # list to array preds = np.concatenate(preds, 0) scores = [] # to calculate the inception_score each split. for i in range(splits): # extract per split image pred part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :] kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0))) kl = np.mean(np.sum(kl, 1)) scores.append(np.exp(kl)) return np.mean(scores), np.std(scores)
def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if opt.resume_params is '': net.initialize(mx.init.MSRAPrelu(), ctx=ctx) if opt.no_wd: for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params) if opt.resume_states is not '': trainer.load_states(opt.resume_states) if opt.label_smoothing or opt.mixup: sparse_label_loss = False else: sparse_label_loss = True if distillation: L = gcv.loss.DistillationSoftmaxCrossEntropyLoss(temperature=opt.temperature, hard_weight=opt.hard_weight, sparse_label=sparse_label_loss) else: L = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=sparse_label_loss) best_val_score = 1 for epoch in range(opt.resume_epoch, opt.num_epochs): tic = time.time() if opt.use_rec: train_data.reset() train_metric.reset() btic = time.time() for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) if opt.mixup: lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha) if epoch >= opt.num_epochs - opt.mixup_off_epoch: lam = 1 data = [lam*X + (1-lam)*X[::-1] for X in data] if opt.label_smoothing: eta = 0.1 else: eta = 0.0 label = mixup_transform(label, classes, lam, eta) elif opt.label_smoothing: hard_label = label label = smooth(label, classes) if distillation: teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \ for X in data] with ag.record(): outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] if distillation: loss = [L(yhat.astype('float32', copy=False), y.astype('float32', copy=False), p.astype('float32', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob)] else: loss = [L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label)] for l in loss: l.backward() trainer.step(batch_size) if opt.mixup: output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \ for out in outputs] train_metric.update(label, output_softmax) else: if opt.label_smoothing: train_metric.update(hard_label, outputs) else: train_metric.update(label, outputs) if opt.log_interval and not (i+1)%opt.log_interval: train_metric_name, train_metric_score = train_metric.get() logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f'%( epoch, i, batch_size*opt.log_interval/(time.time()-btic), train_metric_name, train_metric_score, trainer.learning_rate)) btic = time.time() train_metric_name, train_metric_score = train_metric.get() throughput = int(batch_size * i /(time.time() - tic)) err_top1_val, err_top5_val = test(ctx, val_data) logger.info('[Epoch %d] training: %s=%f'%(epoch, train_metric_name, train_metric_score)) logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f'%(epoch, throughput, time.time()-tic)) logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f'%(epoch, err_top1_val, err_top5_val)) if err_top1_val < best_val_score: best_val_score = err_top1_val net.save_parameters('%s/%.4f-imagenet-%s-%d-best.params'%(save_dir, best_val_score, model_name, epoch)) trainer.save_states('%s/%.4f-imagenet-%s-%d-best.states'%(save_dir, best_val_score, model_name, epoch)) if save_frequency and save_dir and (epoch + 1) % save_frequency == 0: net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, epoch)) trainer.save_states('%s/imagenet-%s-%d.states'%(save_dir, model_name, epoch)) if save_frequency and save_dir: net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, opt.num_epochs-1)) trainer.save_states('%s/imagenet-%s-%d.states'%(save_dir, model_name, opt.num_epochs-1))
'dog', 'frog', 'horse', 'ship', 'truck'] context = [mx.cpu()] # Load Model model_name = opt.model pretrained = True if opt.saved_params == '' else False kwargs = {'classes': classes, 'pretrained': pretrained} net = get_model(model_name, **kwargs) if not pretrained: net.load_parameters(opt.saved_params, ctx = context) # Load Images img = image.imread(opt.input_pic) # Transform transform_fn = transforms.Compose([ transforms.Resize(32), transforms.CenterCrop(32), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) img = transform_fn(img) pred = net(img.expand_dims(0)) ind = nd.argmax(pred, axis=1).astype('int') print('The input picture is classified to be [%s], with probability %.3f.'% (class_names[ind.asscalar()], nd.softmax(pred)[0][ind].asscalar()))
def forward(self, inputs, target, next_word_history, cache_history, begin_state=None): # pylint: disable=arguments-differ """Defines the forward computation for cache cell. Arguments can be either :py:class:`NDArray` or :py:class:`Symbol`. Parameters ---------- inputs: NDArray The input data target: NDArray The label next_word_history: NDArray The next word in memory cache_history: NDArray The hidden state in cache history Returns -------- out: NDArray The linear interpolation of the cache language model with the regular word-level language model next_word_history: NDArray The next words to be kept in the memory for look up (size is equal to the window size) cache_history: NDArray The hidden states to be kept in the memory for look up (size is equal to the window size) """ output, hidden, encoder_hs, _ = \ super(self.lm_model.__class__, self.lm_model).\ forward(inputs, begin_state) encoder_h = encoder_hs[-1].reshape(-3, -2) output = output.reshape(-1, self._vocab_size) start_idx = len(next_word_history) \ if next_word_history is not None else 0 next_word_history = nd.concat(*[nd.one_hot(t[0], self._vocab_size, on_value=1, off_value=0) for t in target], dim=0) if next_word_history is None \ else nd.concat(next_word_history, nd.concat(*[nd.one_hot(t[0], self._vocab_size, on_value=1, off_value=0) for t in target], dim=0), dim=0) cache_history = encoder_h if cache_history is None \ else nd.concat(cache_history, encoder_h, dim=0) out = None softmax_output = nd.softmax(output) for idx, vocab_L in enumerate(softmax_output): joint_p = vocab_L if start_idx + idx > self._window: valid_next_word = next_word_history[start_idx + idx - self._window:start_idx + idx] valid_cache_history = cache_history[start_idx + idx - self._window:start_idx + idx] logits = nd.dot(valid_cache_history, encoder_h[idx]) cache_attn = nd.softmax(self._theta * logits).reshape(-1, 1) cache_dist = (cache_attn.broadcast_to(valid_next_word.shape) * valid_next_word).sum(axis=0) joint_p = self._lambdas * cache_dist + (1 - self._lambdas) * vocab_L out = joint_p[target[idx]] if out is None \ else nd.concat(out, joint_p[target[idx]], dim=0) next_word_history = next_word_history[-self._window:] cache_history = cache_history[-self._window:] return out, next_word_history, cache_history, hidden
################################### predict ################################### h5f5 = h5py.File('features/test_resnet152_v1.h5', 'r') # train_resnet152_v1 train_inceptionv31 h5f6 = h5py.File('features/test_inceptionv3.h5', 'r') # train_resnet152_v1 train_inceptionv31 features3 = h5f5['features'] features4 = h5f6['features'] train_imgs = gluon.data.vision.ImageFolderDataset( './data/train_valid_test/Images') ids = sorted(os.listdir('./data/train_valid_test/test/unknown')) #print(ids) #exit() test_count = 10357 outputs = [] for i in range(test_count): features = np.concatenate([features3[i:i+1], features4[i:i+1]], axis=-1) predict = net(nd.array(features).as_in_context(ctx)) output = nd.softmax(predict) #print(output) #exit() outputs.extend(output.asnumpy()) with open('submission.csv', 'w') as f: f.write('id,' + ','.join(train_imgs.synsets) + '\n') for i, output in zip(ids, outputs): f.write(i.split('.')[0] + ',' + ','.join([str(num) for num in output]) + '\n') ''' #################################### train #################################### #zong shuju 20580 h5f = h5py.File('features/train_resnet152_v1.h5', 'r') # train_resnet152_v1 train_inceptionv31 h5f2 = h5py.File('features/train_inceptionv31.h5', 'r') # train_resnet152_v1 train_inceptionv31 h5f3 = h5py.File('features/labels1.h5', 'r')
def forward(self, word_inputs, tag_inputs, arc_targets=None, rel_targets=None): """Run decoding Parameters ---------- word_inputs : mxnet.ndarray.NDArray word indices of seq_len x batch_size tag_inputs : mxnet.ndarray.NDArray tag indices of seq_len x batch_size arc_targets : mxnet.ndarray.NDArray gold arc indices of seq_len x batch_size rel_targets : mxnet.ndarray.NDArray gold rel indices of seq_len x batch_size Returns ------- tuple (arc_accuracy, rel_accuracy, overall_accuracy, loss) when training, else if given gold target then return arc_accuracy, rel_accuracy, overall_accuracy, outputs, otherwise return outputs, where outputs is a list of (arcs, rels). """ is_train = autograd.is_training() def flatten_numpy(ndarray): """Flatten nd-array to 1-d column vector Parameters ---------- ndarray : numpy.ndarray input tensor Returns ------- numpy.ndarray A column vector """ return np.reshape(ndarray, (-1,), 'F') batch_size = word_inputs.shape[1] seq_len = word_inputs.shape[0] mask = np.greater(word_inputs, self._vocab.ROOT).astype(np.float32) num_tokens = int(np.sum(mask)) # non padding, non root token number if is_train or arc_targets is not None: mask_1D = flatten_numpy(mask) mask_1D_tensor = nd.array(mask_1D) unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK) word_embs = self.word_embs(nd.array(unked_words, dtype='int')) if self.pret_word_embs: word_embs = word_embs + self.pret_word_embs(nd.array(word_inputs)) tag_embs = self.tag_embs(nd.array(tag_inputs)) # Dropout emb_inputs = nd.concat(word_embs, tag_embs, dim=2) # seq_len x batch_size top_recur = biLSTM(self.f_lstm, self.b_lstm, emb_inputs, batch_size, dropout_x=self.dropout_lstm_input if is_train else 0) top_recur = nd.Dropout(data=top_recur, axes=[0], p=self.dropout_mlp) W_dep, b_dep = self.mlp_dep_W.data(), self.mlp_dep_b.data() W_head, b_head = self.mlp_head_W.data(), self.mlp_head_b.data() dep, head = leaky_relu(nd.dot(top_recur, W_dep.T) + b_dep), leaky_relu(nd.dot(top_recur, W_head.T) + b_head) dep, head = nd.Dropout(data=dep, axes=[0], p=self.dropout_mlp), nd.Dropout(data=head, axes=[0], p=self.dropout_mlp) dep, head = nd.transpose(dep, axes=[2, 0, 1]), nd.transpose(head, axes=[2, 0, 1]) dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:] head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:] W_arc = self.arc_W.data() arc_logits = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1, bias_x=True, bias_y=False) # (#head x #dep) x batch_size flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size)) # (#head ) x (#dep x batch_size) arc_preds = arc_logits.argmax(0) # seq_len x batch_size if is_train or arc_targets is not None: correct = np.equal(arc_preds.asnumpy(), arc_targets) arc_correct = correct.astype(np.float32) * mask arc_accuracy = np.sum(arc_correct) / num_tokens targets_1D = flatten_numpy(arc_targets) losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D)) arc_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: arc_probs = np.transpose( np.reshape(nd.softmax(flat_arc_logits, axis=0).asnumpy(), (seq_len, seq_len, batch_size), 'F')) # #batch_size x #dep x #head W_rel = self.rel_W.data() rel_logits = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size, num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True) # (#head x rel_size x #dep) x batch_size flat_rel_logits = reshape_fortran(rel_logits, (seq_len, self._vocab.rel_size, seq_len * batch_size)) # (#head x rel_size) x (#dep x batch_size) _target_vec = nd.array(targets_1D if is_train else flatten_numpy(arc_preds.asnumpy())).reshape( seq_len * batch_size, 1) _target_mat = _target_vec * nd.ones((1, self._vocab.rel_size)) partial_rel_logits = nd.pick(flat_rel_logits, _target_mat.T, axis=0) # (rel_size) x (#dep x batch_size) if is_train or arc_targets is not None: rel_preds = partial_rel_logits.argmax(0) targets_1D = flatten_numpy(rel_targets) rel_correct = np.equal(rel_preds.asnumpy(), targets_1D).astype(np.float32) * mask_1D rel_accuracy = np.sum(rel_correct) / num_tokens losses = self.softmax_loss(partial_rel_logits, nd.array(targets_1D)) rel_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: rel_probs = np.transpose(np.reshape(nd.softmax(flat_rel_logits.transpose([1, 0, 2]), axis=0).asnumpy(), (self._vocab.rel_size, seq_len, seq_len, batch_size), 'F')) # batch_size x #dep x #head x #nclasses if is_train or arc_targets is not None: loss = arc_loss + rel_loss correct = rel_correct * flatten_numpy(arc_correct) overall_accuracy = np.sum(correct) / num_tokens if is_train: return arc_accuracy, rel_accuracy, overall_accuracy, loss outputs = [] for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs, rel_probs): # parse sentences one by one msk[0] = 1. sent_len = int(np.sum(msk)) arc_pred = arc_argmax(arc_prob, sent_len, msk) rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred] rel_pred = rel_argmax(rel_prob, sent_len) outputs.append((arc_pred[1:sent_len], rel_pred[1:sent_len])) if arc_targets is not None: return arc_accuracy, rel_accuracy, overall_accuracy, outputs return outputs