def balance_class(self, prediction, target_idx, k = 1): predict_idx = prediction.argmax(-1) bg_predict = predict_idx == 0 # correct fg_predict = predict_idx != 0 # error type or correction char idx target_idx = target_idx.expand_dims(-1) bg_target = target_idx == 0 fg_target = target_idx != 0 num_bg_predict = int(bg_predict.sum().asnumpy()) num_bg_target = int(bg_target.sum().asnumpy()) num_fg_predict = int(fg_predict.sum().asnumpy()) num_fg_target = int(fg_target.sum().asnumpy()) topk_target = nd.topk(bg_target.reshape(-1) + nd.random.uniform_like(bg_target, 0.0, 0.1).reshape(-1), k = min(num_fg_target * k, int(np.prod(bg_target.shape))), ret_typ = 'mask').reshape_like(bg_target) topk_predict = nd.topk(bg_predict.reshape(-1) + nd.random.uniform_like(bg_predict, 0.0, 0.1).reshape(-1), k = min(num_fg_predict * k, int(np.prod(bg_predict.shape))), ret_typ = 'mask').reshape_like(bg_predict) if len(topk_predict.shape) == 3: topk_predict = topk_predict[:, :, 0] if len(topk_target.shape) == 3: topk_target = topk_target[:, :, 0] if len(fg_target.shape) == 3: fg_target = fg_target[:, :, 0] mask = (topk_target + fg_target + fg_predict + topk_predict).clip(0, 1) return mask
def topk(hm, k=100): ctx = hm.context batch_size, cat, height, width = hm.shape hm = nms(hm) hm = nd.reshape(hm, (0, 0, -1)) topk_scores, topk_idx = nd.topk(hm, k=k, ret_typ='both') topk_x_idx = nd.floor(topk_idx/width) topk_x_idx = nd.reshape(topk_x_idx, (0, -1)) topk_y_idx = (topk_idx%height) topk_y_idx = nd.reshape(topk_y_idx, (0, -1)) topk_scores = nd.reshape(topk_scores, (0, -1)) topk_cat_scores, topk_cat_idx = nd.topk(topk_scores, k=k, ret_typ='both') cls_id = nd.floor(topk_cat_idx/k) batch_idx = nd.repeat(nd.arange(batch_size), repeats=k).reshape((1, -1)) batch_idx = batch_idx.as_in_context(ctx) topk_cat_idx = nd.reshape(topk_cat_idx, (1, -1)) topk_cat_idices = nd.concat(batch_idx, topk_cat_idx, dim=0) topk_cat_x_idx = nd.gather_nd(topk_x_idx, topk_cat_idices) topk_cat_x_idx = nd.reshape(topk_cat_x_idx, (batch_size, k)) topk_cat_y_idx = nd.gather_nd(topk_y_idx, topk_cat_idices) topk_cat_y_idx = nd.reshape(topk_cat_y_idx, (batch_size, k)) return topk_cat_x_idx, topk_cat_y_idx, cls_id
def _topk(scores, K=40): batch, cat, height, width = scores.shape [topk_scores, topk_inds] = nd.topk(nd.reshape(scores, (batch, cat, -1)), ret_typ='both', k=K) # return both value and indices topk_inds = topk_inds % (height * width) topk_ys = (topk_inds / width).astype('int32').astype('float32') topk_xs = (topk_inds % width).astype('int32').astype('float32') [topk_score, topk_ind] = nd.topk(nd.reshape(topk_scores, (batch, -1)), ret_typ='both', k=K) topk_clses = (topk_ind / K).astype('int32') topk_inds = _gather_feat(nd.reshape(topk_inds, (batch, -1, 1)), topk_ind) topk_inds = nd.reshape(topk_inds, (batch, K)) topk_ys = _gather_feat(nd.reshape(topk_ys, (batch, -1, 1)), topk_ind) topk_ys = nd.reshape(topk_ys, (batch, K)) topk_xs = _gather_feat(nd.reshape(topk_xs, (batch, -1, 1)), topk_ind) topk_xs = nd.reshape(topk_xs, (batch, K)) return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
def check_topk(): a = create_vector(size=LARGE_X) ind = nd.topk(a, k=10, axis=0, dtype=np.int64) for i in range(10): assert ind[i] == (LARGE_X - i - 1) ind, val = mx.nd.topk(a, k=3, axis=0, dtype=np.int64, ret_typ="both", is_ascend=False) assert np.all(ind == val) val = nd.topk(a, k=1, axis=0, dtype=np.int64, ret_typ="value") assert val == (LARGE_X - 1)
def test_topk(): b = create_2d_tensor(rows=LARGE_X, columns=SMALL_Y) k = nd.topk(b, k=10, axis=0, dtype=np.int64) assert np.sum(k.asnumpy() == (LARGE_X - 1)) == SMALL_Y ind, val = mx.nd.topk(b, k=3, axis=0, dtype=np.int64, ret_typ="both", is_ascend=False) assert np.all(ind == val) b = create_2d_tensor(rows=SMALL_Y, columns=LARGE_X) l = nd.topk(b, k=1, axis=-1, dtype=np.int64, ret_typ="value") assert l.sum() == np.sum(np.arange(0, SMALL_Y))
def _predict(self, x, ctx_id=0, with_proba=False): if with_proba: return self._predict_proba(x, ctx_id=ctx_id) x = self._predict_preprocess(x) if isinstance(x, pd.DataFrame): assert 'image' in x.columns, "Expect column `image` for input images" df = self._predict(tuple(x['image'])) return df.reset_index(drop=True) elif isinstance(x, (list, tuple)): bs = self._cfg.valid.batch_size self.net.hybridize() results = [] topK = min(5, self.num_class) loader = mx.gluon.data.DataLoader(ImageListDataset( x, self._predict_preprocess), batch_size=bs, last_batch='keep') idx = 0 for batch in loader: batch = mx.gluon.utils.split_and_load(batch, ctx_list=self.ctx, even_split=False) pred = [self.net(input) for input in batch] for p in pred: for ii in range(p.shape[0]): ind = nd.topk( p[ii], k=topK).astype('int').asnumpy().flatten() probs = mx.nd.softmax(p[ii]).asnumpy().flatten() for k in range(topK): results.append({ 'class': self.classes[ind[k]], 'score': probs[ind[k]], 'id': ind[k], 'image': x[idx] }) idx += 1 return pd.DataFrame(results) elif not isinstance(x, mx.nd.NDArray): raise ValueError('Input is not supported: {}'.format(type(x))) assert len(x.shape) == 4 and x.shape[ 1] == 3, "Expect input to be (n, 3, h, w), given {}".format( x.shape) x = x.as_in_context(self.ctx[ctx_id]) pred = self.net(x) topK = min(5, self.num_class) ind = nd.topk(pred, k=topK)[0].astype('int').asnumpy().flatten() probs = mx.nd.softmax(pred)[0].asnumpy().flatten() df = pd.DataFrame([{ 'class': self.classes[ind[i]], 'score': probs[ind[i]], 'id': ind[i] } for i in range(topK)]) return df
def test_topk(): b = create_vector(size=LARGE_X) ind = nd.topk(b, k=10, axis=0, dtype=np.int64) assert np.sum(ind.asnumpy() == (LARGE_X - 1)) == 1 ind, val = mx.nd.topk(b, k=3, axis=0, dtype=np.int64, ret_typ="both", is_ascend=False) assert np.all(ind == val) val = nd.topk(b, k=1, axis=0, dtype=np.int64, ret_typ="value") assert val.sum() == (LARGE_X - 1)
def get_similar_tokens(query_token, k, embed): W = embed.weight.data() x = W(token_to_idx[query_token]) cos = nd.dot(W, x) / (nd.sum(W * W, axis=1) * nd.sum(x * x) + 1e-9).sqrt() topk = nd.topk(cos, k=k + 1, ret_type='indices').asnumpy().astype('int32') for i in topk[1:]: print('cosine sim=%.3f: %s ' % (cos[i].asscalar(), (idx_to_token[i])))
def _predict(self, x): resize = int(math.ceil(self.input_size / self._cfg.train.crop_ratio)) if isinstance(x, str): x = transform_eval(mx.image.imread(x), resize_short=resize, crop_size=self.input_size) elif isinstance(x, mx.nd.NDArray): x = transform_eval(x, resize_short=resize, crop_size=self.input_size) elif isinstance(x, pd.DataFrame): assert 'image' in x.columns, "Expect column `image` for input images" def _predict_merge(x): y = self._predict(x) y['image'] = x return y return pd.concat([_predict_merge(xx) for xx in x['image']]).reset_index(drop=True) else: raise ValueError('Input is not supported: {}'.format(type(x))) x = x.as_in_context(self.ctx[0]) pred = self.net(x) topK = min(5, self.num_class) ind = nd.topk(pred, k=topK)[0].astype('int').asnumpy().flatten() probs = mx.nd.softmax(pred)[0].asnumpy().flatten() df = pd.DataFrame([{ 'class': self.classes[ind[i]], 'score': probs[ind[i]], 'id': ind[i] } for i in range(topK)]) return df
def knn(W, x, k): """ knn算法 """ # 添加的1e-9是为了数值稳定性 cos = nd.dot(W, x.reshape((-1, ))) / ( (nd.sum(W * W, axis=1) + 1e-9).sqrt() * nd.sum(x * x).sqrt()) topk = nd.topk(cos, k=k, ret_typ='indices').asnumpy().astype('int32') return topk, [cos[i].asscalar() for i in topk]
def get_top_k_by_analogy(self, word1, word2, word3, k=1): ''' Returns analogical word for the set of 3 words that are passed as arguments. Analogy refers to: king->queen ; man->woman good->better ; bad->worse do->did ; go->went Eg. emb= Embedder(dimensions=50) print(emb.get_top_k_analogy('good','best','bad')) Returns... ['worst'] Returns a list because you can have top k analogies as result ''' word_vecs = self.__emb_mapper[word1, word2, word3] word_diff = (word_vecs[1] - word_vecs[0] + word_vecs[2]).reshape( (-1, 1)) vocab_vecs = self.__norm_vecs_by_row(self.__emb_mapper.idx_to_vec) dot_product = nd.dot(vocab_vecs, word_diff) indices = nd.topk(dot_product.reshape((len(self.__embedder), )), k=k, ret_typ='indices') indices = [int(i.asscalar()) for i in indices] return self.__embedder.to_tokens(indices)
def sample_neighbours(self, data, query_network): num_stored_samples = self.key_memory.shape[0] batch_size = data[0].shape[0] query = query_network(*data).as_in_context(mx.cpu()) vec1 = nd.repeat(query, repeats=num_stored_samples, axis=0) vec2 = nd.tile(self.key_memory, reps=(batch_size, 1)) diff = nd.subtract(vec1, vec2) sq = nd.square(diff) batch_sum = nd.sum(sq, exclude=1, axis=0) sqrt = nd.sqrt(batch_sum) dist = nd.reshape(sqrt, shape=(batch_size, num_stored_samples)) sample_ind = nd.topk(dist, k=self.k, axis=1, ret_typ="indices") num_outputs = len(self.label_memory) sample_labels = [ self.label_memory[i][sample_ind] for i in range(num_outputs) ] sample_batches = [[ self.value_memory[j][sample_ind] for j in range(len(self.value_memory)) ], sample_labels] return sample_batches
def beam_search_translate(encoder, decoder, input_seq, max_length, ctx, beam_size, in_vocab, out_vocab): in_tokens = input_seq.lower().split(' ') in_tokens += [EOS] + [PAD] * (max_length - len(in_tokens) - 1) enc_input = nd.array([in_vocab.to_indices(in_tokens)], ctx=ctx) enc_state = encoder.begin_state(batch_size=1, ctx=ctx) enc_output, enc_state = encoder(enc_input, enc_state) dec_input = nd.array([out_vocab.token_to_idx[BOS]], ctx=ctx) dec_state = decoder.begin_state(enc_state) output_tokens = [] # the first character prediction dec_output, dec_state = decoder(dec_input, dec_state, enc_output) topk = nd.topk(dec_output, k=beam_size, ret_typ='indices').asnumpy().astype('int32') for idx in topk[0]: score = nd.softmax(dec_output[0])[idx].asscalar() sample_output = predict_rest(encoder, decoder, input_seq, max_length, idx, dec_state, enc_output, score, in_vocab, out_vocab, ctx) output_tokens.append(sample_output) for idx in range(len(output_tokens)): output_tokens[idx][1] = math.log(output_tokens[idx][1]) / (len( output_tokens[idx][0])**0.75) return output_tokens
def get_similar_tokens(query_token, k, embed): W = embed.weight.data() x = W[token_to_idx[query_token]] # Add 1e-9 for numerical stability cos = nd.dot(W, x) / (nd.sum(W * W, axis=1) * nd.sum(x * x) + 1e-9).sqrt() topk = nd.topk(cos, k=k + 1, ret_typ='indices').asnumpy().astype('int32') for i in topk[1:]: # remove input word print('cosine sim=%.3f: %s' % (cos[i].asscalar(), (idx_to_token[i])))
def predict(images, model_name, model_config): """ Runs the model to infer on the given image Arguments: image {NDArray Image} -- image to do prediction on model_name {str} -- name of the model to be used for prediction Returns: dict -- dictionary containing the predicted class """ outputs = {} net = model_config.loaded_models[model_name] # apply default data preprocessing transformed_img = transform_eval(images) if(model_config.models_config[model_name]== 'gpu'): transformed_img = transformed_img.copyto(mx.gpu(0)) # run forward pass to obtain the predicted score for each class pred = net(transformed_img) # map predicted values to probability by softmax prob = nd.softmax(pred)[0].asnumpy() #prob = pred[0].asnumpy() with open(os.path.join("/models",str(model_name),'config.json')) as config_file: data = json.load(config_file) max_number_of_predictions = data['max_number_of_predictions'] minimum_confidence = data['minimum_confidence'] if(max_number_of_predictions>len(net.classes)): max_number_of_predictions=len(net.classes) if(max_number_of_predictions<1): max_number_of_predictions=1 # find the 5 class indices with the highest score ind = nd.topk(pred, k=max_number_of_predictions)[0].astype('int').asnumpy().tolist() if(minimum_confidence>float(prob[ind[0]])): minimum_confidence=float(prob[ind[-1]]) for i in range(max_number_of_predictions): if(float(prob[ind[i]])>minimum_confidence): outputs[net.classes[ind[i]]] = float(prob[ind[i]]) outputs_descending = OrderedDict(sorted(outputs.items(), key=lambda kv: kv[1], reverse=True)) return outputs_descending
def beam_search(outputs, ctx, targets, max_seq_len, beam_width): predicts = nd.topk(nd.softmax(outputs, axis=-1), axis=-1, k=beam_width, ret_typ='both') if not targets: targets = {} beam_result_idxs = [] beam_result_score = [] count = 0 for score, idx in zip(predicts[0][0], predicts[1][0]): idx = [2] + [int(idx.asscalar())] beam_result_idxs.append(idx) beam_result_score.append(score) targets.update( {"beam_{}".format(count): {"idx": idx, "score": score}}) count += 1 result = [] for idx in beam_result_idxs: idx = idx[:max_seq_len] + \ [0] * (max_seq_len - len(idx)) result.append(idx) return nd.array(result, ctx=ctx), targets else: beam_idxs = [] beam_score = [] for scores, idxs, target in zip(predicts[0], predicts[1], targets.values()): last_score = target["score"] last_idxs = target["idx"] max_score = 0 max_score_idx = [] for score, idx in zip(scores, idxs): if last_score + score > max_score: max_score = last_score + score idx = int(idx.asscalar()) max_score_idx = last_idxs[:] + [idx] beam_idxs.append(max_score_idx) beam_score.append(max_score) beam_score, beam_idxs = (list(t) for t in zip(*sorted(zip(beam_score, beam_idxs), reverse=True))) targets = {} count = 0 for idx, score in zip(beam_idxs, beam_score): targets.update( {"beam_{}".format(count): {"idx": idx, "score": score}}) count += 1 result = [] for idx in beam_idxs: idx = idx[:max_seq_len] + \ [0] * (max_seq_len - len(idx)) result.append(idx) return nd.array(result, ctx=ctx), targets
def pick_top_n(preds, top_n=5): top = nd.topk(preds, axis=1, k=top_n, ret_typ='both') top_pred_prob = top[0] top_pred_label = top[1].asnumpy() top_pred_prob /= nd.sum(top_pred_prob, axis=1, keepdims=True) top_pred_prob = top_pred_prob.asnumpy().reshape((-1, )) top_pred_label = top_pred_label.reshape((-1, )) c = np.random.choice(top_pred_label, size=1, p=top_pred_prob) return c
def get_knn(word, k=2000): word_vec = vocab.embedding[word].reshape((-1, 1)) vocab_vecs = norm_vecs_by_row(vocab.embedding.idx_to_vec) dot_prod = nd.dot(vocab_vecs, word_vec) indices = nd.topk(dot_prod.reshape((len(vocab), )), k=k + 1, ret_typ='indices') indices = [int(i.asscalar()) for i in indices] # Remove unknown and input tokens. return vocab.to_tokens(indices[1:])
def eval(img): # Transform img = transform_eval(img) pred = net(img) ind = nd.topk(pred, k=1)[0].astype('int') #print('The input picture is classified to be') label = ind[0].asscalar() prob = nd.softmax(pred)[0][label].asscalar() return label, prob
def draw_line(self, s, e): dir = 1 lu_l, rd_l, dir = (s[0], e[0], dir) if s[0] <= e[0] else (e[0], s[0], -dir) lu_c, rd_c, dir = (s[1], e[1], dir) if s[1] <= e[1] else (e[1], s[1], -dir) h, w = rd_l - lu_l + 1, rd_c - lu_c + 1 mimic_mat = nd.zeros((h, w)) if h == 1 and w == 1: return s, e if 1.0 * h / w <= 1: mimic_w = nd.tile(nd.arange(1, w), (h, 1)) if dir == 1: mimic_h = nd.tile(nd.arange( 0, h), (w - 1, 1)).T # notice the direction & vertical else: mimic_h = nd.tile(nd.arange(h - 1, -1, -1), (w - 1, 1)).T start = nd.array([[h - 1]]) if dir == -1 else nd.array([[0]]) out = nd.abs(mimic_h / mimic_w - 1.0 * h / w) loc_h = nd.topk(-out, axis=0) loc_h = nd.concat(start, loc_h, dim=1) a = nd.arange(w) mimic_mat[loc_h, a] = 1 res_h = (loc_h + lu_l).asnumpy().astype(np.uint8).tolist()[0] res_w = list(range(lu_c, lu_c + w)) return res_h, res_w else: mimic_h = nd.tile(nd.arange(1, h), (w, 1)).T if dir == 1: mimic_w = nd.tile(nd.arange(0, w), (h - 1, 1)) else: mimic_w = nd.tile(nd.arange(w - 1, -1, -1), (h - 1, 1)) start = nd.array([[w - 1]]) if dir == -1 else nd.array([[0]]) out = nd.abs(mimic_h / mimic_w - 1.0 * h / w) loc_h = nd.topk(-out, axis=1).T loc_h = nd.concat(start, loc_h, dim=1) a = nd.arange(h) mimic_mat[a, loc_h] = 1 res_h = list(range(lu_l, lu_l + h)) res_w = (loc_h + lu_c).asnumpy().astype(np.uint8).tolist()[0] return res_h, res_w
def predict(self, query_token, k, embed): word = embed.weight.data() x = word[self.token_to_idx[query_token]] cos = nd.dot(word, x) / (nd.sum(word * word, axis=1) * nd.sum(x * x) + 1e-9).sqrt() topk = nd.topk(cos, k=k + 1, ret_typ='indices').asnumpy().astype('int32') for i in topk[1:]: print('cosine sim=%.3f: %s' % (cos[i].asscalar(), (self.idx_to_token[i])))
def _topk_channel(scores, K=40): batch, cat, height, width = scores.shape [topk_scores, topk_inds] = nd.topk(scores.reshape((batch, cat, -1)), ret_typ = "both", k= K) #[topk_score, topk_ind] = nd.topk(nd.reshape(topk_scores, (batch, -1)), ret_typ='both', k=K) topk_inds = topk_inds % (height * width) topk_ys = (topk_inds / width).astype('int32').astype('float32') topk_xs = (topk_inds % width).astype('int32').astype('float32') return topk_scores, topk_inds, topk_ys, topk_xs
def curvature_based_sample(nn_pts, k): curvature = compute_curvature(nn_pts) point_indices = nd.topk(curvature, axis=-1, k=k, ret_typ='indices') pts_shape = nn_pts.shape batch_size = pts_shape[0] batch_indices = nd.tile(nd.reshape(nd.arange(batch_size), (-1, 1, 1)), (1, k, 1)) indices = nd.concat(batch_indices, nd.expand_dims(point_indices, axis=2), dim=2) return indices
def knn(W, x, k): """ 使用余弦相似度来搜索近义词 :param W: :param x: :param k: :return: """ cos = nd.dot(W, x.reshape( (-1, ))) / (nd.sum(W * W, axis=1).sqrt() * nd.sum(x * x).sqrt()) top_k = nd.topk(cos, k=k, ret_typ="indices").asnumpy().astype("int32") return top_k, [cos[i].asscalar() for i in top_k]
def get_attribute(self, image): """Face attribute predictor. Parameters ---------- image: NDArray. The NDArray data format for MXNet to process, such as (H, W, C). Returns ------- type: tuple Results of Face Attribute Predict: (str(gender), int(age), str(expression)). """ img = transform_eval(image, resize_short=self._image_size, crop_size=self._image_size) img = img.as_in_context(self.ctx[0]) tic = time.time() pred = self.net(img) toc = time.time() - tic print('Attribute inference time: %fms' % (toc * 1000)) topK = 1 topK_age = 6 topK_exp = 2 age = 0 ind_1 = nd.topk(pred[0], k=topK)[0].astype('int') ind_2 = nd.topk(pred[1], k=topK_age)[0].astype('int') ind_3 = nd.topk(pred[2], k=topK_exp)[0].astype('int') for i in range(topK_age): age += int( nd.softmax(pred[1])[0][ind_2[i]].asscalar() * self.attribute_map2[1][ind_2[i].asscalar()]) gender = self.attribute_map2[0][ind_1[0].asscalar()] if nd.softmax(pred[2])[0][ind_3[0]].asscalar() < 0.45: expression = self.attribute_map2[2][7] else: expression_1 = self.attribute_map2[2][ind_3[0].asscalar()] expression_2 = self.attribute_map2[2][ind_3[1].asscalar()] return (gender, age, (expression_1, expression_2))
def explore_seeds(self, x, proportion=0.6): proportion_self = get_self_handle() b, _, h, w = x.shape k = int(h * w * proportion) top_k_values = nd.topk(x.detach().flatten(), k=k, ret_typ='value') idx = [random.choice(range(int(k * (proportion_self - 0.2)), int(k * proportion_self))) for i in range(b)] idx = nd.array(idx).as_in_context(x.context) # line = list(range(b)) threshold = top_k_values.pick(idx) threshold = threshold.reshape((-1, 1, 1, 1)) seeds = (x > threshold) * 5.0 return seeds
def test_accur(target, it, *input): LambdaMin = 5.0 LambdaMax = 1500.0 lamb = 1500.0 theta, phi = input batch_size = target.size lamb = max(LambdaMin, LambdaMax / (1 + 0.1 * it)) # because indexing is not differentiable in mxnet, we must do this output = theta - theta / (1 + lamb) + phi / (1 + lamb) nd.softmax(output, out=output) v, idx = nd.topk(output, ret_typ='both') real = (idx == target.reshape(-1, 1).astype(idx.dtype)) return nd.sum(real) / batch_size, nd.sum(real * v) / batch_size
def _gather_topk_beams(list, score_or_log_probs, batch_size, beam_size, cache=None): """Gather top beams from nested structure.""" score_or_log_probs = nd.array(score_or_log_probs, ctx=ctx) topk_indexes = nd.topk(score_or_log_probs, k=beam_size) return _gather_beams(list, topk_indexes, batch_size, beam_size, cache=cache)
def forward(self, x): root = next(iter(self._structure.items()))[0] router, router_mat, weight, embedd = self._contextify(x)(root) presence = nd.sum(router_mat, axis=2) weight_adj = presence * weight depth = len(self._weightlayer) - nd.topk(nd.reverse(presence, axis=1)) - 1 depth = depth[:, 0] remainder = 1 - nd.sum(weight_adj, axis=1) remainder += nd.choose_element_0index(weight_adj, depth) weight_adj = nd.fill_element_0index(weight_adj, remainder, depth) return (router, weight, weight_adj, router_mat)
def get_action(self, net): if len(self.frames) < self.SAMPLE_DURATION: return None clip_input = self.frames transform_fn = video.VideoGroupValTransform(size=224, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) clip_input = transform_fn(clip_input) print(f"INFO: action input shape:") print([clip.shape for clip in clip_input]) clip_input = np.stack(clip_input, axis=0) clip_input = clip_input.reshape((-1, ) + (32, 3, 224, 224)) clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4)) pred = net(nd.array(clip_input)) classes = net.classes topK = 1 ind = nd.topk(pred, k=topK)[0].astype('int') return classes[ind[0].asscalar()]