def hybrid_forward(self, F, x, *args, **kwargs): # inputs: query frame x^q, query camera v^q, context frames x^{1..M}, context cameras v^{1..M} x_q = x v_q = args[0] x_context = args[1] v_context = args[2] # reshape camera information so we can concat it to image data v_q = F.broadcast_to( F.expand_dims(F.expand_dims(v_q, axis=-1), axis=-1), (0, 0, *self._downsample_output_shape[1:])) with x_q.context: u = F.zeros( (self._batch_size, *self._upsample_output_shape)) # canvas (reconstruction) h_enc = F.zeros((self._batch_size, *self._rnn_hidden_shape)) c_enc = F.zeros((self._batch_size, *self._rnn_hidden_shape)) h_dec = F.zeros((self._batch_size, *self._rnn_hidden_shape)) c_dec = F.zeros((self._batch_size, *self._rnn_hidden_shape)) qs = [ ] # latent (approximate posterior) distributions for each step ps = [] # prior distributions of latents for each step r = self._representation_nn(x_context, v_context) for i in range(self._num_steps): encoded_x_q_and_u = self._downsample_nn(F.concat(x_q, u, dim=1)) # Eq. S20 _, (h_enc, c_enc) = self._inf_rnn( F.concat(encoded_x_q_and_u, v_q, r, h_dec, dim=1), [h_enc, c_enc]) # Eq. S21 q = self._q_layer(h_enc) # convert NxCxHxW to NxF q = F.reshape(q, (self._batch_size, -1)) qs.append(q) z = self._latent_layer(q) # Eq. S11 p = self._p_layer(h_dec) p = F.reshape(p, (self._batch_size, -1)) ps.append(p) gen_z = F.reshape(z, (self._batch_size, self._num_latent_maps, *self._downsample_output_shape[1:])) _, (h_dec, c_dec) = self._gen_rnn(F.concat(gen_z, v_q, r, dim=1), [h_dec, c_dec]) u = u + self._upsample_nn(h_dec) out = self._out_layer(u) return out, nd.stack(*qs, axis=0), nd.stack( *ps, axis=0) # qs and ps: steps x batch x latent
def hybrid_forward(self, F, x, *args, **kwargs): with x.context: r = F.zeros((self._batch_size, *self._input_shape)) # reconstruction h_enc = F.zeros((self._batch_size, *self._rnn_hidden_shape)) c_enc = F.zeros((self._batch_size, *self._rnn_hidden_shape)) h_dec = F.zeros((self._batch_size, *self._rnn_hidden_shape)) c_dec = F.zeros((self._batch_size, *self._rnn_hidden_shape)) qs = [] # latent (approximate posterior) distributions for each step ps = [] # prior distributions of latents for each step encoded_x = self._enc_nn(x) for i in range(self._num_steps): encoded_r = self._enc_nn(F.sigmoid(r)) err = encoded_x - encoded_r _, (h_enc, c_enc) = self._enc_rnn(F.concat(encoded_x, err, h_dec, c_dec, dim=1), [h_enc, c_enc]) q = self._q_layer(h_enc) # convert NxCxHxW to NxF q = F.reshape(q, (self._batch_size, -1)) qs.append(q) z = self._latent_layer(q) p = self._p_layer(h_dec) p = F.reshape(p, (self._batch_size, -1)) ps.append(p) dec_z = F.reshape(z, (self._batch_size, self._num_latent_maps, *self._encoder_output_shape[1:])) _, (h_dec, c_dec) = self._dec_rnn(F.concat(dec_z, encoded_r, dim=1), [h_dec, c_dec]) r = r + self._dec_nn(h_dec) # don't pass reconstruction through sigmoid. loss function takes care of that. return r, nd.stack(*qs, axis=0), nd.stack(*ps, axis=0) # qs and ps: steps x batch x latent
def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False): # pos node, project to its relation projection = self.projection_emb(rel_id, gpu_id, trace) projection = projection.reshape(-1, self.entity_dim, self.relation_dim) head = head.reshape(-1, 1, self.entity_dim) head = nd.batch_dot(head, projection).squeeze() head = head.reshape(num_chunks, -1, self.relation_dim) projection = projection.reshape(num_chunks, -1, self.entity_dim, self.relation_dim) tail = tail.reshape(num_chunks, -1, 1, self.entity_dim) num_rels = projection.shape[1] num_nnodes = tail.shape[1] tails = [] for i in range(num_chunks): tail_negs = [] for j in range(num_nnodes): tail_neg = tail[i][j] tail_neg = tail_neg.reshape(1, 1, self.entity_dim) tail_neg = nd.broadcast_axis(tail_neg, axis=0, size=num_rels) tail_neg = nd.batch_dot(tail_neg, projection[i]) tail_neg = tail_neg.squeeze(axis=1) tail_negs.append(tail_neg) tail_negs = nd.stack(*tail_negs, axis=1) tails.append(tail_negs) tail = nd.stack(*tails) return head, tail
def generate(self, x: nd.NDArray = None, include_intermediate: bool = False, return_attn_params: bool = False) -> \ Union[nd.NDArray, Tuple[nd.NDArray, nd.NDArray]]: """ Generate a batch of samples from model. See Section 2.3 in paper. If x is None, this method generates unconditional samples from the model (as explained in Section 2.3 in the paper). If x is provided, this method reconstructs the input to generate the sample. This is not really a true sample from the model because the model looks at the image it is trying to generate. However, this is useful for seeing how the model generates a particular image. (I believe this is how the figures in the paper are generated.) :param x: Input to generate images from. :param include_intermediate: If True, samples from all timesteps (not only the last timestep) are returned. :param return_attn_params: If True, returns attention params along with generated samples. :return: n x input dim array of generated samples. If include_intermediate is True, then steps x n x input dim. """ canvases = [] attn_params = [] canvas = nd.zeros((self._batch_size, self._input_dim), ctx=self._ctx) h_dec = nd.broadcast_to(self._dec_rnn_h_init.data(), (self._batch_size, 0)) c_dec = nd.broadcast_to(self._dec_rnn_c_init.data(), (self._batch_size, 0)) if x is not None: h_enc = nd.broadcast_to(self._enc_rnn_h_init.data(), (self._batch_size, 0)) c_enc = nd.broadcast_to(self._enc_rnn_c_init.data(), (self._batch_size, 0)) for i in range(self._num_steps): canvases.append(nd.sigmoid(canvas)) if x is not None: err = x - nd.sigmoid(canvas) r, _ = self._read_layer(x, err, h_dec, c_dec) _, (h_enc, c_enc) = self._enc_rnn(nd.concat(r, h_dec, c_dec, dim=1), [h_enc, c_enc]) q = self._enc_dense(h_enc) z = self._latent_layer(q) else: z = nd.random.normal(shape=(self._batch_size, self._latent_dim), ctx=self._ctx) _, (h_dec, c_dec) = self._dec_rnn(z, [h_dec, c_dec]) w, attn_param = self._write_layer(h_dec, c_dec) attn_params.append(attn_param) canvas = canvas + w canvases.append(nd.sigmoid(canvas)) if include_intermediate: samples = nd.stack(*canvases, axis=0) else: samples = canvases[-1] if return_attn_params: return samples, nd.stack(*attn_params, axis=0) else: return samples
def backward(self, req, out_grad, in_data, out_data, in_grad, aux): data = in_data[0] rois = in_data[1] BS, C, H, W = data.shape N = rois.shape[0] dout = out_grad[0] ddata = nd.zeros_like(data) rois = rois.asnumpy() for i in range(N): roi = rois[i] batch_id = roi[0].astype(np.int64) x1, y1, x2, y2 = roi[1:] * self.spatial_scale x1, y1, x2, y2 = np.floor(x1), np.floor(y1), np.ceil(x2), np.ceil( y2) x1, y1, x2, y2 = np.clip(x1, 0, W), np.clip(y1, 0, H), np.clip( x2, 0, W), np.clip(y2, 0, H) x1, y1, x2, y2 = x1.astype(np.int64), y1.astype( np.int64), x2.astype(np.int64), y2.astype(np.int64) if x1 >= x2 or y1 >= y2: continue h = y2 - y1 w = x2 - x1 # (C, h, w) roi_data = data[batch_id, :, y1:y2, x1:x2] # (h*w, C) roi_data = roi_data.reshape((C, -1)).transpose((1, 0)) # (h*w, C, 1) roi_data = roi_data.reshape((0, 0, 1)) # (h*w, C, C) out_product = nd.batch_dot(roi_data, roi_data.transpose((0, 2, 1))) # (C, C) if self.type == "max": reduce_product = nd.max(out_product, axis=0) max_mask = out_product == reduce_product # max_index = nd.argmax(out_product, axis=0) # max_index = max_index.reshape((C * C)) # d_max = nd.eye(h*w)[max_index].transpose((1, 0)).reshape((h*w, C, C)) dout_product = nd.stack(*[dout[i] for _ in range(h * w)]) * max_mask elif self.type == "mean": dout_product = nd.stack(*[dout[i] for _ in range(h * w)]) / (h * w) else: raise NotImplementedError() droi_data = [] for j in range(C): droi_data.append( nd.sum(dout_product[:, j, :] * roi_data[:, :, 0], axis=1) + nd.sum(dout_product[:, :, j] * roi_data[:, :, 0], axis=1)) droi_data = nd.stack(*droi_data, axis=1) # (hw, C) droi_data = droi_data.transpose((1, 0)).reshape((C, h, w)) ddata[batch_id, :, y1:y2, x1:x2] = droi_data self.assign(in_grad[0], req[0], ddata) self.assign(in_grad[1], req[1], nd.zeros_like(in_data[1]))
def sample(self, batch_size=1, with_details=False, with_entropy=False): """ Returns ------- configs : list of dict list of configurations """ inputs = self.static_inputs[batch_size] hidden = self.static_init_hidden[batch_size] actions = [] entropies = [] log_probs = [] for idx in range(len(self.num_tokens)): logits, hidden = self.forward(inputs, hidden, idx, is_embed=(idx == 0)) probs = F.softmax(logits, axis=-1) log_prob = F.log_softmax(logits, axis=-1) entropy = -(log_prob * probs).sum(1, keepdims=False) if with_entropy else None action = mx.random.multinomial(probs, 1) ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context), action.astype('float32')) selected_log_prob = F.gather_nd(log_prob, ind) actions.append(action[:, 0]) entropies.append(entropy) log_probs.append(selected_log_prob) inputs = action[:, 0] + sum(self.num_tokens[:idx]) inputs.detach() configs = [] for idx in range(batch_size): config = {} for i, action in enumerate(actions): choice = action[idx].asscalar() k, space = self.spaces[i] config[k] = int(choice) configs.append(config) if with_details: entropies = F.stack(*entropies, axis=1) if with_entropy else entropies return configs, F.stack(*log_probs, axis=1), entropies else: return configs
def forward(self, x): # NCHW h, w = x.shape[2], x.shape[3] res = [] for i in range(h): res.append( nd.stack(*self.hcell.unroll(w, x[:, :, i, :], layout='NCT')[0], axis=2)) # NCW for i in range(w): res.append( nd.stack(*self.vcell.unroll(h, x[:, :, :, i], layout='NCT')[0], axis=2)) # NCH res = nd.relu(nd.stack(*res[:h], axis=2) + nd.stack(*res[h:], axis=3)) return nd.concat(x, res, dim=1)
def sample(self, batch_size=1, with_details=False, with_entropy=False): # self-attention x = self.embedding(batch_size).reshape( -3, 0) # .squeeze() # b x action x h kshape = (batch_size, self.num_total_tokens, self.hidden_size) vshape = (batch_size, self.num_total_tokens, 1) querry = self.querry(x).reshape(*kshape) # b x actions x h key = self.key(x).reshape(*kshape) # b x actions x h value = self.value(x).reshape(*vshape) # b x actions x 1 atten = mx.nd.linalg_gemm2(querry, key, transpose_b=True).softmax(axis=1) alphas = mx.nd.linalg_gemm2(atten, value).squeeze(axis=-1) actions = [] entropies = [] log_probs = [] for idx in range(len(self.num_tokens)): i0 = sum(self.num_tokens[:idx]) i1 = sum(self.num_tokens[:idx + 1]) logits = alphas[:, i0:i1] probs = F.softmax(logits, axis=-1) log_prob = F.log_softmax(logits, axis=-1) entropy = -(log_prob * probs).sum(1, keepdims=False) if with_entropy else None action = mx.random.multinomial(probs, 1) ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context), action.astype('float32')) selected_log_prob = F.gather_nd(log_prob, ind) actions.append(action[:, 0]) entropies.append(entropy) log_probs.append(selected_log_prob) configs = [] for idx in range(batch_size): config = {} for i, action in enumerate(actions): choice = action[idx].asscalar() k, space = self.spaces[i] config[k] = int(choice) configs.append(config) if with_details: entropies = F.stack(*entropies, axis=1) if with_entropy else entropies return configs, F.stack(*log_probs, axis=1), entropies else: return configs
def edge_func(self, edges): head_r, head_i, head_j, head_k = nd.split(edges.src['emb'], num_outputs=4, axis=-1) rel_r, rel_i, rel_j, rel_k = nd.split(edges.data['emb'], num_outputs=4, axis=-1) tail_r, tail_i, tail_j, tail_k = nd.split(edges.dst['emb'], num_outputs=4, axis=-1) rel_norm = nd.stack(rel_r, rel_i, rel_j, rel_k, axis=0).norm(ord=2, axis=0) x_r = (head_r * rel_r - head_i * rel_i - head_j * rel_j - head_k * rel_k) / (rel_norm + 1e-15) x_i = (head_r * rel_i + head_i * rel_r + head_j * rel_k - head_k * rel_j) / (rel_norm + 1e-15) x_j = (head_r * rel_j - head_i * rel_k + head_j * rel_r + head_k * rel_i) / (rel_norm + 1e-15) x_k = (head_r * rel_k + head_i * rel_j - head_j * rel_i + head_k * rel_r) / (rel_norm + 1e-15) score = x_r * tail_r + x_i * tail_i + x_j * tail_j + x_k * tail_k return {'score': nd.sum(score, -1)}
def infer(self, head_emb, rel_emb, tail_emb): hidden_dim = head_emb.shape[1] head_r, head_i, head_j, head_k = nd.split(head_emb.reshape( head_emb.shape[0], 1, 1, hidden_dim), num_outputs=4, axis=-1) rel_r, rel_i, rel_j, rel_k = nd.split(rel_emb.reshape( 1, rel_emb.shape[0], 1, hidden_dim), num_outputs=4, axis=-1) tail_r, tail_i, tail_j, tail_k = nd.split(tail_emb.reshape( 1, 1, tail_emb.shape[0], hidden_dim), num_outputs=4, axis=-1) rel_norm = nd.stack(rel_r, rel_i, rel_j, rel_k, axis=0).norm(ord=2, axis=0) x_r = (head_r * rel_r - head_i * rel_i - head_j * rel_j - head_k * rel_k) / (rel_norm + 1e-15) x_i = (head_r * rel_i + head_i * rel_r + head_j * rel_k - head_k * rel_j) / (rel_norm + 1e-15) x_j = (head_r * rel_j - head_i * rel_k + head_j * rel_r + head_k * rel_i) / (rel_norm + 1e-15) x_k = (head_r * rel_k + head_i * rel_j - head_j * rel_i + head_k * rel_r) / (rel_norm + 1e-15) score = x_r * tail_r + x_i * tail_i + x_j * tail_j + x_k * tail_k return nd.sum(score, -1)
def hybrid_forward(self, F, x, *args, **kwargs): with x.context: canvas = F.zeros((self._batch_size, self._input_dim), ctx=self._ctx) # broadcast learned hidden states to batch size. h_enc = F.broadcast_to(self._enc_rnn_h_init.data(), (self._batch_size, 0)) c_enc = F.broadcast_to(self._enc_rnn_c_init.data(), (self._batch_size, 0)) h_dec = F.broadcast_to(self._dec_rnn_h_init.data(), (self._batch_size, 0)) c_dec = F.broadcast_to(self._dec_rnn_c_init.data(), (self._batch_size, 0)) qs = [] # latent distributions for each step for i in range(self._num_steps): err = x - F.sigmoid(canvas) r, _ = self._read_layer(x, err, h_dec, c_dec) _, (h_enc, c_enc) = self._enc_rnn(F.concat(r, h_dec, c_dec, dim=1), [h_enc, c_enc]) q = self._enc_dense(h_enc) qs.append(q) z = self._latent_layer(q) _, (h_dec, c_dec) = self._dec_rnn(z, [h_dec, c_dec]) w, _ = self._write_layer(h_dec, c_dec) canvas = canvas + w # don't pass canvas through sigmoid. loss function takes care of that. return canvas, nd.stack(*qs, axis=0) # qs: steps x batch x latent
def seg_attr(x, l): batchsize = x.shape[0] y = [] for bz in range(batchsize): gender = l[bz][2] + l[bz][13] hair = l[bz][2] sunglass = l[bz][4] hat = l[bz][1] tshirt_longsleeve_formal = l[bz][5] + l[bz][6] + l[bz][7] + l[bz][ 14] + l[bz][15] shorts_jeans_longpants = l[bz][9] + l[bz][10] + l[bz][16] + l[bz][17] skirt = l[bz][12] facemask = l[bz][13] logo_plaid = tshirt_longsleeve_formal # print( x[bz][0].shape, gender.shape) y.append(x[bz][0] * gender) y.append(x[bz][1] * hair) y.append(x[bz][2] * sunglass) y.append(x[bz][3] * hat) y.append(x[bz][4] * tshirt_longsleeve_formal) y.append(x[bz][5] * tshirt_longsleeve_formal) y.append(x[bz][6] * tshirt_longsleeve_formal) y.append(x[bz][7] * shorts_jeans_longpants) y.append(x[bz][8] * shorts_jeans_longpants) y.append(x[bz][9] * shorts_jeans_longpants) y.append(x[bz][10] * skirt) y.append(x[bz][11] * facemask) y.append(x[bz][12] * logo_plaid) y.append(x[bz][13] * logo_plaid) stk = nd.stack(*(y)) return stk
def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size): hidden_dim = heads.shape[1] emb_r, emb_i, emb_j, emb_k = nd.split(heads, num_outputs=4, axis=-1) rel_r, rel_i, rel_j, rel_k = nd.split(relations, num_outputs=4, axis=-1) rel_norm = nd.stack(rel_r, rel_i, rel_j, rel_k, axis=0).norm(ord=2, axis=0) x_r = (emb_r * rel_r - emb_i * rel_i - emb_j * rel_j - emb_k * rel_k) / (rel_norm + 1e-15) x_i = (emb_r * rel_i + emb_i * rel_r + emb_j * rel_k - emb_k * rel_j) / (rel_norm + 1e-15) x_j = (emb_r * rel_j - emb_i * rel_k + emb_j * rel_r + emb_k * rel_i) / (rel_norm + 1e-15) x_k = (emb_r * rel_k + emb_i * rel_j - emb_j * rel_i + emb_k * rel_r) / (rel_norm + 1e-15) emb_quaternion = nd.concat(x_r, x_i, x_j, x_k, dim=-1) tmp = emb_quaternion.reshape(num_chunks, chunk_size, hidden_dim) tails = tails.reshape(num_chunks, neg_sample_size, hidden_dim) tails = nd.transpose(tails, axes=(0, 2, 1)) return nd.linalg_gemm2(tmp, heads)
def _score_sentence(self, feats, tags, lens_): start = nd.array([ self.tag_dictionary.get_idx_for_item(START_TAG) ], dtype='int32') start = start.expand_dims(0).tile((tags.shape[0], 1)) stop = nd.array([ self.tag_dictionary.get_idx_for_item(STOP_TAG) ], dtype='int32') stop = stop.expand_dims(0).tile((tags.shape[0], 1)) pad_start_tags = nd.concat(*[start, tags], dim=1) pad_stop_tags = nd.concat(*[tags, stop], dim=1) for i in range(len(lens_)): pad_stop_tags[i, lens_[i]:] = \ self.tag_dictionary.get_idx_for_item(STOP_TAG) score = [] for i in range(feats.shape[0]): r = nd.array(list(range(lens_[i])), dtype='int32') score.append(nd.sum( self.transitions.data()[pad_stop_tags[i, :lens_[i] + 1], pad_start_tags[i, :lens_[i] + 1]] ) + nd.sum(feats[i, r, tags[i, :lens_[i]]])) return nd.stack(*score).squeeze()
def forward(self, signals) -> NDArray: outputs = [] for signal, RFmapper in zip(signals, self._RFmappers): for i in range(signal.shape[1]): outputs.append(RFmapper(signal[:,i,:])) outputs = stack(*outputs, axis=1) outputs = reshape(outputs, (outputs.shape[0], outputs.shape[1], *self.output_shape)) return outputs
def test_on_LFW(model, ctx=mx.gpu()): with open('/home1/LFW/pairs.txt', 'rt') as f: pairs_lines = f.readlines()[1:] sims = [] model.feature = True normalize = transforms.Normalize(mean=0.5, std=0.25) transform = transforms.Compose([ # transforms.Resize((96, 112)), transforms.ToTensor(), normalize, # mTransform, ]) for i in range(6000): p = pairs_lines[i].replace('\n', '').split('\t') if 3 == len(p): sameflag = 1 name1 = p[0] + '/' + p[0] + '_' + '{:04}.jpg'.format(int(p[1])) name2 = p[0] + '/' + p[0] + '_' + '{:04}.jpg'.format(int(p[2])) if 4 == len(p): sameflag = 0 name1 = p[0] + '/' + p[0] + '_' + '{:04}.jpg'.format(int(p[1])) name2 = p[2] + '/' + p[2] + '_' + '{:04}.jpg'.format(int(p[3])) img1 = nd.array(Image.open('/home1/LFW/aligned_lfw-112X96/' + name1)) img2 = nd.array(Image.open('/home1/LFW/aligned_lfw-112X96/' + name2)) img1 = transform(img1) img2 = transform(img2) img = nd.stack(img1, img2) img = img.as_in_context(ctx) output = model(img) f1, f2 = output[0], output[1] cosdistance = nd.sum(f1 * f2) / (f1.norm() * f2.norm() + 1e-5) sims.append('{}\t{}\t{}\t{}\n'.format(name1, name2, cosdistance.asscalar(), sameflag)) sw.add_scalar(tag='score', value=cosdistance.asscalar(), global_step=i) global_param.iter = i accuracy = [] thd = [] folds = KFold(n=6000, n_folds=10, shuffle=False) thresholds = np.arange(0, 1.0, 0.005) predicts = np.array(map(lambda line: line.strip('\n').split(), sims)) for idx, (train, test) in enumerate(folds): best_thresh = find_best_threshold(thresholds, predicts[train]) accuracy.append(eval_acc(best_thresh, predicts[test])) thd.append(best_thresh) # print time.time() - start-cost # single 1080Ti about 100s print('LFWACC={:.4f} std={:.4f} thd={:.4f}'.format(np.mean(accuracy), np.std(accuracy), np.mean(thd))) return np.mean(accuracy)
def transform(data, label): # data: sample x height x width x channel # label: sample data = data.astype('float32') if augs is not None: # apply to each sample one-by-one and then stack data = nd.stack(*[apply_aug_list(d, augs) for d in data]) data = nd.transpose(data, (0, 3, 1, 2)) return data, label.astype('float32')
def generate(self, x: nd.NDArray = None, include_intermediate: bool = False, **kwargs) -> \ Union[nd.NDArray, Tuple[nd.NDArray, nd.NDArray]]: """ Generate a batch of samples from model. See Section 2.3 in paper. If x is None, this method generates unconditional samples from the model (as explained in Section 2.3 in the paper). If x is provided, this method reconstructs the input to generate the sample. This is not really a true sample from the model because the model looks at the image it is trying to generate. However, this is useful for seeing how the model generates a particular image. :param x: Input to generate images from. :param include_intermediate: If True, samples from all timesteps (not only the last timestep) are returned. :return: n x *image_shape array of generated samples. If include_intermediate is True, then steps x n x *image_shape. """ r = nd.zeros((self._batch_size, *self._input_shape), ctx=self._ctx) # reconstruction h_dec = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) c_dec = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) if x is not None: h_enc = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) c_enc = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) encoded_x = self._enc_nn(x) rs = [] # sample(s) over time for i in range(self._num_steps): rs.append(nd.sigmoid(r)) encoded_r = self._enc_nn(rs[-1]) if x is not None: err = encoded_x - encoded_r _, (h_enc, c_enc) = self._enc_rnn(nd.concat(encoded_x, err, h_dec, c_dec, dim=1), [h_enc, c_enc]) q = self._q_layer(h_enc) # convert NxCxHxW to NxF q = nd.reshape(q, (self._batch_size, -1)) z = self._latent_layer(q) else: # sample from prior p = self._p_layer(h_dec) p = nd.reshape(p, (self._batch_size, -1)) z = self._latent_layer(p) dec_z = nd.reshape(z, (self._batch_size, self._num_latent_maps, *self._encoder_output_shape[1:])) _, (h_dec, c_dec) = self._dec_rnn(nd.concat(dec_z, encoded_r, dim=1), [h_dec, c_dec]) r = r + self._dec_nn(h_dec) rs.append(nd.sigmoid(r)) if include_intermediate: samples = nd.stack(*rs, axis=0) else: samples = rs[-1] return samples
def generate(self, v_q: nd.NDArray, x_context: nd.NDArray, v_context: nd.NDArray, include_intermediate: bool = False, **kwargs) -> Union[nd.NDArray, Tuple[nd.NDArray, nd.NDArray]]: """ Generate a batch of samples from model. See Algorithm S3 in paper. :param v_q: Query view camera info. :param x_context: Context frames. :param v_context: Context camera info. :param include_intermediate: If True, samples from all timesteps (not only the last timestep) are returned. :return: n x *image_shape array of generated samples. If include_intermediate is True, then steps x n x *image_shape. """ u = nd.zeros((self._batch_size, *self._upsample_output_shape), ctx=self._ctx) # canvas (reconstruction) h_dec = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) c_dec = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) # reshape camera information so we can concat it to image data v_q = nd.broadcast_to( nd.expand_dims(nd.expand_dims(v_q, axis=-1), axis=-1), (0, 0, *self._downsample_output_shape[1:])) outs = [] # sample(s) over time r = self._representation_nn(x_context, v_context) for i in range(self._num_steps): outs.append(self._out_layer(u)) # Eq. S11 p = self._p_layer(h_dec) p = nd.reshape(p, (self._batch_size, -1)) z = self._latent_layer(p) gen_z = nd.reshape(z, (self._batch_size, self._num_latent_maps, *self._downsample_output_shape[1:])) _, (h_dec, c_dec) = self._gen_rnn(nd.concat(gen_z, v_q, r, dim=1), [h_dec, c_dec]) u = u + self._upsample_nn(h_dec) outs.append(self._out_layer(u)) if include_intermediate: samples = nd.stack(*outs, axis=0) else: samples = outs[-1] return nd.clip(samples, a_min=0.0, a_max=1.0)
def __call__(self, z_prime: NDArray) -> NDArray: z = [] for i in range(z_prime.shape[0]): if len(self._data) < self._capacity: z.append(z_prime[i]) self._data.append(z_prime[i]) elif uniform().asscalar() < 0.5: z.append(self._data.pop(randint(0, self._capacity).asscalar())) self._data.append(z_prime[i]) else: z.append(z_prime[i]) return stack(*z, axis=0)
def sample(self, batch_size=1, with_details=False, with_entropy=False): actions = [] entropies = [] log_probs = [] for idx in range(len(self.num_tokens)): logits = self.decoders[idx](batch_size) probs = F.softmax(logits, axis=-1) log_prob = F.log_softmax(logits, axis=-1) entropy = -(log_prob * probs).sum(1, keepdims=False) if with_entropy else None action = mx.random.multinomial(probs, 1) ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context), action.astype('float32')) selected_log_prob = F.gather_nd(log_prob, ind) actions.append(action[:, 0]) entropies.append(entropy) log_probs.append(selected_log_prob) configs = [] for idx in range(batch_size): config = {} for i, action in enumerate(actions): choice = action[idx].asscalar() k, space = self.spaces[i] config[k] = int(choice) configs.append(config) if with_details: entropies = F.stack(*entropies, axis=1) if with_entropy else entropies return configs, F.stack(*log_probs, axis=1), entropies else: return configs
def to_matrix_and_back(ctx, graph, transformation: Callable[[NDArray], NDArray]): nd_features =\ nd.stack(*[v.features for v in graph.vertices])\ .reshape(graph.n, graph.num_features)\ .as_in_context(ctx) transformed = transformation(nd_features) for v in graph.vertices: v.features = nd.array(transformed[v.id, :]).reshape(-1, 1) features_shape = graph.vertices[0].features.shape graph.num_features = features_shape[0] print(features_shape)
def __prepare_real_in_real_out(self, batch): real_a = batch.data[0] real_a = real_a.transpose((0, 2, 3, 1)) real_a = nd.array(np.squeeze(real_a.asnumpy(), axis=0), ctx=self.ctx) lab = rgb_to_lab(real_a, ctx=self.ctx) lightness_chan, a_chan, b_chan = preprocess_lab(lab) real_in = nd.expand_dims(lightness_chan, axis=3) real_in = real_in.transpose((3, 2, 0, 1)) real_out = nd.stack(a_chan, b_chan, axis=2) real_out = nd.transpose(real_out, axes=(3, 2, 0, 1)) return real_in, real_out
def _forward_alg(self, feats, lens_): batch_size = feats.shape[0] tagset_size = feats.shape[2] length = feats.shape[1] init_alphas = nd.full((self.tagset_size, ), -10000.) init_alphas[self.tag_dictionary.get_idx_for_item(START_TAG)] = 0. forward_var_list = [init_alphas.tile((feats.shape[0], 1))] transitions = self.transitions.data().expand_dims(0).tile( (feats.shape[0], 1, 1)) for i in range(feats.shape[1]): emit_score = feats[:, i, :] tag_var = \ emit_score.expand_dims(2).tile((1, 1, transitions.shape[2])) + \ transitions + \ forward_var_list[i].expand_dims(2).tile((1, 1, transitions.shape[2])).transpose([0, 2, 1]) max_tag_var = nd.max(tag_var, axis=2) new_tag_var = tag_var - max_tag_var.expand_dims(2).tile( (1, 1, transitions.shape[2])) agg_ = nd.log(nd.sum(nd.exp(new_tag_var), axis=2)) forward_var_list.append( nd.full((feats.shape[0], feats.shape[2]), val=max_tag_var + agg_)) # cloned = forward_var.clone() # forward_var[:, i + 1, :] = max_tag_var + agg_ # forward_var = cloned forward_var = nd.stack(*forward_var_list)[ lens_, nd.array(list(range(feats.shape[0])), dtype='int32'), :] terminal_var = forward_var + \ self.transitions.data()[self.tag_dictionary.get_idx_for_item(STOP_TAG)].expand_dims(0).tile(( forward_var.shape[0], 1)) alpha = log_sum_exp_batch(terminal_var) return alpha
def transform_predict(data, data_shape=(3, 331, 331), resize=331): ctx = data.context im = resize_longer(data.asnumpy(), resize, data_shape) im = nd.array(im, ctx=ctx) im = im.astype('float32') / 255 auglist = [ # image.RandomGrayAug(1.0), image.ColorNormalizeAug(mean=(0.417, 0.402, 0.366), std=(0.081, 0.079, 0.080)), ] for aug in auglist: im = aug(im) im = nd.transpose(im, (2, 0, 1)) return nd.stack(im)
def bilinear_roi_pooling(data, rois, spatial_scale, type="max"): """ :param data: (BS, C, H, W) :param rois: (N, 5) :param spatial_scale: float :param type: :return: """ assert isinstance(spatial_scale, float) BS, C, H, W = data.shape N = rois.shape[0] out_data = [] rois = rois.asnumpy() for i in range(N): roi = rois[i] batch_id = roi[0].astype(np.int64) x1, y1, x2, y2 = roi[1:] * spatial_scale x1, y1, x2, y2 = np.floor(x1), np.floor(y1), np.ceil(x2), np.ceil(y2) x1, y1, x2, y2 = np.clip(x1, 0, W), np.clip(y1, 0, H), np.clip(x2, 0, W), np.clip(y2, 0, H) x1, y1, x2, y2 = x1.astype(np.int64), y1.astype(np.int64), x2.astype( np.int64), y2.astype(np.int64) if x1 >= x2 or y1 >= y2: out_data.append( nd.zeros((C, C), ctx=data.context, dtype=data.dtype)) continue # (C, h, w) roi_data = data[batch_id, :, y1:y2, x1:x2] # (h*w, C) roi_data = roi_data.reshape((C, -1)).transpose((1, 0)) # (h*w, C, 1) roi_data = roi_data.reshape((0, 0, 1)) # (h*w, C, C) out_product = nd.batch_dot(roi_data, roi_data.transpose((0, 2, 1))) # (C, C) if type == "max": reduce_product = nd.max(out_product, axis=0) elif type == "mean": reduce_product = nd.mean(out_product, axis=0) else: raise NotImplementedError() out_data.append(reduce_product) out_data = nd.stack(*out_data) return out_data
def unsorted_1d_segment_sum(input, seg_id, n_segs, dim): # TODO: support other dimensions assert dim == 0, 'MXNet only supports segment sum on first dimension' # Use SPMV to simulate segment sum ctx = input.context n_inputs = input.shape[0] input_shape_suffix = input.shape[1:] input = input.reshape(n_inputs, -1) n_range = nd.arange(n_inputs, dtype='int64').as_in_context(input.context) w_nnz = nd.ones(n_inputs).as_in_context(input.context) w_nid = nd.stack(seg_id, n_range, axis=0) w = nd.sparse.csr_matrix((w_nnz, (seg_id, n_range)), (n_segs, n_inputs)) w = w.as_in_context(input.context) y = nd.dot(w, input) y = nd.reshape(y, (n_segs, ) + input_shape_suffix) return y
def ten_crop(img, size): H, W = size iH, iW = img.shape[1:3] if iH < H or iW < W: raise ValueError('image size is smaller than crop size') img_flip = img[:, :, ::-1] crops = nd.stack( img[:, (iH - H) // 2:(iH + H) // 2, (iW - W) // 2:(iW + W) // 2], img[:, 0:H, 0:W], img[:, iH - H:iH, 0:W], img[:, 0:H, iW - W:iW], img[:, iH - H:iH, iW - W:iW], img_flip[:, (iH - H) // 2:(iH + H) // 2, (iW - W) // 2:(iW + W) // 2], img_flip[:, 0:H, 0:W], img_flip[:, iH - H:iH, 0:W], img_flip[:, 0:H, iW - W:iW], img_flip[:, iH - H:iH, iW - W:iW], ) return crops
def broad_multiply(model_output, semantic_region, ctx): """ model_output = 1024 x 7 x 7 semantic_region = 20 x 7 x 7""" # print(model_output.shape) # print(semantic_region.shape) batchsz, a, _, _ = semantic_region.shape # print(model_output.shape) # lc=[] ll = [] for ii in range(0, batchsz): # print(ii) # res = nd.zeros((1024,7,7),ctx) # ll = [] for i in range(0, a): # print(i) tmp = semantic_region[ii][i][:][:] # b,c = tmp.shape res_tmp = model_output[ii] * (tmp.sum()) # print(tmp.sum()) # for j in range(0,b): # # clas=0 # for k in range(0,c): # # tt=nd.reshape(mx.nd.array(tmp[j][k]),shape=model_output[ii].shape) # # print(model_output[ii]) # # print(model_output[ii]*tmp[j][k]) # # print(tmp[j][k]) # # clas+=tmp[j][k] # res = res+model_output[ii]*(tmp[j][k]) # # print(clas) # ll.append(res) # print("res==tmp: ", res == res_tmp) ll.append(res_tmp) # print(len(res.shape)) # res = nd.zeros((1024,7,7),ctx) # lc.append(ll) stk = nd.stack(*(ll)) # lc.append(stk) # print((lc.size())) # nplc=np.array(lc) # print((nplc.shape)) return stk
def train_process(sample): data, label = sample data = data.as_in_context(cfg.ctx) label = label.as_in_context(cfg.ctx) with autograd.record(): with autograd.record(train_mode=cfg.base_train): x = Net.base_forward(data) if cfg.withpcb: with autograd.record(train_mode=cfg.rpp_train): x = Net.split_forward(x) with autograd.record(train_mode=cfg.tail_train): ID, Fea = Net.tail_forward(x) # ID, Fea = Net(data) if isinstance(ID, list): losses = [softmax_cross_entropy(id_, label) for id_ in ID] loss = ndarray.stack(*losses, axis=0).mean(axis=0) else: loss = softmax_cross_entropy(ID, label) loss.backward() for trainer in trainers: trainer.step(data.shape[0]) return loss, ID