def test_fwd_P2B(test_case): """ compare eager fwd and lazy bwd """ rank = flow.env.get_rank() # pid = os.getpid() # print(f"[{pid}][{rank}] ToGlobalGraphTestCase.test_fwd_P2B") local_x = flow.tensor(x, dtype=flow.float32, device=flow.device(f"cuda:{rank}")) local_y = flow.tensor(y, dtype=flow.float32, device=flow.device(f"cuda:{rank}")) z = flow._C.matmul( flow.cat([local_x, local_x], dim=1), flow.cat([local_y, local_y], dim=1), transpose_b=True, ) z = flow._C.relu(z) # print(f"z shape: {z.shape}, device: {z.device}") # print(z.numpy()) placement = flow.placement("cuda", ranks=[0, 1]) sbp = flow.sbp.split(1) c_x = local_x.to_global(placement=placement, sbp=sbp) c_y = local_y.to_global(placement=placement, sbp=sbp) # print(f"c_x shape: {c_x.shape}, placement: {c_x.placement}, sbp: {c_x.sbp}") # print(f"c_y shape: {c_y.shape}, placement: {c_y.placement}, sbp: {c_y.sbp}") m = MyModule1(c_y) g = MyGraph(m) g_z = g(c_x) # print(f"g_z shape: {g_z.shape}, placement: {g_z.placement}, sbp: {g_z.sbp}") # print(g_z.to_local().numpy()) test_case.assertTrue(np.allclose(z.numpy(), g_z.to_local().numpy()))
def forward(self, inputs, targets): """ Args: inputs (torch.Tensor): feature matrix with shape (batch_size, feat_dim). targets (torch.LongTensor): ground truth labels with shape (num_classes). """ n = inputs.size(0) # Compute pairwise distance, replace by the official when merged dist = flow.pow(inputs, 2).sum(dim=1).expand(n, n) dist = dist + flow.transpose(dist, dim0=1, dim1=0) temp1 = -2 * flow.matmul(inputs, flow.transpose(inputs, dim0=1, dim1=0)) dist = flow.add(dist, temp1) dist = flow.sqrt(flow.clamp(dist, min=1e-12)) # For each anchor, find the hardest positive and negative mask = targets.expand(n, n).eq( flow.transpose(targets.expand(n, n), dim0=1, dim1=0)) dist_ap, dist_an = [], [] y1 = flow.zeros((1, n), dtype=flow.float32).to("cuda") y2 = flow.Tensor(np.exp(100 * np.ones((1, n)))).to("cuda") for i in range(n): temp_dist = flow.slice(dist, [(i, i + 1, 1)]) temp_mask = flow.slice(mask, [(i, i + 1, 1)]) temp_mask_rev = flow.slice(1 - mask, [(i, i + 1, 1)]) dist_ap.append(temp_mask.where(temp_dist, y1).max().unsqueeze(0)) dist_an.append( temp_mask_rev.where(temp_dist, y2).min().unsqueeze(0)) dist_ap = flow.cat(dist_ap) dist_an = flow.cat(dist_an) # Compute ranking hinge loss y = flow.ones_like(dist_an) return self.ranking_loss(dist_an, dist_ap, y)
def forward(self, x, c): c = c.view(c.size(0), c.size(1), 1, 1) c1 = c.repeat(1, 1, x.size(2), x.size(3)) x = flow.cat([x, c1], dim=1) x = self.d1(x) c2 = c.repeat(1, 1, x.size(2), x.size(3)) x = flow.cat([x, c2], dim=1) x = self.d2(x) c3 = c.repeat(1, 1, x.size(2), x.size(3)) x = flow.cat([x, c3], dim=1) x = self.d3(x) c4 = c.repeat(1, 1, x.size(2), x.size(3)) x = flow.cat([x, c4], dim=1) x = self.d4(x) c5 = c.repeat(1, 1, x.size(2), x.size(3)) x = flow.cat([x, c5], dim=1) x = self.conv(x) x = self.pool(x) x = flow.squeeze(x) x = flow.tanh(x) return x
def test(opt): model = DeepQNetwork() pretrain_models = flow.load("{}".format(opt.saved_path)) model.load_state_dict(pretrain_models) model.eval() model.to("cuda") game_state = GameState() image, reward, terminal = game_state.frame_step(0) image = pre_processing( image[:game_state.SCREENWIDTH, :int(game_state.BASEY)], opt.image_size, opt.image_size, ) image = flow.Tensor(image) image = image.to("cuda") state = flow.cat(tuple(image for _ in range(4))).unsqueeze(0) while True: prediction = model(state)[0] action = flow.argmax(prediction).numpy()[0] next_image, reward, terminal = game_state.frame_step(action) next_image = pre_processing( next_image[:game_state.SCREENWIDTH, :int(game_state.BASEY)], opt.image_size, opt.image_size, ) next_image = flow.Tensor(next_image) next_image = next_image.to("cuda") next_state = flow.cat((state[0, 1:, :, :], next_image)).unsqueeze(0) state = next_state
def forward(self, hidden_states, layer_past=None, use_cache=False): hidden_states = self.c_attn(hidden_states) query, key, value = flow.chunk(hidden_states, chunks=3, dim=2) query = self._split_heads(query, self.num_heads, self.head_dim) key = self._split_heads(key, self.num_heads, self.head_dim) value = self._split_heads(value, self.num_heads, self.head_dim) if layer_past is not None: past_key, past_value = layer_past key = flow.cat((past_key, key), dim=-2) value = flow.cat((past_value, value), dim=-2) if use_cache is True: present = (key, value) else: present = None attn_output, attn_weights = self._attn(query, key, value) attn_output = self._merge_heads(attn_output, self.num_heads, self.head_dim) attn_output = self.c_proj(attn_output) attn_output = self.resid_dropout(attn_output) outputs = (attn_output, present, attn_weights) return outputs
def inference( self, x, xmask, memory, memory_mask=None, pos=None, cache={"slf": None, "src": None}, ): if self.normalize_before: x = self.norm1(x) residual = x if self.relative_positional: slf_attn_out, slf_attn_weight, slf_cache = self.slf_attn.inference( x, xmask, pos, cache=["slf"] ) else: slf_attn_out, slf_attn_weight, slf_cache = self.slf_attn.inference( x, xmask, cache=["slf"] ) if self.concat_after: x = residual + self.concat_linear1(flow.cat([x, slf_attn_out], dim=-1)) else: x = residual + self.dropout1(slf_attn_out) if not self.normalize_before: x = self.norm1(x) if self.normalize_before: x = self.norm2(x) residual = x src_attn_out, src_attn_weight, src_cache = self.src_attn.inference( x, memory, memory_mask, cache["src"] ) if self.concat_after: x = residual + self.concat_linear2(flow.cat([x, src_attn_out], dim=-1)) else: x = residual + self.dropout2(src_attn_out) if not self.normalize_before: x = self.norm2(x) if self.normalize_before: x = self.norm3(x) residual = x x = residual + self.dropout3(self.feed_forward(x)) if not self.normalize_before: x = self.norm3(x) return ( x, {"slf_attn_weight": slf_attn_weight, "src_attn_weight": src_attn_weight}, {"slf": slf_cache, "src": src_cache}, )
def forward(self, x: Tensor) -> Tensor: if self.stride == 1: cnt_at_dim1 = int(x.shape[1] / 2) x1 = x[:, 0:cnt_at_dim1, ::] x2 = x[:, cnt_at_dim1:, ::] out = flow.cat((x1, self.branch2(x2)), dim=1) else: out = flow.cat((self.branch1(x), self.branch2(x)), dim=1) out = channel_shuffle(out, 2) return out
def forward(self, tgt, tgt_mask, memory, memory_mask, pos): """Compute decoded features :param torch.Tensor tgt: decoded previous target features (batch, max_time_out, size) :param torch.Tensor tgt_mask: mask for x (batch, max_time_out) :param torch.Tensor memory: encoded source features (batch, max_time_in, size) :param torch.Tensor memory_mask: mask for memory (batch, max_time_in) """ if self.normalize_before: tgt = self.norm1(tgt) residual = tgt if self.relative_positional: slf_attn_out, slf_attn_weights = self.slf_attn(tgt, tgt_mask, pos) else: slf_attn_out, slf_attn_weights = self.slf_attn(tgt, tgt_mask) if self.concat_after: x = residual + self.concat_linear1(flow.cat([tgt, slf_attn_out], dim=-1)) else: x = residual + self.dropout1(slf_attn_out) if not self.normalize_before: x = self.norm1(x) if self.normalize_before: x = self.norm2(x) residual = x src_attn_out, src_attn_weights = self.src_attn(x, memory, memory_mask) if self.concat_after: x = residual + self.concat_linear2(flow.cat([x, src_attn_out], dim=-1)) else: x = residual + self.dropout2(src_attn_out) if not self.normalize_before: x = self.norm2(x) if self.normalize_before: x = self.norm3(x) residual = x x = residual + self.dropout3(self.feed_forward(x)) if not self.normalize_before: x = self.norm3(x) return ( x, { "slf_attn_weights": slf_attn_weights, "src_attn_weights": src_attn_weights, }, )
def forward(self, x: flow.Tensor) -> flow.Tensor: x = self.conv1(x) x = [self.branch3x3_conv(x), self.branch3x3_pool(x)] x = flow.cat(x, 1) x = [self.branch7x7a(x), self.branch7x7b(x)] x = flow.cat(x, 1) x = [self.branchpoola(x), self.branchpoolb(x)] x = flow.cat(x, 1) return x
def _test_concat_with_three_tensor_backward(test_case, device): input1 = flow.tensor( np.random.randn(2, 6, 5, 3), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) input2 = flow.tensor( np.random.randn(2, 6, 5, 3), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) input3 = flow.tensor( np.random.randn(2, 6, 5, 3), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) of_out = flow.cat([input1, input2, input3], dim=1) of_out = of_out.sum() of_out.backward() test_case.assertTrue( np.allclose(input1.grad.numpy(), np.ones((2, 6, 5, 3)), 0.0001, 0.0001) ) test_case.assertTrue( np.allclose(input2.grad.numpy(), np.ones((2, 6, 5, 3)), 0.0001, 0.0001) ) test_case.assertTrue( np.allclose(input3.grad.numpy(), np.ones((2, 6, 5, 3)), 0.0001, 0.0001) )
def train_discriminator(self, images, label1, label0): z = self.generate_noise() g_out = self.generator(z) cat = flow.cat((images, g_out), dim=0) result = self.discriminator(cat) d_logits = result[:images.shape[0]] g_logits = result[images.shape[0]:] d_loss_real = self.of_cross_entropy(d_logits, label1) d_loss_fake = self.of_cross_entropy(g_logits, label0) d_loss = d_loss_fake + d_loss_real d_loss.backward() self.optimizerD.step() self.optimizerD.zero_grad() return ( to_numpy(d_loss), to_numpy(d_loss_fake), to_numpy(d_loss_real), to_numpy(d_logits), to_numpy(g_logits), )
def inference(self, x, mask, pos=None, cache=None): if self.normalize_before: x = self.norm1(x) residual = x if self.relative_positional: slf_attn_out, slf_attn_weights, new_cache = self.slf_attn.inference( x, mask, cache, pos) else: slf_attn_out, slf_attn_weights, new_cache = self.slf_attn.inference( x, mask, cache) if self.concat_after: x = residual + self.concat_linear( flow.cat([x, slf_attn_out], dim=-1)) else: x = residual + slf_attn_out if not self.normalize_before: x = self.norm1(x) if self.normalize_before: x = self.norm2(x) residual = x x = residual + self.feed_forward(x) if not self.normalize_before: x = self.norm2(x) return x, new_cache, {"slf_attn_weights": slf_attn_weights}
def get_summarized_data(self): dim = self.dim() if dim == 0: return self if dim == 1: if self.size(0) > 2 * PRINT_OPTS.edgeitems: return flow.cat( ( slice_wrapper(self, [0, PRINT_OPTS.edgeitems, 1]), slice_wrapper( self, [self.size(0) - PRINT_OPTS.edgeitems, self.size(0), 1] ), ) ) else: return self if self.size(0) > 2 * PRINT_OPTS.edgeitems: start = [ slice_wrapper(self, [i, i + 1, 1]) for i in range(0, PRINT_OPTS.edgeitems) ] end = [ slice_wrapper(self, [i, i + 1, 1]) for i in range(self.shape[0] - PRINT_OPTS.edgeitems, self.shape[0]) ] return flow.stack([get_summarized_data(x) for x in (start + end)]) else: return flow.stack( [ get_summarized_data(slice_wrapper(self, [i, i + 1, 1])) for i in range(len(self)) ] )
def conv_bank(x, module_list, act): outs = [] for layer in module_list: out = act(pad_layer(x, layer)) outs.append(out) out = flow.cat(outs + [x], dim=1) return out
def concat_cond(x, cond): # x = [batch_size, x_channels, length] # cond = [batch_size, c_channels] cond = cond.unsqueeze(dim=2) cond = cond.expand(*cond.size()[:-1], x.size(-1)) out = flow.cat([x, cond], dim=1) return out
def preprocess(self, padded_input): """ Generate decoder input and output label from padded_input Add <sos> to decoder input, and add <eos> to decoder output label """ ys = [y[y != IGNORE_ID] for y in padded_input] # prepare input and output word sequences with sos/eos IDs eos = ys[0].new_ones([1]).fill_(self.eos_id) sos = ys[0].new_ones([1]).fill_(self.sos_id) ys_in = [flow.cat([sos, y], dim=0) for y in ys] ys_out = [flow.cat([y, eos], dim=0) for y in ys] ys_in_pad = pad_list(ys_in, self.eos_id) ys_out_pad = pad_list(ys_out, IGNORE_ID) assert ys_in_pad.size() == ys_out_pad.size() return ys_in_pad, ys_out_pad
def sample_sequence( model, length, context, num_samples=1, temperature=1, top_k=1, top_p=0.0, device="cuda", ): context = flow.tensor(context, dtype=flow.long, device=device) context = context.unsqueeze(0).repeat(num_samples, 1) generated = context past_key_values = None with flow.no_grad(): for _ in trange(length): outputs = model(generated, past_key_values=past_key_values, use_cache=True) logits, past_key_values = outputs[:2] next_token_logits = logits[:, -1, :] / temperature filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p) probs = filtered_logits.softmax(-1) next_token = probs.argmax(-1) # next_token = flow.multinomial(flow.softmax(filtered_logits, dim=-1), num_samples=1) generated = flow.cat((generated, next_token.unsqueeze(0)), dim=1) return generated
def forward(self, x, init_states=None): """Assumes x is of shape (batch, sequence, feature)""" bs, seq_sz, _ = x.size() hidden_seq = [] if init_states is None: h_t, c_t = ( flow.zeros((bs, self.hidden_size)).to(x.device), flow.zeros((bs, self.hidden_size)).to(x.device), ) else: h_t, c_t = init_states HS = self.hidden_size for t in range(seq_sz): x_t = x[:, t, :].reshape(x.shape[0], x.shape[2]) gates = flow.matmul(x_t, self.W) + flow.matmul(h_t, self.U) + self.bias i_t, f_t, g_t, o_t = ( flow.sigmoid(gates[:, :HS]), flow.sigmoid(gates[:, HS : HS * 2]), flow.tanh(gates[:, HS * 2 : HS * 3]), flow.sigmoid(gates[:, HS * 3 :]), ) c_t = f_t * c_t + i_t * g_t h_t = o_t * flow.tanh(c_t) hidden_seq.append(h_t.unsqueeze(1)) hidden_seq = flow.cat(hidden_seq, dim=1) return hidden_seq, (h_t, c_t)
def forward(self, x, mask, pos=None): if self.normalize_before: x = self.norm1(x) residual = x if self.relative_positional: slf_attn_out, slf_attn_weights = self.slf_attn(x, mask, pos) else: slf_attn_out, slf_attn_weights = self.slf_attn(x, mask) if self.concat_after: x = residual + self.concat_linear( flow.cat([x, slf_attn_out], dim=-1)) else: x = residual + self.dropout1(slf_attn_out) if not self.normalize_before: x = self.norm1(x) if self.normalize_before: x = self.norm2(x) residual = x x = residual + self.dropout2(self.feed_forward(x)) if not self.normalize_before: x = self.norm2(x) return x, {"slf_attn_weights": slf_attn_weights}
def forward(self, x, init_states=None): seq_sz, bs, _ = x.size() hidden_seq = [] if init_states is None: h_t, c_t = ( flow.zeros((bs, self.hidden_size)).to("cuda"), flow.zeros((bs, self.hidden_size)).to("cuda"), ) else: h_t, c_t = init_states HS = self.hidden_size for t in range(seq_sz): x_t = x[t, :, :] x_t = x_t.reshape(x.shape[1], x.shape[2]) gates = flow.matmul(x_t, self.W) + flow.matmul(h_t, self.U) + self.bias i_t, f_t, g_t, o_t = ( flow.sigmoid(gates[:, :HS]), flow.sigmoid(gates[:, HS:HS * 2]), flow.tanh(gates[:, HS * 2:HS * 3]), flow.sigmoid(gates[:, HS * 3:]), ) c_t = f_t * c_t + i_t * g_t h_t = o_t * flow.tanh(c_t) hidden_seq.append(h_t.unsqueeze(0)) hidden_seq = flow.cat(hidden_seq, dim=0) return hidden_seq, (h_t, c_t)
def forward(self, inputs: flow.Tensor) -> flow.Tensor: net = self.conv2d_1a_3x3(inputs) net = self.conv2d_2a_3x3(net) net = self.conv2d_2b_3x3(net) net = self.MaxPool_3a_3x3(net) net = self.conv2d_3b_1x1(net) net = self.conv2d_4a_3x3(net) net = self.MaxPool_5a_3x3(net) # stem net = self.Mixed_5b(net) net = self.block35(net) netB1 = self.conv_ls1(net) netB1 = self.MaxPool_3x3_ls1(netB1) net = self.Mixed_6a(net) net = self.block17(net) netB2 = self.conv_ls2(net) net = self.Mixed_7a(net) net = self.block8(net) netB3 = [netB1, netB2, net] netAll = flow.cat(netB3, 1) netAll = self.conv_ls3(netAll) net = self.Conv2d_7b_1x1(netAll) net = self.AvgPool_1a_8x8(net) net = flow.reshape(net, [net.shape[0], -1]) hidden = self.dense(net) hidden = self.relu(hidden) return hidden
def test_concat_with_axis_one(test_case): input1 = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32) input2 = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32) of_out = flow.cat([input1, input2], dim=1) np_out = np.concatenate((input1.numpy(), input2.numpy()), axis=1) test_case.assertTrue(np.array_equal(of_out.numpy(), np_out))
def forward(self, input: flow.Tensor) -> flow.Tensor: x = input x = [self.Branch_0(x), self.Branch_1(x), self.Branch_2(x), self.Branch_3(x)] output = flow.cat(x, 1) return output
def forward(self, x, init_states=None): """Assumes x is of shape (batch, sequence, feature)""" seq_sz, bs, _ = x.size() hidden_seq = [] if init_states is None: h_t, c_t = ( flow.zeros((bs, self.hidden_size)).to("cuda"), flow.zeros((bs, self.hidden_size)).to("cuda"), ) else: h_t, c_t = init_states HS = self.hidden_size for t in range(seq_sz): x_t = x[t, :, :].reshape(x.shape[1], x.shape[2]) # batch the computations into a single matrix multiplication # NOTE(Xu Zhiqiu): flow does not support view now, use reshape instead gates = flow.matmul(x_t, self.W) + flow.matmul(h_t, self.U) + self.bias i_t, f_t, g_t, o_t = ( flow.sigmoid(gates[:, :HS]), flow.sigmoid(gates[:, HS:HS * 2]), flow.tanh(gates[:, HS * 2:HS * 3]), flow.sigmoid(gates[:, HS * 3:]), ) c_t = f_t * c_t + i_t * g_t h_t = o_t * flow.tanh(c_t) hidden_seq.append(h_t.unsqueeze(0)) hidden_seq = flow.cat(hidden_seq, dim=0) return hidden_seq, (h_t, c_t)
def nllloss_1d(self, input, target): n = input.shape[0] idx = flow.unsqueeze(flow.arange(0, n, 1), dim=1) target = flow.unsqueeze(target, dim=1) t = flow.cat([idx, target], dim=1) res = self._gather_nd_op(input, t)[0] return res
def forward(self, x, hidden=None): batch_size, seq_len, _ = x.size() H_S = self.hidden_size hidden_seq = [] if hidden is None: h_t = flow.zeros((batch_size, self.hidden_size)) else: h_t = hidden for t in range(seq_len): x_t = x[:, t, :] gates_1 = flow.matmul(x_t, self.inp_W) + self.inp_b gates_2 = flow.matmul(h_t, self.hid_W) + self.hid_b r_gate = flow.sigmoid(gates_1[:, :H_S] + gates_2[:, :H_S]) z_gate = flow.sigmoid(gates_1[:, H_S:H_S * 2] + gates_2[:, H_S:H_S * 2]) h_t_ = flow.tanh(gates_1[:, H_S * 2:H_S * 3] + r_gate * gates_2[:, H_S * 2:H_S * 3]) h_t = (1 - z_gate) * h_t_ + z_gate * h_t hidden_seq.append(h_t.unsqueeze(1)) hidden_seq = flow.cat(hidden_seq, dim=1) return hidden_seq, h_t
def forward(self, x): if x.device == flow.device("cpu") and self.groups > 1: in_channel_axis = 1 filter_out_axis = 0 in_split_list = ConvUtil.split( inputs, axis=in_channel_axis, split_num=groups ) filter_split_list = ConvUtil.split( filters, axis=filter_out_axis, split_num=groups ) out_list = [] for i in range(len(in_split_list)): out_list.append( self._op( in_split_list[i], filter_split_list[i], padding_before, channel_pos, kernel_size_list, strides, dilations, groups=1, name=name + str(i), )[0] ) res = flow.cat(out_list, axis=in_channel_axis) else: res = self._op(x, self.weight)[0] if self._bias_add_op is not None: res = self._bias_add_op(res, self.bias)[0] return res
def forward(self, x): x1, x2 = self.split(x) out = F.relu(self.bn1(self.conv1(x2))) out = self.bn2(self.conv2(out)) out = F.relu(self.bn3(self.conv3(out))) out = oneflow.cat([x1, out], 1) out = self.shuffle(out) return out
def sinc(band, t_right): y_right = flow.sin( 2 * math.pi * band * t_right) / (2 * math.pi * band * t_right) y_left = flip(y_right, 0) y = flow.cat([y_left, flow.ones(1).to("cuda"), y_right]) return y
def train_discriminator(self, input, target, label0, label1): g_out = self.netG(input) # Fake; stop backprop to the generator by detaching fake_B fake_AB = flow.cat([input, g_out.detach()], 1) pred_fake = self.netD(fake_AB) d_fake_loss = self.criterionGAN(pred_fake, label0) # Real real_AB = flow.cat([input, target], 1) pred_real = self.netD(real_AB) d_real_loss = self.criterionGAN(pred_real, label1) # combine loss and calculate gradients d_loss = (d_fake_loss + d_real_loss) * 0.5 d_loss.backward() self.optimizerD.step() self.optimizerD.zero_grad() return to_numpy(d_fake_loss), to_numpy(d_real_loss), to_numpy(d_loss)