def clean_branch(branch, parent, starting_ref): if (git_hash(parent) != git_hash(starting_ref) and git_hash(branch) != git_hash(starting_ref)): print 'Rebasing:', branch # Try a plain rebase first if rebase(parent, starting_ref, branch, abort=True).success: return # Maybe squashing will help? print "Failed! Attempting to squash", branch, "...", squash_branch = branch+"_squash_attempt" run_git('checkout', '-b', squash_branch) squash() squash_ret = rebase(parent, starting_ref, squash_branch, abort=True) run_git('checkout', branch) run_git('branch', '-D', squash_branch) if squash_ret.success: print 'Success!' squash() final_rebase = rebase(parent, starting_ref, branch) assert final_rebase.success == squash_ret.success if not squash_ret.success: print squash_ret.message print 'Failure :(' print 'Your working copy is in mid-rebase. Please completely resolve and' print 'run `git reup` again.' sys.exit(1)
def forward(self, x): # x: [batch_size, in_capsules=1152, in_capsule_size=8] batch_size = x.size(0) x = torch.stack([x] * self.out_capsules, dim=2) # x: [batch_size, in_capsules=1152, out_capsules=10, in_capsule_size=8] W = torch.cat([self.W.unsqueeze(0)] * batch_size, dim=0) # W: [batch_size, in_capsules=1152, out_capsules=10, out_capsule_size=16, in_capsule_size=8] # Transform inputs by weight matrix `W`. u_hat = torch.matmul(W, x.unsqueeze(4)) # matrix multiplication # u_hat: [batch_size, in_capsules=1152, out_capsules=10, out_capsule_size=16, 1] u_hat_detached = u_hat.detach() # u_hat_detached: [batch_size, in_capsules=1152, out_capsules=10, out_capsule_size=16, 1] # In forward pass, `u_hat_detached` = `u_hat`, and # in backward, no gradient can flow from `u_hat_detached` back to `u_hat`. # Initialize routing logits to zero. b_ij = Variable(torch.zeros(self.in_capsules, self.out_capsules, 1)) if self.gpu >= 0: b_ij = b_ij.cuda(self.gpu) # b_ij: [in_capsules=1152, out_capsules=10, 1] # Iterative routing. for iteration in range(self.routing_iters): # Convert routing logits to softmax. c_ij = F.softmax(b_ij.unsqueeze(0), dim=2) c_ij = torch.cat([c_ij] * batch_size, dim=0).unsqueeze(4) # c_ij: [batch_size, in_capsules=1152, out_capsules=10, 1, 1] if iteration == self.routing_iters - 1: # Apply routing `c_ij` to weighted inputs `u_hat`. s_j = (c_ij * u_hat).sum(dim=1, keepdim=True) # element-wise product # s_j: [batch_size, 1, out_capsules=10, out_capsule_size=16, 1] v_j = squash(s_j, dim=3) # v_j: [batch_size, 1, out_capsules=10, out_capsule_size=16, 1] else: # Apply routing `c_ij` to weighted inputs `u_hat`. s_j = (c_ij * u_hat_detached).sum( dim=1, keepdim=True) # element-wise product # s_j: [batch_size, 1, out_capsules=10, out_capsule_size=16, 1] v_j = squash(s_j, dim=3) # v_j: [batch_size, 1, out_capsules=10, out_capsule_size=16, 1] # Compute inner products of 2 16D-vectors, `u_hat` and `v_j`. u_vj1 = torch.matmul(u_hat_detached.transpose(3, 4), v_j).squeeze(4).mean(dim=0, keepdim=False) # u_vj1: [in_capsules=1152, out_capsules=10, 1] # Update b_ij (routing). b_ij = b_ij + u_vj1 return v_j.squeeze(4).squeeze( 1) # [batch_size, out_capsules=10, out_capsule_size=16]
def forward(self, x): # x: [256, 20, 20] u = [] for i in range(self.capsule_units): u_i = self.conv_units[i](x) # u_i: [capsule_size=8, 6, 6] u_i = u_i.view(self.capsule_size, -1, 1) # u_i: [capsule_size=8, 36, 1] u.append(u_i) # u: [batch_size, capsule_size=8, 36, 1] x capsule_units=32 u = torch.cat(u, dim=3) # u: [batch_size, capsule_size=8, 36, capsule_units=32] u = u.view(batch_size, self.capsule_size, -1) # u: [batch_size, capsule_size=8, 1152=36*32] u = u.transpose(1, 2) # u: [batch_size, 1152, capsule_size=8] u_squashed = squash(u, dim=2) # u_squashed: [batch_size, 1152, capsule_size=8] return u_squashed
def spell(self, num): """Return the spelling of the given integer Arguments: num -- number to spell Return value: A string with num's spelling. """ if num == 0: return self.NUMBERS[0] # *** Pass 1. Apply rules to decompose the number *** tokens = self._parse_num(num) # Renumber orders order = 0 for i in range(len(tokens)-1, -1, -1): if isorder(tokens[i]): order += 1 tokens[i] = order tokens = squash(isorder, tokens) logging.debug("Number decomposition:\n %s\n", tokens) # *** Pass 2. Apply list transformations *** processed_tokens = apply_passes(tokens, self.PASSES, self.META) for index, token in enumerate(processed_tokens): if isnum(token): processed_tokens[index] = self.NUMBERS[int(token)] elif isorder(token): processed_tokens[index] = self.ORDERS[token] result = ''.join(processed_tokens).rstrip() logging.debug("Final components:\n %s\n", processed_tokens) # Finally, squash any sequence of whitespace into a single space return re.sub(r'\s+', ' ', result)
def forward(self, x): # x: [ batch_size,in_capsules=1152, in_capsule_size=8] #カプセル化(畳み込み) si = x.size(0) x = torch.reshape(x, (si, 20, 20)) x = x.unsqueeze(1) x = self.conv(x) x = x.view(si, x.size(1), -1) #x = torch.reshape(x,(x.size(0),x.size(1),self.in_capsules ,self.in_capsule_size )) x = torch.stack([x] * self.out_capsules, dim=2) # x: [batch_size, in_capsules=1152, out_capsules=10, in_capsule_size=8] W = torch.cat([self.W.unsqueeze(0)] * si, dim=0) #.to(device) # W: [batch_size, in_capsules=1152, out_capsules=10, out_capsule_size=16, in_capsule_size=8] # Transform inputs by weight matrix `W`. u_hat = torch.matmul(W, x.unsqueeze(4)) # matrix multiplication # u_hat: [batch_size, in_capsules=1152, out_capsules=10, out_capsule_size=16, 1] u_hat_detached = u_hat.detach() # u_hat_detached: [batch_size, in_capsules=1152, out_capsules=10, out_capsule_size=16, 1] # In forward pass, `u_hat_detached` = `u_hat`, and # in backward, no gradient can flow from `u_hat_detached` back to `u_hat`. # Initialize routing logits to zero. #if self.gpu >= 0: b_ij = Variable(torch.zeros(self.in_capsules, self.out_capsules, 1)).to(device) # b_ij: [in_capsules=1152, out_capsules=10, 1] #pdb.set_trace() #ルーティング for iteration in range(self.routing_iters): # Convert routing logits to softmax. #pdb.set_trace() c_ij = b_ij.unsqueeze(0) c_ij = c_ij.log_softmax(dim=2) c_ij = torch.cat([c_ij] * si, dim=0).unsqueeze(4) # c_ij: [batch_size, in_capsules=1152, out_capsules=10, 1, 1] # 3イタレーション行う if iteration == self.routing_iters - 1: # Apply routing `c_ij` to weighted inputs `u_hat`. s_j = (c_ij * u_hat).sum(dim=1, keepdim=True) # element-wise product # s_j: [batch_size, 1, out_capsules=10, out_capsule_size=16, 1] v_j = s_j.clone() # v_j: [batch_size, 1, out_capsules=10, out_capsule_size=16, 1] else: # Apply routing `c_ij` to weighted inputs `u_hat`. s_j = (c_ij * u_hat_detached).sum(dim=1, keepdim=True) # s_j: [batch_size, 1, out_capsules=10, out_capsule_size=16, 1] v_j = squash(s_j, dim=3) # v_j: [batch_size, 1, out_capsules=10, out_capsule_size=16, 1] # u_hat_detached: [batch_size, in_capsules=1152, out_capsules=10, out_capsule_size=16, 1] # Compute inner products of 2 16D-vectors, `u_hat` and `v_j`. u_vj1 = torch.matmul(u_hat_detached.transpose(3, 4), v_j).squeeze(4).mean(dim=0, keepdim=False) # # u_vj1: [in_capsules=1152, out_capsules=10, 1] # Update b_ij (routing). b_ij = b_ij + u_vj1 #del b_ij, u_vj1 ,u_hat,u_hat_detached ,c_ij,s_j v_j = torch.sqrt((v_j**2).sum(dim=3)) return v_j.squeeze(3).squeeze( 1) # [batch_size, out_capsules, out_capsule_size]