def attention(self, state_list): assert self.config.p.attention assert self.attention_has_been_set_up if self.config.p.full_state_attention: state = nn.ConcatNode(state_list, self.g) else: state = state_list[0] alpha = nn.DotNode(state, self.to_alpha, self.g) #print 'alpha shape', alpha.shape(), self.stacked_attention_memory alpha = nn.SoftmaxNode(alpha, self.g) newstates = nn.DotNode(alpha, self.stacked_attention_memory, self.g) return nn.SplitNode(newstates, self.config.p.gru_depth, self.g)
def get_score(self, statement, hyps, f): in_string, in_parents, in_left, in_right, in_params, depths, \ parent_arity, leaf_position, arity = self.parse_statement_and_hyps( statement, hyps, f) #print in_string to_middle = self.gru_block(self.v.forward_start, in_string, in_params, hs_backward=self.v.backward_start, parents=in_parents, left_siblings=in_left, right_siblings=in_right, bidirectional=self.config.p.bidirectional, structure_data=list( zip(depths, parent_arity, leaf_position, arity)), feed_to_attention=False) h = nn.ConcatNode(to_middle, self.g) h = nn.DropoutNode(h, self.dropout, self.g) h = nn.RELUDotAdd(h, self.v.main_first_W, self.v.main_first_b, self.g) h = nn.DropoutNode(h, self.dropout, self.g) for i in range(self.config.p.out_layers): h = nn.RELUDotAdd(h, self.v.main_Ws[i], self.v.main_bs[i], self.g) h = nn.DropoutNode(h, self.dropout, self.g) h = nn.DotNode(h, self.v.last_W, self.g) h = nn.AddNode([h, self.v.last_b], self.g) return h
def set_up_attention(self): self.attention_has_been_set_up = True if not self.config.p.attention: return #print 'attention', len(self.attention_memory),len(self.attention_memory[0]) prestack = [ nn.ConcatNode([layer[i] for layer in self.attention_memory], self.g) for i in range(len(self.attention_memory[0])) ] #print prestack self.stacked_attention_memory = nn.StackNode(prestack, self.g) #print 'stacked_memory.shape()', self.stacked_attention_memory.shape() if self.config.p.full_state_attention: prestack = [ nn.ConcatNode([layer[i] for layer in self.attention_memory], self.g) for i in range(len(self.attention_memory[0])) ] self.to_alpha = nn.StackNode(prestack, self.g) else: prestack = self.attention_memory[0] self.to_alpha = nn.StackNode(prestack, self.g) #print len(self.attention_memory),len(self.attention_memory[0]), self.attention_memory[0][0].value.shape #print 'to_alpha shape',self.to_alpha.value.shape # transpose self.to_alpha = nn.TransposeInPlaceNode(self.to_alpha, self.g) # to_alpha is (length, rish) if self.config.p.matrix_attention: self.to_alpha = nn.DotNode(self.v.attention_B, self.to_alpha, self.g)
def __init__(self, variables, config, proof_step, train=False): ''' this is the model. As a single pass, it processes the inputs, and computes the losses, and runs a training step if train. ''' DefaultModel.__init__(self, config, variables, train=train) # fix the random seed if not self.train: np.random.seed(proof_step.context.number + +proof_step.prop.number + proof_step.tree.size()) main = self.main_get_vector(proof_step.tree, proof_step.context.hyps, proof_step.context.f) main = nn.DotNode(main, self.v.W, self.g) # get a list [right prop, wrong prop 0, ..., wrong_prop n] props = self.get_props(proof_step) ###DEBUG #if not self.train: print [p.label for p in props] ###DEBUG out_vectors = [ self.prop_get_vector(prop.tree, prop.hyps, prop.f) for prop in props ] stacked = nn.StackNode(out_vectors, self.g) stacked = nn.TransposeInPlaceNode(stacked, self.g) logits = nn.DotNode(main, stacked, self.g) cross_entropy = nn.SoftmaxCrossEntropyLoss(0, logits, self.g) self.loss = nn.AddNode([self.loss, cross_entropy], self.g) accuracy = 1 * (np.argmax(logits.value) == 0) self.outputs = [cross_entropy.value, accuracy, 1.0 / len(props)] self.output_counts = [1, 1, 1] # perform the backpropagation if we are training if train: self.g.backprop(self.loss)