def set_up_attention(self): self.attention_has_been_set_up = True if not self.config.p.attention: return #print 'attention', len(self.attention_memory),len(self.attention_memory[0]) prestack = [ nn.ConcatNode([layer[i] for layer in self.attention_memory], self.g) for i in range(len(self.attention_memory[0])) ] #print prestack self.stacked_attention_memory = nn.StackNode(prestack, self.g) #print 'stacked_memory.shape()', self.stacked_attention_memory.shape() if self.config.p.full_state_attention: prestack = [ nn.ConcatNode([layer[i] for layer in self.attention_memory], self.g) for i in range(len(self.attention_memory[0])) ] self.to_alpha = nn.StackNode(prestack, self.g) else: prestack = self.attention_memory[0] self.to_alpha = nn.StackNode(prestack, self.g) #print len(self.attention_memory),len(self.attention_memory[0]), self.attention_memory[0][0].value.shape #print 'to_alpha shape',self.to_alpha.value.shape # transpose self.to_alpha = nn.TransposeInPlaceNode(self.to_alpha, self.g) # to_alpha is (length, rish) if self.config.p.matrix_attention: self.to_alpha = nn.DotNode(self.v.attention_B, self.to_alpha, self.g)
def forward_vertical_slice(self, hs, parent, left, input_token, params, structure_data, takes_attention=True): takes_attention = takes_attention and self.config.p.attention # first construct the actual inputs, which is a bunch of stuff merged together if takes_attention: attention_in = self.attention(hs) x = self.encode(input_token, structure_data=structure_data) out_hs = [] for i in range(self.config.p.gru_depth): x = nn.DropoutNode(x, self.dropout, self.g) if self.config.p.augmentation and takes_attention: merged_x = nn.ConcatNode( [x, parent[i], left[i], attention_in[i]], self.g) elif self.config.p.augmentation and not takes_attention: merged_x = nn.ConcatNode([x, parent[i], left[i]], self.g) elif not self.config.p.augmentation and takes_attention: merged_x = nn.ConcatNode([x, attention_in[i]], self.g) elif not self.config.p.augmentation and not takes_attention: merged_x = x x = nn.GRUbCell(hs[i], merged_x, params[i], self.g, dropout=self.dropout) out_hs.append(x) return out_hs, x
def __init__(self, variables, config, proof_step, train=False): ''' this is the model. As a single pass, it processes the inputs, and computes the losses, and runs a training step if train. ''' # we just defined the proof step as a triple (tree, hyps, correct_output) = proof_step DefaultModel.__init__(self, config, variables, train=train) # fix the random seed if not self.train: np.random.seed(tree.size() + 100 * len(hyps) + 10000 * correct_output) correct_score = self.get_score( tree, hyps, None ) wrong_score = nn.ConstantNode(np.array([0.0]), self.g) correct_output = 1*correct_output logits = nn.ConcatNode([wrong_score, correct_score], self.g) cross_entropy = nn.SoftmaxCrossEntropyLoss(correct_output, logits, self.g) self.loss = nn.AddNode([self.loss, cross_entropy], self.g) accuracy = 1 * (np.argmax(logits.value) == correct_output) self.outputs = [cross_entropy.value, accuracy, 1-correct_output] self.output_counts = [1, 1, 1] # perform the backpropagation if we are training if train: self.g.backprop(self.loss)
def get_score(self, statement, hyps, f): in_string, in_parents, in_left, in_right, in_params, depths, \ parent_arity, leaf_position, arity = self.parse_statement_and_hyps( statement, hyps, f) #print in_string to_middle = self.gru_block(self.v.forward_start, in_string, in_params, hs_backward=self.v.backward_start, parents=in_parents, left_siblings=in_left, right_siblings=in_right, bidirectional=self.config.p.bidirectional, structure_data=list( zip(depths, parent_arity, leaf_position, arity)), feed_to_attention=False) h = nn.ConcatNode(to_middle, self.g) h = nn.DropoutNode(h, self.dropout, self.g) h = nn.RELUDotAdd(h, self.v.main_first_W, self.v.main_first_b, self.g) h = nn.DropoutNode(h, self.dropout, self.g) for i in range(self.config.p.out_layers): h = nn.RELUDotAdd(h, self.v.main_Ws[i], self.v.main_bs[i], self.g) h = nn.DropoutNode(h, self.dropout, self.g) h = nn.DotNode(h, self.v.last_W, self.g) h = nn.AddNode([h, self.v.last_b], self.g) return h
def encode(self, token, structure_data=None): index = self.config.encode[token] out = nn.SingleIndexNode(index, self.v.L, self.g) out = nn.DropoutNode(out, self.dropout, self.g) if self.config.p.structure_data: structure_data_node = nn.ConstantNode( self.config.p.structure_data_scaling * np.array(structure_data), self.g) out = nn.ConcatNode([out, structure_data_node], self.g) return out
def attention(self, state_list): assert self.config.p.attention assert self.attention_has_been_set_up if self.config.p.full_state_attention: state = nn.ConcatNode(state_list, self.g) else: state = state_list[0] alpha = nn.DotNode(state, self.to_alpha, self.g) #print 'alpha shape', alpha.shape(), self.stacked_attention_memory alpha = nn.SoftmaxNode(alpha, self.g) newstates = nn.DotNode(alpha, self.stacked_attention_memory, self.g) return nn.SplitNode(newstates, self.config.p.gru_depth, self.g)
def get_vector(self, statement, hyps, f, statement_gru, hyps_gru, forward_start, backward_start, first_W, first_b, Ws, bs): in_string, in_parents, in_left, in_right, in_params, depths, \ parent_arity, leaf_position, arity = self.parse_statement_and_hyps( statement, hyps, f, statement_gru, hyps_gru) #print in_string to_middle = self.gru_block(forward_start, in_string, in_params, hs_backward=backward_start, parents=in_parents, left_siblings=in_left, right_siblings=in_right, bidirectional=self.config.p.bidirectional, structure_data = zip(depths, parent_arity, leaf_position, arity), feed_to_attention=False) h = nn.ConcatNode(to_middle, self.g) h = nn.DropoutNode(h, self.dropout, self.g) h = nn.RELUDotAdd(h, first_W, first_b, self.g) h = nn.DropoutNode(h, self.dropout, self.g) for i in range(self.config.p.out_layers): h = nn.RELUDotAdd(h, Ws[i], bs[i], self.g) h = nn.DropoutNode(h, self.dropout, self.g) return h
def gru_block(self, hs, input_tokens, params, hs_backward=None, parents=None, left_siblings=None, right_siblings=None, bidirectional=True, feed_to_attention=False, structure_data=None): # verify the parameters feed_to_attention = self.config.p.attention and feed_to_attention if self.config.p.augmentation: assert left_siblings is not None assert parents is not None if bidirectional: assert right_siblings is not None # this does the forward and backwards parts of a gru_block xs = self.encode_string(input_tokens, structure_datas=structure_data) length = len(input_tokens) # memory is a len * depth * directions list memory = [] h_out_forward = [] h_out_backward = [] if bidirectional else None # we proceed layer by layer for i in range(self.config.p.gru_depth): this_layer_foward = [None] * length #forward pass h = hs[i] for pos in range(length): this_params = params[pos] this_x = xs[pos] this_x = nn.DropoutNode(this_x, self.dropout, self.g) if self.config.p.augmentation: # no attention, forward pass parent = parents[pos] parent_x = this_params.aug[ i].no_parent if parent == -1 else this_layer_foward[ parent] left_sibling = left_siblings[pos] left_sibling_x = this_params.aug[ i].no_left_sibling if left_sibling == -1 else this_layer_foward[ left_sibling] this_x = nn.ConcatNode([this_x, parent_x, left_sibling_x], self.g) h = nn.GRUbCell(h, this_x, this_params.forward[i], self.g, dropout=self.dropout) this_layer_foward[pos] = h h_out_forward.append(h) # backward pass if bidirectional: this_layer_backward = [None] * length #forward pass h = hs_backward[i] for pos in range(length - 1, -1, -1): this_params = params[pos] this_x = xs[pos] this_x = nn.DropoutNode(this_x, self.dropout, self.g) if self.config.p.augmentation: # no attention, forward pass right_sibling = right_siblings[pos] right_sibling_x = this_params.aug[ i].no_right_sibling if right_sibling == -1 else this_layer_backward[ right_sibling] this_x = nn.ConcatNode([this_x, right_sibling_x], self.g) h = nn.GRUbCell(h, this_x, this_params.backward[i], self.g, dropout=self.dropout) this_layer_backward[pos] = h h_out_backward.append(h) # now figure out the forward layer thingy xs = [ nn.ConcatNode(x, self.g) for x in zip(this_layer_foward, this_layer_backward) ] else: xs = this_layer_foward memory.append(xs) if feed_to_attention: self.attention_memory = memory # h_out is the forward out or the concatonation of the forward and backward outs h_out = [ nn.ConcatNode(x, self.g) for x in zip(h_out_forward, h_out_backward) ] if bidirectional else h_out_forward return h_out # this is really all we need