コード例 #1
0
    def set_up_attention(self):
        self.attention_has_been_set_up = True
        if not self.config.p.attention: return

        #print 'attention', len(self.attention_memory),len(self.attention_memory[0])

        prestack = [
            nn.ConcatNode([layer[i]
                           for layer in self.attention_memory], self.g)
            for i in range(len(self.attention_memory[0]))
        ]
        #print prestack
        self.stacked_attention_memory = nn.StackNode(prestack, self.g)
        #print 'stacked_memory.shape()', self.stacked_attention_memory.shape()

        if self.config.p.full_state_attention:
            prestack = [
                nn.ConcatNode([layer[i]
                               for layer in self.attention_memory], self.g)
                for i in range(len(self.attention_memory[0]))
            ]
            self.to_alpha = nn.StackNode(prestack, self.g)
        else:
            prestack = self.attention_memory[0]
            self.to_alpha = nn.StackNode(prestack, self.g)
            #print len(self.attention_memory),len(self.attention_memory[0]), self.attention_memory[0][0].value.shape
            #print 'to_alpha shape',self.to_alpha.value.shape

        # transpose
        self.to_alpha = nn.TransposeInPlaceNode(self.to_alpha, self.g)

        # to_alpha is (length, rish)
        if self.config.p.matrix_attention:
            self.to_alpha = nn.DotNode(self.v.attention_B, self.to_alpha,
                                       self.g)
コード例 #2
0
    def forward_vertical_slice(self,
                               hs,
                               parent,
                               left,
                               input_token,
                               params,
                               structure_data,
                               takes_attention=True):
        takes_attention = takes_attention and self.config.p.attention

        # first construct the actual inputs, which is a bunch of stuff merged together
        if takes_attention: attention_in = self.attention(hs)

        x = self.encode(input_token, structure_data=structure_data)
        out_hs = []
        for i in range(self.config.p.gru_depth):
            x = nn.DropoutNode(x, self.dropout, self.g)
            if self.config.p.augmentation and takes_attention:
                merged_x = nn.ConcatNode(
                    [x, parent[i], left[i], attention_in[i]], self.g)
            elif self.config.p.augmentation and not takes_attention:
                merged_x = nn.ConcatNode([x, parent[i], left[i]], self.g)
            elif not self.config.p.augmentation and takes_attention:
                merged_x = nn.ConcatNode([x, attention_in[i]], self.g)
            elif not self.config.p.augmentation and not takes_attention:
                merged_x = x

            x = nn.GRUbCell(hs[i],
                            merged_x,
                            params[i],
                            self.g,
                            dropout=self.dropout)
            out_hs.append(x)

        return out_hs, x
コード例 #3
0
    def __init__(self, variables, config, proof_step, train=False):
        ''' this is the model.  As a single pass, it processes the
        inputs, and computes the losses, and runs a training step if
        train.
        '''

        # we just defined the proof step as a triple
        (tree, hyps, correct_output) = proof_step

        DefaultModel.__init__(self, config, variables, train=train)

        # fix the random seed
        if not self.train:
            np.random.seed(tree.size() + 100 * len(hyps) + 10000 * correct_output)

        correct_score = self.get_score(
                tree, hyps, None
                )

        wrong_score = nn.ConstantNode(np.array([0.0]), self.g)
        correct_output = 1*correct_output

        logits = nn.ConcatNode([wrong_score, correct_score], self.g)
        cross_entropy = nn.SoftmaxCrossEntropyLoss(correct_output, logits, self.g)
        self.loss = nn.AddNode([self.loss, cross_entropy], self.g)

        accuracy = 1 * (np.argmax(logits.value) == correct_output)
        self.outputs = [cross_entropy.value, accuracy, 1-correct_output]
        self.output_counts = [1, 1, 1]

        # perform the backpropagation if we are training
        if train:
            self.g.backprop(self.loss)
コード例 #4
0
    def get_score(self, statement, hyps, f):
        in_string, in_parents, in_left, in_right, in_params, depths, \
                parent_arity, leaf_position, arity = self.parse_statement_and_hyps(
                statement, hyps, f)

        #print in_string
        to_middle = self.gru_block(self.v.forward_start,
                                   in_string,
                                   in_params,
                                   hs_backward=self.v.backward_start,
                                   parents=in_parents,
                                   left_siblings=in_left,
                                   right_siblings=in_right,
                                   bidirectional=self.config.p.bidirectional,
                                   structure_data=list(
                                       zip(depths, parent_arity, leaf_position,
                                           arity)),
                                   feed_to_attention=False)

        h = nn.ConcatNode(to_middle, self.g)
        h = nn.DropoutNode(h, self.dropout, self.g)
        h = nn.RELUDotAdd(h, self.v.main_first_W, self.v.main_first_b, self.g)
        h = nn.DropoutNode(h, self.dropout, self.g)
        for i in range(self.config.p.out_layers):
            h = nn.RELUDotAdd(h, self.v.main_Ws[i], self.v.main_bs[i], self.g)
            h = nn.DropoutNode(h, self.dropout, self.g)
        h = nn.DotNode(h, self.v.last_W, self.g)
        h = nn.AddNode([h, self.v.last_b], self.g)

        return h
コード例 #5
0
 def encode(self, token, structure_data=None):
     index = self.config.encode[token]
     out = nn.SingleIndexNode(index, self.v.L, self.g)
     out = nn.DropoutNode(out, self.dropout, self.g)
     if self.config.p.structure_data:
         structure_data_node = nn.ConstantNode(
             self.config.p.structure_data_scaling *
             np.array(structure_data), self.g)
         out = nn.ConcatNode([out, structure_data_node], self.g)
     return out
コード例 #6
0
    def attention(self, state_list):
        assert self.config.p.attention
        assert self.attention_has_been_set_up

        if self.config.p.full_state_attention:
            state = nn.ConcatNode(state_list, self.g)
        else:
            state = state_list[0]

        alpha = nn.DotNode(state, self.to_alpha, self.g)
        #print 'alpha shape', alpha.shape(), self.stacked_attention_memory
        alpha = nn.SoftmaxNode(alpha, self.g)
        newstates = nn.DotNode(alpha, self.stacked_attention_memory, self.g)
        return nn.SplitNode(newstates, self.config.p.gru_depth, self.g)
コード例 #7
0
ファイル: pred_model.py プロジェクト: solversa/holophrasm
    def get_vector(self, statement, hyps, f, statement_gru, hyps_gru,
            forward_start, backward_start, first_W, first_b, Ws, bs):
        in_string, in_parents, in_left, in_right, in_params, depths, \
                parent_arity, leaf_position, arity = self.parse_statement_and_hyps(
                statement, hyps, f, statement_gru, hyps_gru)

        #print in_string
        to_middle = self.gru_block(forward_start, in_string, in_params,
                hs_backward=backward_start, parents=in_parents,
                left_siblings=in_left, right_siblings=in_right,
                bidirectional=self.config.p.bidirectional,
                structure_data = zip(depths, parent_arity, leaf_position, arity),
                feed_to_attention=False)

        h = nn.ConcatNode(to_middle, self.g)
        h = nn.DropoutNode(h, self.dropout, self.g)
        h = nn.RELUDotAdd(h, first_W, first_b, self.g)
        h = nn.DropoutNode(h, self.dropout, self.g)
        for i in range(self.config.p.out_layers):
            h = nn.RELUDotAdd(h, Ws[i], bs[i], self.g)
            h = nn.DropoutNode(h, self.dropout, self.g)

        return h
コード例 #8
0
    def gru_block(self,
                  hs,
                  input_tokens,
                  params,
                  hs_backward=None,
                  parents=None,
                  left_siblings=None,
                  right_siblings=None,
                  bidirectional=True,
                  feed_to_attention=False,
                  structure_data=None):

        # verify the parameters
        feed_to_attention = self.config.p.attention and feed_to_attention
        if self.config.p.augmentation:
            assert left_siblings is not None
            assert parents is not None
            if bidirectional:
                assert right_siblings is not None

        # this does the forward and backwards parts of a gru_block
        xs = self.encode_string(input_tokens, structure_datas=structure_data)
        length = len(input_tokens)

        # memory is a len * depth * directions list
        memory = []
        h_out_forward = []
        h_out_backward = [] if bidirectional else None

        # we proceed layer by layer
        for i in range(self.config.p.gru_depth):
            this_layer_foward = [None] * length

            #forward pass
            h = hs[i]
            for pos in range(length):
                this_params = params[pos]

                this_x = xs[pos]
                this_x = nn.DropoutNode(this_x, self.dropout, self.g)
                if self.config.p.augmentation:
                    # no attention, forward pass
                    parent = parents[pos]
                    parent_x = this_params.aug[
                        i].no_parent if parent == -1 else this_layer_foward[
                            parent]
                    left_sibling = left_siblings[pos]
                    left_sibling_x = this_params.aug[
                        i].no_left_sibling if left_sibling == -1 else this_layer_foward[
                            left_sibling]
                    this_x = nn.ConcatNode([this_x, parent_x, left_sibling_x],
                                           self.g)

                h = nn.GRUbCell(h,
                                this_x,
                                this_params.forward[i],
                                self.g,
                                dropout=self.dropout)
                this_layer_foward[pos] = h
            h_out_forward.append(h)

            # backward pass
            if bidirectional:
                this_layer_backward = [None] * length

                #forward pass
                h = hs_backward[i]
                for pos in range(length - 1, -1, -1):
                    this_params = params[pos]

                    this_x = xs[pos]
                    this_x = nn.DropoutNode(this_x, self.dropout, self.g)
                    if self.config.p.augmentation:
                        # no attention, forward pass
                        right_sibling = right_siblings[pos]
                        right_sibling_x = this_params.aug[
                            i].no_right_sibling if right_sibling == -1 else this_layer_backward[
                                right_sibling]
                        this_x = nn.ConcatNode([this_x, right_sibling_x],
                                               self.g)

                    h = nn.GRUbCell(h,
                                    this_x,
                                    this_params.backward[i],
                                    self.g,
                                    dropout=self.dropout)
                    this_layer_backward[pos] = h

                h_out_backward.append(h)
                # now figure out the forward layer thingy
                xs = [
                    nn.ConcatNode(x, self.g)
                    for x in zip(this_layer_foward, this_layer_backward)
                ]
            else:
                xs = this_layer_foward

            memory.append(xs)

        if feed_to_attention:
            self.attention_memory = memory

        # h_out is the forward out or the concatonation of the forward and backward outs
        h_out = [
            nn.ConcatNode(x, self.g)
            for x in zip(h_out_forward, h_out_backward)
        ] if bidirectional else h_out_forward

        return h_out  # this is really all we need