Ejemplo n.º 1
0
    def __init__(self, config, variables, train=False):
        self.config = config
        self.v = variables
        self.g = nn.ComputationalGraph(nodes=self.v.vs)
        self.lm = config.lm
        self.attention_has_been_set_up = False
        self.dropout = self.config.p.dropout if train else None
        self.train = train

        # add in regularization if the regularization
        # is not zero.
        if self.config.p.regularization is not None:
            reg_losses = [
                nn.L2Node(self.config.p.regularization, var, self.g)
                for var in self.v.rvs
            ]
            self.loss = nn.AddNode(reg_losses, self.g)
        else:
            self.loss = nn.ConstantNode(0.0, graph=self.g)

        # self.attention_memory should be a list of the
        # intermediate states for the GRU block:
        # self.attention_memory[i][j] is the ith input symbol
        # at the jth layer

        if self.config.p.attention:
            self.attention_memory = []
Ejemplo n.º 2
0
    def __init__(self, variables, config, proof_step, train=False):
        ''' this is the model.  As a single pass, it processes the
        inputs, and computes the losses, and runs a training step if
        train.
        '''

        # we just defined the proof step as a triple
        (tree, hyps, correct_output) = proof_step

        DefaultModel.__init__(self, config, variables, train=train)

        # fix the random seed
        if not self.train:
            np.random.seed(tree.size() + 100 * len(hyps) + 10000 * correct_output)

        correct_score = self.get_score(
                tree, hyps, None
                )

        wrong_score = nn.ConstantNode(np.array([0.0]), self.g)
        correct_output = 1*correct_output

        logits = nn.ConcatNode([wrong_score, correct_score], self.g)
        cross_entropy = nn.SoftmaxCrossEntropyLoss(correct_output, logits, self.g)
        self.loss = nn.AddNode([self.loss, cross_entropy], self.g)

        accuracy = 1 * (np.argmax(logits.value) == correct_output)
        self.outputs = [cross_entropy.value, accuracy, 1-correct_output]
        self.output_counts = [1, 1, 1]

        # perform the backpropagation if we are training
        if train:
            self.g.backprop(self.loss)
Ejemplo n.º 3
0
    def get_score(self, statement, hyps, f):
        in_string, in_parents, in_left, in_right, in_params, depths, \
                parent_arity, leaf_position, arity = self.parse_statement_and_hyps(
                statement, hyps, f)

        #print in_string
        to_middle = self.gru_block(self.v.forward_start,
                                   in_string,
                                   in_params,
                                   hs_backward=self.v.backward_start,
                                   parents=in_parents,
                                   left_siblings=in_left,
                                   right_siblings=in_right,
                                   bidirectional=self.config.p.bidirectional,
                                   structure_data=list(
                                       zip(depths, parent_arity, leaf_position,
                                           arity)),
                                   feed_to_attention=False)

        h = nn.ConcatNode(to_middle, self.g)
        h = nn.DropoutNode(h, self.dropout, self.g)
        h = nn.RELUDotAdd(h, self.v.main_first_W, self.v.main_first_b, self.g)
        h = nn.DropoutNode(h, self.dropout, self.g)
        for i in range(self.config.p.out_layers):
            h = nn.RELUDotAdd(h, self.v.main_Ws[i], self.v.main_bs[i], self.g)
            h = nn.DropoutNode(h, self.dropout, self.g)
        h = nn.DotNode(h, self.v.last_W, self.g)
        h = nn.AddNode([h, self.v.last_b], self.g)

        return h
Ejemplo n.º 4
0
    def __init__(self, variables, config, proof_step, train=False):
        ''' this is the model.  As a single pass, it processes the
        inputs, and computes the losses, and runs a training step if
        train.
        '''
        DefaultModel.__init__(self, config, variables, train=train)

        # fix the random seed
        if not self.train:
            np.random.seed(proof_step.context.number +
                           +proof_step.prop.number + proof_step.tree.size())

        main = self.main_get_vector(proof_step.tree, proof_step.context.hyps,
                                    proof_step.context.f)

        main = nn.DotNode(main, self.v.W, self.g)

        # get a list [right prop, wrong prop 0, ..., wrong_prop n]
        props = self.get_props(proof_step)

        ###DEBUG
        #if not self.train: print [p.label for p in props]
        ###DEBUG
        out_vectors = [
            self.prop_get_vector(prop.tree, prop.hyps, prop.f)
            for prop in props
        ]
        stacked = nn.StackNode(out_vectors, self.g)
        stacked = nn.TransposeInPlaceNode(stacked, self.g)

        logits = nn.DotNode(main, stacked, self.g)
        cross_entropy = nn.SoftmaxCrossEntropyLoss(0, logits, self.g)
        self.loss = nn.AddNode([self.loss, cross_entropy], self.g)

        accuracy = 1 * (np.argmax(logits.value) == 0)
        self.outputs = [cross_entropy.value, accuracy, 1.0 / len(props)]
        self.output_counts = [1, 1, 1]

        # perform the backpropagation if we are training
        if train:
            self.g.backprop(self.loss)
Ejemplo n.º 5
0
    def __init__(self,
                 variables,
                 config,
                 proof_step,
                 train=False,
                 target_index=None):
        ''' this is the model.  As a single pass, it processes the
        inputs, and computes the losses, and runs a training step if
        train.
        '''
        DefaultModel.__init__(self, config, variables, train=train)
        if not self.train:
            np.random.seed(proof_step.context.number +
                           +proof_step.prop.number + proof_step.tree.size())

        self.parse_and_augment_proof_step(proof_step,
                                          target_index=target_index)

        # merge the inputs together so that we can bidirection it
        in_string, in_parents, in_left, in_right, in_params, depths, parent_arity, leaf_position, arity = merge_graph_structures(
            [self.known_graph_structure, self.to_prove_graph_structure],
            [self.v.known_gru_block, self.v.to_prove_gru_block])

        # print
        # print in_string
        # print in_parents
        # print in_left
        # print in_right
        # print depths
        # print parent_arity
        # print leaf_position
        # print arity

        # do the left side gru blocks
        to_middle = self.gru_block(self.v.forward_start,
                                   in_string,
                                   in_params,
                                   hs_backward=self.v.backward_start,
                                   parents=in_parents,
                                   left_siblings=in_left,
                                   right_siblings=in_right,
                                   bidirectional=self.config.p.bidirectional,
                                   structure_data=list(
                                       zip(depths, parent_arity, leaf_position,
                                           arity)),
                                   feed_to_attention=self.config.p.attention)

        # set up the attentional model
        if self.config.p.attention:
            self.set_up_attention()

        # process the middle
        from_middle = [
            nn.RELUDotAdd(x, W, b, self.g)
            for x, W, b in zip(to_middle, self.v.middle_W, self.v.middle_b)
        ]

        # process the right side
        out_string = self.out_graph_structure.string
        out_parents = self.out_graph_structure.parents
        out_left = self.out_graph_structure.left_sibling
        arity = self.out_graph_structure.arity
        leaf_position = self.out_graph_structure.leaf_position
        parent_arity = self.out_graph_structure.parent_arity
        depths = self.out_graph_structure.depth
        structure_data = list(zip(depths, parent_arity, leaf_position, arity))

        out_length = len(out_string)
        out_xs = []
        all_hs = []
        hs = from_middle
        for i in range(out_length):
            # figure out the augmentation stuff.
            if self.config.p.augmentation:
                parent = out_parents[i]
                parent_hs = [x.no_parent for x in self.v.out_gru_block.aug
                             ] if parent == -1 else all_hs[parent]
                left = out_left[i]
                left_hs = [
                    x.no_left_sibling for x in self.v.out_gru_block.aug
                ] if left == -1 else all_hs[left]
            else:
                parent_hs = None
                left_hs = None

            hs, x = self.forward_vertical_slice(
                hs,
                parent_hs,
                left_hs,
                out_string[i],
                self.v.out_gru_block.forward,
                structure_data[i],
                takes_attention=self.config.p.attention)
            all_hs.append(hs)
            out_xs.append(x)

        # test

        # calculate logits and score
        self.correct_string = out_string[1:] + ['END_OF_SECTION']
        #out_xs = [nn.ZerosNode([64], self.g) for token in correct_string]
        self.all_correct = True
        self.num_correct = 0
        self.all_logits = [self.x_to_predictions(x) for x in out_xs]
        self.prediction = []
        all_costs = [
            self.score(logits, c_token)
            for logits, c_token in zip(self.all_logits, self.correct_string)
        ]
        perplexity = nn.AddNode(all_costs, self.g)

        #self.logit_matrix = np.concat([l.value for l in all_logits])

        self.loss = nn.AddNode([perplexity, self.loss], self.g)

        # and train
        if train:
            # print 'training2'
            # print len(self.v.vs), len(self.g.nodes)
            self.g.backprop(self.loss)
            # for v in self.v.vs:
            #     print v.name, np.mean(v.value ** 2)
            #self.v.optimizer.minimize()

        # put the outputs in the standard training format
        self.outputs = [
            perplexity.value, self.num_correct, 1 * self.all_correct
        ]
        self.output_counts = [out_length, out_length, 1]