def __init__(self, config, variables, train=False): self.config = config self.v = variables self.g = nn.ComputationalGraph(nodes=self.v.vs) self.lm = config.lm self.attention_has_been_set_up = False self.dropout = self.config.p.dropout if train else None self.train = train # add in regularization if the regularization # is not zero. if self.config.p.regularization is not None: reg_losses = [ nn.L2Node(self.config.p.regularization, var, self.g) for var in self.v.rvs ] self.loss = nn.AddNode(reg_losses, self.g) else: self.loss = nn.ConstantNode(0.0, graph=self.g) # self.attention_memory should be a list of the # intermediate states for the GRU block: # self.attention_memory[i][j] is the ith input symbol # at the jth layer if self.config.p.attention: self.attention_memory = []
def __init__(self, variables, config, proof_step, train=False): ''' this is the model. As a single pass, it processes the inputs, and computes the losses, and runs a training step if train. ''' # we just defined the proof step as a triple (tree, hyps, correct_output) = proof_step DefaultModel.__init__(self, config, variables, train=train) # fix the random seed if not self.train: np.random.seed(tree.size() + 100 * len(hyps) + 10000 * correct_output) correct_score = self.get_score( tree, hyps, None ) wrong_score = nn.ConstantNode(np.array([0.0]), self.g) correct_output = 1*correct_output logits = nn.ConcatNode([wrong_score, correct_score], self.g) cross_entropy = nn.SoftmaxCrossEntropyLoss(correct_output, logits, self.g) self.loss = nn.AddNode([self.loss, cross_entropy], self.g) accuracy = 1 * (np.argmax(logits.value) == correct_output) self.outputs = [cross_entropy.value, accuracy, 1-correct_output] self.output_counts = [1, 1, 1] # perform the backpropagation if we are training if train: self.g.backprop(self.loss)
def encode(self, token, structure_data=None): index = self.config.encode[token] out = nn.SingleIndexNode(index, self.v.L, self.g) out = nn.DropoutNode(out, self.dropout, self.g) if self.config.p.structure_data: structure_data_node = nn.ConstantNode( self.config.p.structure_data_scaling * np.array(structure_data), self.g) out = nn.ConcatNode([out, structure_data_node], self.g) return out