def __init__(self, config, variables, train=False): self.config = config self.v = variables self.g = nn.ComputationalGraph(nodes=self.v.vs) self.lm = config.lm self.attention_has_been_set_up = False self.dropout = self.config.p.dropout if train else None self.train = train # add in regularization if the regularization # is not zero. if self.config.p.regularization is not None: reg_losses = [ nn.L2Node(self.config.p.regularization, var, self.g) for var in self.v.rvs ] self.loss = nn.AddNode(reg_losses, self.g) else: self.loss = nn.ConstantNode(0.0, graph=self.g) # self.attention_memory should be a list of the # intermediate states for the GRU block: # self.attention_memory[i][j] is the ith input symbol # at the jth layer if self.config.p.attention: self.attention_memory = []
def __init__(self, variables, config, proof_step, train=False): ''' this is the model. As a single pass, it processes the inputs, and computes the losses, and runs a training step if train. ''' # we just defined the proof step as a triple (tree, hyps, correct_output) = proof_step DefaultModel.__init__(self, config, variables, train=train) # fix the random seed if not self.train: np.random.seed(tree.size() + 100 * len(hyps) + 10000 * correct_output) correct_score = self.get_score( tree, hyps, None ) wrong_score = nn.ConstantNode(np.array([0.0]), self.g) correct_output = 1*correct_output logits = nn.ConcatNode([wrong_score, correct_score], self.g) cross_entropy = nn.SoftmaxCrossEntropyLoss(correct_output, logits, self.g) self.loss = nn.AddNode([self.loss, cross_entropy], self.g) accuracy = 1 * (np.argmax(logits.value) == correct_output) self.outputs = [cross_entropy.value, accuracy, 1-correct_output] self.output_counts = [1, 1, 1] # perform the backpropagation if we are training if train: self.g.backprop(self.loss)
def get_score(self, statement, hyps, f): in_string, in_parents, in_left, in_right, in_params, depths, \ parent_arity, leaf_position, arity = self.parse_statement_and_hyps( statement, hyps, f) #print in_string to_middle = self.gru_block(self.v.forward_start, in_string, in_params, hs_backward=self.v.backward_start, parents=in_parents, left_siblings=in_left, right_siblings=in_right, bidirectional=self.config.p.bidirectional, structure_data=list( zip(depths, parent_arity, leaf_position, arity)), feed_to_attention=False) h = nn.ConcatNode(to_middle, self.g) h = nn.DropoutNode(h, self.dropout, self.g) h = nn.RELUDotAdd(h, self.v.main_first_W, self.v.main_first_b, self.g) h = nn.DropoutNode(h, self.dropout, self.g) for i in range(self.config.p.out_layers): h = nn.RELUDotAdd(h, self.v.main_Ws[i], self.v.main_bs[i], self.g) h = nn.DropoutNode(h, self.dropout, self.g) h = nn.DotNode(h, self.v.last_W, self.g) h = nn.AddNode([h, self.v.last_b], self.g) return h
def __init__(self, variables, config, proof_step, train=False): ''' this is the model. As a single pass, it processes the inputs, and computes the losses, and runs a training step if train. ''' DefaultModel.__init__(self, config, variables, train=train) # fix the random seed if not self.train: np.random.seed(proof_step.context.number + +proof_step.prop.number + proof_step.tree.size()) main = self.main_get_vector(proof_step.tree, proof_step.context.hyps, proof_step.context.f) main = nn.DotNode(main, self.v.W, self.g) # get a list [right prop, wrong prop 0, ..., wrong_prop n] props = self.get_props(proof_step) ###DEBUG #if not self.train: print [p.label for p in props] ###DEBUG out_vectors = [ self.prop_get_vector(prop.tree, prop.hyps, prop.f) for prop in props ] stacked = nn.StackNode(out_vectors, self.g) stacked = nn.TransposeInPlaceNode(stacked, self.g) logits = nn.DotNode(main, stacked, self.g) cross_entropy = nn.SoftmaxCrossEntropyLoss(0, logits, self.g) self.loss = nn.AddNode([self.loss, cross_entropy], self.g) accuracy = 1 * (np.argmax(logits.value) == 0) self.outputs = [cross_entropy.value, accuracy, 1.0 / len(props)] self.output_counts = [1, 1, 1] # perform the backpropagation if we are training if train: self.g.backprop(self.loss)
def __init__(self, variables, config, proof_step, train=False, target_index=None): ''' this is the model. As a single pass, it processes the inputs, and computes the losses, and runs a training step if train. ''' DefaultModel.__init__(self, config, variables, train=train) if not self.train: np.random.seed(proof_step.context.number + +proof_step.prop.number + proof_step.tree.size()) self.parse_and_augment_proof_step(proof_step, target_index=target_index) # merge the inputs together so that we can bidirection it in_string, in_parents, in_left, in_right, in_params, depths, parent_arity, leaf_position, arity = merge_graph_structures( [self.known_graph_structure, self.to_prove_graph_structure], [self.v.known_gru_block, self.v.to_prove_gru_block]) # print # print in_string # print in_parents # print in_left # print in_right # print depths # print parent_arity # print leaf_position # print arity # do the left side gru blocks to_middle = self.gru_block(self.v.forward_start, in_string, in_params, hs_backward=self.v.backward_start, parents=in_parents, left_siblings=in_left, right_siblings=in_right, bidirectional=self.config.p.bidirectional, structure_data=list( zip(depths, parent_arity, leaf_position, arity)), feed_to_attention=self.config.p.attention) # set up the attentional model if self.config.p.attention: self.set_up_attention() # process the middle from_middle = [ nn.RELUDotAdd(x, W, b, self.g) for x, W, b in zip(to_middle, self.v.middle_W, self.v.middle_b) ] # process the right side out_string = self.out_graph_structure.string out_parents = self.out_graph_structure.parents out_left = self.out_graph_structure.left_sibling arity = self.out_graph_structure.arity leaf_position = self.out_graph_structure.leaf_position parent_arity = self.out_graph_structure.parent_arity depths = self.out_graph_structure.depth structure_data = list(zip(depths, parent_arity, leaf_position, arity)) out_length = len(out_string) out_xs = [] all_hs = [] hs = from_middle for i in range(out_length): # figure out the augmentation stuff. if self.config.p.augmentation: parent = out_parents[i] parent_hs = [x.no_parent for x in self.v.out_gru_block.aug ] if parent == -1 else all_hs[parent] left = out_left[i] left_hs = [ x.no_left_sibling for x in self.v.out_gru_block.aug ] if left == -1 else all_hs[left] else: parent_hs = None left_hs = None hs, x = self.forward_vertical_slice( hs, parent_hs, left_hs, out_string[i], self.v.out_gru_block.forward, structure_data[i], takes_attention=self.config.p.attention) all_hs.append(hs) out_xs.append(x) # test # calculate logits and score self.correct_string = out_string[1:] + ['END_OF_SECTION'] #out_xs = [nn.ZerosNode([64], self.g) for token in correct_string] self.all_correct = True self.num_correct = 0 self.all_logits = [self.x_to_predictions(x) for x in out_xs] self.prediction = [] all_costs = [ self.score(logits, c_token) for logits, c_token in zip(self.all_logits, self.correct_string) ] perplexity = nn.AddNode(all_costs, self.g) #self.logit_matrix = np.concat([l.value for l in all_logits]) self.loss = nn.AddNode([perplexity, self.loss], self.g) # and train if train: # print 'training2' # print len(self.v.vs), len(self.g.nodes) self.g.backprop(self.loss) # for v in self.v.vs: # print v.name, np.mean(v.value ** 2) #self.v.optimizer.minimize() # put the outputs in the standard training format self.outputs = [ perplexity.value, self.num_correct, 1 * self.all_correct ] self.output_counts = [out_length, out_length, 1]