def train(data, model, optimizer, verbose=True): criterion = nn.NLLLoss() if model.use_cuda: criterion.cuda() correct_actions = 0 total_actions = 0 tot_loss = 0. instance_count = 0 for sentence, actions in data: if len(sentence) <= 2: continue optimizer.zero_grad() model.refresh() outputs, _, actions_done = model(sentence, actions) if model.use_cuda: loss = ag.Variable(cuda.FloatTensor([0])) action_idxs = [ ag.Variable(cuda.LongTensor([a])) for a in actions_done ] else: loss = ag.Variable(torch.FloatTensor([0])) action_idxs = [ ag.Variable(torch.LongTensor([a])) for a in actions_done ] for output, act in zip(outputs, action_idxs): loss += criterion(output.view(-1, 3), act) tot_loss += utils.to_scalar(loss.data) instance_count += 1 for gold, output in zip(actions_done, outputs): pred_act = utils.argmax(output.data) if pred_act == gold: correct_actions += 1 total_actions += len(outputs) loss.backward() optimizer.step() acc = float(correct_actions) / total_actions loss = float(tot_loss) / instance_count if verbose: print( "Number of instances: {} Number of network actions: {}".format( instance_count, total_actions)) print("Acc: {} Loss: {}".format( float(correct_actions) / total_actions, tot_loss / instance_count))
def evaluate(data, model, verbose=False): correct_actions = 0 total_actions = 0 tot_loss = 0. instance_count = 0 criterion = nn.NLLLoss() if model.use_cuda: criterion.cuda() for sentence, actions in data: if len(sentence) > 1: outputs, _, actions_done = model(sentence, actions) if model.use_cuda: loss = ag.Variable(cuda.FloatTensor([0])) action_idxs = [ ag.Variable(cuda.LongTensor([a])) for a in actions_done ] else: loss = ag.Variable(torch.FloatTensor([0])) action_idxs = [ ag.Variable(torch.LongTensor([a])) for a in actions_done ] for output, act in zip(outputs, action_idxs): loss += criterion(output.view((-1, 3)), act) tot_loss += utils.to_scalar(loss.data) instance_count += 1 for gold, output in zip(actions_done, outputs): pred_act = utils.argmax(output.data) if pred_act == gold: correct_actions += 1 total_actions += len(outputs) acc = float(correct_actions) / total_actions loss = float(tot_loss) / instance_count if verbose: print( "Number of instances: {} Number of network actions: {}".format( instance_count, total_actions)) print("Acc: {} Loss: {}".format( float(correct_actions) / total_actions, tot_loss / instance_count)) return acc, loss
def forward(self, sentence, actions=None): """ Does the core parsing logic. Make sure to return everything that needs to be returned 1. The log probabilities from every choice made 2. The dependency graph 3. The actions you did, as a list The boiler plate at the beginning initializes a valid ParserState object, and now you may do actions on that state by calling shift(), arc_right(), arc_left(), or get features from it in your feature extractor. If you are supplied gold actions, you should do those. Make sure that you only do valid actions if you are not supplied gold actions (use _validate_action). Also, note that symbolic constants have been defined for the different Actions in constants.py E.g Actions.SHIFT is 0, Actions.ARC_L is 1, so that the 0th element of the output of your action chooser is the log probability of shift, the 1st is the log probability of ARC_L, etc. """ self.refresh() # clear up hidden states from last run, if need be padded_sent = sentence + [END_OF_INPUT_TOK] # Initialize the parser state sentence_embs = self.word_embedding(padded_sent) parser_state = ParserState( padded_sent, sentence_embs, self.combiner, null_stack_tok_embed=self.null_stack_tok_embed, root_tok_embed=self.root_tok_embed) outputs = [] # Holds the output of each action decision actions_done = [] # Holds all actions we have done dep_graph = set() # Build this up as you go # Make the gold action queue if we have it if actions is not None: action_queue = deque() action_queue.extend([Actions.action_to_ix[a] for a in actions]) have_gold_actions = True else: have_gold_actions = False act_opt = [Actions.SHIFT, Actions.ARC_L, Actions.ARC_R] temp = 1 while not parser_state.done_parsing(): feats = self.feature_extractor.get_features(parser_state) log_probs = self.action_chooser(feats) if (have_gold_actions): gold_action = action_queue.popleft() if gold_action == 0: parser_state.shift() else: if gold_action == 1: temp = temp + 1 e = parser_state.arc_left() else: temp = temp - 1 e = parser_state.arc_right() dep_graph.add(e) actions_done.append(act_opt[gold_action]) else: prevalid_act = utils.argmax(log_probs.view(1, -1)) act = parser_state._validate_action(prevalid_act) if act == 0: parser_state.shift() else: if act == 1: temp = temp + 1 e = parser_state.arc_left() else: temp = temp - 1 e = parser_state.arc_right() dep_graph.add(e) actions_done.append(act_opt[act]) outputs.append(log_probs.view(1, -1)) return outputs, dep_graph, actions_done
def make_resolver(feats, emb_dict, scoring_model): return lambda markables : [utils.argmax(scoring_model.score_instance(emb_dict[markables[0].entity], markables, i, feats))\ for i in range(len(markables))]