Beispiel #1
0
def train(data, model, optimizer, verbose=True):
    criterion = nn.NLLLoss()

    if model.use_cuda:
        criterion.cuda()

    correct_actions = 0
    total_actions = 0
    tot_loss = 0.
    instance_count = 0

    for sentence, actions in data:

        if len(sentence) <= 2:
            continue

        optimizer.zero_grad()
        model.refresh()

        outputs, _, actions_done = model(sentence, actions)

        if model.use_cuda:
            loss = ag.Variable(cuda.FloatTensor([0]))
            action_idxs = [
                ag.Variable(cuda.LongTensor([a])) for a in actions_done
            ]
        else:
            loss = ag.Variable(torch.FloatTensor([0]))
            action_idxs = [
                ag.Variable(torch.LongTensor([a])) for a in actions_done
            ]

        for output, act in zip(outputs, action_idxs):
            loss += criterion(output.view(-1, 3), act)

        tot_loss += utils.to_scalar(loss.data)
        instance_count += 1

        for gold, output in zip(actions_done, outputs):
            pred_act = utils.argmax(output.data)
            if pred_act == gold:
                correct_actions += 1
        total_actions += len(outputs)

        loss.backward()
        optimizer.step()

    acc = float(correct_actions) / total_actions
    loss = float(tot_loss) / instance_count
    if verbose:
        print(
            "Number of instances: {}    Number of network actions: {}".format(
                instance_count, total_actions))
        print("Acc: {}  Loss: {}".format(
            float(correct_actions) / total_actions, tot_loss / instance_count))
Beispiel #2
0
def evaluate(data, model, verbose=False):

    correct_actions = 0
    total_actions = 0
    tot_loss = 0.
    instance_count = 0
    criterion = nn.NLLLoss()

    if model.use_cuda:
        criterion.cuda()

    for sentence, actions in data:

        if len(sentence) > 1:
            outputs, _, actions_done = model(sentence, actions)

            if model.use_cuda:
                loss = ag.Variable(cuda.FloatTensor([0]))
                action_idxs = [
                    ag.Variable(cuda.LongTensor([a])) for a in actions_done
                ]
            else:
                loss = ag.Variable(torch.FloatTensor([0]))
                action_idxs = [
                    ag.Variable(torch.LongTensor([a])) for a in actions_done
                ]

            for output, act in zip(outputs, action_idxs):
                loss += criterion(output.view((-1, 3)), act)

            tot_loss += utils.to_scalar(loss.data)
            instance_count += 1

            for gold, output in zip(actions_done, outputs):
                pred_act = utils.argmax(output.data)
                if pred_act == gold:
                    correct_actions += 1

            total_actions += len(outputs)

    acc = float(correct_actions) / total_actions
    loss = float(tot_loss) / instance_count
    if verbose:
        print(
            "Number of instances: {}    Number of network actions: {}".format(
                instance_count, total_actions))
        print("Acc: {}  Loss: {}".format(
            float(correct_actions) / total_actions, tot_loss / instance_count))
    return acc, loss
Beispiel #3
0
    def forward(self, sentence, actions=None):
        """
        Does the core parsing logic.
        Make sure to return everything that needs to be returned
            1. The log probabilities from every choice made
            2. The dependency graph
            3. The actions you did, as a list

        The boiler plate at the beginning initializes a valid
        ParserState object, and now you may do actions on that state by calling
        shift(), arc_right(), arc_left(), or get features from it in your
        feature extractor.

        If you are supplied gold actions, you should do those.
        Make sure that you only do valid actions if you are not supplied gold actions (use _validate_action).

        Also, note that symbolic constants have been defined for the different Actions in constants.py
        E.g Actions.SHIFT is 0, Actions.ARC_L is 1, so that the 0th element of
        the output of your action chooser is the log probability of shift, the 1st is the log probability
        of ARC_L, etc.
        """
        self.refresh()  # clear up hidden states from last run, if need be

        padded_sent = sentence + [END_OF_INPUT_TOK]

        # Initialize the parser state
        sentence_embs = self.word_embedding(padded_sent)
        parser_state = ParserState(
            padded_sent,
            sentence_embs,
            self.combiner,
            null_stack_tok_embed=self.null_stack_tok_embed,
            root_tok_embed=self.root_tok_embed)

        outputs = []  # Holds the output of each action decision
        actions_done = []  # Holds all actions we have done
        dep_graph = set()  # Build this up as you go

        # Make the gold action queue if we have it
        if actions is not None:
            action_queue = deque()
            action_queue.extend([Actions.action_to_ix[a] for a in actions])
            have_gold_actions = True
        else:
            have_gold_actions = False
        act_opt = [Actions.SHIFT, Actions.ARC_L, Actions.ARC_R]
        temp = 1
        while not parser_state.done_parsing():
            feats = self.feature_extractor.get_features(parser_state)
            log_probs = self.action_chooser(feats)
            if (have_gold_actions):
                gold_action = action_queue.popleft()
                if gold_action == 0:
                    parser_state.shift()
                else:
                    if gold_action == 1:
                        temp = temp + 1
                        e = parser_state.arc_left()
                    else:
                        temp = temp - 1
                        e = parser_state.arc_right()
                    dep_graph.add(e)
                actions_done.append(act_opt[gold_action])
            else:
                prevalid_act = utils.argmax(log_probs.view(1, -1))
                act = parser_state._validate_action(prevalid_act)
                if act == 0:
                    parser_state.shift()
                else:
                    if act == 1:
                        temp = temp + 1
                        e = parser_state.arc_left()
                    else:
                        temp = temp - 1
                        e = parser_state.arc_right()
                    dep_graph.add(e)
                actions_done.append(act_opt[act])
            outputs.append(log_probs.view(1, -1))
        return outputs, dep_graph, actions_done
def make_resolver(feats, emb_dict, scoring_model):
    return lambda markables : [utils.argmax(scoring_model.score_instance(emb_dict[markables[0].entity], markables, i, feats))\
                               for i in range(len(markables))]