Example #1
0
 def __init__(self, encoder, decoder, root_id, config, ac_size):
     self.config = config
     self.encoder = encoder
     self.decoder = decoder
     self.root = root_id
     encoder_p = next(filter(lambda p: p.requires_grad, encoder.parameters()))
     self.use_cuda = encoder_p.is_cuda
     self.bucket = Variable(torch.zeros(self.config.train_batch_size, 1, self.config.lstm_hiddens * 2)).type(torch.FloatTensor)
     self.cut = Variable(torch.zeros(self.config.train_batch_size, ac_size)).type(torch.FloatTensor)
     self.index = Variable(torch.zeros(self.config.train_batch_size * 4)).type(torch.LongTensor)
     self.device = encoder_p.get_device() if self.use_cuda else None
     if self.use_cuda:
         self.bucket = self.bucket.cuda(self.device)
         self.index = self.index.cuda(self.device)
         self.cut = self.cut.cuda(self.device)
     self.gold_pred_pairs = []
     self.training = True
     if self.config.train_batch_size > self.config.test_batch_size:
         batch_size = self.config.train_batch_size
     else:
         batch_size = self.config.test_batch_size
     self.batch_states = []
     self.step = []
     for idx in range(0, batch_size):
         self.batch_states.append([])
         self.step.append(0)
         for idy in range(0, 1024):
             self.batch_states[idx].append(State())
Example #2
0
def get_gold_actions(data, vocab):
    all_actions = []
    states = []
    for idx in range(0, 1024):
        states.append(State())
    all_feats = []
    for sentence in data:
        start = states[0]
        start.clear()
        start.ready(sentence, vocab)
        actions = []
        step = 0
        inst_feats = []
        while not states[step].is_end():
            gold_action = states[step].get_gold_action(vocab)
            gold_feats = states[step].prepare_index(start._word_size)
            inst_feats.append(deepcopy(gold_feats))
            actions.append(gold_action)
            next_state = states[step + 1]
            states[step].move(next_state, gold_action)
            step += 1
        all_feats.append(inst_feats)
        all_actions.append(actions)
        result = states[step].get_result(vocab)
        arc_total, arc_correct, rel_total, rel_correct = evalDepTree(
            sentence, result)
        assert arc_total == arc_correct and rel_total == rel_correct
        assert len(actions) == (len(sentence) - 1) * 2
    return all_feats, all_actions
Example #3
0
 def __init__(self, wordEnc, EDUEnc, dec, config):
     self.config = config
     self.wordEnc = wordEnc
     self.EDUEnc = EDUEnc
     self.dec = dec
     self.use_cuda = next(filter(lambda p: p.requires_grad, wordEnc.parameters())).is_cuda
     self.batch_states = []
     self.step = []
     for idx in range(config.test_batch_size):
         self.batch_states.append([])
         self.step.append(0)
         for idy in range(1024):
             self.batch_states[idx].append(State())
Example #4
0
 def move(self, batch_states, pred_actions, vocab):
     count = 0
     for idx in range(0, len(batch_states)):
         if not batch_states[idx][-1].is_end():
             count += 1
     assert len(pred_actions) == count
     offset = 0
     for (idx, cur_states) in enumerate(batch_states):
         if not cur_states[-1].is_end():
             next_state = State()
             cur_states[-1].move(next_state, pred_actions[offset])
             cur_states.append(next_state)
             offset += 1
             self.step[idx] += 1
Example #5
0
def get_gold_candid(data, vocab):
    states = []
    all_candid = []
    for idx in range(0, 1024):
        states.append(State())
    for sentence in data:
        start = states[0]
        start.clear()
        start.ready(sentence, vocab)
        step = 0
        inst_candid = []
        while not states[step].is_end():
            gold_action = states[step].get_gold_action(vocab)
            candid = states[step].get_candidate_actions(vocab)
            inst_candid.append(candid)
            next_state = states[step + 1]
            states[step].move(next_state, gold_action)
            step += 1
        all_candid.append(inst_candid)
    return all_candid
Example #6
0
def get_gold_actions(data, vocab):
    for doc in data:
        for action in doc.gold_actions:
            if action.is_reduce():
                action.label = vocab.rel2id(action.label_str)
    all_actions = []
    states = []
    for idx in range(1024):
        states.append(State())
    all_feats = []
    S = Metric()
    N = Metric()
    R = Metric()
    F = Metric()
    for doc in data:
        start = states[0]
        start.clear()
        start.ready(doc)
        step = 0
        inst_feats = []
        inst_candidate = []
        action_num = len(doc.gold_actions)
        while not states[step].is_end():
            assert step < action_num
            gold_action = doc.gold_actions[step]
            gold_feats = states[step].prepare_index()
            inst_feats.append(deepcopy(gold_feats))
            next_state = states[step + 1]
            states[step].move(next_state, gold_action)
            step += 1
        all_feats.append(inst_feats)
        all_actions.append(doc.gold_actions)
        assert len(inst_feats) == len(doc.gold_actions)
        result = states[step].get_result(vocab)
        doc.evaluate(result, S, N, R, F)
        assert S.bIdentical() and N.bIdentical() and R.bIdentical(
        ) and F.bIdentical()
    return all_feats, all_actions
Example #7
0
def get_gold_actions(data, vocab):
    all_actions = []
    all_states = []
    for idx in range(0, 1024):
        all_states.append(State())
    for sentence in data:
        start = all_states[0]
        start.clear()
        start.ready(sentence, vocab)
        actions = []
        step = 0
        while not all_states[step].is_end():
            gold_action = all_states[step].get_gold_action(vocab)
            actions.append(gold_action)
            next_state = all_states[step + 1]
            all_states[step].move(next_state, gold_action)
            step += 1
        all_actions.append(actions)
        result = all_states[step].get_result(vocab)
        arc_total, arc_correct, rel_total, rel_correct = evalDepTree(
            sentence, result)
        assert arc_total == arc_correct and rel_total == rel_correct
        assert len(actions) == (len(sentence) - 1) * 2
    return all_actions
Example #8
0
    def decode(self, batch_data, bacth_gold_actions, vocab):
        decoder_scores = []
        self.step.clear()
        self.gold_pred_actions.clear()

        b = self.encoder_outputs.size()[0]
        start_states = []
        for idx in range(0, b):
            start_states.append(State())
            start_states[idx].ready(batch_data[idx], vocab)
            self.step.append(0)

        batch_states = []
        for idx in range(0, b):
            one_inst_states = []
            one_inst_states.append(start_states[idx])
            batch_states.append(one_inst_states)

        while not self.all_states_are_finished(batch_states):
            self.prepare_atom_feat(batch_states, vocab)
            if self.training:
                gold_actions = self.get_gold_actions(batch_states, bacth_gold_actions)
            hidden_states = self.batch_hidden_state(batch_states)
            all_candidates = self.get_candidates(batch_states, vocab)
            action_scores = self.decoder.forward(hidden_states, all_candidates)
            pred_ac_ids = self.get_predicted_ac_id(action_scores)
            pred_actions = self.get_predict_actions(pred_ac_ids, vocab)
            batch_action_scores = self.padding_action_scores(batch_states, action_scores)
            if self.training:
                self.move(batch_states, gold_actions, vocab)
                self.gold_pred_actions.append((gold_actions, pred_actions))
            else:
                self.move(batch_states, pred_actions, vocab)
            decoder_scores.append(batch_action_scores.unsqueeze(1))
        self.batch_states = batch_states
        self.decoder_outputs = torch.cat(decoder_scores, 1)
    try:
        states_count[states.index(state_names)] += 1
    except ValueError:
        states.append(state_names)
        states_count.append(1)

    if len(new_communities) > 1:
        # match communities
        confusion = create_confusion_matrix(new_communities[-2],
                                            new_communities[-1])
        create_continuation_matrix(confusion, sigma, jaccard_null_model)
        #   [clean_old.set.set_ground_truth(event='reset') for clean_old in new_communities[-2]]
        match_communities(new_communities[-2], new_communities[-1],
                          dead_communities, confusion)

        state_transition = State(list(confusion[:-1, :-1].sum(axis=1)),
                                 list(confusion[:-1, :-1].sum(axis=0)), vi)
        state = np.reshape(confusion[:-1, :-1], state_transition.edges_len)
        state_transition.set_state(state)
        score = state_transition.similarity

        # find rebirths
        # for dead in dead_communities:
        candidate_communities = [
            communities for communities in new_communities[-1]
            if communities.community_events[-1][1] != "P"
        ]
        if candidate_communities:
            dead_confusion = create_confusion_matrix(dead_communities,
                                                     candidate_communities)
            save_jaccard_index_matrix = Community.jaccard_index.copy()
            save_continuation_matrix = Community.continuation.copy()