コード例 #1
0
    def calculate(self, w, seed):
        # Step 2: calculate P(w) and P(seed)
        w_count = 0
        seed_count = 0
        for i, sent in enumerate(self.train_X):
            if w in sent:
                w_count += 1  # count the occurrence of word w
            if seed in sent:
                seed_count += 1  # count the occurrence of word seed
        p_w = w_count / self.train_size
        p_seed = seed_count / self.train_size

        # Step 3: calculate P(w,seed)
        w_seed_count = 0
        for i, sent in enumerate(self.train_X):
            if (w in sent) and (seed in sent):
                w_seed_count += 1  # count the occurrence of word seed
        p_w_seed = w_seed_count / self.train_size / self.train_size

        # Step 4: calculate PMI
        value = math.log2(p_w_seed / (p_w * p_seed))

        return value


if __name__ == '__main__':
    data = SST(fine_grained=False)
    pmi = PMI(data, 0.5)
    pmi_1 = pmi.calculate("movie", "good")
    pmi_2 = pmi.calculate("movie", "bad")
    lex_w = pmi_1 - pmi_2
コード例 #2
0
    def test(self):
        self.model.eval()
        X = self._get_feature_vec(self.data.test["sents"])
        for i in range(X[0].shape[0]):
            outputs = []
            for j in range(self.class_size):
                outputs.append(self.model(torch.FloatTensor(X[j][i])).squeeze())  # wv+b for all class in pytorch
            probabilities = list(torch.softmax(torch.stack(outputs), dim=0)) # get the probability for each class
            max_prob = max(probabilities)
            max_prob_idx = probabilities.index(max_prob) # the max output as the predicted class c
            c = self.class_dict[max_prob_idx]
            if c == 0:
                c_string = "Very negative"
            elif c == 1:
                c_string = "Negative"
            elif c == 2:
                c_string = "Neutural"
            elif c == 3:
                c_string = "Positive"
            else:
                assert c == 4
                c_string = "Very positive"

            print(' '.join(self.data.test["sents"][i]),": ",c_string)

if __name__ == '__main__':
    data = SST(fine_grained=True)
    classifier = Multiclass_log_linear(data,0.2)
    classifier.train()
    classifier.test()