class PMITest(unittest.TestCase):
    def setUp(self):
        self.labels = ["x1x2x3", "x1x2x3", "x4x5x6"]
        self.features_list = [{'f1': "alpha", "f2": "beta"}, {'f1': "alpha", "f2": "gamma"}, {'f1': "alpha", "f2": "beta"}]
        self.pmi = PMI(self.labels, self.features_list)

    def test_pmi(self):
        self.assertEqual(self.pmi.pmi("x1x2x3", "alpha", "f1"), 0)
        self.assertTrue(self.pmi.pmi("x1x2x3", "beta", "f2") < 0)
        self.assertTrue(self.pmi.pmi("x1x2x3", "gamma", "f2") > 0)

    def test_pmi_vector(self):
        return 0
Exemple #2
0
def classify_translate_cerelation(pTrains,pTests):
    dict=CEDict()
    pmi=PMI()
    
    trains=[]
    tests=[]
    
    for label,p in pTrains:
        words=getTranlateFeaturesCERelation(p,dict,pmi)
        trains.append(CDocument(label,words))
    for label,p in pTests:
        words=getTranlateFeaturesCERelation(p,dict,pmi)
        tests.append(CDocument(label,words))
    
    return me_classify(trains,tests)
Exemple #3
0
def blp_translate_pmi(pTrains,pTests):
    dict=CEDict()
    syn=Synonym()
    pmi=PMI()
    
    trains=[]
    tests=[]
    
    for label,p in pTrains:
        words= getTranlateFeaturesPMI(p,dict,pmi)
        trains.append(CDocument(label,words))
    for label,p in pTests:
        words= getTranlateFeaturesPMI(p,dict,pmi)
        tests.append(CDocument(label,words))
    
    blp=BLP(trains+tests)
    blp.LP_Classify(trains,tests)
Exemple #4
0
def blp_sense_sentiment(pTrains,pTests):
    dict=CEDict()
    pmi=PMI()
    
    cn_lexicon=CnSentimentLexicon()
    en_lexicon=EnSentimentLexicon()
    
    trains=[]
    tests=[]
    
    for label,p in pTrains:
        words=getFeaturesSenseAndSentiment(p,dict,pmi,cn_lexicon,en_lexicon)
        trains.append(CDocument(label,words))
    for label,p in pTests:
        words=getFeaturesSenseAndSentiment(p,dict,pmi,cn_lexicon,en_lexicon)
        tests.append(CDocument(label,words))
    
    blp=BLP(trains+tests)
    blp.LP_Classify(trains,tests)
Exemple #5
0
def blp_translate_cerelation(pTrains,pTests):
    dict=CEDict()
    pmi=PMI()
    
    trains=[]
    tests=[]
    
    for label,p in pTrains:
        words=getTranlateFeaturesCERelation(p,dict,pmi)
        trains.append(CDocument(label,words))
    for label,p in pTests:
        words=getTranlateFeaturesCERelation(p,dict,pmi)
        tests.append(CDocument(label,words))
    
#    for d in trains+tests:
#        d.words['SMOOTH']=1
    
    
    blp=BLP(trains+tests)
    blp.LP_Classify(trains,tests)
 def setUp(self):
     self.labels = ["x1x2x3", "x1x2x3", "x4x5x6"]
     self.features_list = [{'f1': "alpha", "f2": "beta"}, {'f1': "alpha", "f2": "gamma"}, {'f1': "alpha", "f2": "beta"}]
     self.pmi = PMI(self.labels, self.features_list)
Exemple #7
0
import logging

# :: Logging level ::
loggingLevel = logging.INFO
logger = logging.getLogger()
logger.setLevel(loggingLevel)

ch = logging.StreamHandler(sys.stdout)
ch.setLevel(loggingLevel)
formatter = logging.Formatter('%(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
parser = argparse.ArgumentParser()

parser.add_argument('--dataset',
                    help='Location of word dataset files',
                    type=str,
                    required='true')
parser.add_argument('--BIO',
                    help='State if its bio_tag',
                    type=bool,
                    default=False)

args = parser.parse_args()
# Build the graph
pmi = PMI(args.dataset)
connected = pmi.build_graph()
# Performs classification
classifier = Classifier(args.dataset, args.BIO)
classifier.classify()
        # グラフの構築
        contextualfeatures_list = [
            sent2contextualfeature(s) for s in all_sents
        ]
        ngrams_list = []
        ngrams_list.append(all_label_ngram_list)
        ngrams_list.append(all_unlabel_ngram_list)
        all_ngrams = [ngram for ngrams in ngrams_list for ngram in ngrams]
        flat_ngrams = list(
            chain.from_iterable(list(chain.from_iterable(ngrams_list))))
        all_ngram_counter = Counter(flat_ngrams)
        all_features = [
            contextualfeature for contextualfeatures in contextualfeatures_list
            for contextualfeature in contextualfeatures
        ]
        pmi = PMI(flat_ngrams, all_features)
        all_features_dict = contextualfeatureslist2dict(
            contextualfeatures_list)
        pmi_vectors = np.array([
            pmi.pmi_vector(ngram, all_features_dict) for ngram in flat_ngrams
        ])
        if (args.load):
            with open('graph.dat', 'rb') as fp:
                graph = pickle.load(fp)
        else:
            graph = Graph(flat_ngrams, pmi_vectors,
                          len(all_unlabel_ngram_list))
            if (args.save):
                with open('graph.dat', 'wb') as fp:
                    pickle.dump(graph, fp)