class PMITest(unittest.TestCase): def setUp(self): self.labels = ["x1x2x3", "x1x2x3", "x4x5x6"] self.features_list = [{'f1': "alpha", "f2": "beta"}, {'f1': "alpha", "f2": "gamma"}, {'f1': "alpha", "f2": "beta"}] self.pmi = PMI(self.labels, self.features_list) def test_pmi(self): self.assertEqual(self.pmi.pmi("x1x2x3", "alpha", "f1"), 0) self.assertTrue(self.pmi.pmi("x1x2x3", "beta", "f2") < 0) self.assertTrue(self.pmi.pmi("x1x2x3", "gamma", "f2") > 0) def test_pmi_vector(self): return 0
def classify_translate_cerelation(pTrains,pTests): dict=CEDict() pmi=PMI() trains=[] tests=[] for label,p in pTrains: words=getTranlateFeaturesCERelation(p,dict,pmi) trains.append(CDocument(label,words)) for label,p in pTests: words=getTranlateFeaturesCERelation(p,dict,pmi) tests.append(CDocument(label,words)) return me_classify(trains,tests)
def blp_translate_pmi(pTrains,pTests): dict=CEDict() syn=Synonym() pmi=PMI() trains=[] tests=[] for label,p in pTrains: words= getTranlateFeaturesPMI(p,dict,pmi) trains.append(CDocument(label,words)) for label,p in pTests: words= getTranlateFeaturesPMI(p,dict,pmi) tests.append(CDocument(label,words)) blp=BLP(trains+tests) blp.LP_Classify(trains,tests)
def blp_sense_sentiment(pTrains,pTests): dict=CEDict() pmi=PMI() cn_lexicon=CnSentimentLexicon() en_lexicon=EnSentimentLexicon() trains=[] tests=[] for label,p in pTrains: words=getFeaturesSenseAndSentiment(p,dict,pmi,cn_lexicon,en_lexicon) trains.append(CDocument(label,words)) for label,p in pTests: words=getFeaturesSenseAndSentiment(p,dict,pmi,cn_lexicon,en_lexicon) tests.append(CDocument(label,words)) blp=BLP(trains+tests) blp.LP_Classify(trains,tests)
def blp_translate_cerelation(pTrains,pTests): dict=CEDict() pmi=PMI() trains=[] tests=[] for label,p in pTrains: words=getTranlateFeaturesCERelation(p,dict,pmi) trains.append(CDocument(label,words)) for label,p in pTests: words=getTranlateFeaturesCERelation(p,dict,pmi) tests.append(CDocument(label,words)) # for d in trains+tests: # d.words['SMOOTH']=1 blp=BLP(trains+tests) blp.LP_Classify(trains,tests)
def setUp(self): self.labels = ["x1x2x3", "x1x2x3", "x4x5x6"] self.features_list = [{'f1': "alpha", "f2": "beta"}, {'f1': "alpha", "f2": "gamma"}, {'f1': "alpha", "f2": "beta"}] self.pmi = PMI(self.labels, self.features_list)
import logging # :: Logging level :: loggingLevel = logging.INFO logger = logging.getLogger() logger.setLevel(loggingLevel) ch = logging.StreamHandler(sys.stdout) ch.setLevel(loggingLevel) formatter = logging.Formatter('%(message)s') ch.setFormatter(formatter) logger.addHandler(ch) parser = argparse.ArgumentParser() parser.add_argument('--dataset', help='Location of word dataset files', type=str, required='true') parser.add_argument('--BIO', help='State if its bio_tag', type=bool, default=False) args = parser.parse_args() # Build the graph pmi = PMI(args.dataset) connected = pmi.build_graph() # Performs classification classifier = Classifier(args.dataset, args.BIO) classifier.classify()
# グラフの構築 contextualfeatures_list = [ sent2contextualfeature(s) for s in all_sents ] ngrams_list = [] ngrams_list.append(all_label_ngram_list) ngrams_list.append(all_unlabel_ngram_list) all_ngrams = [ngram for ngrams in ngrams_list for ngram in ngrams] flat_ngrams = list( chain.from_iterable(list(chain.from_iterable(ngrams_list)))) all_ngram_counter = Counter(flat_ngrams) all_features = [ contextualfeature for contextualfeatures in contextualfeatures_list for contextualfeature in contextualfeatures ] pmi = PMI(flat_ngrams, all_features) all_features_dict = contextualfeatureslist2dict( contextualfeatures_list) pmi_vectors = np.array([ pmi.pmi_vector(ngram, all_features_dict) for ngram in flat_ngrams ]) if (args.load): with open('graph.dat', 'rb') as fp: graph = pickle.load(fp) else: graph = Graph(flat_ngrams, pmi_vectors, len(all_unlabel_ngram_list)) if (args.save): with open('graph.dat', 'wb') as fp: pickle.dump(graph, fp)