def classify_translate_cerelation(pTrains,pTests): dict=CEDict() pmi=PMI() trains=[] tests=[] for label,p in pTrains: words=getTranlateFeaturesCERelation(p,dict,pmi) trains.append(CDocument(label,words)) for label,p in pTests: words=getTranlateFeaturesCERelation(p,dict,pmi) tests.append(CDocument(label,words)) return me_classify(trains,tests)
def blp_translate_simple(pTrains,pTests): dict=CEDict() trains=[] tests=[] for label,p in pTrains: words=getTranlateFeatures(p,dict) trains.append(CDocument(label,words)) for label,p in pTests: words=getTranlateFeatures(p,dict) tests.append(CDocument(label,words)) blp=BLP(trains+tests) blp.LP_Classify(trains,tests)
def classify_translate_simple(pTrains,pTests): dict=CEDict() syn=Synonym() # lm=LanguageModel() trains=[] tests=[] for label,p in pTrains: words=getTranlateFeatures(p,dict) trains.append(CDocument(label,words)) for label,p in pTests: words=getTranlateFeatures(p,dict) tests.append(CDocument(label,words)) return me_classify(trains,tests)
def blp_translate_pmi(pTrains,pTests): dict=CEDict() syn=Synonym() pmi=PMI() trains=[] tests=[] for label,p in pTrains: words= getTranlateFeaturesPMI(p,dict,pmi) trains.append(CDocument(label,words)) for label,p in pTests: words= getTranlateFeaturesPMI(p,dict,pmi) tests.append(CDocument(label,words)) blp=BLP(trains+tests) blp.LP_Classify(trains,tests)
def blp_translate_lm(pTrains,pTests): dict=CEDict() syn=Synonym() lm=LanguageModel() trains=[] tests=[] for label,p in pTrains: words=getTranslateFeaturesByLM(p,dict,lm) trains.append(CDocument(label,words)) for label,p in pTests: words=getTranslateFeaturesByLM(p,dict,lm) tests.append(CDocument(label,words)) blp=BLP(trains+tests) blp.LP_Classify(trains,tests)
def blp_sense_sentiment(pTrains,pTests): dict=CEDict() pmi=PMI() cn_lexicon=CnSentimentLexicon() en_lexicon=EnSentimentLexicon() trains=[] tests=[] for label,p in pTrains: words=getFeaturesSenseAndSentiment(p,dict,pmi,cn_lexicon,en_lexicon) trains.append(CDocument(label,words)) for label,p in pTests: words=getFeaturesSenseAndSentiment(p,dict,pmi,cn_lexicon,en_lexicon) tests.append(CDocument(label,words)) blp=BLP(trains+tests) blp.LP_Classify(trains,tests)
def blp_translate_cerelation(pTrains,pTests): dict=CEDict() pmi=PMI() trains=[] tests=[] for label,p in pTrains: words=getTranlateFeaturesCERelation(p,dict,pmi) trains.append(CDocument(label,words)) for label,p in pTests: words=getTranlateFeaturesCERelation(p,dict,pmi) tests.append(CDocument(label,words)) # for d in trains+tests: # d.words['SMOOTH']=1 blp=BLP(trains+tests) blp.LP_Classify(trains,tests)
#! /usr/bin/env python #coding=utf-8 import numpy as ny from cedict import CEDict import os dict=CEDict() # init Chinese-English Dict class CDocument: def __init__(self,label,words): self.label=label self.words=words class Post: def __init__(self,content,happiness,sadness,anger,fear,surprise): self.content=content self.happiness=happiness.lower() self.sadness=sadness.lower() self.anger=anger.lower() self.fear=fear.lower() self.surprise=surprise.lower() self.words={} self.en={} self.cn={} for w in content.split(): w=w.lower() self.words[w]=1 if isASCII(w): if isRealEnglishWord(w): self.en[w]=1
from cedict import CEDict from datesolve import * from event import * ceDict = CEDict() MONTHS = {'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'} class CDocument: def __init__(self, words, polarity, id, text, eventName): self.words = words self.polarity = polarity if polarity == True: self.label = 1 else: self.label = 0 self.id = id self.text = text self.eventName = eventName def __repr__(self): return "Data id:% s text:% s event:% s label:% s" %(self.id, self.words, self.eventName, self.label) def readTweets(path): tweets = [] monthDict = getMonthDict() for line in open(path, 'r').readlines(): line = line.strip() if len(line) > 0: line = line.lower()