예제 #1
0
def classify_translate_cerelation(pTrains,pTests):
    dict=CEDict()
    pmi=PMI()
    
    trains=[]
    tests=[]
    
    for label,p in pTrains:
        words=getTranlateFeaturesCERelation(p,dict,pmi)
        trains.append(CDocument(label,words))
    for label,p in pTests:
        words=getTranlateFeaturesCERelation(p,dict,pmi)
        tests.append(CDocument(label,words))
    
    return me_classify(trains,tests)
예제 #2
0
def blp_translate_simple(pTrains,pTests):
    dict=CEDict()
    
    trains=[]
    tests=[]
    
    for label,p in pTrains:
        words=getTranlateFeatures(p,dict)
        trains.append(CDocument(label,words))
    for label,p in pTests:
        words=getTranlateFeatures(p,dict)
        tests.append(CDocument(label,words))
        
    blp=BLP(trains+tests)
    blp.LP_Classify(trains,tests)
예제 #3
0
def classify_translate_simple(pTrains,pTests):
    dict=CEDict()
    syn=Synonym()
#    lm=LanguageModel()
    
    trains=[]
    tests=[]
    
    for label,p in pTrains:
        words=getTranlateFeatures(p,dict)
        trains.append(CDocument(label,words))
    for label,p in pTests:
        words=getTranlateFeatures(p,dict)
        tests.append(CDocument(label,words))
    
    return me_classify(trains,tests)
예제 #4
0
def blp_translate_pmi(pTrains,pTests):
    dict=CEDict()
    syn=Synonym()
    pmi=PMI()
    
    trains=[]
    tests=[]
    
    for label,p in pTrains:
        words= getTranlateFeaturesPMI(p,dict,pmi)
        trains.append(CDocument(label,words))
    for label,p in pTests:
        words= getTranlateFeaturesPMI(p,dict,pmi)
        tests.append(CDocument(label,words))
    
    blp=BLP(trains+tests)
    blp.LP_Classify(trains,tests)
예제 #5
0
def blp_translate_lm(pTrains,pTests):
    dict=CEDict()
    syn=Synonym()
    lm=LanguageModel()
    
    trains=[]
    tests=[]
    
    for label,p in pTrains:
        words=getTranslateFeaturesByLM(p,dict,lm)
        trains.append(CDocument(label,words))
    for label,p in pTests:
        words=getTranslateFeaturesByLM(p,dict,lm)
        tests.append(CDocument(label,words))
    
    blp=BLP(trains+tests)
    blp.LP_Classify(trains,tests)
예제 #6
0
def blp_sense_sentiment(pTrains,pTests):
    dict=CEDict()
    pmi=PMI()
    
    cn_lexicon=CnSentimentLexicon()
    en_lexicon=EnSentimentLexicon()
    
    trains=[]
    tests=[]
    
    for label,p in pTrains:
        words=getFeaturesSenseAndSentiment(p,dict,pmi,cn_lexicon,en_lexicon)
        trains.append(CDocument(label,words))
    for label,p in pTests:
        words=getFeaturesSenseAndSentiment(p,dict,pmi,cn_lexicon,en_lexicon)
        tests.append(CDocument(label,words))
    
    blp=BLP(trains+tests)
    blp.LP_Classify(trains,tests)
예제 #7
0
def blp_translate_cerelation(pTrains,pTests):
    dict=CEDict()
    pmi=PMI()
    
    trains=[]
    tests=[]
    
    for label,p in pTrains:
        words=getTranlateFeaturesCERelation(p,dict,pmi)
        trains.append(CDocument(label,words))
    for label,p in pTests:
        words=getTranlateFeaturesCERelation(p,dict,pmi)
        tests.append(CDocument(label,words))
    
#    for d in trains+tests:
#        d.words['SMOOTH']=1
    
    
    blp=BLP(trains+tests)
    blp.LP_Classify(trains,tests)
예제 #8
0
#! /usr/bin/env python
#coding=utf-8
import numpy as ny
from cedict import CEDict
import os

dict=CEDict() # init Chinese-English Dict

class CDocument:
    def __init__(self,label,words):
        self.label=label
        self.words=words

class Post:
    def __init__(self,content,happiness,sadness,anger,fear,surprise):
        self.content=content
        self.happiness=happiness.lower()
        self.sadness=sadness.lower()
        self.anger=anger.lower()
        self.fear=fear.lower()
        self.surprise=surprise.lower()
        
        self.words={}
        self.en={}
        self.cn={}
        for w in content.split():
            w=w.lower()
            self.words[w]=1
            if isASCII(w): 
                if isRealEnglishWord(w):
                    self.en[w]=1
예제 #9
0
from cedict import CEDict
from datesolve import *
from event import *

ceDict = CEDict()
MONTHS = {'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'}


class CDocument:
    def __init__(self, words, polarity, id, text, eventName):
        self.words = words
        self.polarity = polarity
        if polarity == True:
            self.label = 1
        else:
            self.label = 0

        self.id = id
        self.text = text
        self.eventName = eventName
    def __repr__(self):
        return "Data id:% s text:% s event:% s label:% s" %(self.id, self.words, self.eventName, self.label)


def readTweets(path):
    tweets = []
    monthDict = getMonthDict()
    for line in open(path, 'r').readlines():
        line = line.strip()
        if len(line) > 0:
            line = line.lower()