def __init__(self):
        data = GetDataLocation()
        self.data_path = data.get_path()

        self.pos_lines = open(os.path.join(self.data_path, "rt-polarity.pos"), "r").read().splitlines()
        self.neg_lines = open(os.path.join(self.data_path, "rt-polarity.neg"), "r").read().splitlines()

        # print num_lines_pos #total positive lines
        # print num_lines_neg #total neg lines
        pos_count = open(os.path.join(self.data_path, "rt-polarity.pos"), "r").read().split()
        neg_count = open(os.path.join(self.data_path, "rt-polarity.neg"), "r").read().split()

        self.pos_word_count = len(pos_count)
        self.neg_word_count = len(neg_count)

        self.trainset = [(x, 1) for x in self.pos_lines] + [(x, -1) for x in self.neg_lines]

        self.pos_1gram = {}
        self.pos_2gram = {}
        self.pos_3gram = {}
        self.pos_4gram = {}
        self.pos_4gram = {}

        self.neg_1gram = {}
        self.neg_2gram = {}
        self.neg_3gram = {}
        self.neg_4gram = {}
        self.neg_5gram = {}
        self.train()
Esempio n. 2
0
    def __init__(self, n):
        data = GetDataLocation()
        self.data_path = data.get_path()
        
        self.symbols = ['!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '_', '+', '=', ',', '.',
            '<', '>', '?', '/']
        #train set data statistic
        #define positive and negative word training set
        #train set data statistic
        #positive word training set
        poslines = open(self.data_path+'/rt-polarity.pos', 'r').read().splitlines()
        #negative word training set
        neglines = open(self.data_path+'/rt-polarity.neg', 'r').read().splitlines()
        self.stop_list=open(self.data_path+'/stopwordslist_1.file','r').read().splitlines()
        #test_data=open(r'pos.txt','r').read().splitlines()
        self.poslinesTrain = poslines[:]
        self.neglinesTrain = neglines[:]

        #open the file for checking the emotions
        self.pos_smiley = open(self.data_path+'/positive_smile.pos', 'r').read().split()
        self.neg_smiley = open(self.data_path+'/negative_smile.neg', 'r').read().split()


        #create the train set and the test set by attaching labels to text to form a
        #list of tuples (sentence, label). Labels are 1 for positive, -1 for negative

        self.trainset = [(x, 1) for x in self.poslinesTrain] + [(x, -1) for x in self.neglinesTrain]
        self.poswords = {}#for storing the word count in unigram
        self.negwords = {}
        self.poswords_bigram = {}#for bigram
        self.negwords_bigram = {}
        self.poswords_ngram = {}#for n gram
        self.negwords_ngram = {}
        self.n = n
        self.train(self.n)
    def __init__(self):
        data = GetDataLocation()
        path = data.get_path()
        file_name = os.path.join(path, "spellcheck")

        self.NWORDS = self.train(self.words(file(file_name).read()))
        self.alphabet = 'abcdefghijklmnopqrstuvwxyz'
        self.pos = []
        self.neg = []
        return
Esempio n. 4
0
 def __init__(self):
     self.training_percent   = 100      # in 0 to 100 range
     self.testing_percent    = 20        # in 0 to 100 range
     
     data                    = GetDataLocation()
     data_path               = data.get_path()
     self.train_file         = data_path + "/svm.train"
     self.test_file          = data_path + "/svm.test"
     self.model_file         = data_path + "/model/svm.modeluni"
     self.dict_file          = data_path + "/svm.dictuni"
     self.pos_file           = data_path + "/svm.pos"
     self.neg_file           = data_path + "/svm.neg"
     self.stop_words_file    = data_path + "/stop"
     
 def __init__(self):
     data = GetDataLocation()
     path = data.get_path()
     file_name = os.path.join(path, "stopwords")
     self.stop_word_list = open(file_name).read().splitlines()
     return
Esempio n. 6
0
                print "Classification Done!"
                file = open("result", 'a')
                file.write(param + ";" + str(p_acc) + '\n')
                file.close()
            except:
                file = open("result", 'a')
                file.write(param + ";ERROR\n")
                file.close()
                continue





settings = Settings()

data = GetDataLocation()
data_path = data.get_path()

settings.training_percent   = 80      # in 0 to 100 range
settings.testing_percent    = 20        # in 0 to 100 range
settings.train_file         = data_path + "/svm.train"
settings.test_file          = data_path + "/svm.test"
settings.model_file         = data_path + "/model/svm.modeluni"
settings.dict_file          = data_path + "/svm.dictuni"
settings.pos_file           = data_path + "/svm.pos"
settings.neg_file           = data_path + "/svm.neg"
settings.stop_words_file    = data_path + "/stop"

run = Analysis(settings)
#run = Run(settings, False)