Esempio n. 1
0
 def classify(self, source, kaggle = None):
     data = parseReviews.getReviewList(source, defaultToZero = False)
     predictions = []
     for review in data:
         tags = []
         for entry in review[1]:
             probs = dict()
             for sentiment in self.sentiment_probabilities:
                 probs[sentiment] = math.log(self.sentiment_probabilities[sentiment])
             for word in entry[1]:
                 if word in self.word_probabilities:
                     for p in self.word_probabilities[word]:
                         probs[p] += math.log(self.word_probabilities[word][p])
             tags.append(max(probs.iteritems(), key=operator.itemgetter(1))[0])
         print tags
         predictions.append(tags)
     if kaggle != None:
         with open(kaggle,'w') as f:
             f.write('Id,answer\n')
             i = 0
             for seq in predictions:
                 for tag in seq:
                     f.write(str(i) + ',' + str(tag) + '\n')
                     i += 1
     return predictions
Esempio n. 2
0
def get_train_data(filename, destination):
    #with open(destination, 'w') as d:
    with open(filename) as f:
        result = dict()
        lst = parseReviews.getReviewList(filename)
        for i in lst:
            linelst = i[1]
            for j in linelst:
                result[j[1].strip()] = ToSentiment(j[0])
                    #d.write(j[1].strip() + " : " + ToSentiment(j[0]) + "\n")
            #d.close()
        return result
Esempio n. 3
0
def get_test_data(filename, destination):
    with open(destination, 'w') as d:
        with open(filename) as f:
            result = []
            lst = parseReviews.getReviewList(filename)
            for i in lst:
                linelst = i[1]
                for j in linelst:
                    result.append(j[1].strip())
            d.write("Id,answer"+"\n")
            i = 0
            for sample in result:
                d.write(str(i) +"," + str(parseReviews.strToSentiment(classifier.classify(gen_bow(sample)))) + "\n")
                i = i +1
            d.close()
            return result
Esempio n. 4
0
def get_train_data(filename, destination):
    with open(destination, 'w') as d:
        with open(filename) as f:
            result = dict()
            print 'inside script'
            lst = parseReviews.getReviewList(filename)
            d.write('Id,Answer' + '\n')
            print 'writing to file'
            for i in lst:
                linelst = i[1]
                k = 0
                for j in linelst:
                    result[j[1].strip()] = j[0]
                    d.write(str(k)+ "," + str(j[0]) + "\n")
                    k = k+1
            d.close()
            return result
Esempio n. 5
0
 def parse_training(self, source):
     data = parseReviews.getReviewList(source, defaultToZero = False)
     for review in data:
         for entry in review[1]:
             self.update_counts(entry)
Esempio n. 6
0
def parseTraining(filename):
	with open(filename) as f:
		a = parseReviews.getReviewList(filename)
		dic = parseReviews.wordSentimentMapBasic(a)
		return list(dic.keys())