def classify(self, source, kaggle = None): data = parseReviews.getReviewList(source, defaultToZero = False) predictions = [] for review in data: tags = [] for entry in review[1]: probs = dict() for sentiment in self.sentiment_probabilities: probs[sentiment] = math.log(self.sentiment_probabilities[sentiment]) for word in entry[1]: if word in self.word_probabilities: for p in self.word_probabilities[word]: probs[p] += math.log(self.word_probabilities[word][p]) tags.append(max(probs.iteritems(), key=operator.itemgetter(1))[0]) print tags predictions.append(tags) if kaggle != None: with open(kaggle,'w') as f: f.write('Id,answer\n') i = 0 for seq in predictions: for tag in seq: f.write(str(i) + ',' + str(tag) + '\n') i += 1 return predictions
def get_train_data(filename, destination): #with open(destination, 'w') as d: with open(filename) as f: result = dict() lst = parseReviews.getReviewList(filename) for i in lst: linelst = i[1] for j in linelst: result[j[1].strip()] = ToSentiment(j[0]) #d.write(j[1].strip() + " : " + ToSentiment(j[0]) + "\n") #d.close() return result
def get_test_data(filename, destination): with open(destination, 'w') as d: with open(filename) as f: result = [] lst = parseReviews.getReviewList(filename) for i in lst: linelst = i[1] for j in linelst: result.append(j[1].strip()) d.write("Id,answer"+"\n") i = 0 for sample in result: d.write(str(i) +"," + str(parseReviews.strToSentiment(classifier.classify(gen_bow(sample)))) + "\n") i = i +1 d.close() return result
def get_train_data(filename, destination): with open(destination, 'w') as d: with open(filename) as f: result = dict() print 'inside script' lst = parseReviews.getReviewList(filename) d.write('Id,Answer' + '\n') print 'writing to file' for i in lst: linelst = i[1] k = 0 for j in linelst: result[j[1].strip()] = j[0] d.write(str(k)+ "," + str(j[0]) + "\n") k = k+1 d.close() return result
def parse_training(self, source): data = parseReviews.getReviewList(source, defaultToZero = False) for review in data: for entry in review[1]: self.update_counts(entry)
def parseTraining(filename): with open(filename) as f: a = parseReviews.getReviewList(filename) dic = parseReviews.wordSentimentMapBasic(a) return list(dic.keys())