Esempio n. 1
 def processReviewXls(self, sheet, row):
     review = Review()
     start_col = 0
     end_col = 11
     for col in range(start_col, end_col):
         if col == 0:
             review.reviewId = sheet.cell_value(row, col)
         elif col == 1:
    = sheet.cell_value(row, col)
         elif col == 2:
             review.Food = self.XlsCheckValue(sheet.cell_value(row, col))
         elif col == 3:
             review.Drinks = self.XlsCheckValue(sheet.cell_value(row, col))
         elif col == 4:
             review.Ambiance = self.XlsCheckValue(sheet.cell_value(row, col))
         elif col == 5:
             review.Service = self.XlsCheckValue(sheet.cell_value(row, col))
         elif col == 6:
             review.Location = self.XlsCheckValue(sheet.cell_value(row, col))
         elif col == 7:
             review.Deals = self.XlsCheckValue(sheet.cell_value(row, col))
         elif col == 8:
             review.Price = self.XlsCheckValue(sheet.cell_value(row, col))
             pass  # control should have never reached here as there are only 11 columns in xls
     return review
Esempio n. 2
    def stemmingStopWRemoval(self, review, vocab):
        ''' Does Following things:
        1. Tokenize review into sentences, and then into words
        2. Remove stopwords, punctuation and stem each word
        3. Add words into vocab 
        4. Make Sentence objects and corresponding Review object
        reviewObj = Review()
        #copying ratings into reviewObj
        for ratingType, rating in review["Ratings"].items():
            reviewObj.ratings[ratingType] = rating
        reviewObj.reviewId = review["ReviewID"]

        stemmer = PorterStemmer()
        reviewContent = review["Content"]
        #TODO: Append title too!
        sentencesInReview = nltk.sent_tokenize(reviewContent)
        puncs = set(string.punctuation)  #punctuation marks
        for sentence in sentencesInReview:
            wordList = []
            words = nltk.word_tokenize(sentence)
            for word in words:
                if not all(c.isdigit() or c in puncs for c in word):
                    word = word.lower()
                    if word not in self.stopWords:
                        word = stemmer.stem(word.lower())
            if wordList:
                sentenceObj = Sentence(wordList)
        if reviewObj.sentences:
Esempio n. 3
 def processReviewXls(self, sheet, row):
     review = Review()
     start_col = 0
     end_col = 11
     for col in range(start_col, end_col):
         if (col == 0):
             review.reviewId = sheet.cell_value(row, col)
         elif (col == 1):
    = sheet.cell_value(row, col)
         elif (col == 2):
             review.Food = self.XlsCheckValue(sheet.cell_value(row, col))
         elif (col == 3):
             review.Drinks = self.XlsCheckValue(sheet.cell_value(row, col))
         elif (col == 4):
             review.Ambiance = self.XlsCheckValue(sheet.cell_value(
                 row, col))
         elif (col == 5):
             review.Service = self.XlsCheckValue(sheet.cell_value(row, col))
         elif (col == 6):
             review.Location = self.XlsCheckValue(sheet.cell_value(
                 row, col))
         elif (col == 7):
             review.Deals = self.XlsCheckValue(sheet.cell_value(row, col))
         elif (col == 8):
             review.Price = self.XlsCheckValue(sheet.cell_value(row, col))
             pass  #control should have never reached here as there are only 11 columns in xls
     return review