def loadReviewAndProcess(survey_id, provider): # Loading Reviews in a data frame # df = pd.read_csv(filename) m_sentences = [] if isinstance(survey_id, list): reviews = Reviews.objects(survey_id__in=survey_id, provider=provider) else: reviews = Reviews.objects(survey_id=survey_id, provider=provider) print(survey_id, "-COUNT-", reviews.count()) # num_reviews = len(df) # Adding ID column to existing data frame # df['RID'] = range(1, num_reviews + 1) # reviews = df["review"] rid = 0 # print(df["review"]) # return "finish" qualified_sentences = [] # print ('processing raw reviews ID:'), for review in reviews: sentences = sent_tokenize(review.review) m_stns = [] for sentence in sentences: # print (sentence) tokens = word_tokenize(sentence) # discard too short sentences if (tokens is not None) and (len(tokens) > 2): stn = addSentences(tokens, pos_tag(tokens), getLemma(tokens), stops) # print (stn) # print("Zurez",df['RID'][rid]) # print(review.rating) m_stns.append(Sentence(rid + 1, review.rating, stn)) qualified_sentences.append(sentence) m_sentences.extend(m_stns) # Create Indexed Vocabulary expandVocabulary(m_stns) rid += 1 if rid % 100 == 0: print('#', rid) print('') print('Finished Loading reviews') # print (qualified_sentences) return qualified_sentences, m_sentences
def loadReviewAndProcess(survey_id, provider): # Loading Reviews in a data frame # df = pd.read_csv(filename) m_sentences = [] if isinstance(survey_id, list): reviews = Reviews.objects(survey_id__in=survey_id, provider=provider) else: reviews = Reviews.objects(survey_id=survey_id, provider=provider) print(survey_id, "-COUNT-", reviews.count()) # num_reviews = len(df) # Adding ID column to existing data frame # df['RID'] = range(1, num_reviews + 1) # reviews = df["review"] rid = 0 # print(df["review"]) # return "finish" qualified_sentences = [] # print ('processing raw reviews ID:'), for review in reviews: sentences = sent_tokenize(review.review) m_stns = [] for sentence in sentences: # print (sentence) tokens = word_tokenize(sentence) # discard too short sentences if (tokens is not None) and (len(tokens) > 2): stn = addSentences(tokens, pos_tag(tokens), getLemma(tokens), stops) # print (stn) # print("Zurez",df['RID'][rid]) # print(review.rating) m_stns.append(Sentence(rid + 1, review.rating, stn)) qualified_sentences.append(sentence) m_sentences.extend(m_stns) # Create Indexed Vocabulary expandVocabulary(m_stns) rid += 1 if rid % 100 == 0: print("#", rid) print("") print("Finished Loading reviews") # print (qualified_sentences) return qualified_sentences, m_sentences
def run(self): data = [] try: spamreader = SentR.objects(survey_id=self.sid, provider=self.p) except Exception as e: spamreader = SentR.objects(survey_id=self.sid[0], provider=self.p) print("aspect_rating", e) if verbose: print("spamreader000", spamreader.count(), self.sid) # raise e # a= spamreader.line # reviews= # with open(filename, "rt") as csvfile: # spamreader = csv.reader(csvfile) try: for row in spamreader: # print("row",row) aspect = row.line[2] review_ID = row.line[1] polarity = row.line[5] data_line = [review_ID, aspect, polarity] data.append(data_line) except Exception as e: if verbose: print("aspect_rating1", e) raise e # print(data) try: overall_ratings = [] try: spamreader = Reviews.objects(survey_id=self.sid, provider=self.p) if verbose: print("spamreader1", spamreader.count(), self.sid) spamreader[0] except Exception as e: spamreader = Reviews.objects(survey_id__in=self.scopy, provider=self.p) if verbose: print("aspect_rating4", e) # raise e if verbose: print("spamreader2", spamreader.count(), self.scopy) # with open('Data/reviews.csv', "rt") as csvfile: # spamreader = csv.reader(csvfile) for row in spamreader: # print(row.rating) overall_ratings.append(float(row.rating)) last_review_ID = max(list(map(int, [row[0] for row in data]))) for review_ID in range(1, last_review_ID): review_rows = [row for row in data if row[0] == str(review_ID)] # food_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('food'))] # service_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('service'))] # price_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('price'))] ambience_rows = [ row for row in review_rows if row[1] == str(ASPECTS.index('ambience')) ] vfm_rows = [ row for row in review_rows if row[1] == str(ASPECTS.index('value_for_money')) ] rs_rows = [ row for row in review_rows if row[1] == str(ASPECTS.index('room_service')) ] cleanliness_rows = [ row for row in review_rows if row[1] == str(ASPECTS.index('cleanliness')) ] amenities_rows = [ row for row in review_rows if row[1] == str(ASPECTS.index('amenities')) ] neutral_rows = [row for row in review_rows if row[1] == '-1'] # print(food_rows) overall = overall_ratings[review_ID] if len(review_rows) != 0: # AR_food = aspect_rating(review_rows, food_rows, overall) # AR_service = aspect_rating(review_rows, service_rows, overall) # AR_price = aspect_rating(review_rows, price_rows, overall) AR_ambience = aspect_rating(review_rows, ambience_rows, overall) AR_vfm = aspect_rating(review_rows, vfm_rows, overall) AR_rs = aspect_rating(review_rows, rs_rows, overall) AR_cleanliness = aspect_rating(review_rows, cleanliness_rows, overall) AR_amenities = aspect_rating(review_rows, amenities_rows, overall) else: AR_ambience = overall AR_vfm = overall AR_rs = overall AR_cleanliness = overall AR_amenities = overall # r= Aspect(sector="food",provider=self.p,survey_id=self.sid,food=str(AR_food),service=str(AR_service),price=str(AR_price),value_for_money=str(AR_vfm),room_service=str(AR_rs),cleanliness=str(AR_cleanliness),overall=str(overall)).save() r = Aspect(sector="food", provider=self.p, survey_id=self.sid, ambience=str(AR_ambience), value_for_money=str(AR_vfm), room_service=str(AR_rs), cleanliness=str(AR_cleanliness), amenities=str(AR_amenities), overall=str(overall)).save() print("Aspect Rating Done") except Exception as e: # print("aspect_rating3",e) raise e
def run(self): data = [] try: spamreader=SentR.objects(survey_id=self.sid,provider=self.p) except Exception as e: spamreader=SentR.objects(survey_id=self.sid[0],provider=self.p) print("aspect_rating",e) if verbose:print("spamreader000",spamreader.count(),self.sid) # raise e # a= spamreader.line # reviews= # with open(filename, "rt") as csvfile: # spamreader = csv.reader(csvfile) try: for row in spamreader: # print("row",row) aspect = row.line[2] review_ID = row.line[1] polarity = row.line[5] data_line = [review_ID, aspect, polarity] data.append(data_line) except Exception as e: if verbose:print("aspect_rating1",e) raise e # print(data) try: overall_ratings = [] try: spamreader=Reviews.objects(survey_id=self.sid,provider=self.p) if verbose:print("spamreader1",spamreader.count(),self.sid) spamreader[0] except Exception as e: spamreader=Reviews.objects(survey_id__in=self.scopy,provider=self.p) if verbose:print ("aspect_rating4",e) # raise e if verbose:print("spamreader2",spamreader.count(),self.scopy) # with open('Data/reviews.csv', "rt") as csvfile: # spamreader = csv.reader(csvfile) for row in spamreader: # print(row.rating) overall_ratings.append(float(row.rating)) last_review_ID = max(list(map(int,[row[0] for row in data]))) for review_ID in range(1, last_review_ID): review_rows = [row for row in data if row[0] == str(review_ID)] # food_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('food'))] # service_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('service'))] # price_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('price'))] ambience_rows=[row for row in review_rows if row[1] == str(ASPECTS.index('ambience'))] vfm_rows=[row for row in review_rows if row[1] == str(ASPECTS.index('value_for_money'))] rs_rows=[row for row in review_rows if row[1] == str(ASPECTS.index('room_service'))] cleanliness_rows=[row for row in review_rows if row[1] == str(ASPECTS.index('cleanliness'))] amenities_rows=[row for row in review_rows if row[1] == str(ASPECTS.index('amenities'))] neutral_rows = [row for row in review_rows if row[1] == '-1'] # print(food_rows) overall = overall_ratings[review_ID] if len(review_rows) !=0 : # AR_food = aspect_rating(review_rows, food_rows, overall) # AR_service = aspect_rating(review_rows, service_rows, overall) # AR_price = aspect_rating(review_rows, price_rows, overall) AR_ambience=aspect_rating(review_rows,ambience_rows,overall) AR_vfm=aspect_rating(review_rows,vfm_rows,overall) AR_rs=aspect_rating(review_rows,rs_rows,overall) AR_cleanliness=aspect_rating(review_rows,cleanliness_rows,overall) AR_amenities=aspect_rating(review_rows,amenities_rows,overall) else : AR_ambience = overall AR_vfm = overall AR_rs = overall AR_cleanliness = overall AR_amenities = overall # r= Aspect(sector="food",provider=self.p,survey_id=self.sid,food=str(AR_food),service=str(AR_service),price=str(AR_price),value_for_money=str(AR_vfm),room_service=str(AR_rs),cleanliness=str(AR_cleanliness),overall=str(overall)).save() r= Aspect(sector="food",provider=self.p,survey_id=self.sid,ambience=str(AR_ambience),value_for_money=str(AR_vfm),room_service=str(AR_rs),cleanliness=str(AR_cleanliness),amenities=str(AR_amenities),overall=str(overall)).save() print("Aspect Rating Done") except Exception as e: # print("aspect_rating3",e) raise e