Example #1
0
def loadReviewAndProcess(survey_id, provider):
    # Loading Reviews in a data frame
    # df = pd.read_csv(filename)
    m_sentences = []
    if isinstance(survey_id, list):
        reviews = Reviews.objects(survey_id__in=survey_id, provider=provider)

    else:
        reviews = Reviews.objects(survey_id=survey_id, provider=provider)
    print(survey_id, "-COUNT-", reviews.count())
    # num_reviews = len(df)
    # Adding ID column to existing data frame
    # df['RID'] = range(1, num_reviews + 1)
    # reviews = df["review"]
    rid = 0
    # print(df["review"])
    # return "finish"
    qualified_sentences = []

    # print ('processing raw reviews ID:'),
    for review in reviews:
        sentences = sent_tokenize(review.review)
        m_stns = []

        for sentence in sentences:
            # print (sentence)
            tokens = word_tokenize(sentence)
            # discard too short sentences
            if (tokens is not None) and (len(tokens) > 2):
                stn = addSentences(tokens, pos_tag(tokens), getLemma(tokens),
                                   stops)
                # print (stn)
                # print("Zurez",df['RID'][rid])
                # print(review.rating)
                m_stns.append(Sentence(rid + 1, review.rating, stn))

                qualified_sentences.append(sentence)

        m_sentences.extend(m_stns)
        # Create Indexed Vocabulary
        expandVocabulary(m_stns)
        rid += 1
        if rid % 100 == 0:
            print('#', rid)
    print('')
    print('Finished Loading reviews')
    # print (qualified_sentences)
    return qualified_sentences, m_sentences
def loadReviewAndProcess(survey_id, provider):
    # Loading Reviews in a data frame
    # df = pd.read_csv(filename)
    m_sentences = []
    if isinstance(survey_id, list):
        reviews = Reviews.objects(survey_id__in=survey_id, provider=provider)

    else:
        reviews = Reviews.objects(survey_id=survey_id, provider=provider)
    print(survey_id, "-COUNT-", reviews.count())
    # num_reviews = len(df)
    # Adding ID column to existing data frame
    # df['RID'] = range(1, num_reviews + 1)
    # reviews = df["review"]
    rid = 0
    # print(df["review"])
    # return "finish"
    qualified_sentences = []

    # print ('processing raw reviews ID:'),
    for review in reviews:
        sentences = sent_tokenize(review.review)
        m_stns = []

        for sentence in sentences:
            # print (sentence)
            tokens = word_tokenize(sentence)
            # discard too short sentences
            if (tokens is not None) and (len(tokens) > 2):
                stn = addSentences(tokens, pos_tag(tokens), getLemma(tokens), stops)
                # print (stn)
                # print("Zurez",df['RID'][rid])
                # print(review.rating)
                m_stns.append(Sentence(rid + 1, review.rating, stn))

                qualified_sentences.append(sentence)

        m_sentences.extend(m_stns)
        # Create Indexed Vocabulary
        expandVocabulary(m_stns)
        rid += 1
        if rid % 100 == 0:
            print("#", rid)
    print("")
    print("Finished Loading reviews")
    # print (qualified_sentences)
    return qualified_sentences, m_sentences
Example #3
0
    def run(self):
        data = []
        try:
            spamreader = SentR.objects(survey_id=self.sid, provider=self.p)
        except Exception as e:
            spamreader = SentR.objects(survey_id=self.sid[0], provider=self.p)
            print("aspect_rating", e)
        if verbose: print("spamreader000", spamreader.count(), self.sid)
        # raise e
        # a= spamreader.line
        # reviews=
        # with open(filename, "rt") as csvfile:
        # 	spamreader = csv.reader(csvfile)
        try:
            for row in spamreader:
                # print("row",row)
                aspect = row.line[2]
                review_ID = row.line[1]
                polarity = row.line[5]
                data_line = [review_ID, aspect, polarity]
                data.append(data_line)
        except Exception as e:
            if verbose: print("aspect_rating1", e)
            raise e
        # print(data)
        try:
            overall_ratings = []
            try:
                spamreader = Reviews.objects(survey_id=self.sid,
                                             provider=self.p)
                if verbose: print("spamreader1", spamreader.count(), self.sid)
                spamreader[0]
            except Exception as e:
                spamreader = Reviews.objects(survey_id__in=self.scopy,
                                             provider=self.p)
                if verbose: print("aspect_rating4", e)
                # raise e
                if verbose:
                    print("spamreader2", spamreader.count(), self.scopy)
            # with open('Data/reviews.csv', "rt") as csvfile:
            # 	spamreader = csv.reader(csvfile)

            for row in spamreader:
                # print(row.rating)
                overall_ratings.append(float(row.rating))

            last_review_ID = max(list(map(int, [row[0] for row in data])))
            for review_ID in range(1, last_review_ID):

                review_rows = [row for row in data if row[0] == str(review_ID)]
                # food_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('food'))]
                # service_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('service'))]
                # price_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('price'))]
                ambience_rows = [
                    row for row in review_rows
                    if row[1] == str(ASPECTS.index('ambience'))
                ]
                vfm_rows = [
                    row for row in review_rows
                    if row[1] == str(ASPECTS.index('value_for_money'))
                ]
                rs_rows = [
                    row for row in review_rows
                    if row[1] == str(ASPECTS.index('room_service'))
                ]
                cleanliness_rows = [
                    row for row in review_rows
                    if row[1] == str(ASPECTS.index('cleanliness'))
                ]
                amenities_rows = [
                    row for row in review_rows
                    if row[1] == str(ASPECTS.index('amenities'))
                ]
                neutral_rows = [row for row in review_rows if row[1] == '-1']
                # print(food_rows)
                overall = overall_ratings[review_ID]

                if len(review_rows) != 0:
                    # AR_food = aspect_rating(review_rows, food_rows, overall)
                    # AR_service = aspect_rating(review_rows, service_rows, overall)
                    # AR_price = aspect_rating(review_rows, price_rows, overall)
                    AR_ambience = aspect_rating(review_rows, ambience_rows,
                                                overall)
                    AR_vfm = aspect_rating(review_rows, vfm_rows, overall)
                    AR_rs = aspect_rating(review_rows, rs_rows, overall)
                    AR_cleanliness = aspect_rating(review_rows,
                                                   cleanliness_rows, overall)
                    AR_amenities = aspect_rating(review_rows, amenities_rows,
                                                 overall)

                else:
                    AR_ambience = overall
                    AR_vfm = overall
                    AR_rs = overall
                    AR_cleanliness = overall
                    AR_amenities = overall

                # r= Aspect(sector="food",provider=self.p,survey_id=self.sid,food=str(AR_food),service=str(AR_service),price=str(AR_price),value_for_money=str(AR_vfm),room_service=str(AR_rs),cleanliness=str(AR_cleanliness),overall=str(overall)).save()
                r = Aspect(sector="food",
                           provider=self.p,
                           survey_id=self.sid,
                           ambience=str(AR_ambience),
                           value_for_money=str(AR_vfm),
                           room_service=str(AR_rs),
                           cleanliness=str(AR_cleanliness),
                           amenities=str(AR_amenities),
                           overall=str(overall)).save()
                print("Aspect Rating Done")
        except Exception as e:
            # print("aspect_rating3",e)
            raise e
Example #4
0
	def run(self):
		data = []
		try:
			spamreader=SentR.objects(survey_id=self.sid,provider=self.p)
		except Exception as e: 
			spamreader=SentR.objects(survey_id=self.sid[0],provider=self.p)
			print("aspect_rating",e)
		if verbose:print("spamreader000",spamreader.count(),self.sid)
			# raise e
		# a= spamreader.line
		# reviews= 
		# with open(filename, "rt") as csvfile:
		# 	spamreader = csv.reader(csvfile)
		try:
			for row in spamreader:
				# print("row",row)
				aspect = row.line[2]
				review_ID = row.line[1]
				polarity = row.line[5]
				data_line = [review_ID, aspect, polarity]
				data.append(data_line)
		except Exception as e:
			if verbose:print("aspect_rating1",e)
			raise e
		# print(data)
		try:
			overall_ratings = []
			try:
				spamreader=Reviews.objects(survey_id=self.sid,provider=self.p)
				if verbose:print("spamreader1",spamreader.count(),self.sid)
				spamreader[0]
			except Exception as e:
				spamreader=Reviews.objects(survey_id__in=self.scopy,provider=self.p)
				if verbose:print ("aspect_rating4",e)
				# raise e
				if verbose:print("spamreader2",spamreader.count(),self.scopy)
			# with open('Data/reviews.csv', "rt") as csvfile:
			# 	spamreader = csv.reader(csvfile)

			for row in spamreader:
				# print(row.rating)
				overall_ratings.append(float(row.rating))

			last_review_ID = max(list(map(int,[row[0] for row in data])))
			for review_ID in range(1, last_review_ID):

				review_rows = [row for row in data if row[0] == str(review_ID)]
				# food_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('food'))]
				# service_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('service'))]
				# price_rows = [row for row in review_rows if row[1] == str(ASPECTS.index('price'))]
				ambience_rows=[row for row in review_rows if row[1] == str(ASPECTS.index('ambience'))]
				vfm_rows=[row for row in review_rows if row[1] == str(ASPECTS.index('value_for_money'))]
				rs_rows=[row for row in review_rows if row[1] == str(ASPECTS.index('room_service'))]
				cleanliness_rows=[row for row in review_rows if row[1] == str(ASPECTS.index('cleanliness'))]
				amenities_rows=[row for row in review_rows if row[1] == str(ASPECTS.index('amenities'))]
				neutral_rows = [row for row in review_rows if row[1] == '-1']
				# print(food_rows)
				overall = overall_ratings[review_ID]

				if len(review_rows) !=0 :
					# AR_food = aspect_rating(review_rows, food_rows, overall)
					# AR_service = aspect_rating(review_rows, service_rows, overall)
					# AR_price = aspect_rating(review_rows, price_rows, overall)
					AR_ambience=aspect_rating(review_rows,ambience_rows,overall)
					AR_vfm=aspect_rating(review_rows,vfm_rows,overall)
					AR_rs=aspect_rating(review_rows,rs_rows,overall)
					AR_cleanliness=aspect_rating(review_rows,cleanliness_rows,overall)
					AR_amenities=aspect_rating(review_rows,amenities_rows,overall)

				else :
					AR_ambience = overall
					AR_vfm = overall
					AR_rs = overall
					AR_cleanliness = overall
					AR_amenities = overall
					
				# r= Aspect(sector="food",provider=self.p,survey_id=self.sid,food=str(AR_food),service=str(AR_service),price=str(AR_price),value_for_money=str(AR_vfm),room_service=str(AR_rs),cleanliness=str(AR_cleanliness),overall=str(overall)).save()
				r= Aspect(sector="food",provider=self.p,survey_id=self.sid,ambience=str(AR_ambience),value_for_money=str(AR_vfm),room_service=str(AR_rs),cleanliness=str(AR_cleanliness),amenities=str(AR_amenities),overall=str(overall)).save()
				print("Aspect Rating Done")
		except Exception as e:
			# print("aspect_rating3",e)
			raise e