def __init__(self, path, percent, ngrams=0): self.data_folder = path + '\\data\\alle\\train\\' self.test_folder = path + '\\data\\alle\\test\\' self.percent = percent stopwords = helpers.file_as_list(path + '\\data\\stop_words.txt') self.positive = Review(ngrams, stopwords) self.negative = Review(ngrams, stopwords) self.test_pos = Review(ngrams, stopwords) self.test_neg = Review(ngrams, stopwords)
def test_review(): txt = "I don't love her any more" r = Review('abc', txt) d = r.to_dict() cpyr = Review.from_dict(d) print cpyr.to_dict()
def review_edit_page(review_id): searchform = SearchForm() db = current_app.config["db"] review = db.get_review(review_id) if review is None: abort(404) if not current_user.id == review.author: abort(401) form = ReviewForm() if form.validate_on_submit(): score = form.data["score"] comment = form.data["comment"] author = review.author book_id = review.book review_id = review.id review_ = Review(score=score, comment=comment, author=author, book=book_id, id=review_id) db.update_review(review_) flash("Review updated successfully.") return redirect(url_for("book_page", book_id=book_id)) form.score.data = str(review.score) form.comment.data = review.comment return render_template("review_edit.html", form=form, searchform=searchform)
def create_review(self, item, rate): new_review = Review(self, item, rate) self.reviews.append(new_review) logger.info( f"New review by {new_review}, is created by the customer: {self.id}" ) return new_review
def get_data_from_file(self): ngram_count = {} ngrams_list = [] reviews = [] x_train_dataset = [] y_train_dataset = [] threshold = 128 train = random.sample(self.files, 10000) #train = self.files for path in train: if ('train' not in path[0]): continue with open(path[0] + '/' + path[1]) as reader: review = reader.read() review = Review(review, path) if (self.n > 1): ngrams = review.generate_ngrams(n=2) else: ngrams = review.review reviews.append(review) for elem in ngrams: if (elem not in ngram_count): ngram_count[elem] = 0 ngram_count[elem] += 1 x_train_dataset, y_train_dataset, input_size = self.make_bag_of_words( ngram_count, reviews, threshold) return x_train_dataset, y_train_dataset, reviews, input_size
def post(self, product_id, member_id): json_string = self.request.body review_props = review_properties(json_string) new_review = Review(product_id=product_id, member_id=member_id, time=review_props["time"], rating=review_props["rating"], summary=review_props["summary"], text=review_props["text"]) new_review.put() json_response_dict = new_review.to_dict() # This dictionary is not quite ready to serialize into JSON because JSON has no native datetime type. # It is common in JSON to use an ISO 8601 datetime string with Z (representing GMT/UT) as the timezone. # Such a datetime looks like: "2016-10-04T18:55:11Z". Therefore we make that substitution (including the # trailing "Z" which is not optional, but the library leaves it off because conventionally python does # not distinguish between offsetless datetimes and UTC datetimes (a datetime with an offset that is 0). json_response_dict["time"] = json_response_dict["time"].isoformat( ) + "Z" # Now we can dump into valid json. json_response_string = json.dumps(json_response_dict) # Set the response type and write the json. self.response.headers["Content-Type"] = "application/json" self.response.write(json_response_string)
def get_book_reviews(book_link): """Given a url representing a book, return the reviews for that book. The reviews are stored in a dictionary. Each review has an id as the key, and a dictionary as the value. The value stores the date and the contents of the review, as strings. """ page = requests.get(book_link) soup = BeautifulSoup(page.content, 'html.parser') reviews = {} dates = soup.find_all("a", {"class": "reviewDate createdAt right"}, href=True) contents = soup.find_all("div", {"class": "reviewText stacked"}) analyser = SentimentIntensityAnalyzer() for id in range(len(contents)): score = analyser.polarity_scores(contents[id].get_text().strip()) key = str(id) review = Review(date=dates[id].get_text().strip(), content=contents[id].get_text().strip(), score=score) reviews[key] = review.create_dictionary() print(contents[id].get_text().strip()) return reviews
def add_review(self, item, text, rating): new_review = Review(self, item, text, rating) self.reviews.append(new_review) logger.info( f"{self.first_name} {self.last_name}'s review for '{item.title}' was added." ) return new_review
def crawl(url): data = urllib.request.urlopen(url).read() review_list = [] soup = bs(data, 'html.parser') soup.select('h3>a')[0].text title = soup.find('h3', class_='h_movie').find('a').text div = soup.find('div', class_='score_result') data_list = div.select('ul > li') for review in data_list: star = review.find('div', class_='star_score').text.strip() reply = review.find('div', class_='score_reple') comment = reply.find('p').text date = reply.select('dt>em')[1].text.strip() button = review.find('div', class_='btn_area') sympathy = button.select('strong') good = sympathy[0].text bad = sympathy[1].text review_list.append(Review(comment, date, star, good, bad)) return title, review_list
def book_page(book_id): searchform = SearchForm() db = current_app.config["db"] book, author_id = db.get_book(book_id) reviews, users = db.get_reviews(book_id) if book is None: abort(404) form = ReviewForm() if form.validate_on_submit(): score = form.data["score"] comment = form.data["comment"] author = db.get_user_id(current_user.username) review = Review(author=author, book=book_id, score=score, comment=comment) review_id = db.add_review(review) review.id = review_id return redirect(url_for("book_page", book_id=book_id)) return render_template("book.html", book=book, author_id=author_id, form=form, reviews=reviews, users=users, searchform=searchform)
def read_data(path): reviews_list = []; f1 = open(path, 'r'); lines1 = f1.readlines(); for i in range(len(lines1)): reviews_list += [Review(lines1[i].strip(),"")] return reviews_list;
def readReviews(reviewIDs, folder, ironic): """Returns a dictionary containing reviews to the given IDs.""" return { reviewID: Review(filename="{0}{1}.txt".format(folder, reviewID), ironic=ironic) for reviewID in reviewIDs }
def getReviews(self): reviews_array = [] i = 1 hdr = {'User-Agent': 'super happy flair bot by /u/spladug'} url = self.REVIEWS_ENDPOINT + self.id + '&pagenumber=' + str( i) + '&pagesize=50&recipeType=Recipe&sortBy=MostHelpful' req = urllib2.Request(url, headers=hdr) data = urllib2.urlopen(req).read() lxml_data = lxml.html.fromstring(data) reviews = lxml_data.xpath(self.REVIEWS_TAG) while len(reviews) != 0: for review in reviews: r = Review() r.parseReview(review, self.id) reviews_array.append(r) i += 1 try: time.sleep(random.random() + 1) data = urllib2.urlopen( self.REVIEWS_ENDPOINT + self.id + '&pagenumber=' + str(i) + '&pagesize=50&recipeType=Recipe&sortBy=MostHelpful').read( ) lxml_data = lxml.html.fromstring(data) reviews = lxml_data.xpath(self.REVIEWS_TAG) except: # Assume if we cannot get the next page of the reviews, then there are no more reviews reviews = {} return reviews_array
def callback(ch, method, properties, body): """ This function will be triggered when an element enter the pastebin_parser queue. Review object will be create in order to parse the content of a text file stored at "tmp/pastebin/<filename>". """ body = body.decode('utf-8') msg = f"Data {body} received from queue {lc.PARSER_QUEUE}" if gc.DEBUG: log_info('pastebin', msg) path = f"{lc.TEMP_STORAGE}/{body}" try: with open(path, 'r') as f: data = f.read() except Exception as e: msg = f"callback() failed, for file {body}: {e}" log_info('pastebin', msg) return delimiter = data.find('\n') metadata = data[0:delimiter] data = data[delimiter + 1::] review = Review(metadata, data) review.search(path, body) ch.basic_ack(delivery_tag=method.delivery_tag)
def test_user(): user1 = User('Martin', 'pw12345') user4 = User('Martin', 'pw12345') user2 = User('Ian', 'pw67890') user3 = User('Daniel', 'pw87465') movie = Movie("Moana", 2009) review_text = "It was so average" rating = 10 review = Review(movie, review_text, rating) print(movie) movie.runtime_minutes = 107 movie3 = Movie("", 4545) print(movie3) movie2 = Movie("Inception", 2010) movie2.runtime_minutes = 133 user1.watch_movie(movie) user1.watch_movie(movie2) user1.watch_movie(movie) user2.add_review(review) print(user1) print(user4) print(user2) print(user3) print(user1 == user4) print(user1 > user2) print(user1.time_spent_watching_movies_minutes) print(user2.reviews) print(user1.watched_movies)
def review(user_msg): """ This function analyzes the users review of a cafe and returns a response about the sentiment of the review. :param user_msg: string :return: str """ # initiate review object rvw = Review(user_msg) # determine if review is positive or negative predicted_sentiment = rvw.predict_sentiment() # store review rvw.store_review() # if positive, respond to positive experience if predicted_sentiment == 1: response_msg = "Based on your review, it seems that you had a good time. Great! For another recommendation, please write: recommend me" # otherwise it was negative, respond to negative experience else: response_msg = "Based on your review, it seems that you did not have a good time. I'm sorry! For another recommendation, please write: recommend me" return response_msg
async def handle_dm(self, message): # Translate unicode message.content = uni2ascii(message.content) # Handle a help message if message.content == Report.HELP_KEYWORD: reply = "Use the `report` command to begin the reporting process.\n" reply += "Use the `cancel` command to cancel the report process.\n" await message.channel.send(reply) return author_id = message.author.id responses = [] # Let the report class handle this message; forward all the messages it returns to us if author_id not in globals.CURRENT_REVIEWERS_DB: # Only respond to messages if they're part of a reporting flow if author_id not in self.reports and not message.content.startswith(Report.START_KEYWORD): return # If we don't currently have an active report for this user, add one if author_id not in self.reports: self.reports[author_id] = Report(self) responses, report = await self.reports[author_id].handle_message(message) for r in responses: await message.channel.send(r) # If the report is complete or cancelled, remove it from our map if self.reports[author_id].report_complete(): if report is not None and report.reporting_user not in globals.BAD_REPORT_TRACKER: globals.BAD_REPORT_TRACKER[report.reporting_user] = 1 self.reports.pop(author_id) if report is not None and globals.BAD_REPORT_TRACKER[report.reporting_user] <= globals.BAD_REPORT_THRESHOLD: globals.TICKET_NUM += 1 globals.REPORTS_DATABASE[globals.TICKET_NUM] = report await self.handle_report(globals.TICKET_NUM) else: # This is a review from a moderator # If we don't currently have an active report for this user, add one if author_id not in self.reviews and message.content != "s": return if author_id not in self.reviews: self.reviews[author_id] = Review(self) case_id = globals.CURRENT_REVIEWERS_DB[author_id] report = globals.REPORTS_DATABASE[(int)(case_id)] responses = await self.reviews[author_id].review_report(message, report, case_id, author_id) for r in responses: await message.channel.send(r) # If the review is complete or cancelled, remove it from our map if self.reviews[author_id].review_complete(): self.reviews.pop(author_id) del globals.CURRENT_REVIEWERS_DB[author_id] # Set number of reviewers in globals file if (len(globals.REVIEWS_DATABASE[case_id]) >= globals.NUM_REVIEWERS): await self.handle_review(case_id)
def test_to_string(self): r = Review("Testing review.") for _ in range(1000): rand_num = random.random() r.set_positivity(rand_num) expected = "<--- Review Text ---> \n" + r.get_text() + "\n <--- Positivity ---> \n"+str(r.positivity)+"\n" actual = r.__str__() self.assertEqual(actual, expected)
def init_data(textfile, lablesfile): reviews_list = [] f1 = open(textfile, 'r') f2 = open(lablesfile, 'r') lines1 = f1.readlines() lines2 = f2.readlines() for i in range(len(lines1)): reviews_list += [Review(lines1[i].strip(), lines2[i].strip())] return reviews_list
def LogisticTest(self, words, parser): x_test = [] for word in words: review = Review(word) x_test.append(parser.bag_of_one_word(review, self.n)) print(self.nb.predict(x_test)) print(self.nb.predict_proba(x_test))
def load(self, directory, label): reviews = [] for root, subFolders, files in os.walk(directory): for file in files: with open(os.path.join(root, file), 'r') as fin: for lines in fin: reviews.append( Review(lines, label, os.path.join(root, file))) return reviews
def insert_review(self, rating, product_id, customer_email): try: review = Review(rating, product_id, customer_email) self.session.add(review) self.session.commit() if id is not None or id != 0: return [True, review] except SQLAlchemyError as e: return [False, e]
def scrape_review(review): date = review.find("time", "date").text summary = review.find("h2", "summary").text.strip('"') # abstract summary = Scrape.clean( summary ) # call function clean to remove bad characters and line breaks in the text rating = review.find("span", "rating").find("span")["title"] # overall rating # subratings workLifeBalance = None cultureAndValues = None careerOpportunities = None compensationAndBenefits = None seniorManagement = None # I wrapped this part in a try because not all reviews have sub ratings try: subRating = review.find("div", "subRatings").find_all("li") workLifeBalance = subRating[0].find("span")["title"] cultureAndValues = subRating[1].find("span")["title"] careerOpportunities = subRating[2].find("span")["title"] compensationAndBenefits = subRating[3].find("span")["title"] seniorManagement = subRating[4].find("span")["title"] except Exception: subRating = None authorJobTitle = review.find("span", "authorJobTitle").text authorJobTitle = Scrape.clean( authorJobTitle ) # call function clean to remove bad characters and line breaks in the text mainText = review.find("p", "mainText").text mainText = Scrape.clean( mainText ) # call function clean to remove bad characters and line breaks in the text info = review.find_all("div", "v2__EIReviewDetailsV2__fullWidth" ) # both con and pro stored in info pro = info[0].find("p", "v2__EIReviewDetailsV2__bodyColor").text pro = Scrape.clean( pro ) # call function clean to remove bad characters and line breaks in the text con = info[1].find("p", "v2__EIReviewDetailsV2__bodyColor").text con = Scrape.clean( con ) # call function clean to remove bad characters and line breaks in the text # returns all the information for the review, storing it in a Review object to make it easier to access later return Review(date, summary, rating, workLifeBalance, cultureAndValues, careerOpportunities, compensationAndBenefits, seniorManagement, authorJobTitle, mainText, pro, con)
def main(): with open("data.json", "r") as f: data = json.load(f) print("There are total {} data found.Please be wait.".format( len(data))) for review_data in data: ReviewTable.insert(Review(review_data)) print("data added")
def get_data_from_csv(path): df = pd.read_csv(path) data = [] for i, d in df.iterrows(): data.append( Review(init_score=d[0], movie_id=d[1], sentiment=d[2], summary=str(d[3]), text=d[4])) return data
def read_review_data(filename): review_list = [] with open(filename) as json_file: data = json.load(json_file) for obj in data['reviews']: review = Review() review.rating = obj['rating'] review.date = obj['date'] review.text = obj['review_text'] review_list.append(review) return review_list
def readReviews(filename): f = open("ml-100k/" + filename, "r") a = f.readlines() length = len(a) review_list = [] for x in range(0, length): fields = a[x].split() if int(fields[0]) <= 100 and int(fields[1]) <= 100: review_list.append(Review(fields[0], fields[1], fields[2])) return review_list
def post(self): review = Review(electric_vehicle=ElectricVehicle.get_by_id( int(self.request.get('electric_vehicle_id'))).key) user = users.get_current_user() if user: review.content = self.request.get('content') review.score = int(self.request.get('score', None)) review.put() self.add_message('Review submitted successfully.', 'success') logging.info(review) self.redirect('/electric_vehicles/') return
def __init__(self, review_df): assert len(review_df['GH'].unique() ) == 1, "Must pass data for a single teacher to Teacher" # Store business-level meta data self.teacher_id = str(review_df.GH.iloc[0]) # string self.subject_name = str(review_df.ZWMC.iloc[0]) # string self.review = [ Review(dict(review_row), teacher=self) for _, review_row in review_df.iterrows() ]
def save_data(): try: session = Session() data = request.get_json() print(data) review = Review(**data) session.add(review) session.commit() return Response(status=200) except Exception as e: print(e) return Response(status=500,response="Unable to successfully add the review")