Ejemplo n.º 1
0
    def __extractReview(self, stringList, prod_id,
                        cursor):  # extrai os dados de uma review
        helpful = -1
        costumer = -1
        date = -1
        rating = -1
        votes = -1
        aux = -1

        reviewList = []

        if len(stringList) > 1:
            stringList = stringList[1:]

        for string in stringList:
            date = string.find("cutomer:")
            if (date != -1):
                date = string[4:date - 1]

            costumer = string.find("cutomer:")
            aux = string.find("rating:")

            if (costumer != -1):
                costumer = string[costumer + 9:aux]

            rating = string.find("rating:")
            aux = string.find("votes:")

            if rating != -1:
                rating = string[rating + 8:aux]

            votes = string.find("votes:")
            aux = string.find("helpful:")

            if (votes != -1):
                votes = string[votes + 6:aux]

            aux = string.find("helpful:")

            if aux != -1:
                helpful = string[aux + 8:len(string)]

            if costumer != -1:
                costumer = str(costumer).strip()
                self.customerSet.add(costumer)

                r = Review(date, votes, rating, helpful, prod_id, costumer)
                reviewList.append(r.getValuesString())
                # if r.executeInsertStatement(cursor):
                #     pass
                # else:
                #     print("error inserting:\n", r.toString())
                #     print(string)
                #     break

        return ",".join(reviewList)  # retorna a string para o bulk insert
Ejemplo n.º 2
0
def createReview(rec):
    room = Room.query.filter_by(listingid=rec['listing_id']).first()
    renters = User.query.filter_by(isHost=0, isAdmin=0).all()

    if room:
        ind = random.randrange(len(renters))
        user_id = renters[ind].id
        uname = renters[ind].uname
        room.reviews.append(
            Review(int(rec['scores']),
                   'Review from user {}'.format(rec['reviewer_name']),
                   rec['comments'], user_id))
    else:
        rooms = Room.query.filter_by().all()
        ridx = random.randrange(len(rooms))
        user_id = renters[ind].id
        uname = renters[ind].uname
        rooms[ridx].reviews.append(
            Review(int(rec['scores']),
                   'Review from user {}'.format(rec['reviewer_name']),
                   rec['comments'], user_id))
Ejemplo n.º 3
0
def get_random_data():
    rooms_number = 100
    rooms = []

    for i in range(rooms_number):
        room = Room(random.choice(list(RoomTypes)), random.randint(1, 5),
                    random.randint(1, 5), random.randint(1, 10),
                    bool(random.getrandbits(1)), "desc",
                    bool(random.getrandbits(1)), bool(random.getrandbits(1)),
                    bool(random.getrandbits(1)), bool(random.getrandbits(1)),
                    bool(random.getrandbits(1)), bool(random.getrandbits(1)),
                    bool(random.getrandbits(1)), bool(random.getrandbits(1)),
                    bool(random.getrandbits(1)), bool(random.getrandbits(1)),
                    37.9754983 + random.uniform(-1, 1),
                    23.7356671 + random.uniform(-1, 1), "address", "info",
                    random.randint(1, 5), random.randint(23, 300),
                    random.uniform(10, 70), random_sentence(3),
                    random.randint(6, 20), random.randint(1, 3), 7)

        for y in range(10):
            room.images.append(
                Image('https://picsum.photos/id/' + str(i * 10 + y) +
                      '/400/400'))
            room.reviews.append(
                Review(random.uniform(1, 5), random_sentence(3),
                       random_sentence(10), 4))

            start_date = datetime.now()

            for x in range(10):
                next_date = start_date + timedelta(days=random.randint(2, 30))

            room.reservations.append(
                Reservation(start_date, next_date, Status.rented))

            start_date = next_date + timedelta(days=random.randint(20, 40))

            next_date = None
            room.reservations.append(
                Reservation(start_date, next_date, Status.not_available))

            db.session.add(room)

            db.session.commit()
Ejemplo n.º 4
0
def put_new_review(entity_id):
    session = get_session()

    if session.query(Entity).get(entity_id) is None:
        session.close()
        abort(404, 'Entity not found')
        return

    current_user_id = get_jwt_identity()

    review = session.query(Review).filter(
        Review.user_id == current_user_id,
        Review.entity_id == entity_id
    ).first()

    if review is not None:
        session.close()
        abort(400, "User with id = %s already has a review for entity with id = %s" % (current_user_id, entity_id))
        return

    text = None
    language = None

    content = g.data
    if 'text' in content:
        text = content['text']
        language = translation_init.translate.detect(text)

    review = Review(
        user_id=current_user_id,
        entity_id=entity_id,
        rating=content['rating'],
        time=datetime.now(),
        text=text,
        locale=language
    )
    
    session.add(review)
    session.commit()
    session.close()

    return 'ok'
Ejemplo n.º 5
0
def new_review(room_id):
    data = request.get_json()

    room = Room.query.filter_by(id=room_id).first()
    if room is None:
        return jsonify({'message': 'ERROR'})
    print(data)
    review = data['review']

    user_public_id = review['user_public_id']
    user = User.query.filter_by(public_id=user_public_id).first()
    user_id = user.id
    print(user_id)

    room.reviews.append(
        Review(review['rating'], review['title'], review['description'],
               user_id))
    db.session.commit()

    return jsonify({'message': 'SUCCESS'})
def scrape_listing_detail(driver, listing_url, city, country):
    if driver is None:
        driver = prepare_driver()
    driver.get(listing_url)
    time.sleep(3)

    acc_type = "Hostel"
    try:
        name = driver.find_element_by_xpath(
            "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[1]/div/div[2]/div/div/h1"
        ).text.strip()
    except:
        print("Name is None.")
        name = ""
    try:
        address = driver.find_element_by_xpath(
            "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[1]/div/div[2]/div/div/div/span/a[1]"
        ).text.strip()
        address = address + ", " + city + ", " + country
    except:
        print("Address is None.")
        address = ""
    try:
        rating_score = driver.find_element_by_xpath(
            "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[4]/div/section[1]/div/div[1]/div[1]"
        ).text.strip()
        try:
            rating_score = float(rating_score)
        except:
            rating_score = None
    except:
        print("Rating Score is None.")
        rating_score = 0
    try:
        rating_text = driver.find_element_by_xpath(
            "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[4]/div/section[1]/div/div[1]/div[2]/p"
        ).text.strip()
    except:
        print("Rating Text is None.")
        rating_text = ""
    try:
        total_number_of_ratings = driver.find_element_by_xpath(
            "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[4]/div/section[1]/div/div[1]/div[2]/a/span"
        ).text.strip().replace('Total Reviews', '').strip()
        try:
            total_number_of_ratings = int(total_number_of_ratings)
        except:
            total_number_of_ratings = 0
    except:
        print("Total ratings is None.")
        total_number_of_ratings = 0

    try:
        description = driver.find_element_by_xpath(
            "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[4]/div/section[2]/div/div/div"
        ).text
    except:
        print("Description is None.")
        description = ""

    try:
        image_urls_class = driver.find_element_by_name(
            "ms-gallery").find_element_by_class_name(
                "row").find_element_by_class_name(
                    "small-12").find_element_by_class_name("gallery")
        image_urls = []
        for image in image_urls_class.find_elements_by_class_name(
                "gallery-item"):
            img = image.find_element_by_tag_name("img").get_attribute("src")
            image_urls.append(img)
        print(len(image_urls), "Image urls")
    except:
        print("Image urls is None.")
        image_urls = []

    try:
        reviews = []
        reviews_class = driver.find_element_by_name("ms-latest-reviews")
        reviews_ul = reviews_class.find_element_by_tag_name("ul")
        reviews_li = reviews_ul.find_elements_by_tag_name("li")

        for review in reviews_li:
            r = review.find_element_by_class_name(
                "property-review").find_element_by_class_name("review-info")
            r_country = r.find_element_by_class_name(
                'details-bottom').text.strip()
            r_text = r.find_element_by_class_name(
                "notes").find_element_by_class_name(
                    "truncate-container").find_element_by_class_name(
                        "text").text.strip()
            rev = Review(text=r_text, review_country=r_country)
            reviews.append(rev.__dict__)
    except:
        print("Reviews is None.")
        reviews = []

    try:
        reviews_breakdown_list = []
        reviews_class = driver.find_element_by_name("ms-reviews-and-ratings")
        reviews_breakdown_ul = reviews_class.find_element_by_tag_name("ul")
        reviews_breakdown_li = reviews_breakdown_ul.find_elements_by_tag_name(
            "li")
        for breakdown in reviews_breakdown_li:
            b_text = breakdown.find_element_by_class_name(
                'rating-label').text.strip()
            b_value = breakdown.find_element_by_class_name(
                'rating-label').find_element_by_class_name(
                    'pull-right').text.strip()
            try:
                b_value = int(b_value)
            except:
                b_value = None
            rev_b = ReviewBreakdown(type=b_text, value=b_value)
            reviews_breakdown_list.append(rev_b.__dict__)
    except:
        print("Reviews Breakdown missing")

    try:
        facilities = []
        facilities_class = driver.find_element_by_name(
            "ms-facilities").find_element_by_class_name(
                "row").find_element_by_class_name(
                    "small-12").find_element_by_class_name("pb-3")
        groups = facilities_class.find_elements_by_class_name("facility-group")

        for group in groups:
            facilities_ul = group.find_element_by_tag_name("ul")
            facilities_li = facilities_ul.find_elements_by_tag_name("li")
            for f in facilities_li:
                facilities.append(str(f.text.strip()))
    except:
        print("Facilities is None.")
        facilities = []

    g = geocoder.geonames(city + ", " + country, key='developer005')
    if g.ok is True:
        latitude = g.lat
        longitude = g.lng
    else:
        latitude = None
        longitude = None

    acc = Accommodation(name=str(name),
                        country=str(country),
                        city=str(city),
                        address=str(address),
                        type=str(acc_type),
                        description=str(description),
                        image_urls=image_urls,
                        latitude=latitude,
                        longitude=longitude,
                        rating_text=str(rating_text),
                        rating_score=rating_score,
                        total_number_of_ratings=total_number_of_ratings,
                        facilities=facilities,
                        scrape_url=str(listing_url),
                        reviews=reviews,
                        review_breakdown=reviews_breakdown_list)

    return acc
Ejemplo n.º 7
0
	def _parseItem(self, item):
		review = Review()
		
		textNode = item.find("{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle")
		if textNode is not None:
			review.text = textNode.text
		
		authorNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}GotoURL/{http://www.apple.com/itms/}b")
		if authorNode is not None:
			review.author = authorNode.text.strip()
		else:
			review.author = u"Anonymous"
		
		ratingNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView")
		try:
			review.rating = int(ratingNode.attrib['alt'].strip(' stars'))
		except KeyError:
			review.rating = 0
		
		reportConcernNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}VBoxView/{http://www.apple.com/itms/}GotoURL")
		
		if reportConcernNode is not None and reportConcernNode.attrib is not None and "url" in reportConcernNode.attrib:
			parseResult = urlparse.urlparse(reportConcernNode.attrib["url"])
			queryResult = urlparse.parse_qs(parseResult.query)
			if queryResult is not None and "userReviewId" in queryResult:
				review.identifier = queryResult["userReviewId"][0]
		
								
		titleNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}b")
		if titleNode is not None:
			review.title = titleNode.text
		
		versionAndDateNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}GotoURL")
		if versionAndDateNode is not None:
			regexpResult = re.search("Version ([^\n^\ ]+)", versionAndDateNode.tail)
			if regexpResult:
				review.version = regexpResult.group(1)
										
																						
			regexpResult = re.search("(((?P<day1>\d{1,2})\.(?P<month1>\d{1,2})\.)|((?P<month2>\w+) (?P<day2>\d{1,2})[ ,]+)|((?P<day3>\d{1,2})[ \-](?P<month3>.+?)\.?[ \-]))(?P<year>\d{4})", versionAndDateNode.tail)
						
			if regexpResult:
				dateObject = None
				
				dict = regexpResult.groupdict()
				
				if dict["day1"] is not None:
					dateObject = datetime(int(dict["year"]), int(dict["month1"]), int(dict["day1"]), 0, 0, 0)
				elif dict["day2"] is not None:
					k = dict["month2"].lower()
					if k in months:
						dateObject = datetime(int(dict["year"]), int(months[k]), int(dict["day2"]), 0, 0, 0)
					else:
						regexpResult = re.search(".*?Version\s.*?[\s\-]+?([^\s\-].+)", versionAndDateNode.tail)
						if regexpResult:
							print regexpResult.group(1)
				else:
					k = dict["month3"].lower()
					if k in months:
						dateObject = datetime(int(dict["year"]), int(months[k]), int(dict["day3"]), 0, 0, 0)
					else:
						regexpResult = re.search(".*?Version\s.*?[\s\-]+?([^\s\-].+)", versionAndDateNode.tail)
						if regexpResult:
							print regexpResult.group(1)
									
				review.date = dateObject
			else:
				regexpResult = re.search(".*?Version\s.*?[\s\-]+?([^\s\-].+)", versionAndDateNode.tail)
				if regexpResult:
					print regexpResult.group(1)
		
		if review.date is None:
			review.date = self.lastDate
		else:
			self.lastDate = review.date
			
		return review
Ejemplo n.º 8
0
	def _getReviews(self, conditions = None, limit = 0):
		reviews = []
				
		cursor = self.reviewsCollection.find(spec = conditions, limit = limit).sort([(u"date", pymongo.DESCENDING), (u"order", pymongo.DESCENDING)])
		for rawReview in cursor:
			review = Review()
			review.rowId = rawReview["_id"]
			review.identifier = rawReview["identifier"]
			review.author = rawReview["author"]
			review.appId = rawReview["appId"]
			review.title = rawReview["title"]
			review.text = rawReview["text"]
			review.version = rawReview["version"]
			review.rating = rawReview["rating"]
			review.date = rawReview["date"]
			review.appStoreId = rawReview["appStoreId"]
			
			reviews.append(review)
			
		return reviews
Ejemplo n.º 9
0
	def convert_numbers_r(self, review):
		new_content = self.convert_numbers(review.content)
		new_label = review.polarity
		return Review(new_content, new_label)
Ejemplo n.º 10
0
	def fold_cases_r(self, review):
		new_content = self.fold_cases(review.content)
		new_label = review.polarity
		return Review(new_content, new_label)
Ejemplo n.º 11
0
	def remove_punctuations_r(self, review):
		new_content = self.remove_punctuations(review.content)
		new_label = review.polarity
		return Review(new_content, new_label)