def _getReviews(self, conditions = None, limit = 0): reviews = [] cursor = self.reviewsCollection.find(spec = conditions, limit = limit).sort([(u"date", pymongo.DESCENDING), (u"order", pymongo.DESCENDING)]) for rawReview in cursor: review = Review() review.rowId = rawReview["_id"] review.identifier = rawReview["identifier"] review.author = rawReview["author"] review.appId = rawReview["appId"] review.title = rawReview["title"] review.text = rawReview["text"] review.version = rawReview["version"] review.rating = rawReview["rating"] review.date = rawReview["date"] review.appStoreId = rawReview["appStoreId"] reviews.append(review) return reviews
def _parseItem(self, item): review = Review() textNode = item.find("{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle") if textNode is not None: review.text = textNode.text authorNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}GotoURL/{http://www.apple.com/itms/}b") if authorNode is not None: review.author = authorNode.text.strip() else: review.author = u"Anonymous" ratingNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView") try: review.rating = int(ratingNode.attrib['alt'].strip(' stars')) except KeyError: review.rating = 0 reportConcernNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}VBoxView/{http://www.apple.com/itms/}GotoURL") if reportConcernNode is not None and reportConcernNode.attrib is not None and "url" in reportConcernNode.attrib: parseResult = urlparse.urlparse(reportConcernNode.attrib["url"]) queryResult = urlparse.parse_qs(parseResult.query) if queryResult is not None and "userReviewId" in queryResult: review.identifier = queryResult["userReviewId"][0] titleNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}b") if titleNode is not None: review.title = titleNode.text versionAndDateNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}GotoURL") if versionAndDateNode is not None: regexpResult = re.search("Version ([^\n^\ ]+)", versionAndDateNode.tail) if regexpResult: review.version = regexpResult.group(1) regexpResult = re.search("(((?P<day1>\d{1,2})\.(?P<month1>\d{1,2})\.)|((?P<month2>\w+) (?P<day2>\d{1,2})[ ,]+)|((?P<day3>\d{1,2})[ \-](?P<month3>.+?)\.?[ \-]))(?P<year>\d{4})", versionAndDateNode.tail) if regexpResult: dateObject = None dict = regexpResult.groupdict() if dict["day1"] is not None: dateObject = datetime(int(dict["year"]), int(dict["month1"]), int(dict["day1"]), 0, 0, 0) elif dict["day2"] is not None: k = dict["month2"].lower() if k in months: dateObject = datetime(int(dict["year"]), int(months[k]), int(dict["day2"]), 0, 0, 0) else: regexpResult = re.search(".*?Version\s.*?[\s\-]+?([^\s\-].+)", versionAndDateNode.tail) if regexpResult: print regexpResult.group(1) else: k = dict["month3"].lower() if k in months: dateObject = datetime(int(dict["year"]), int(months[k]), int(dict["day3"]), 0, 0, 0) else: regexpResult = re.search(".*?Version\s.*?[\s\-]+?([^\s\-].+)", versionAndDateNode.tail) if regexpResult: print regexpResult.group(1) review.date = dateObject else: regexpResult = re.search(".*?Version\s.*?[\s\-]+?([^\s\-].+)", versionAndDateNode.tail) if regexpResult: print regexpResult.group(1) if review.date is None: review.date = self.lastDate else: self.lastDate = review.date return review