def setUp(self): self.container = FullResolveContainer() self.cases = [] for case in _cases: before = Entity() before.importData(case[0]) self.cases.append((before,case[1]))
class AEnrichHttpTest(AStampedAPIHttpTestCase): def setUp(self): self.api = MongoStampedAPI() self.media1 = Entity() raw = { "category" : "film", "subtitle" : "film", "subcategory" : "movie", "title" : "Colombiana", "image" : "http://images.fandango.com/r85.9.2/ImageRenderer/375/375/images/no_image_375x375.jpg/140038/images/masterrepository/tms/105255/105255_aa.jpg", "titlel" : "colombiana", "sources" : { "fandango" : { "fid" : "140038" } }, "details" : { "media" : { "track_length" : "6420", "genre" : "Action/Adventure", "original_release_date" : "August 26, 2011", "mpaa_rating" : "PG-13" }, "video" : { "director" : "Olivier Megaton", "cast" : "Zoe Saldana, Jordi Mollà, Lennie James, Amandla Stenberg, Michael Vartan" } }, "desc" : "A young woman, after witnessing her parents' murder as a child in Bogota, grows up to be a stone-cold assassin. \n\n Release Date:8/26/2011" } self.media1.importData(raw) def tearDown(self): pass
def _parse_feed(self, pool, url): utils.log("[%s] parsing feed %s" % (self, url)) data = feedparser.parse(url) for entry in data.entries: try: entity = Entity() entity.subcategory = "book" entity.amazon = {} asin = self.id_re.match(entry.id).groups()[0] # note: every valid amazon standard identification number is exactly 10 digits long if 10 == len(asin): entity.asin = asin else: continue title_match = self.title_re.match(entry.title) if title_match: title_match = title_match.groups() entity.title = title_match[1] entity.popularity = title_match[0] else: entity.title = entry.title entity.amazon_link = entry.link soup = BeautifulSoup(entry.summary) img = soup.find("img") if img: entity.image = img.get("src") author = soup.find("span", {"class": "riRssContributor"}) if author: author_link = author.find("a") if author_link: entity.author = author_link.getText() else: author = author.getText().strip() try: entity.author = self.author_re0.match(author).groups()[0] except AttributeError: try: entity.author = self.author_re1.match(author).groups()[0] except AttributeError: entity.author = author pass # pprint(entity) # self._globals['books'] = entry if asin in self.seen: continue self.seen.add(asin) self._output.put(entity) except: utils.printException()