예제 #1
0
 def setUp(self):
     self.container = FullResolveContainer()
     self.cases = []
     for case in _cases:
         before = Entity()
         before.importData(case[0])
         self.cases.append((before,case[1]))
예제 #2
0
class AEnrichHttpTest(AStampedAPIHttpTestCase):

    def setUp(self):
        self.api = MongoStampedAPI()
        self.media1 = Entity()
        raw = {
            "category" : "film",
            "subtitle" : "film",
            "subcategory" : "movie",
            "title" : "Colombiana",
            "image" : "http://images.fandango.com/r85.9.2/ImageRenderer/375/375/images/no_image_375x375.jpg/140038/images/masterrepository/tms/105255/105255_aa.jpg",
            "titlel" : "colombiana",
            "sources" : {
                "fandango" : {
                    "fid" : "140038"
                }
            },
            "details" : {
                "media" : {
                    "track_length" : "6420",
                    "genre" : "Action/Adventure",
                    "original_release_date" : "August 26, 2011",
                    "mpaa_rating" : "PG-13"
                },
                "video" : {
                    "director" : "Olivier Megaton",
                    "cast" : "Zoe Saldana, Jordi Mollà, Lennie James, Amandla Stenberg, Michael Vartan"
                }
            },
            "desc" : "A young woman, after witnessing her parents' murder as a child in Bogota, grows up to be a stone-cold assassin. \n\n  Release Date:8/26/2011"
        }
        self.media1.importData(raw)

    def tearDown(self):
        pass
예제 #3
0
    def _parse_feed(self, pool, url):
        utils.log("[%s] parsing feed %s" % (self, url))
        data = feedparser.parse(url)

        for entry in data.entries:
            try:
                entity = Entity()
                entity.subcategory = "book"
                entity.amazon = {}

                asin = self.id_re.match(entry.id).groups()[0]

                # note: every valid amazon standard identification number is exactly 10 digits long
                if 10 == len(asin):
                    entity.asin = asin
                else:
                    continue

                title_match = self.title_re.match(entry.title)
                if title_match:
                    title_match = title_match.groups()
                    entity.title = title_match[1]
                    entity.popularity = title_match[0]
                else:
                    entity.title = entry.title

                entity.amazon_link = entry.link

                soup = BeautifulSoup(entry.summary)
                img = soup.find("img")
                if img:
                    entity.image = img.get("src")

                author = soup.find("span", {"class": "riRssContributor"})
                if author:
                    author_link = author.find("a")

                    if author_link:
                        entity.author = author_link.getText()
                    else:
                        author = author.getText().strip()

                        try:
                            entity.author = self.author_re0.match(author).groups()[0]
                        except AttributeError:
                            try:
                                entity.author = self.author_re1.match(author).groups()[0]
                            except AttributeError:
                                entity.author = author
                                pass

                # pprint(entity)
                # self._globals['books'] = entry

                if asin in self.seen:
                    continue

                self.seen.add(asin)
                self._output.put(entity)
            except:
                utils.printException()