예제 #1
0
    def __parsePage(self, page):

        """Scrapes html from IMDB for information."""

        mdata = microdata.extract(page)
        if not mdata:
            raise ValueError("Unable to find any microdata in IMDB result")
        mitem = mdata[0]

        if "http://schema.org/TVSeries/name" in mitem:
            self.title = mitem["http://schema.org/TVSeries/name"]
            if "http://schema.org/TVSeries/actors" in mitem:
                _cast = mitem["http://schema.org/TVSeries/actors"]
                self.cast = _cast
            if "http://schema.org/TVSeries/datePublished" in mitem:
                self.releasedate = mitem["http://schema.org/TVSeries/datePublished"]
            if "http://schema.org/TVSeries/description" in mitem:
                self.shortdescription = mitem["http://schema.org/TVSeries/description"]
            if "http://schema.org/TVSeries/duration" in mitem:
                self.runtime = mitem["http://schema.org/TVSeries/duration"]
            if "http://schema.org/TVSeries/genre" in mitem:
                _genre = mitem["http://schema.org/TVSeries/genre"]
                if _genre is not None:
                    self.genre = _genre
        elif "http://schema.org/Movie/name" in mitem:
            self.title = mitem["http://schema.org/Movie/name"]
            if "http://schema.org/Movie/actors" in mitem:
                _cast = mitem["http://schema.org/Movie/actors"]
                self.cast = _cast
            if "http://schema.org/Movie/datePublished" in mitem:
                self.releasedate = mitem["http://schema.org/Movie/datePublished"]
            if "http://schema.org/Movie/description" in mitem:
                self.shortdescription = mitem["http://schema.org/Movie/description"]
            if "http://schema.org/Movie/director" in mitem:
                _dir = mitem["http://schema.org/Movie/director"]
                self.director = _dir
            if "http://schema.org/Movie/duration" in mitem:
                self.runtime = mitem["http://schema.org/Movie/duration"]
            if "http://schema.org/Movie/genre" in mitem:
                _genre = mitem["http://schema.org/Movie/genre"]
                self.genre = _genre

        AGG_RATE_VALUE = "http://schema.org/AggregateRating/ratingValue"
        aggRates = []
        if "children" in mitem:
            aggRates = [x for x in mitem["children"] if AGG_RATE_VALUE in x.keys()]
        if aggRates:
            aggRate = aggRates[0]
            rateValue = aggRate[AGG_RATE_VALUE]
            rateTotal = None
            if "http://schema.org/AggregateRating/bestRating" in aggRate:
                rateTotal = aggRate["http://schema.org/AggregateRating/bestRating"]
            if rateTotal:
                self.rating = "%s / %s" % (rateValue, rateTotal)
            else:
                self.rating = rateValue
예제 #2
0
    def __parsePage(self, page):
        """Scrapes html from IMDB for information."""

        mdata = microdata.extract(page)
        if not mdata:
            raise ValueError("Unable to find any microdata in IMDB result")
        mitem = mdata[0]

        if 'http://schema.org/Movie/actors' in mitem:
            _cast = mitem['http://schema.org/Movie/actors']
            self.cast = re.split(r',', _cast)
        if 'http://schema.org/Movie/datePublished' in mitem:
            self.releasedate = mitem['http://schema.org/Movie/datePublished']
        if 'http://schema.org/Movie/description' in mitem:
            self.shortdescription = mitem[
                'http://schema.org/Movie/description']
        if 'http://schema.org/Movie/director' in mitem:
            _dir = mitem['http://schema.org/Movie/director']
            self.director = re.split(r',', _dir)
        if 'http://schema.org/Movie/duration' in mitem:
            self.runtime = mitem['http://schema.org/Movie/duration']
        if 'http://schema.org/Movie/genre' in mitem:
            _genre = mitem['http://schema.org/Movie/genre']
            self.genre = re.split(r',', _genre)
        if 'http://schema.org/Movie/name' in mitem:
            self.title = mitem['http://schema.org/Movie/name']

        AGG_RATE_VALUE = 'http://schema.org/AggregateRating/ratingValue'
        aggRates = []
        if 'children' in mitem:
            aggRates = [x for x in mitem['children'] \
              if AGG_RATE_VALUE in x.keys() ]
        if aggRates:
            aggRate = aggRates[0]
            rateValue = aggRate[AGG_RATE_VALUE]
            rateTotal = None
            if 'http://schema.org/AggregateRating/bestRating' in aggRate:
                rateTotal = aggRate[
                    'http://schema.org/AggregateRating/bestRating']
            if rateTotal:
                self.rating = '%s / %s' % (rateValue, rateTotal)
            else:
                self.rating = rateValue
예제 #3
0
	def __parsePage(self, page):

		"""Scrapes html from IMDB for information."""

		mdata = microdata.extract( page )
		if not mdata:
			raise ValueError("Unable to find any microdata in IMDB result")
		mitem = mdata[0]

		if 'http://schema.org/Movie/actors' in mitem:
			_cast = mitem['http://schema.org/Movie/actors']
			self.cast = re.split(r',', _cast)
		if 'http://schema.org/Movie/datePublished' in mitem:
			self.releasedate = mitem['http://schema.org/Movie/datePublished']
		if 'http://schema.org/Movie/description' in mitem:
			self.shortdescription = mitem['http://schema.org/Movie/description']
		if 'http://schema.org/Movie/director' in mitem:
			_dir = mitem['http://schema.org/Movie/director']
			self.director = re.split(r',', _dir)
		if 'http://schema.org/Movie/duration' in mitem:
			self.runtime = mitem['http://schema.org/Movie/duration']
		if 'http://schema.org/Movie/genre' in mitem:
			_genre = mitem['http://schema.org/Movie/genre']
			self.genre = re.split(r',', _genre)
		if 'http://schema.org/Movie/name' in mitem:
			self.title = mitem['http://schema.org/Movie/name']

		AGG_RATE_VALUE = 'http://schema.org/AggregateRating/ratingValue'
		aggRates = []
		if 'children' in mitem:
			aggRates = [x for x in mitem['children'] \
					if AGG_RATE_VALUE in x.keys() ]
		if aggRates:
			aggRate = aggRates[0]
			rateValue = aggRate[ AGG_RATE_VALUE ]
			rateTotal = None
			if 'http://schema.org/AggregateRating/bestRating' in aggRate:
				rateTotal = aggRate[
						'http://schema.org/AggregateRating/bestRating' ]
			if rateTotal:
				self.rating = '%s / %s' % ( rateValue, rateTotal )
			else:
				self.rating = rateValue