Beispiel #1
0
    def to_metadata(self, log, entry):  # {{{
        title = unicode(entry.xpath(u'normalize-space(.//span[@itemprop="name"][1]/text())'))
        # log.debug(u'Tile: -----> %s' % title)

        author = unicode(entry.xpath(u'normalize-space(.//a[contains(@href, "person")][1]/text())'))
        # log.debug(u'Author: -----> %s' % author)

        norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
        mi = Metadata(title, norm_authors)

        ozon_id = entry.xpath(u'substring-before(substring-after(normalize-space(.//a[starts-with(@href, "/context/detail/id/")][1]/@href), "id/"), "/")')
        if ozon_id:
            mi.identifiers = {'ozon':ozon_id}
            # log.debug(u'ozon_id: -----> %s' % ozon_id)

        mi.ozon_cover_url = None
        cover = entry.xpath(u'normalize-space(.//img[1]/@src)')
        # log.debug(u'cover: -----> %s' % cover)
        if cover:
            mi.ozon_cover_url = _translateToBigCoverUrl(cover)
            # log.debug(u'mi.ozon_cover_url: -----> %s' % mi.ozon_cover_url)

        pub_year = None
        if pub_year:
            mi.pubdate = toPubdate(log, pub_year)
            # log.debug('pubdate %s' % mi.pubdate)

        mi.rating = self.get_rating(entry)
        # if not mi.rating:
        #    log.debug('No rating found. ozon_id:%s'%ozon_id)

        return mi
Beispiel #2
0
    def to_metadata(self, log, entry):  # {{{
        xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'

        title = entry.xpath(xp_template.format('Name'))
        author = entry.xpath(xp_template.format('Author'))
        norm_authors = map(_normalizeAuthorNameWithInitials,
                           map(unicode.strip,
                               unicode(author).split(u',')))
        mi = Metadata(title, norm_authors)

        ozon_id = entry.xpath(xp_template.format('ID'))
        mi.identifiers = {'ozon': ozon_id}

        mi.comments = entry.xpath(xp_template.format('Annotation'))

        mi.ozon_cover_url = None
        cover = entry.xpath(xp_template.format('Picture'))
        if cover:
            mi.ozon_cover_url = _translateToBigCoverUrl(cover)

        pub_year = entry.xpath(xp_template.format('Year'))
        if pub_year:
            mi.pubdate = toPubdate(log, pub_year)
            #log.debug('pubdate %s'%mi.pubdate)

        rating = entry.xpath(xp_template.format('ClientRatingValue'))
        if rating:
            try:
                #'rating',     A floating point number between 0 and 10
                # OZON raion N of 5, calibre of 10, but there is a bug? in identify
                mi.rating = float(rating)
            except:
                pass
            rating
        return mi
Beispiel #3
0
    def get_metadata_from_detail(self, log, entry, title, authors, identifiers):  # {{{
        title = unicode(entry.xpath(u'normalize-space(.//h1[@itemprop="name"][1]/text())'))
        # log.debug(u'Tile (from_detail): -----> %s' % title)

        author = unicode(entry.xpath(u'normalize-space(.//a[contains(@href, "person")][1]/text())'))
        # log.debug(u'Author (from_detail): -----> %s' % author)

        norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
        mi = Metadata(title, norm_authors)

        ozon_id = entry.xpath(u'substring-before(substring-after(normalize-space(.//a[starts-with(@href, "/context/detail/id/")][1]/@href), "id/"), "/")')
        if ozon_id:
            # log.debug(u'ozon_id (from_detail): -----> %s' % ozon_id)
            mi.identifiers = {'ozon':ozon_id}

        mi.ozon_cover_url = None
        cover = entry.xpath(u'normalize-space(.//img[1]/@src)')
        if cover:
            mi.ozon_cover_url = _translateToBigCoverUrl(cover)
            # log.debug(u'mi.ozon_cover_url  (from_detail): -----> %s' % mi.ozon_cover_url)

        mi.rating = self.get_rating(entry)
        # log.debug(u'mi.rating  (from_detail): -----> %s' % mi.rating)
        if not mi.rating:
            log.debug('No rating (from_detail) found. ozon_id:%s'%ozon_id)

        return mi
Beispiel #4
0
    def to_metadata(self, log, entry):  # {{{
        xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'

        title = entry.xpath(xp_template.format('Name'))
        author = entry.xpath(xp_template.format('Author'))
        norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
        mi = Metadata(title, norm_authors)

        ozon_id = entry.xpath(xp_template.format('ID'))
        mi.identifiers = {'ozon':ozon_id}

        mi.comments = entry.xpath(xp_template.format('Annotation'))

        mi.ozon_cover_url = None
        cover = entry.xpath(xp_template.format('Picture'))
        if cover:
            mi.ozon_cover_url = _translateToBigCoverUrl(cover)

        pub_year = entry.xpath(xp_template.format('Year'))
        if pub_year:
            mi.pubdate = toPubdate(log, pub_year)
            #log.debug('pubdate %s'%mi.pubdate)

        rating = entry.xpath(xp_template.format('ClientRatingValue'))
        if rating:
            try:
                #'rating',     A floating point number between 0 and 10
                # OZON raion N of 5, calibre of 10, but there is a bug? in identify
                mi.rating = float(rating)
            except:
                pass
            rating
        return mi
Beispiel #5
0
    def to_metadata(self, log, entry):  # {{{
        title = unicode(entry.xpath(u'normalize-space(.//span[@itemprop="name"][1]/text())'))
        # log.debug(u'Tile: -----> %s' % title)

        author = unicode(entry.xpath(u'normalize-space(.//a[contains(@href, "person")][1]/text())'))
        # log.debug(u'Author: -----> %s' % author)

        norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
        mi = Metadata(title, norm_authors)

        ozon_id = entry.xpath(u'substring-before(substring-after(normalize-space(.//a[starts-with(@href, "/context/detail/id/")][1]/@href), "id/"), "/")')
        if ozon_id:
            mi.identifiers = {'ozon':ozon_id}
            # log.debug(u'ozon_id: -----> %s' % ozon_id)

        mi.ozon_cover_url = None
        cover = entry.xpath(u'normalize-space(.//img[1]/@src)')
        # log.debug(u'cover: -----> %s' % cover)
        if cover:
            mi.ozon_cover_url = _translateToBigCoverUrl(cover)
            # log.debug(u'mi.ozon_cover_url: -----> %s' % mi.ozon_cover_url)

        pub_year = None
        if pub_year:
            mi.pubdate = toPubdate(log, pub_year)
            # log.debug('pubdate %s' % mi.pubdate)

        mi.rating = self.get_rating(entry)
        # if not mi.rating:
        #    log.debug('No rating found. ozon_id:%s'%ozon_id)

        return mi
Beispiel #6
0
    def get_metadata_from_detail(self, log, entry, title, authors, identifiers):  # {{{
        title = unicode(entry.xpath(u'normalize-space(.//h1[@itemprop="name"][1]/text())'))
        # log.debug(u'Tile (from_detail): -----> %s' % title)

        author = unicode(entry.xpath(u'normalize-space(.//a[contains(@href, "person")][1]/text())'))
        # log.debug(u'Author (from_detail): -----> %s' % author)

        norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
        mi = Metadata(title, norm_authors)

        ozon_id = entry.xpath(u'substring-before(substring-after(normalize-space(//link[@rel="canonical"][contains(@href, "/context/detail/id/")][1]/@href), "id/"), "/")')
        if ozon_id:
            # log.debug(u'ozon_id (from_detail): -----> %s' % ozon_id)
            mi.identifiers = {'ozon':ozon_id}

        mi.ozon_cover_url = None
        cover = entry.xpath(u'normalize-space(.//img[1]/@src)')
        if cover:
            mi.ozon_cover_url = _translateToBigCoverUrl(cover)
            # log.debug(u'mi.ozon_cover_url  (from_detail): -----> %s' % mi.ozon_cover_url)

        mi.rating = self.get_rating(entry)
        # log.debug(u'mi.rating  (from_detail): -----> %s' % mi.rating)
        if not mi.rating:
            log.debug('No rating (from_detail) found. ozon_id:%s'%ozon_id)

        return mi
Beispiel #7
0
    def to_metadata(self, log, entry):  # {{{
        title = unicode(
            entry.xpath(
                u'normalize-space(.//div[@itemprop="name"][1]/text())'))
        # log.debug(u'Title: -----> %s' % title)

        author = unicode(
            entry.xpath(
                u'normalize-space(.//div[contains(@class, "mPerson")])'))
        # log.debug(u'Author: -----> %s' % author)

        norm_authors = map(_normalizeAuthorNameWithInitials,
                           map(unicode.strip,
                               unicode(author).split(u',')))
        mi = Metadata(title, norm_authors)

        ozon_id = entry.get('data-href').split('/')[-2]

        if ozon_id:
            mi.identifiers = {'ozon': ozon_id}
            # log.debug(u'ozon_id: -----> %s' % ozon_id)

        mi.ozon_cover_url = None
        cover = entry.xpath(u'normalize-space(.//img[1]/@src)')
        log.debug(u'cover: -----> %s' % cover)
        if cover:
            mi.ozon_cover_url = _translateToBigCoverUrl(cover)
            # log.debug(u'mi.ozon_cover_url: -----> %s' % mi.ozon_cover_url)

        pub_year = None
        pub_year_block = entry.xpath(
            u'.//div[@class="bOneTileProperty"]/text()')
        year_pattern = re.compile('\d{4}')
        if pub_year_block:
            pub_year = re.search(year_pattern, pub_year_block[0])
            if pub_year:
                mi.pubdate = toPubdate(log, pub_year.group())
        # log.debug('pubdate %s' % mi.pubdate)

        mi.rating = self.get_rating(log, entry)
        # if not mi.rating:
        #    log.debug('No rating found. ozon_id:%s'%ozon_id)

        return mi
Beispiel #8
0
    def to_metadata(self, log, entry):  # {{{
        title = unicode(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())'))
        # log.debug(u'Title: -----> %s' % title)

        author = unicode(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])'))
        # log.debug(u'Author: -----> %s' % author)

        norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
        mi = Metadata(title, norm_authors)

        ozon_id = entry.get('data-href').split('/')[-2]

        if ozon_id:
            mi.identifiers = {'ozon': ozon_id}
            # log.debug(u'ozon_id: -----> %s' % ozon_id)

        mi.ozon_cover_url = None
        cover = entry.xpath(u'normalize-space(.//img[1]/@src)')
        log.debug(u'cover: -----> %s' % cover)
        if cover:
            mi.ozon_cover_url = _translateToBigCoverUrl(cover)
            # log.debug(u'mi.ozon_cover_url: -----> %s' % mi.ozon_cover_url)

        pub_year = None
        pub_year_block = entry.xpath(u'.//div[@class="bOneTileProperty"]/text()')
        year_pattern = re.compile('\d{4}')
        if pub_year_block:
            pub_year = re.search(year_pattern, pub_year_block[0])
            if pub_year:
                mi.pubdate = toPubdate(log, pub_year.group())
        # log.debug('pubdate %s' % mi.pubdate)

        mi.rating = self.get_rating(log, entry)
        # if not mi.rating:
        #    log.debug('No rating found. ozon_id:%s'%ozon_id)

        return mi