コード例 #1
0
ファイル: opensearch_store.py プロジェクト: JimmXinu/calibre
def open_search(url, query, max_results=10, timeout=60):
    description = Description(url)
    url_template = description.get_best_template()
    if not url_template:
        return
    oquery = Query(url_template)

    # set up initial values
    oquery.searchTerms = query
    oquery.count = max_results
    url = oquery.url()

    counter = max_results
    br = browser()
    with closing(br.open(url, timeout=timeout)) as f:
        doc = etree.fromstring(f.read())
        for data in doc.xpath('//*[local-name() = "entry"]'):
            if counter <= 0:
                break

            counter -= 1

            s = SearchResult()

            s.detail_item = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip()

            for link in data.xpath('./*[local-name() = "link"]'):
                rel = link.get('rel')
                href = link.get('href')
                type = link.get('type')

                if rel and href and type:
                    if 'http://opds-spec.org/thumbnail' in rel:
                        s.cover_url = href
                    elif 'http://opds-spec.org/image/thumbnail' in rel:
                        s.cover_url = href
                    elif 'http://opds-spec.org/acquisition/buy' in rel:
                        s.detail_item = href
                    elif 'http://opds-spec.org/acquisition/sample' in rel:
                        pass
                    elif 'http://opds-spec.org/acquisition' in rel:
                        if type:
                            ext = guess_extension(type)
                            if ext:
                                ext = ext[1:].upper().strip()
                                s.downloads[ext] = href
            s.formats = ', '.join(s.downloads.keys()).strip()

            s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
            s.author = ', '.join(data.xpath('./*[local-name() = "author"]//*[local-name() = "name"]//text()')).strip()

            price_e = data.xpath('.//*[local-name() = "price"][1]')
            if price_e:
                price_e = price_e[0]
                currency_code = price_e.get('currencycode', '')
                price = ''.join(price_e.xpath('.//text()')).strip()
                s.price = currency_code + ' ' + price
                s.price = s.price.strip()

            yield s
コード例 #2
0
ファイル: xinxii_plugin.py プロジェクト: 089git/calibre
    def search(self, query, max_results=10, timeout=60):
        '''
        XinXii's open search url is:
        http://www.xinxii.com/catalog-search/query/?keywords={searchTerms}&amp;pw={startPage?}&amp;doc_lang={docLang}&amp;ff={docFormat},{docFormat},{docFormat}

        This url requires the docLang and docFormat. However, the search itself
        sent to XinXii does not require them. They can be ignored. We cannot
        push this into the stanard OpenSearchOPDSStore search because of the
        required attributes.

        XinXii doesn't return all info supported by OpenSearchOPDSStore search
        function so this one is modified to remove parts that are used.
        '''

        url = 'http://www.xinxii.com/catalog-search/query/?keywords=' + urllib.quote_plus(query)

        counter = max_results
        br = browser()
        with closing(br.open(url, timeout=timeout)) as f:
            doc = etree.fromstring(f.read())
            for data in doc.xpath('//*[local-name() = "entry"]'):
                if counter <= 0:
                    break

                counter -= 1

                s = SearchResult()

                s.detail_item = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip()

                for link in data.xpath('./*[local-name() = "link"]'):
                    rel = link.get('rel')
                    href = link.get('href')
                    type = link.get('type')

                    if rel and href and type:
                        if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
                            s.cover_url = href
                        if rel == 'alternate':
                            s.detail_item = href

                s.formats = 'EPUB, PDF'

                s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
                s.author = ', '.join(data.xpath('./*[local-name() = "author"]//*[local-name() = "name"]//text()')).strip()

                price_e = data.xpath('.//*[local-name() = "price"][1]')
                if price_e:
                    price_e = price_e[0]
                    currency_code = price_e.get('currencycode', '')
                    price = ''.join(price_e.xpath('.//text()')).strip()
                    s.price = currency_code + ' ' + price
                    s.price = s.price.strip()


                yield s
コード例 #3
0
ファイル: eknigi_plugin.py プロジェクト: MarioJC/calibre
    def search(self, query, max_results=10, timeout=60):
        # check for cyrillic symbols before performing search
        uquery = unicode(query.strip(), 'utf-8')
        reObj = re.search(u'^[а-яА-Я\\d\\s]{2,}$', uquery)
        if not reObj:
            return

        base_url = 'http://e-knigi.net'
        url = base_url + '/virtuemart?page=shop.browse&search_category=0&search_limiter=anywhere&keyword=' + urllib2.quote(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())

            # if the store finds only one product, it opens directly detail view
            for data in doc.xpath('//div[@class="prod_details"]'):
                s = SearchResult()
                s.cover_url = ''.join(data.xpath('.//div[@class="vm_main_info clearfix"]/div[@class="lf"]/a/img/@src')).strip()
                s.title = ''.join(data.xpath('.//div[@class="vm_main_info clearfix"]/div[@class="lf"]/a/img/@alt')).strip()
                s.author = ''.join(data.xpath('.//div[@class="td_bg clearfix"]/div[@class="gk_product_tab"]/div/table/tr[3]/td[2]/text()')).strip()
                s.price = ''.join(data.xpath('.//span[@class="productPrice"]/text()')).strip()
                s.detail_item = url
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
                return

            # search in store results
            for data in doc.xpath('//div[@class="browseProductContainer"]'):
                if counter <= 0:
                    break
                id = ''.join(data.xpath('.//a[1]/@href')).strip()
                if not id:
                    continue

                title = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@title')).strip()
                author = ''.join(data.xpath('.//div[@style="float:left;width:90%"]/b/text()')).strip().replace('Автор: ', '')

                if title.lower().find(query.lower()) == -1 and author.lower().find(query.lower()) == -1:
                    continue

                counter -= 1

                s = SearchResult()
                s.cover_url = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@src')).strip()
                s.title = title
                s.author = author
                s.price = ''.join(data.xpath('.//span[@class="productPrice"]/text()')).strip()
                s.detail_item = base_url + id
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
コード例 #4
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.google.com/search?tbm=bks&q=' + urllib.quote_plus(query)
        
        br = browser()
        
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ol[@id="rso"]/li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//h3/a/@href'))
                if not id:
                    continue

                title = ''.join(data.xpath('.//h3/a//text()'))
                authors = data.xpath('.//div[@class="f"]//a//text()')
                while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'):
                    authors = authors[:-1]
                if not authors:
                    continue
                author = ', '.join(authors)

                counter -= 1
                
                s = SearchResult()
                s.title = title.strip()
                s.author = author.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                
                yield s
コード例 #5
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.bewrite.net/mm5/merchant.mvc?Search_Code=B&Screen=SRCH&Search=' + urllib2.quote(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@id="content"]//table/tr[position() > 1]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a/@href'))
                if not id:
                    continue

                heading = ''.join(data.xpath('./td[2]//text()'))
                title, q, author = heading.partition('by ')
                cover_url = ''
                price = ''

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
コード例 #6
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://ebooks.eharlequin.com/BANGSearch.dll?Type=FullText&FullTextField=All&FullTextCriteria=' + urllib2.quote(query)
        
        br = browser()
        
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//table[not(.//@class="sidelink")]/tr[.//ul[@id="details"]]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//ul[@id="details"]/li[@id="title-results"]/a/@href'))
                if not id:
                    continue

                title = ''.join(data.xpath('.//ul[@id="details"]/li[@id="title-results"]/a/text()'))
                author = ''.join(data.xpath('.//ul[@id="details"]/li[@id="author"][1]//a/text()'))
                price = ''.join(data.xpath('.//div[@class="ourprice"]/font/text()'))
                cover_url = ''.join(data.xpath('.//a[@href="%s"]/img/@src' % id))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = 'http://ebooks.eharlequin.com/' + id.strip()
                s.formats = 'EPUB'
                
                yield s
コード例 #7
0
ファイル: ebookpoint_plugin.py プロジェクト: JimmXinu/calibre
    def search(self, query, max_results=25, timeout=60):
        url = 'http://ebookpoint.pl/search?qa=&szukaj=' + quote_plus(
            query.decode('utf-8').encode('iso-8859-2')) + '&serwisyall=0&wprzyg=0&wsprzed=1&wyczerp=0&formaty=em-p'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[@class="list"]/li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('./a/@href'))
                if not id:
                    continue

                formats = ', '.join(data.xpath('.//ul[@class="book-type book-type-points"]//span[@class="popup"]/span/text()'))
                cover_url = ''.join(data.xpath('.//p[@class="cover"]/img/@data-src'))
                title = ''.join(data.xpath('.//div[@class="book-info"]/h3/a/text()'))
                author = ''.join(data.xpath('.//p[@class="author"]//text()'))
                price = ''.join(data.xpath('.//p[@class="price price-incart"]/a/ins/text()|.//p[@class="price price-add"]/a/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = re.sub(r'\.',',',price)
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats.upper()

                yield s
コード例 #8
0
    def search(self, query, max_results=20, timeout=60):

        br = browser()

        counter = max_results
        page = 1
        while counter:
            with closing(
                br.open(
                    "http://www.publio.pl/e-booki,strona" + str(page) + ".html?q=" + urllib.quote(query),
                    timeout=timeout,
                )
            ) as f:
                doc = html.fromstring(f.read())
                for data in doc.xpath('//div[@class="item"]'):
                    if counter <= 0:
                        break

                    id = "".join(data.xpath('.//div[@class="img"]/a/@href'))
                    if not id:
                        continue

                    cover_url = "".join(data.xpath('.//div[@class="img"]/a/img/@data-original'))
                    title = "".join(data.xpath('.//div[@class="img"]/a/@title'))
                    title2 = "".join(data.xpath('.//div[@class="desc"]/h5//text()'))
                    if title2:
                        title = title + ". " + title2
                    if (
                        "".join(
                            data.xpath('./div[@class="desc"]/div[@class="detailShortList"]/div[last()]/span/text()')
                        ).strip()
                        == "Seria:"
                    ):
                        series = "".join(
                            data.xpath('./div[@class="desc"]/div[@class="detailShortList"]/div[last()]/a/@title')
                        )
                        title = title + " (seria " + series + ")"
                    author = ", ".join(
                        data.xpath('./div[@class="desc"]/div[@class="detailShortList"]/div[@class="row"][1]/a/@title')
                    )
                    price = "".join(data.xpath('.//div[@class="priceBox tk-museo-slab"]/ins/text()'))
                    if not price:
                        price = "".join(data.xpath('.//div[@class="priceBox tk-museo-slab"]/text()')).strip()
                    formats = ", ".join(data.xpath('.//div[@class="formats"]/a/img/@alt'))

                    counter -= 1

                    s = SearchResult()
                    s.cover_url = "http://www.publio.pl" + cover_url
                    s.title = title.strip()
                    s.author = author
                    s.price = price
                    s.detail_item = "http://www.publio.pl" + id.strip()
                    s.drm = SearchResult.DRM_LOCKED if "DRM" in formats else SearchResult.DRM_UNLOCKED
                    s.formats = formats.replace(" DRM", "").strip()

                    yield s
                if not doc.xpath('boolean(//a[@class="next"])'):
                    break
                page += 1
コード例 #9
0
ファイル: zixo_plugin.py プロジェクト: Eksmo/calibre
    def search(self, query, max_results=10, timeout=60):
        url = 'http://zixo.pl/wyszukiwarka/?search=' + urllib.quote(query.encode('utf-8')) + '&product_type=0'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="productInline"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[@class="productThumb"]/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//a[@class="productThumb"]/img/@src'))
                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
                author = ','.join(data.xpath('.//div[@class="productDescription"]/span[1]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="priceList"]/span/text()'))
                price = re.sub('\.', ',', price)

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://zixo.pl' + id.strip()
                s.drm = SearchResult.DRM_LOCKED

                yield s
コード例 #10
0
ファイル: ebook_nl_plugin.py プロジェクト: MarioJC/calibre
    def search(self, query, max_results=10, timeout=60):
        url = ('http://www.ebook.nl/store/advanced_search_result.php?keywords=' + urllib2.quote(query))
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@id="books"]/div[@itemtype="http://schema.org/Book"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('./meta[@itemprop="url"]/@content')).strip()
                if not id:
                    continue
                cover_url = 'http://www.ebook.nl/store/' + ''.join(data.xpath('.//img[@itemprop="image"]/@src'))
                title = ''.join(data.xpath('./span[@itemprop="name"]/a/text()')).strip()
                author = ''.join(data.xpath('./span[@itemprop="author"]/a/text()')).strip()
                if author == '&nbsp':
                    author = ''
                price = ''.join(data.xpath('.//span[@itemprop="price"]//text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNKNOWN
                s.detail_item = id

                yield s
コード例 #11
0
ファイル: ozon_ru_plugin.py プロジェクト: BobPyron/calibre
    def search(self, query, max_results=15, timeout=60):
        search_url = self.shop_url + '/webservice/webservice.asmx/SearchWebService?'\
                    'searchText=%s&searchContext=ebook' % urllib2.quote(query)
        search_urls = [ search_url ]

        ## add this as the fist try if it looks like ozon ID
        if re.match("^\d{6,9}$", query):
            ozon_detail = self.shop_url + '/webservices/OzonWebSvc.asmx/ItemDetail?ID=%s' % query
            search_urls.insert(0, ozon_detail)

        xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'
        counter = max_results
        br = browser()

        for url in search_urls:
            with closing(br.open(url, timeout=timeout)) as f:
                raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
                doc = etree.fromstring(raw)
                for data in doc.xpath('//*[local-name()="SearchItems" or local-name()="ItemDetail"]'):
                    if counter <= 0:
                        break
                    counter -= 1

                    s = SearchResult()
                    s.detail_item = data.xpath(xp_template.format('ID'))
                    s.title = data.xpath(xp_template.format('Name'))
                    s.author = data.xpath(xp_template.format('Author'))
                    s.price = data.xpath(xp_template.format('Price'))
                    s.cover_url = data.xpath(xp_template.format('Picture'))
                    s.price = format_price_in_RUR(s.price)
                    yield s
コード例 #12
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.ebookshoppe.com/search.php?search_query=' + quote(query)
        br = browser()
        br.addheaders = [("Referer", "http://www.ebookshoppe.com/")]

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[@class="ProductList"]/li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('./div[@class="ProductDetails"]/'
                                        'strong/a/@href')).strip()
                if not id:
                    continue
                cover_url = ''.join(data.xpath('./div[@class="ProductImage"]/a/img/@src'))
                title = ''.join(data.xpath('./div[@class="ProductDetails"]/strong/a/text()'))
                price = ''.join(data.xpath('./div[@class="ProductPriceRating"]/em/text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNLOCKED
                s.detail_item = id

                self.get_author_and_formats(s, timeout)
                if not s.author:
                    continue

                yield s
コード例 #13
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.bubok.pt/resellers/calibre_search/' + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[contains(@class, "libro")]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="url"]/text()'))

                title = ''.join(data.xpath('.//div[@class="titulo"]/text()'))

                author = ''.join(data.xpath('.//div[@class="autor"]/text()'))

                price = ''.join(data.xpath('.//div[@class="precio"]/text()'))

                formats = ''.join(data.xpath('.//div[@class="formatos"]/text()'))

                cover = ''.join(data.xpath('.//div[@class="portada"]/text()'))

                counter -= 1

                s = SearchResult()
                s.title = title.strip()
                s.author = author.strip()
                s.detail_item = id.strip()
                s.price = price.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats.strip()
                s.cover_url = cover.strip()
                yield s
コード例 #14
0
    def search(self, query, max_results=10, timeout=60):
        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))

            data_xpath = '//div[contains(@class, "prod")]'
            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
            asin_xpath = '@name'
            cover_xpath = './/img[@class="productImage"]/@src'
            title_xpath = './/h3[@class="newaps"]/a//text()'
            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'

            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break

                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). Se we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
                    continue

                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin = data.xpath(asin_xpath)
                if asin:
                    asin = asin[0]
                else:
                    continue

                cover_url = ''.join(data.xpath(cover_xpath))

                title = ''.join(data.xpath(title_xpath))
                author = ''.join(data.xpath(author_xpath))
                try:
                    if self.author_article:
                        author = author.split(self.author_article, 1)[1].split(" (")[0]
                except:
                    pass

                price = ''.join(data.xpath(price_xpath))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Kindle'

                yield s
コード例 #15
0
ファイル: bn_plugin.py プロジェクト: Eksmo/calibre
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.barnesandnoble.com/s/%s?keyword=%s&store=ebook' % (query.replace(' ', '-'), urllib.quote_plus(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[contains(@class, "result-set")]/li[contains(@class, "result")]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[contains(@class, "image-bounding-box")]/a/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//img[contains(@class, "product-image")]/@src'))

                title = ''.join(data.xpath('.//a[@class="title"]//text()'))
                author = ', '.join(data.xpath('.//a[@class="contributor"]//text()'))
                price = ''.join(data.xpath('.//div[@class="price-format"]//span[contains(@class, "price")]/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Nook'

                yield s
コード例 #16
0
ファイル: foyles_uk_plugin.py プロジェクト: 089git/calibre
    def search(self, query, max_results=10, timeout=60):
        url = 'http://ebooks.foyles.co.uk/catalog/search/?query=' + urllib2.quote(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="doc-item"]'):
                if counter <= 0:
                    break
                id_ = ''.join(data.xpath('.//p[@class="doc-cover"]/a/@href')).strip()
                if not id_:
                    continue
                id_ = 'http://ebooks.foyles.co.uk' + id_

                cover_url = ''.join(data.xpath('.//p[@class="doc-cover"]/a/img/@src'))
                title = ''.join(data.xpath('.//span[@class="title"]/a/text()'))
                author = ', '.join(data.xpath('.//span[@class="author"]/span[@class="author"]/text()'))
                price = ''.join(data.xpath('.//span[@itemprop="price"]/text()')).strip()
                format_ = ''.join(data.xpath('.//p[@class="doc-meta-format"]/span[last()]/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = id_
                s.drm = SearchResult.DRM_LOCKED
                s.formats = format_

                yield s
コード例 #17
0
    def search(self, query, max_results=10, timeout=60):
        print( "search!")
        q = query.decode('utf-8')

        url = "https://views.scraperwiki.com/run/haodooscraperview/?" + urlencode(
                {
                    "q": q
                } )
        print( url )

        br = browser()
        with closing(br.open(url, timeout=timeout)) as f:
            json_doc = f.read()
            if len(json_doc)>0:
                result = json.loads( json_doc )
                for volume in result:
                    s = SearchResult()
                    s.title = volume['title']
                    s.detail_item = volume['url']
                    s.price = '$0.00'
                    s.drm = SearchResult.DRM_UNLOCKED

                    if volume.has_key('type') and len(volume["type"]):
                        for t in volume["type"]:
                            s.downloads[ t['type'] ] = t['link']
                        s.formats = ', '.join(s.downloads.keys())
                    yield s
            else:
                print( "scrape nothing." )
コード例 #18
0
ファイル: bookoteka_plugin.py プロジェクト: Eksmo/calibre
    def search(self, query, max_results=10, timeout=60):
        url = 'http://bookoteka.pl/list?search=' + urllib.quote_plus(query) + '&cat=1&hp=1&type=1'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//li[@class="EBOOK"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[@class="item_link"]/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//a[@class="item_link"]/img/@src'))
                title = ''.join(data.xpath('.//div[@class="shelf_title"]/a/text()'))
                author = ''.join(data.xpath('.//div[@class="shelf_authors"][1]/text()'))
                price = ''.join(data.xpath('.//span[@class="EBOOK"]/text()'))
                price = price.replace('.', ',')
                formats = ', '.join(data.xpath('.//a[@class="fancybox protected"]/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = 'http://bookoteka.pl' + cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://bookoteka.pl' + id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats.strip()

                yield s
コード例 #19
0
    def search(self, query, max_results=10, timeout=60):
        base_url = 'https://www.millsandboon.co.uk'
        url = base_url + '/search.aspx??format=ebook&searchText=' + urllib2.quote(query)
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//article[contains(@class, "group")]'):
                if counter <= 0:
                    break
                id_ = ''.join(data.xpath('.//div[@class="img-wrapper"]/a/@href')).strip()
                if not id_:
                    continue

                cover_url = ''.join(data.xpath('.//div[@class="img-wrapper"]/a/img/@src'))
                title =  ''.join(data.xpath('.//div[@class="img-wrapper"]/a/img/@alt')).strip()
                author = ''.join(data.xpath('.//a[@class="author"]/text()'))
                price = ''.join(data.xpath('.//div[@class="type-wrapper"]/ul/li[child::span[text()="eBook"]]/a/text()'))
                format_ = ''.join(data.xpath('.//p[@class="doc-meta-format"]/span[last()]/text()'))
                drm = SearchResult.DRM_LOCKED

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = id_
                s.drm = drm
                s.formats = format_

                yield s
コード例 #20
0
ファイル: kobo_plugin.py プロジェクト: Aliminator666/calibre
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.kobobooks.com/search/search.html?q=' + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[contains(@class, "flowview-items")]/li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('./a[contains(@class, "block-link")]/@href'))
                if not id:
                    continue
                id = id[1:]

                price = ''.join(data.xpath('.//a[contains(@class, "primary-button")]//text()'))

                cover_url = ''.join(data.xpath('.//img[1]/@src'))
                cover_url = 'http:%s' % cover_url

                title = ''.join(data.xpath('.//p[contains(@class, "flowview-item-title")]//text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.price = price.strip()
                s.detail_item = 'http://store.kobobooks.com/' + id.strip()
                s.formats = 'EPUB'
                s.drm = SearchResult.DRM_UNKNOWN

                yield s
コード例 #21
0
    def search(self, query, max_results=10, timeout=60):
        url = "http://www.legimi.com/pl/ebooki/?szukaj=" + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@id="listBooks"]/div'):
                if counter <= 0:
                    break

                id = "".join(data.xpath('.//a[@class="plainLink"]/@href'))
                if not id:
                    continue

                cover_url = "".join(data.xpath(".//img[1]/@src"))
                title = "".join(data.xpath('.//span[@class="bookListTitle ellipsis"]/text()'))
                author = "".join(data.xpath('.//span[@class="bookListAuthor ellipsis"]/text()'))
                price = "".join(data.xpath('.//div[@class="bookListPrice"]/span/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = "http://www.legimi.com/" + cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = "http://www.legimi.com/" + id.strip()

                yield s
コード例 #22
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://www.beam-shop.de/search?saltFieldLimitation=all&sSearch=' + urllib2.quote(query)
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[contains(@class, "product--box")]'):
                if counter <= 0:
                    break

                id_ = ''.join(data.xpath('./div/div[contains(@class, "product--info")]/a/@href')).strip()
                if not id_:
                    continue
                cover_url = ''.join(data.xpath('./div/div[contains(@class, "product--info")]/a//img/@srcset'))
                if cover_url:
                    cover_url = cover_url.split(',')[0].strip()
                author = data.xpath('.//a[@class="product--author"]/text()')[0].strip()
                title = data.xpath('.//a[@class="product--title"]/text()')[0].strip()
                price = data.xpath('.//div[@class="product--price"]/span/text()')[0].strip()
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNLOCKED
                s.detail_item = id_
#                 s.formats = None
                yield s
コード例 #23
0
ファイル: gutenberg_plugin.py プロジェクト: Eksmo/calibre
    def search(self, query, max_results=10, timeout=60):
        url = 'http://m.gutenberg.org/ebooks/search.mobile/?default_prefix=all&sort_order=title&query=' + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ol[@class="results"]/li[@class="booklink"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('./a/@href'))
                id = id.split('.mobile')[0]

                title = ''.join(data.xpath('.//span[@class="title"]/text()'))
                author = ''.join(data.xpath('.//span[@class="subtitle"]/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = ''

                s.detail_item = id.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = '$0.00'
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
コード例 #24
0
    def search(self, query, max_results=20, timeout=60):
        url = 'http://www.escapemagazine.pl/wyszukiwarka?query=' + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="item item_short"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//h2[@class="title"]/a[1]/@href'))
                if not id:
                    continue

                title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
                author = ''.join(data.xpath('.//div[@class="author"]/text()'))
                price = ''.join(data.xpath('.//span[@class="price_now"]/strong/text()')) + ' zł'
                cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://www.escapemagazine.pl' + id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = 'PDF'

                yield s
コード例 #25
0
    def search(self, query, max_results=15, timeout=60):
        search_url = (
            self.shop_url + "/webservice/webservice.asmx/SearchWebService?"
            "searchText=%s&searchContext=ebook" % urllib2.quote(query)
        )
        search_urls = [search_url]

        xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'
        counter = max_results
        br = browser()

        for url in search_urls:
            with closing(br.open(url, timeout=timeout)) as f:
                raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
                doc = etree.fromstring(raw)
                for data in doc.xpath('//*[local-name()="SearchItems" or local-name()="ItemDetail"]'):
                    if counter <= 0:
                        break
                    counter -= 1

                    s = SearchResult()
                    s.detail_item = data.xpath(xp_template.format("ID"))
                    s.title = data.xpath(xp_template.format("Name"))
                    s.author = data.xpath(xp_template.format("Author"))
                    s.price = data.xpath(xp_template.format("Price"))
                    s.cover_url = data.xpath(xp_template.format("Picture"))
                    s.price = format_price_in_RUR(s.price)
                    yield s
コード例 #26
0
ファイル: whsmith_uk_plugin.py プロジェクト: Mymei2/calibre
    def search(self, query, max_results=10, timeout=60):
        url = ('http://www.whsmith.co.uk/search?keywordCategoryId=wc_dept_ebooks&results=60'
               '&page=1&keywords=' + urllib2.quote(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//li[@class="product"]'):
                if counter <= 0:
                    break
                id_ = ''.join(data.xpath('./a[@class="product_image_wrap"]/@href'))
                if not id_:
                    continue
                id_ = 'http://www.whsmith.co.uk' + id_
                cover_url = ''.join(data.xpath('.//img[@class="product_image"]/@src'))
                title = ''.join(data.xpath('.//h4[@class="product_title"]/text()'))
                author = ', '.join(data.xpath('.//span[@class="product_second"]/text()'))
                price = ''.join(data.xpath('.//span[@class="price"]/text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_LOCKED
                s.detail_item = id_
                s.formats = 'ePub'

                yield s
コード例 #27
0
ファイル: woblink_plugin.py プロジェクト: Eksmo/calibre
    def search(self, query, max_results=10, timeout=60):
        url = 'http://woblink.com/publication?query=' + urllib.quote_plus(query.encode('utf-8'))
        if max_results > 10:
            if max_results > 20:
                url += '&limit=30'
            else:
                url += '&limit=20'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="book-item"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//td[@class="w10 va-t"]/a[1]/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//td[@class="w10 va-t"]/a[1]/img/@src'))
                title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
                author = ', '.join(data.xpath('.//p[@class="author"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="prices"]/span[1]/span/text()'))
                price = re.sub('\.', ',', price)
                formats = [ form[8:-4].split('_')[0] for form in data.xpath('.//p[3]/img/@src')]

                s = SearchResult()
                s.cover_url = 'http://woblink.com' + cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price + ' zł'
                s.detail_item = id.strip()
                
                # MOBI should be send first,
                if 'MOBI' in formats:
                    t = copy.copy(s)
                    t.title += ' MOBI'
                    t.drm = SearchResult.DRM_UNLOCKED
                    t.formats = 'MOBI'
                    formats.remove('MOBI')
                    
                    counter -= 1
                    yield t
                    
                # and the remaining formats (if any) next
                if formats:
                    if 'epub' in formats:
                        formats.remove('epub')
                        formats.append('WOBLINK')
                        if 'E Ink' in data.xpath('.//div[@class="prices"]/img/@title'):
                            formats.insert(0, 'EPUB')
                    
                    s.drm = SearchResult.DRM_LOCKED
                    s.formats = ', '.join(formats).upper()
                    
                    counter -= 1
                    yield s
コード例 #28
0
ファイル: gutenberg_plugin.py プロジェクト: j-howell/calibre
def search(query, max_results=10, timeout=60, write_raw_to=None):
    url = 'http://m.gutenberg.org/ebooks/search.opds/?query=' + quote_plus(query)

    counter = max_results
    br = browser(user_agent='calibre/'+__version__)
    with closing(br.open(url, timeout=timeout)) as f:
        raw = f.read()
        if write_raw_to is not None:
            with open(write_raw_to, 'wb') as f:
                f.write(raw)
        doc = etree.fromstring(raw)
        for data in doc.xpath('//*[local-name() = "entry"]'):
            if counter <= 0:
                break

            counter -= 1

            s = SearchResult()

            # We could use the <link rel="alternate" type="text/html" ...> tag from the
            # detail odps page but this is easier.
            id = fix_url(''.join(data.xpath('./*[local-name() = "id"]/text()')).strip())
            s.detail_item = url_slash_cleaner('%s/ebooks/%s' % (web_url, re.sub(r'[^\d]', '', id)))
            s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
            s.author = ', '.join(data.xpath('./*[local-name() = "content"]//text()')).strip()
            if not s.title or not s.author:
                continue

            # Get the formats and direct download links.
            with closing(br.open(id, timeout=timeout/4)) as nf:
                ndoc = etree.fromstring(nf.read())
                for link in ndoc.xpath('//*[local-name() = "link" and @rel = "http://opds-spec.org/acquisition"]'):
                    type = link.get('type')
                    href = link.get('href')
                    if type:
                        ext = mimetypes.guess_extension(type)
                        if ext:
                            ext = ext[1:].upper().strip()
                            s.downloads[ext] = fix_url(href)

            s.formats = ', '.join(s.downloads.keys())
            if not s.formats:
                continue

            for link in data.xpath('./*[local-name() = "link"]'):
                rel = link.get('rel')
                href = link.get('href')
                type = link.get('type')

                if rel and href and type:
                    href = fix_url(href)
                    if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
                        if href.startswith('data:image/png;base64,'):
                            cdata = href.replace('data:image/png;base64,', '')
                            if not isinstance(cdata, bytes):
                                cdata = cdata.encode('ascii')
                            s.cover_data = base64.b64decode(cdata)

            yield s
コード例 #29
0
ファイル: amazon_uk_plugin.py プロジェクト: foolsh/calibre
 def search(self, query, max_results=10, timeout=60):
     s = SearchResult()
     s.title = 'Amazon required that this<br>store be permanently closed.'
     s.author = ''
     s.price = ''
     s.detail_item = ''
     s.drm = SearchResult.DRM_UNKNOWN
     yield s
コード例 #30
0
ファイル: cache_update_thread.py プロジェクト: AEliu/calibre
    def run(self):
        url = 'http://www.mobileread.com/forums/ebooks.php?do=getlist&type=html'

        self.update_details.emit(_('Checking last download date.'))
        last_download = self.config.get('last_download', None)
        # Don't update the book list if our cache is less than one week old.
        if last_download and (time.time() - last_download) < 604800:
            return

        self.update_details.emit(_('Downloading book list from MobileRead.'))
        # Download the book list HTML file from MobileRead.
        br = browser()
        raw_data = None
        try:
            with closing(br.open(url, timeout=self.timeout)) as f:
                raw_data = f.read()
        except:
            return

        if not raw_data or not self._run:
            return

        self.update_details.emit(_('Processing books.'))
        # Turn books listed in the HTML file into SearchResults's.
        books = []
        try:
            data = html.fromstring(raw_data)
            raw_books = data.xpath('//ul/li')
            self.total_changed.emit(len(raw_books))

            for i, book_data in enumerate(raw_books):
                self.update_details.emit(
                        _('%(num)s of %(tot)s books processed.') % dict(
                            num=i, tot=len(raw_books)))
                book = SearchResult()
                book.detail_item = ''.join(book_data.xpath('.//a/@href'))
                book.formats = ''.join(book_data.xpath('.//i/text()'))
                book.formats = book.formats.strip()

                text = ''.join(book_data.xpath('.//a/text()'))
                if ':' in text:
                    book.author, q, text = text.partition(':')
                book.author = book.author.strip()
                book.title = text.strip()
                books.append(book)

                if not self._run:
                    books = []
                    break
                else:
                    self.update_progress.emit(i)
        except:
            pass

        # Save the book list and it's create time.
        if books:
            self.config['book_list'] = self.seralize_books(books)
            self.config['last_download'] = time.time()
コード例 #31
0
def search_google(query, max_results=10, timeout=60, write_html_to=None):
    url = 'https://www.google.com/search?tbm=bks&q=' + quote_plus(query)

    br = browser()

    counter = max_results
    with closing(br.open(url, timeout=timeout)) as f:
        raw = f.read()
        doc = parse_html(raw)
        if write_html_to is not None:
            praw = html.tostring(doc, encoding='utf-8')
            open(write_html_to, 'wb').write(praw)
        for data in doc.xpath('//div[@id="rso"]/div'):
            if counter <= 0:
                break
            h3 = data.xpath('descendant::h3')
            if not h3:
                continue
            h3 = h3[0]
            a = h3.getparent()
            id = a.get('href')
            if not id:
                continue

            title = ''.join(data.xpath('.//h3//text()')).strip()
            authors = data.xpath(
                'descendant::a[@class="fl" and @href]//text()')
            while authors and authors[-1].strip().lower() in ('preview',
                                                              'read',
                                                              'more editions'):
                authors = authors[:-1]
            if not authors:
                continue
            author = ' & '.join(authors)

            counter -= 1

            s = SearchResult()
            s.title = title.strip()
            s.author = author.strip()
            s.detail_item = id.strip()
            s.drm = SearchResult.DRM_UNKNOWN

            yield s
コード例 #32
0
    def search(self, query, max_results=25, timeout=60):
        url = 'http://ebookpoint.pl/search?qa=&szukaj=' + quote_plus(
            query.decode('utf-8').encode('iso-8859-2')
        ) + '&serwisyall=0&wprzyg=0&wsprzed=1&wyczerp=0&formaty=em-p'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[@class="list"]/li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('./a/@href'))
                if not id:
                    continue

                formats = ', '.join(
                    data.xpath(
                        './/ul[@class="book-type book-type-points"]//span[@class="popup"]/span/text()'
                    ))
                cover_url = ''.join(
                    data.xpath('.//p[@class="cover"]/img/@data-src'))
                title = ''.join(
                    data.xpath('.//div[@class="book-info"]/h3/a/text()'))
                author = ''.join(data.xpath('.//p[@class="author"]//text()'))
                price = ''.join(
                    data.xpath(
                        './/p[@class="price price-incart"]/a/ins/text()|.//p[@class="price price-add"]/a/text()'
                    ))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = re.sub(r'\.', ',', price)
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats.upper()

                yield s
コード例 #33
0
ファイル: kobo_plugin.py プロジェクト: pombreda/calibre-1
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.kobobooks.com/search/search.html?q=' + urllib.quote_plus(
            query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath(
                    '//ul[contains(@class, "flowview-items")]/li'):
                if counter <= 0:
                    break

                id = ''.join(
                    data.xpath('./a[contains(@class, "block-link")]/@href'))
                if not id:
                    continue
                id = id[1:]

                price = ''.join(
                    data.xpath(
                        './/a[contains(@class, "primary-button")]//text()'))

                cover_url = ''.join(data.xpath('.//img[1]/@src'))
                cover_url = 'http:%s' % cover_url

                title = ''.join(
                    data.xpath(
                        './/p[contains(@class, "flowview-item-title")]//text()'
                    ))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.price = price.strip()
                s.detail_item = 'http://store.kobobooks.com/' + id.strip()
                s.formats = 'EPUB'
                s.drm = SearchResult.DRM_UNKNOWN

                yield s
コード例 #34
0
    def search(self, query, max_results=10, timeout=60):
        url = u'http://uk.nook.com/s/%s?s%%5Bdref%%5D=1&s%%5Bkeyword%%5D=%s' % (query.replace(' ', '-'), urllib.quote(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            raw = f.read()
            doc = html.fromstring(raw)
            for data in doc.xpath('//ul[contains(@class, "product_list")]/li'):
                if counter <= 0:
                    break

                id_ = ''.join(data.xpath('.//span[contains(@class, "image")]/a/@href'))
                if not id_:
                    continue
                if id_.startswith('/gb'):
                    id_ = id_[3:]
                id_ = 'http://uk.nook.com' + id_.strip()

                cover_url = ''.join(data.xpath('.//span[contains(@class, "image")]//img/@data-src'))

                title = ''.join(data.xpath('.//div[contains(@class, "title")]//text()')).strip()
                if not title:
                    continue

                author = ', '.join(data.xpath('.//div[contains(@class, "contributor")]//a/text()')).strip()
                price = ''.join(data.xpath('.//div[contains(@class, "action")]//a//text()')).strip()
                price = re.sub(r'[^\d.,£]', '', price)

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = id_
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Nook'

                yield s
コード例 #35
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.ebooks.com/SearchApp/SearchResults.net?term=' + quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@id="results"]//li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[1]/@href'))
                mo = re.search(r'\d+', id)
                if not mo:
                    continue
                id = mo.group()

                cover_url = ''.join(data.xpath('.//div[contains(@class, "img")]//img/@src'))

                title = ''.join(data.xpath(
                    'descendant::span[@class="book-title"]/a/text()')).strip()
                author = ', '.join(data.xpath(
                    'descendant::span[@class="author"]/a/text()')).strip()
                if not title or not author:
                    continue

                price = ''.join(data.xpath(
                    './/span[starts-with(text(), "US$") or'
                    ' starts-with(text(), "€") or starts-with(text(), "CA$") or'
                    ' starts-with(text(), "AU$") or starts-with(text(), "£")]/text()')).strip()

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = '?url=http://www.ebooks.com/cj.asp?IID=' + id.strip() + '&cjsku=' + id.strip()

                yield s
コード例 #36
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.barnesandnoble.com/s/%s?keyword=%s&store=ebook&view=list' % (query.decode('utf-8').replace(' ', '-'), quote_plus(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            raw = f.read()
            doc = html.fromstring(raw)
            for data in doc.xpath('//ol[contains(@class, "result-set")]/li[contains(@class, "result")]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[contains(@class, "image-block")]/a/@href'))
                if not id:
                    continue

                cover_url = ''
                cover_id = ''.join(data.xpath('.//img[contains(@class, "product-image")]/@id'))
                m = re.search(r"%s'.*?srcUrl: '(?P<iurl>.*?)'.*?}" % cover_id, raw)
                if m:
                    cover_url = m.group('iurl')

                title = ''.join(data.xpath('descendant::p[@class="title"]//span[@class="name"]//text()')).strip()
                if not title:
                    continue

                author = ', '.join(data.xpath('.//ul[contains(@class, "contributors")]//a[contains(@class, "subtle")]//text()')).strip()
                price = ''.join(data.xpath('.//a[contains(@class, "bn-price")]//text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Nook'

                yield s
コード例 #37
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://drmfree.calibre-ebook.com/search/?q=' + urllib.quote_plus(
            query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[@id="object_list"]//li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="links"]/a[1]/@href'))
                id = id.strip()
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath('.//div[@class="cover"]/img/@src'))

                price = ''.join(data.xpath('.//div[@class="price"]/text()'))
                a, b, price = price.partition('Price:')
                price = price.strip()
                if not price:
                    continue

                title = ''.join(data.xpath('.//div/strong/text()'))
                author = ''.join(data.xpath('.//div[@class="author"]//text()'))
                author = author.partition('by')[-1]

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
コード例 #38
0
    def search(self, query, max_results=25, timeout=60):
        url = 'http://ebookpoint.pl/search.scgi?szukaj=' + urllib.quote_plus(
            query.decode('utf-8').encode(
                'iso-8859-2')) + '&serwisyall=0&x=0&y=0'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="book-list"]/ul[2]/li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[@class="cover"]/@href'))
                if not id:
                    continue

                formats = ', '.join(
                    data.xpath('.//div[@class="ikony"]/span/text()'))
                if formats in ['MP3', '']:
                    continue
                cover_url = ''.join(
                    data.xpath('.//a[@class="cover"]/img/@src'))
                title = ''.join(data.xpath('.//h3/a/@title'))
                title = re.sub('eBook.', '', title)
                author = ''.join(data.xpath('.//p[@class="author"]//text()'))
                price = ''.join(data.xpath('.//p[@class="price"]/ins/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = 'http://ebookpoint.pl' + re.sub(
                    '72x9', '65x8', cover_url)
                s.title = title.strip()
                s.author = author.strip()
                s.price = re.sub(r'\.', ',', price)
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats.upper()

                yield s
コード例 #39
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://www.empik.com/ebooki/ebooki,3501,s?sort=scoreDesc&resultsPP={}&q={}'.format(
            max_results, quote(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath(
                    '//div[@class="search-content js-search-content"]/div'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="name"]/a/@href'))
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath('.//a/img[@class="lazy"]/@lazy-img'))
                author = ', '.join(
                    data.xpath('.//a[@class="smartAuthor"]/text()'))
                title = ''.join(data.xpath('.//div[@class="name"]/a/@title'))
                price = ''.join(
                    data.xpath('.//div[@class="price ta-price-tile "]/text()'))

                # with closing(br.open('https://empik.com' + id.strip(), timeout=timeout/4)) as nf:
                #    idata = html.fromstring(nf.read())
                #    crawled = idata.xpath('.//a[(@class="chosen hrefstyle") or (@class="connectionsLink hrefstyle")]/text()')
                #    formats = ','.join([re.sub('ebook, ','', x.strip()) for x in crawled if 'ebook' in x])

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.split('  - ')[0]
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = 'https://empik.com' + id.strip()
                # s.formats = formats.upper().strip()

                yield s
コード例 #40
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://wolnelektury.pl/szukaj?q=' + quote_plus(
            query.encode('utf-8'))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//li[@class="Book-item"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="title"]/a/@href'))
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath('.//div[@class="cover-area"]//img/@src'))
                title = ''.join(
                    data.xpath('.//div[@class="title"]/a[1]/text()'))
                author = ', '.join(
                    data.xpath('.//div[@class="author"]/a/text()'))
                price = '0,00 zł'

                counter -= 1

                s = SearchResult()
                for link in data.xpath(
                        './/div[@class="book-box-formats"]/span/a'):
                    ext = ''.join(link.xpath('./text()'))
                    href = 'http://wolnelektury.pl' + link.get('href')
                    s.downloads[ext] = href
                s.cover_url = 'http://wolnelektury.pl' + cover_url.strip()
                s.title = title.strip()
                s.author = author
                s.price = price
                s.detail_item = 'http://wolnelektury.pl' + id
                s.formats = ', '.join(s.downloads.keys())
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
コード例 #41
0
    def search(self, query, max_results=10, timeout=60):
        try:
            results = lg.lookup(lg.search(query))
            print('Reached LibGen Mirrors.')
        except Exception as e:
            print(e)
            print('pylibgen crashed. In most cases this is caused by unreachable LibGen Mirrors, try again in a few minutes.')
            return

        self.num_results = len(results)

        for r in results:
            s = SearchResult()
            s.title = r['title']
            s.author = r['author']
            s.price = '$0.00'
            s.drm = SearchResult.DRM_UNLOCKED
            s.formats = r['extension']
            s.detail_item = r['md5']
            yield s
コード例 #42
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://weightlessbooks.com/?s=' + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//li[@class="product"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="cover"]/a/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//div[@class="cover"]/a/img/@src'))

                price = ''.join(data.xpath('.//div[@class="buy_buttons"]/b[1]/text()'))
                if not price:
                    continue

                formats = ', '.join(data.xpath('.//select[@class="eStore_variation"]//option//text()'))
                formats = formats.upper()

                title = ''.join(data.xpath('.//h3/a/text()'))
                author = ''.join(data.xpath('.//h3//text()'))
                author = author.replace(title, '')

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats

                yield s
コード例 #43
0
    def search(self, query, max_results=10, timeout=60):

        br = browser()
        page=1

        counter = max_results
        while counter:
            with closing(br.open('https://cdp.pl/ksiazki/e-book.html?q=' + urllib.parse.quote_plus(query) + '&p=' + str(page), timeout=timeout)) as f:
                doc = html.fromstring(f.read())
                for data in doc.xpath('//ul[@class="products"]/li'):
                    if counter <= 0:
                        break

                    id = ''.join(data.xpath('.//a[@class="product-image"]/@href'))
                    if not id:
                        continue

                    cover_url = ''.join(data.xpath('.//a[@class="product-image"]/img/@data-src'))
                    title = ''.join(data.xpath('.//h3[1]/a/@title'))
                    price = ''.join(data.xpath('.//span[@class="custom_price"]/text()'))+','+''.join(data.xpath('.//span[@class="custom_price"]/sup/text()'))
                    author = ''.join(data.xpath('.//div[@class="authors"]/@title'))
                    formats = ''
                    with closing(br.open(id.strip(), timeout=timeout/4)) as nf:
                        idata = html.fromstring(nf.read())
                        formats = idata.xpath('//div[@class="second-part-holder"]//div[@class="product-attributes-container"]/ul/li/span/text()')[-1]

                    counter -= 1

                    s = SearchResult()
                    s.cover_url = cover_url
                    s.title = title.replace(' (ebook)','').strip()
                    s.author = author
                    s.price = price + ' zł'
                    s.detail_item = id.strip()
                    s.drm = SearchResult.DRM_UNLOCKED
                    s.formats = formats.upper().strip()

                    yield s
                if not doc.xpath('//a[@class="next-page"]'):
                    break
            page+=1
コード例 #44
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://www.beam-shop.de/search?saltFieldLimitation=all&sSearch=' + quote(
            query)
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[contains(@class, "product--box")]'):
                if counter <= 0:
                    break

                id_ = ''.join(
                    data.xpath(
                        './div/div[contains(@class, "product--info")]/a/@href')
                ).strip()
                if not id_:
                    continue
                cover_url = ''.join(
                    data.xpath(
                        './div/div[contains(@class, "product--info")]/a//img/@srcset'
                    ))
                if cover_url:
                    cover_url = cover_url.split(',')[0].strip()
                author = data.xpath(
                    './/a[@class="product--author"]/text()')[0].strip()
                title = data.xpath(
                    './/a[@class="product--title"]/text()')[0].strip()
                price = data.xpath(
                    './/div[@class="product--price"]/span/text()')[0].strip()
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNLOCKED
                s.detail_item = id_
                #                 s.formats = None
                yield s
コード例 #45
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.rw2010.pl/go.live.php/?launch_macro=catalogue-search-rd'
        values={
            'fkeyword': query,
            'file_type':''
            }

        br = browser()

        counter = max_results
        with closing(br.open(url, data=urlencode(values), timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="ProductDetail"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="img"]/a/@href'))
                if not id:
                    continue

                with closing(br.open(id.strip(), timeout=timeout/4)) as nf:
                    idata = html.fromstring(nf.read())
                    cover_url = ''.join(idata.xpath('//div[@class="boxa"]//div[@class="img"]/img/@src'))
                    author = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Autor: "]/span/text()'))
                    title = ''.join(idata.xpath('//div[@class="boxb"]/h2[1]/text()'))
                    title = re.sub(r'\(#.+\)', '', title)
                    formats = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Format pliku: "]/span/text()'))
                    price = ''.join(idata.xpath('//div[@class="price-box"]/span/text()')) + ',00 zł'

                counter -= 1

                s = SearchResult()
                s.cover_url = 'http://www.rw2010.pl/' + cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = re.sub(r'%3D', '=', id)
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats[0:-2].upper()

                yield s
コード例 #46
0
ファイル: litres_plugin.py プロジェクト: onyx-Sean/calibre
    def create_search_result(self, data):
        xp_template = 'normalize-space(@{0})'

        sRes = SearchResult()
        sRes.drm = SearchResult.DRM_UNLOCKED
        sRes.detail_item = data.xpath(xp_template.format('hub_id'))
        sRes.title = data.xpath('string(.//title-info/book-title/text()|.//publish-info/book-name/text())')
        # aut = concat('.//title-info/author/first-name', ' ')
        authors = data.xpath('.//title-info/author/first-name/text()|'
        './/title-info/author/middle-name/text()|'
        './/title-info/author/last-name/text()')
        sRes.author = u' '.join(map(type(u''), authors))
        sRes.price = data.xpath(xp_template.format('price'))
        # cover vs cover_preview
        sRes.cover_url = data.xpath(xp_template.format('cover_preview'))
        sRes.price = format_price_in_RUR(sRes.price)

        types = data.xpath('//fb2-book//files/file/@type')
        fmt_set = _parse_ebook_formats(' '.join(types))
        sRes.formats = ', '.join(fmt_set)
        return sRes
コード例 #47
0
ファイル: ebook_nl_plugin.py プロジェクト: zyxw121/webcal
    def search(self, query, max_results=10, timeout=60):
        url = (
            'http://www.ebook.nl/store/advanced_search_result.php?keywords=' +
            quote(query))
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath(
                    '//div[@id="books"]/div[@itemtype="http://schema.org/Book"]'
            ):
                if counter <= 0:
                    break

                id = ''.join(
                    data.xpath('./meta[@itemprop="url"]/@content')).strip()
                if not id:
                    continue
                cover_url = 'http://www.ebook.nl/store/' + ''.join(
                    data.xpath('.//img[@itemprop="image"]/@src'))
                title = ''.join(
                    data.xpath('./span[@itemprop="name"]/a/text()')).strip()
                author = ''.join(
                    data.xpath('./span[@itemprop="author"]/a/text()')).strip()
                if author == '&nbsp':
                    author = ''
                price = ''.join(
                    data.xpath('.//span[@itemprop="price"]//text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNKNOWN
                s.detail_item = id

                yield s
コード例 #48
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://www.legimi.pl/ebooki/?szukaj=' + urllib.quote_plus(
            query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@id="listBooks"]/div'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[1]/@href'))
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath(
                        './/span[@class="listImage imageDarkLoader"]/img/@src')
                )
                title = ''.join(
                    data.xpath(
                        './/span[@class="bookListTitle ellipsis"]/text()'))
                author = ''.join(
                    data.xpath(
                        './/span[@class="bookListAuthor ellipsis"]/text()'))
                price = ''.join(
                    data.xpath('.//div[@class="bookListPrice"]/span/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'https://www.legimi.pl/' + id.strip()

                yield s
コード例 #49
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.ebookshoppe.com/search.php?search_query=' + quote(
            query)
        br = browser()
        br.addheaders = [("Referer", "http://www.ebookshoppe.com/")]

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[@class="ProductList"]/li'):
                if counter <= 0:
                    break

                id = ''.join(
                    data.xpath('./div[@class="ProductDetails"]/'
                               'strong/a/@href')).strip()
                if not id:
                    continue
                cover_url = ''.join(
                    data.xpath('./div[@class="ProductImage"]/a/img/@src'))
                title = ''.join(
                    data.xpath(
                        './div[@class="ProductDetails"]/strong/a/text()'))
                price = ''.join(
                    data.xpath('./div[@class="ProductPriceRating"]/em/text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNLOCKED
                s.detail_item = id

                self.get_author_and_formats(s, timeout)
                if not s.author:
                    continue

                yield s
コード例 #50
0
    def _do_search(self, url, max_results, timeout):
        br = browser()
        with closing(br.open(url, timeout=timeout)) as f:
            page = f.read().decode('utf-8')
            doc = html.fromstring(page)

            for data in doc.xpath('//ul[contains(@class,"book_list")]/li'):
                if max_results <= 0:
                    break

                s = SearchResult()
                s.detail_item = ''.join(
                    data.xpath('.//a[@class="th"]/@href')).strip()
                if not id:
                    continue

                s.cover_url = ''.join(
                    data.xpath(
                        './/a[@class="th"]/img/@data-original')).strip()
                s.title = ''.join(
                    data.xpath(
                        './/div[@class="item-title"]/a/text()')).strip()
                s.author = ', '.join(
                    data.xpath('.//div[@class="item-author"]/a/text()')).strip(
                        ', ')

                price_list = data.xpath('.//div[@class="item-price"]')
                for price_item in price_list:
                    if price_item.text.startswith('е-книга:'):
                        s.price = ''.join(price_item.xpath('.//span/text()'))
                        break

                s.price = '0.00 лв.' if not s.price and not price_list else s.price
                if not s.price:
                    # no e-book available
                    continue

                max_results -= 1
                yield s
コード例 #51
0
def search(query, max_results=15, timeout=60):
    url = 'http://www.ozon.ru/?context=search&text=%s&store=1,0&group=div_book' % quote_plus(query)

    counter = max_results
    br = browser()

    with closing(br.open(url, timeout=timeout)) as f:
        raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
        root = parse_html(raw)
        for tile in root.xpath('//*[@class="bShelfTile inline"]'):
            if counter <= 0:
                break
            counter -= 1

            s = SearchResult(store_name='OZON.ru')
            s.detail_item = shop_url + tile.xpath('descendant::a[@class="eShelfTile_Link"]/@href')[0]
            s.title = tile.xpath('descendant::span[@class="eShelfTile_ItemNameText"]/@title')[0]
            s.author = tile.xpath('descendant::span[@class="eShelfTile_ItemPerson"]/@title')[0]
            s.price = ''.join(tile.xpath('descendant::div[contains(@class, "eShelfTile_Price")]/text()'))
            s.cover_url = 'http:' + tile.xpath('descendant::img/@data-original')[0]
            s.price = format_price_in_RUR(s.price)
            yield s
コード例 #52
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://zixo.pl/wyszukiwarka/?search=' + urllib.quote(
            query.encode('utf-8')) + '&product_type=0'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="productInline"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[@class="productThumb"]/@href'))
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath('.//a[@class="productThumb"]/img/@src'))
                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
                author = ','.join(
                    data.xpath(
                        './/div[@class="productDescription"]/span[1]/a/text()')
                )
                price = ''.join(
                    data.xpath('.//div[@class="priceList"]/span/text()'))
                price = re.sub('\.', ',', price)

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://zixo.pl' + id.strip()
                s.drm = SearchResult.DRM_LOCKED

                yield s
コード例 #53
0
    def search(self, query, max_results=10, timeout=60):

        br = browser()
        page=1

        counter = max_results
        while counter:
            with closing(br.open('http://www.koobe.pl/s,p,' + str(page) + ',szukaj/fraza:' + urllib.quote(query), timeout=timeout)) as f:
                doc = html.fromstring(f.read().decode('utf-8'))
                for data in doc.xpath('//div[@class="seach_result"]/div[@class="result"]'):
                    if counter <= 0:
                        break

                    id = ''.join(data.xpath('.//div[@class="cover"]/a/@href'))
                    if not id:
                        continue

                    cover_url = ''.join(data.xpath('.//div[@class="cover"]/a/img/@src'))
                    price = ''.join(data.xpath('.//span[@class="current_price"]/text()'))
                    title = ''.join(data.xpath('.//h2[@class="title"]/a/text()'))
                    author = ', '.join(data.xpath('.//h3[@class="book_author"]/a/text()'))
                    formats = ', '.join(data.xpath('.//div[@class="formats"]/div/div/@title'))

                    counter -= 1

                    s = SearchResult()
                    s.cover_url =  'http://koobe.pl/' + cover_url
                    s.title = title.strip()
                    s.author = author.strip()
                    s.price = price
                    s.detail_item = 'http://koobe.pl' + id[1:]
                    s.formats = formats.upper()
                    s.drm = SearchResult.DRM_UNLOCKED

                    yield s
                if not doc.xpath('//div[@class="site_bottom"]//a[@class="right"]'):
                    break
            page+=1
コード例 #54
0
ファイル: allegro_plugin.py プロジェクト: sss/calibre
    def search(self, query, max_results=10, timeout=60):

        br = browser()
        page=1

        counter = max_results
        while counter:
            with closing(br.open('http://ebooki.allegro.pl/szukaj?fraza=' + urllib.quote(query) + '&strona=' + str(page), timeout=timeout)) as f:
                doc = html.fromstring(f.read().decode('utf-8'))
                for data in doc.xpath('//div[@class="listing-list"]/div[@class="listing-list-item"]'):
                    if counter <= 0:
                        break

                    id = ''.join(data.xpath('.//div[@class="listing-cover-wrapper"]/a/@href'))
                    if not id:
                        continue

                    cover_url = ''.join(data.xpath('.//div[@class="listing-cover-wrapper"]/a/img/@src'))
                    title = ''.join(data.xpath('.//div[@class="listing-info"]/div[1]/a/text()'))
                    author = ', '.join(data.xpath('.//div[@class="listing-info"]/div[2]/a/text()'))
                    price = ''.join(data.xpath('.//div[@class="book-price"]/text()'))
                    formats = ', '.join(data.xpath('.//div[@class="listing-buy-formats"]//div[@class="devices-wrapper"]/span[@class="device-label"]/span/text()'))

                    counter -= 1

                    s = SearchResult()
                    s.cover_url = 'http://ebooki.allegro.pl/' + cover_url
                    s.title = title.strip()
                    s.author = author.strip()
                    s.price = price
                    s.detail_item = 'http://ebooki.allegro.pl/' + id[1:]
                    s.formats = formats.upper()
                    s.drm = SearchResult.DRM_UNLOCKED

                    yield s
                if not doc.xpath('//a[@class="paging-arrow right-paging-arrow"]'):
                    break
            page+=1
コード例 #55
0
    def search(self, query, max_results=20, timeout=60):

        br = browser()

        counter = max_results
        page = 1
        while counter:
            with closing(br.open('http://www.publio.pl/e-booki,strona' + str(page) + '.html?q=' + quote(query), timeout=timeout)) as f:  # noqa
                doc = html.fromstring(f.read())
                for data in doc.xpath('//div[@class="products-list"]//div[@class="product-tile"]'):
                    if counter <= 0:
                        break

                    id = ''.join(data.xpath('.//a[@class="product-tile-cover"]/@href'))
                    if not id:
                        continue

                    cover_url = ''.join(data.xpath('.//img[@class="product-tile-cover-photo"]/@src'))
                    title = ''.join(data.xpath('.//span[@class="product-tile-title-long"]/text()'))
                    author = ', '.join(data.xpath('.//span[@class="product-tile-author"]/a/text()'))
                    price = ''.join(data.xpath('.//div[@class="product-tile-price-wrapper "]/a/ins/text()'))
                    formats = ''.join(data.xpath('.//a[@class="product-tile-cover"]/img/@alt')).split(' - ebook ')[1]

                    counter -= 1

                    s = SearchResult()
                    s.cover_url = 'http://www.publio.pl' + cover_url
                    s.title = title.strip()
                    s.author = author
                    s.price = price
                    s.detail_item = 'http://www.publio.pl' + id.strip()
                    s.formats = formats.upper().strip()

                    yield s
                if not doc.xpath('boolean(//a[@class="next"])'):
                    break
                page+=1
コード例 #56
0
    def search(self, query, max_results=10, timeout=60):

        br = browser()
        page=1

        counter = max_results
        while counter:
            with closing(br.open('https://www.swiatebookow.pl/ebooki/?q=' + quote(query) + '&page=' + str(page), timeout=timeout)) as f:
                doc = html.fromstring(f.read().decode('utf-8'))
                for data in doc.xpath('//div[@class="category-item-container"]//div[@class="book-large"]'):
                    if counter <= 0:
                        break

                    id = ''.join(data.xpath('./a/@href'))
                    if not id:
                        continue

                    cover_url = ''.join(data.xpath('.//div[@class="cover-xs"]/img/@src'))
                    price = ''.join(data.xpath('.//span[@class="item-price"]/text()')+data.xpath('.//span[@class="sub-price"]/text()'))
                    title = ''.join(data.xpath('.//h3/text()'))
                    author = ', '.join(data.xpath('.//div[@class="details"]/p/a/text()'))

                    counter -= 1

                    s = SearchResult()
                    s.cover_url =  'https://www.swiatebookow.pl' + cover_url
                    s.title = title.strip()
                    s.author = author.strip()
                    s.price = price
                    s.detail_item = 'https://www.swiatebookow.pl' + id
                    # s.formats = formats.upper()
                    s.drm = SearchResult.DRM_UNLOCKED

                    yield s
                if not doc.xpath('//div[@class="paging_bootstrap pagination"]//a[@class="next"]'):
                    break
            page+=1
コード例 #57
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://www.legimi.pl/ebooki/?sort=score&searchphrase=' + quote_plus(
            query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath(
                    '//div[@class="book-search row auto-clear"]/div'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="panel-body"]/a/@href'))
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath('.//div[@class="img-content"]/img/@data-src'))
                title = ''.join(
                    data.xpath(
                        './/a[@class="book-title clampBookTitle"]/text()'))
                author = ' '.join(
                    data.xpath(
                        './/div[@class="authors-container clampBookAuthors"]/a/text()'
                    ))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.detail_item = 'https://www.legimi.pl' + id.strip()
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
コード例 #58
0
ファイル: whsmith_uk_plugin.py プロジェクト: zyxw121/webcal
    def search(self, query, max_results=10, timeout=60):
        url = (
            'https://www.whsmith.co.uk/search?keywordCategoryId=wc_dept_ebooks&results=60'
            '&page=1&keywords=' + quote(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//li[@class="product"]'):
                if counter <= 0:
                    break
                id_ = ''.join(
                    data.xpath('./a[@class="product_image_wrap"]/@href'))
                if not id_:
                    continue
                id_ = 'https://www.whsmith.co.uk' + id_
                cover_url = ''.join(
                    data.xpath('.//img[@class="product_image"]/@src'))
                title = ''.join(
                    data.xpath('.//h4[@class="product_title"]/text()'))
                author = ', '.join(
                    data.xpath('.//span[@class="product_second"]/text()'))
                price = ''.join(data.xpath('.//span[@class="price"]/text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_LOCKED
                s.detail_item = id_
                s.formats = 'ePub'

                yield s
コード例 #59
0
    def search(self, query, max_results=20, timeout=60):
        url = 'http://www.escapemagazine.pl/wyszukiwarka?query=' + urllib.quote_plus(
            query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="item item_short"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//h2[@class="title"]/a[1]/@href'))
                if not id:
                    continue

                title = ''.join(
                    data.xpath('.//h2[@class="title"]/a[1]/text()'))
                author = ''.join(data.xpath('.//div[@class="author"]/text()'))
                price = ''.join(
                    data.xpath(
                        './/span[@class="price_now"]/strong/text()')) + ' zł'
                cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://www.escapemagazine.pl' + id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = 'PDF'

                yield s
コード例 #60
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.bubok.pt/resellers/calibre_search/' + urllib.quote_plus(
            query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[contains(@class, "libro")]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="url"]/text()'))

                title = ''.join(data.xpath('.//div[@class="titulo"]/text()'))

                author = ''.join(data.xpath('.//div[@class="autor"]/text()'))

                price = ''.join(data.xpath('.//div[@class="precio"]/text()'))

                formats = ''.join(
                    data.xpath('.//div[@class="formatos"]/text()'))

                cover = ''.join(data.xpath('.//div[@class="portada"]/text()'))

                counter -= 1

                s = SearchResult()
                s.title = title.strip()
                s.author = author.strip()
                s.detail_item = id.strip()
                s.price = price.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats.strip()
                s.cover_url = cover.strip()
                yield s