예제 #1
0
def pchome_lightnovel():
    url = "https://ecapi.pchome.com.tw/cdn/ecshop/prodapi/v2/newarrival/DJAZ/prod&offset=1&limit=20&fields=Id,Nick,Pic,Price,Discount,isSpec,Name,isCarrier,isSnapUp,isBigCart&_callback=jsonp_prodlist?_callback=jsonp_prodlist"

    title = "PChome 輕小說"

    feed = feedgen.feed.FeedGenerator()
    feed.author({"name": "Feed Generator"})
    feed.id(url)
    feed.link(href=url, rel="alternate")
    feed.title(title)

    r = requests.get(url, headers={"User-agent": user_agent}, timeout=5)
    body = re.match(r"^[^\[]*(\[.*\])[^\[]*$", r.text).group(1)
    items = json.loads(body)

    for item in items:
        content = '{}<br/><img alt="{}" src="https://a.ecimg.tw{}"/>'.format(
            html.escape(item["Nick"]),
            html.escape(item["Nick"]),
            html.escape(item["Pic"]["B"]),
        )
        book_title = item["Nick"]
        book_url = "https://24h.pchome.com.tw/books/prod/{}".format(
            urllib.parse.quote_plus(item["Id"]))

        entry = feed.add_entry()
        entry.content(content, type="xhtml")
        entry.id(book_url)
        entry.title(book_title)
        entry.link(href=book_url)

    bottle.response.set_header("Cache-Control", "max-age=300,public")
    bottle.response.set_header("Content-Type", "application/atom+xml")

    return feed.atom_str()
예제 #2
0
def plurktop(lang):
    url = "https://www.plurk.com/Stats/topReplurks?period=day&lang={}&limit=50".format(
        urllib.parse.quote_plus(lang))

    title = "Plurk Top ({})".format(lang)

    feed = feedgen.feed.FeedGenerator()
    feed.author({"name": "Feed Generator"})
    feed.id(url)
    feed.link(href=url, rel="alternate")
    feed.title(title)

    r = requests.get(url, headers={"User-agent": user_agent}, timeout=5)
    body = json.loads(r.text)

    for (x, stat) in body["stats"]:
        url = "https://www.plurk.com/p/" + base36.dumps(stat["id"])

        content = stat["content"]
        content = re.sub(r' height="\d+(px)?"', " ", content)
        content = re.sub(r' width="\d+(px)?"', " ", content)

        entry = feed.add_entry()
        entry.author({"name": stat["owner"]["full_name"]})
        entry.content(content, type="CDATA")
        entry.id(url)
        entry.link(href=url)
        entry.published(stat["posted"])
        entry.title(stat["content_raw"])

    bottle.response.set_header("Cache-Control", "max-age=300,public")
    bottle.response.set_header("Content-Type", "application/atom+xml")

    return feed.atom_str()
예제 #3
0
def bookwalker_lightnovel():
    url = "https://www.bookwalker.com.tw/more/fiction/1/3"

    title = "BOOKWALKER 輕小說"

    feed = feedgen.feed.FeedGenerator()
    feed.author({"name": "Feed Generator"})
    feed.id(url)
    feed.link(href=url, rel="alternate")
    feed.title(title)

    r = requests.get(url, headers={"User-agent": user_agent}, timeout=5)
    body = lxml.html.fromstring(r.text)

    for item in body.cssselect(".bwbookitem a"):
        img = item.cssselect("img")[0]
        img.set("src", img.get("data-src"))
        content = lxml.etree.tostring(item, encoding="unicode")
        book_title = item.get("title")
        book_url = item.get("href")

        entry = feed.add_entry()
        entry.content(content, type="xhtml")
        entry.id(book_url)
        entry.title(book_title)
        entry.link(href=book_url)

    bottle.response.set_header("Cache-Control", "max-age=300,public")
    bottle.response.set_header("Content-Type", "application/atom+xml")

    return feed.atom_str()
예제 #4
0
파일: plurk.py 프로젝트: hasname/feedgen
    def get(self, *args, **kwargs):
        keyword = kwargs['keyword']

        url = 'https://www.plurk.com/Search/search2'

        title = 'Plurk Search - {}'.format(keyword)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        s = services.RequestsService().process()

        r = s.post(url, data={'query': keyword})
        body = json.loads(r.text)

        for p in body['plurks']:
            url = 'https://www.plurk.com/p/' + base36.dumps(p['id'])

            content = self.str_clean(p['content'])

            entry = feed.add_entry()
            entry.content(content, type='CDATA')
            entry.id(url)
            entry.link(href=url)
            entry.published(dateutil.parser.parse(p['posted']))
            entry.title(self.str_clean(p['content_raw']))

        res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #5
0
    def get(self, *args, **kwargs):
        url = 'https://www.bookwalker.com.tw/more/fiction/1/3'

        title = 'BOOKWALKER 輕小說'

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        s = services.RequestsService().process()

        r = s.get(url)
        body = lxml.html.fromstring(r.text)

        for item in body.cssselect('.bwbookitem a'):
            img = item.cssselect('img')[0]
            img.set('src', img.get('data-src'))
            content = lxml.etree.tostring(item, encoding='unicode')
            book_title = item.get('title')
            book_url = item.get('href')

            entry = feed.add_entry()
            entry.content(content, type='xhtml')
            entry.id(book_url)
            entry.title(book_title)
            entry.link(href=book_url)

        res = HttpResponse(feed.atom_str(),
                           content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #6
0
파일: youtube.py 프로젝트: hasname/feedgen
    def get(self, *args, **kwargs):
        keyword = kwargs['keyword']

        url = 'https://www.youtube.com/results?search_query={}&sp=CAI%253D'.format(urllib.parse.quote_plus(keyword))

        title = 'YouTube Search - {}'.format(keyword)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        s = services.RequestsService().process()

        r = s.get(url)

        m = re.search(r"var ytInitialData = (.*?);?</script>", r.text, re.MULTILINE)
        ytInitialData = m.group(1)
        j = json.loads(ytInitialData)
        items = j['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']

        for item in items:
            try:
                # author
                author = item['videoRenderer']['longBylineText']['runs'][0]['text']

                # link
                link = 'https://www.youtube.com/watch?v=' + urllib.parse.quote(item['videoRenderer']['videoId'])

                # img
                img = 'https://i.ytimg.com/vi/' + item['videoRenderer']['videoId'] + '/hqdefault.jpg'

                # title
                title = item['videoRenderer']['title']['runs'][0]['text']

                # content
                content = '<img alt="{}" src="{}"/>'.format(
                    html.escape(title), html.escape(img)
                )

                entry = feed.add_entry()
                entry.author({'name': author})
                entry.content(content, type='xhtml')
                entry.id(link)
                entry.title(title)
                entry.link(href=link)

            except IndexError:
                pass
            except KeyError:
                pass

        res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #7
0
def shopee(keyword):
    url = "https://shopee.tw/api/v2/search_items/?by=ctime&keyword={}&limit=50&newest=0&order=desc&page_type=search".format(
        urllib.parse.quote_plus(keyword))

    title = "蝦皮搜尋 - {}".format(keyword)

    feed = feedgen.feed.FeedGenerator()
    feed.author({"name": "Feed Generator"})
    feed.id(url)
    feed.link(href=url, rel="alternate")
    feed.title(title)

    r = requests.get(url, headers={"User-agent": user_agent}, timeout=5)
    body = json.loads(r.text)

    session = FuturesSession(executor=ThreadPoolExecutor(max_workers=10))
    futures = []

    for item in body["items"]:
        itemid = item["itemid"]
        name = item["name"]
        shopid = item["shopid"]

        itemapi_url = "https://shopee.tw/api/v2/item/get?itemid=%d&shopid=%d" % (
            itemid,
            shopid,
        )
        futures.append(
            session.get(itemapi_url,
                        headers={"User-agent": user_agent},
                        timeout=5))

    for f in futures:
        r = f.result()
        item = json.loads(r.text)["item"]

        itemid = item["itemid"]
        name = item["name"]
        shopid = item["shopid"]

        prod_url = "https://shopee.tw/product/%d/%d" % (shopid, itemid)
        img_url = "https://cf.shopee.tw/file/%s" % (item["image"])

        content = '{}<br/><img alt="{}" src="{}"/>'.format(
            html.escape(name), html.escape(name), html.escape(img_url))

        entry = feed.add_entry()
        entry.content(content, type="xhtml")
        entry.id(prod_url)
        entry.link(href=prod_url)
        entry.title(name)

    bottle.response.set_header("Cache-Control", "max-age=300,public")
    bottle.response.set_header("Content-Type", "application/atom+xml")

    return feed.atom_str()
예제 #8
0
def youtube(keyword):
    url = "https://www.youtube.com/results?sp=CAI%%253D&search_query={}".format(
        urllib.parse.quote_plus(keyword))

    title = "YouTube Search - {}".format(keyword)

    feed = feedgen.feed.FeedGenerator()
    feed.author({"name": "Feed Generator"})
    feed.id(url)
    feed.link(href=url, rel="alternate")
    feed.title(title)

    r = requests.get(url)
    body = lxml.html.fromstring(r.text)

    for item in body.cssselect("ol.item-section div.yt-lockup-video"):
        try:
            a = item.cssselect("a[title].spf-link")[0]

            # author
            author = item.cssselect(
                ".yt-lockup-byline a.spf-link.yt-uix-sessionlink"
            )[0].text_content()

            # link
            link = a.get("href")
            if "/" == link[0]:
                link = "https://www.youtube.com" + link

            # img
            link_tuple = urllib.parse.urlparse(link)
            d = urllib.parse.parse_qs(link_tuple[4])
            img = "https://i.ytimg.com/vi/" + d["v"][0] + "/hqdefault.jpg"

            # title
            title = a.get("title")

            # content
            content = '<img alt="{}" src="{}"/>'.format(
                html.escape(title), html.escape(img))

            entry = feed.add_entry()
            entry.author({"name": author})
            entry.content(content, type="xhtml")
            entry.id(link)
            entry.title(title)
            entry.link(href=link)

        except IndexError:
            pass

    bottle.response.set_header("Cache-Control", "max-age=300,public")
    bottle.response.set_header("Content-Type", "application/atom+xml")

    return feed.atom_str()
예제 #9
0
파일: pchome.py 프로젝트: hasname/feedgen
    def get(self, *args, **kwargs):
        keyword = kwargs['keyword']

        url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q={}&page=1&sort=new/dc'.format(
            urllib.parse.quote_plus(keyword))

        title = 'PChome 搜尋 - {}'.format(keyword)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        try:
            s = services.RequestsService().process()

            r = s.get(url)
            body = json.loads(r.text)
        except:
            body = {'prods': []}

        for item in body['prods']:
            # Product name & description
            item_author = self.str_clean(item['author'])
            item_desc = self.str_clean(item['describe'])
            item_name = self.str_clean(item['name'])
            item_origin_price = item['originPrice']
            item_price = item['price']
            item_title = '(${}/${}) {}'.format(item_origin_price, item_price,
                                               item_name)

            # URL
            if item['cateId'][0] == 'D':
                item_url = 'https://24h.pchome.com.tw/prod/' + item['Id']
            else:
                item_url = 'https://mall.pchome.com.tw/prod/' + item['Id']
            img_url = 'https://cs-a.ecimg.tw%s' % (item['picB'])

            content = '{}<br/><img alt="{}" src="{}"/>'.format(
                html.escape(item_desc), html.escape(item_name),
                html.escape(img_url))

            entry = feed.add_entry()
            entry.author({'name': item_author})
            entry.content(content, type='xhtml')
            entry.id(item_url)
            entry.link(href=item_url)
            entry.title(item_title)

        res = HttpResponse(feed.atom_str(),
                           content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #10
0
파일: shopee.py 프로젝트: hasname/feedgen
    def get(self, *args, **kwargs):
        keyword = kwargs['keyword']

        url = 'https://shopee.tw/api/v2/search_items/?by=ctime&keyword={}&limit=50&newest=0&order=desc&page_type=search&version=2'.format(
            urllib.parse.quote_plus(keyword))
        referer = 'https://shopee.tw/search?keyword={}'.format(
            urllib.parse.quote_plus(keyword))

        title = '蝦皮搜尋 - {}'.format(keyword)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        try:
            proxy = services.ProxyService().process()
            s = services.RequestsService().process()

            s.proxies = {'http': proxy, 'https': proxy}
            r = s.get(url, headers={'Referer': referer})
            body = json.loads(r.text)
            items = body['items']
        except:
            return HttpResponse('Service Unavailable', status=503)

        if not isinstance(items, list):
            items = []

        for item in items:
            itemid = item['itemid']
            name = item['name']
            shopid = item['shopid']

            prod_url = 'https://shopee.tw/product/%d/%d' % (shopid, itemid)
            img_url = 'https://cf.shopee.tw/file/%s' % (item['image'])

            content = '{}<br/><img alt="{}" src="{}"/>'.format(
                html.escape(name), html.escape(name), html.escape(img_url))

            entry = feed.add_entry()
            entry.content(content, type='xhtml')
            entry.id(prod_url)
            entry.link(href=prod_url)
            entry.title(name)

        res = HttpResponse(feed.atom_str(),
                           content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #11
0
    def get(self, *args, **kwargs):
        keyword = kwargs['keyword']

        url = 'https://www.cakeresume.com/jobs?q={}'.format(
            urllib.parse.quote_plus(keyword))

        title = 'CakeResume 搜尋 - {}'.format(keyword)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        try:
            s = services.RequestsService().process()

            r = s.get(url)
            state = re.search(
                r'<script>window\.__APP_INITIAL_REDUX_STATE__ = (.*?)</script>',
                r.text, re.MULTILINE).group(1)
            state = state.replace('"jwt":undefined', '"jwt":false')
            items = json.loads(state)['jobSearch']['jobResultsState'][
                'content']['_rawResults'][0]['hits']
        except:
            items = []

        for item in items:
            item_author = item['page']['name']
            item_content = '<p>{}</p><p>{}</p>'.format(
                html.escape(item.get('requirements_plain_text', '')),
                html.escape(item.get('description_plain_text', '')))
            item_title = item['title']
            item_url = 'https://www.cakeresume.com/companies/{}/jobs/{}'.format(
                item['page']['path'], item['path'])
            item_updated_at = datetime.datetime.fromtimestamp(
                item['content_updated_at'] / 1000, tz=datetime.timezone.utc)

            entry = feed.add_entry()
            entry.author({'name': item_author})
            entry.content(item_content, type='xhtml')
            entry.id(item_url)
            entry.link(href=item_url)
            entry.title(item_title)
            entry.updated(item_updated_at)

        res = HttpResponse(feed.atom_str(),
                           content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #12
0
    def get(self, *args, **kwargs):
        keyword = kwargs['keyword']

        url = 'https://www.518.com.tw/job-index-P-1.html?i=1&am=1&ad={}&orderType=1&orderField=8'.format(urllib.parse.quote_plus(keyword))

        title = '518 搜尋 - {}'.format(keyword)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        try:
            s = services.RequestsService().process()

            r = s.get(url)
            body = lxml.html.fromstring(r.text)
        except:
            body = lxml.html.fromstring('<html></html>')

        for item in body.cssselect('#listContent > ul'):
            try:
                a = item.cssselect('li.title a')[0]
                job_title = a.getchildren()[0].text_content()

                job_url = a.get('href')
                job_url = re.sub('\\?.*', '', job_url)

                job_company = item.cssselect('li.company')[0].text_content()

                job_desc = item.cssselect('li.sumbox')[0].text_content()
                content = '<h3>{}</h3><p>{}</p>'.format(
                    html.escape(job_company), html.escape(job_desc)
                )

                entry = feed.add_entry()
                entry.content(content, type='xhtml')
                entry.id(job_url)
                entry.link(href=job_url)
                entry.title(job_title)

            except IndexError:
                pass

        res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #13
0
파일: job104.py 프로젝트: hasname/feedgen
    def get(self, *args, **kwargs):
        keyword = kwargs['keyword']

        url = 'https://www.104.com.tw/jobs/search/?ro=0&kwop=7&keyword={}&order=11&asc=0&page=1&mode=s'.format(
            keyword)

        title = '104 搜尋 - {}'.format(keyword)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        try:
            s = services.RequestsService().process()

            r = s.get(url)
            body = lxml.html.fromstring(r.text)
        except:
            body = lxml.html.fromstring('</html></html>')

        for item in body.cssselect('article.job-list-item'):
            try:
                job_company = item.get('data-cust-name')
                job_desc = item.cssselect(
                    'p.job-list-item__info')[0].text_content()
                job_title = item.get('data-job-name')
                job_url = item.cssselect('a.js-job-link')[0].get('href')
                job_url = re.sub(r'^//', 'https://', job_url)
                job_url = re.sub(r'&jobsource=\w*$', '', job_url)

                content = '<h3>{}</h3><pre>{}</pre>'.format(
                    html.escape(job_company), html.escape(job_desc))

                entry = feed.add_entry()
                entry.content(content, type='xhtml')
                entry.id(job_url)
                entry.link(href=job_url)
                entry.title(job_title)
            except:
                pass

        res = HttpResponse(feed.atom_str(),
                           content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #14
0
def magic(keyword):
    url = 'https://www.youtube.com/results?sp=CAI%%253D&search_query=%s' % (
        urllib.parse.quote_plus(keyword))

    r = requests.get(url)

    title = 'YouTube Search - %s' % (keyword)

    feed = feedgen.feed.FeedGenerator()
    feed.author({'name': 'YouTube Search Feed Generator'})
    feed.id(url)
    feed.link(href=url, rel='alternate')
    feed.title(title)

    body = lxml.html.fromstring(r.text)

    for item in body.cssselect('ol.item-section div.yt-lockup-video'):
        try:
            a = item.cssselect('a[title].spf-link')[0]

            # link
            link = a.get('href')
            if '/' == link[0]:
                link = 'https://www.youtube.com' + link

            # img
            link_tuple = urllib.parse.urlparse(link)
            d = urllib.parse.parse_qs(link_tuple[4])
            img = 'https://i.ytimg.com/vi/' + d['v'][0] + '/hqdefault.jpg'

            # title
            title = a.get('title')

            # content
            content = '%s<br/><img alt="%s" src="%s"/>' % (
                html.escape(title), html.escape(title), html.escape(img))

            entry = feed.add_entry()
            entry.content(content, type='xhtml')
            entry.id(link)
            entry.title(title)
            entry.link(href=link)

        except IndexError:
            pass

    print(str(feed.atom_str(), 'utf-8'))
예제 #15
0
    def get(self, *args, **kwargs):
        url = 'https://www.dcard.tw/f'

        title = 'Dcard 首頁'

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        proxy = services.ProxySocks5Service().process()
        s = services.RequestsService().process()

        r = s.get(
            'https://www.dcard.tw/service/api/v2/popularForums/GetHead?listKey=popularForums'
        )
        if r.status_code == 200:
            head = r.json()['head']
            r = s.get(
                'https://www.dcard.tw/service/api/v2/popularForums/GetPage?pageKey={}'
                .format(head))
            items = r.json()['items']
        else:
            items = []

        for item in items:
            item_title = item['posts'][0]['title']
            item_url = 'https://www.dcard.tw/f/{}/p/{}'.format(
                item['alias'], item['posts'][0]['id'])
            item_desc = item['posts'][0]['excerpt']

            item_content = '<p>{}</p>'.format(html.escape(item_desc))

            entry = feed.add_entry()
            entry.content(item_content, type='xhtml')
            entry.id(item_url)
            entry.title(item_title)
            entry.link(href=item_url)

        res = HttpResponse(feed.atom_str(),
                           content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #16
0
    def get(self, *args, **kwargs):
        keyword = kwargs['keyword']

        url = 'https://www.1111.com.tw/search/job?flag=13&ks={}&fs=1&si=1&ts=4&col=da&sort=desc'.format(
            urllib.parse.quote_plus(keyword))

        title = '1111 搜尋 - {}'.format(keyword)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        s = services.RequestsService().process()

        r = s.get(url)
        r.encoding = 'utf-8'
        body = lxml.html.fromstring(r.text)

        for item in body.cssselect('li.jbInfo'):
            a = item.cssselect('a.mobileItemClick')[0]
            job_title = a.get('title')
            job_url = a.get('href')
            if job_url.startswith('/job/'):
                job_url = 'https://www.1111.com.tw' + job_url

            job_company = item.cssselect('a.d-block.organ')[0].get('title')

            job_desc = item.cssselect('.jbInfoTxt')[0].text_content()
            content = '<h3>{}</h3><p>{}</p>'.format(html.escape(job_company),
                                                    html.escape(job_desc))

            entry = feed.add_entry()
            entry.content(content, type='xhtml')
            entry.id(job_url)
            entry.link(href=job_url)
            entry.title(job_title)

        res = HttpResponse(feed.atom_str(),
                           content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #17
0
def dcardtop():
    url = "https://www.dcard.tw/f"

    title = "Dcard Top"

    feed = feedgen.feed.FeedGenerator()
    feed.author({"name": "Feed Generator"})
    feed.id(url)
    feed.link(href=url, rel="alternate")
    feed.title(title)

    r = requests.get(url, headers={"User-agent": user_agent}, timeout=5)
    body = lxml.html.fromstring(r.text)

    for post in body.cssselect('div[class^="PostList_entry_"]'):
        try:
            post_author = post.cssselect(
                'div[class^="PostAuthorHeader_meta_"]')[0].text_content()
            post_excerpt = post.cssselect(
                'div[class^="PostEntry_excerpt_"]')[0].text_content()
            post_title = post.cssselect(
                'h3[class^="PostEntry_title_"]')[0].text_content()
            post_url = post.cssselect('a[class^="PostEntry_root_"]')[0].get(
                'href')

            if post_url.startswith("/"):
                post_url = "https://www.dcard.tw" + post_url

            content = html.escape(post_excerpt)

            entry = feed.add_entry()
            entry.author({"name": post_author})
            entry.content(content, type="xhtml")
            entry.id(post_url)
            entry.link(href=post_url)
            entry.title(post_title)

        except IndexError:
            pass

    bottle.response.set_header("Cache-Control", "max-age=300,public")
    bottle.response.set_header("Content-Type", "application/atom+xml")

    return feed.atom_str()
예제 #18
0
def pchome(keyword):
    url = "https://ecshweb.pchome.com.tw/search/v3.3/all/results?q={}&page=1&sort=new/dc".format(
        urllib.parse.quote_plus(keyword))

    title = "PChome 搜尋 - {}".format(keyword)

    feed = feedgen.feed.FeedGenerator()
    feed.author({"name": "Feed Generator"})
    feed.id(url)
    feed.link(href=url, rel="alternate")
    feed.title(title)

    r = requests.get(url, headers={"User-agent": user_agent}, timeout=5)
    body = json.loads(r.text)

    for prod in body["prods"]:
        # Product name & description
        prod_name = prod["name"]
        prod_desc = prod["describe"]
        prod_author = prod["author"]

        # URL
        if prod["cateId"][0] == "D":
            prod_url = "https://24h.pchome.com.tw/prod/" + prod["Id"]
        else:
            prod_url = "https://mall.pchome.com.tw/prod/" + prod["Id"]
        img_url = "https://a.ecimg.tw%s" % (prod["picB"])

        content = '{}<br/><img alt="{}" src="{}"/>'.format(
            html.escape(prod_desc), html.escape(prod_name),
            html.escape(img_url))

        entry = feed.add_entry()
        entry.author({"name": prod_author})
        entry.content(content, type="xhtml")
        entry.id(prod_url)
        entry.link(href=prod_url)
        entry.title(prod_name)

    bottle.response.set_header("Cache-Control", "max-age=300,public")
    bottle.response.set_header("Content-Type", "application/atom+xml")

    return feed.atom_str()
예제 #19
0
def job518(keyword):
    url = "https://www.518.com.tw/job-index-P-1.html?i=1&am=1&ad={}&orderType=1&orderField=8".format(
        urllib.parse.quote_plus(keyword))

    title = "518 搜尋 - {}".format(keyword)

    feed = feedgen.feed.FeedGenerator()
    feed.author({"name": "Feed Generator"})
    feed.id(url)
    feed.link(href=url, rel="alternate")
    feed.title(title)

    r = requests.get(url, headers={"User-agent": user_agent}, timeout=5)
    body = lxml.html.fromstring(r.text)

    for item in body.cssselect("#listContent > ul"):
        try:
            a = item.cssselect("li.title a")[0]
            job_title = a.getchildren()[0].text_content()

            job_url = a.get("href")
            job_url = re.sub("\?.*", "", job_url)

            job_company = item.cssselect("li.company")[0].text_content()

            job_desc = item.cssselect("li.sumbox")[0].text_content()
            content = "<h3>{}</h3><p>{}</p>".format(html.escape(job_company),
                                                    html.escape(job_desc))

            entry = feed.add_entry()
            entry.content(content, type="xhtml")
            entry.id(job_url)
            entry.link(href=job_url)
            entry.title(job_title)

        except IndexError:
            pass

    bottle.response.set_header("Cache-Control", "max-age=300,public")
    bottle.response.set_header("Content-Type", "application/atom+xml")

    return feed.atom_str()
예제 #20
0
파일: pchome.py 프로젝트: hasname/feedgen
    def get(self, *args, **kwargs):
        url = 'https://ecapi.pchome.com.tw/cdn/ecshop/prodapi/v2/newarrival/DJAZ/prod&offset=1&limit=20&fields=Id,Nick,Pic,Price,Discount,isSpec,Name,isCarrier,isSnapUp,isBigCart&_callback=jsonp_prodlist?_callback=jsonp_prodlist'

        title = 'PChome 輕小說'

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        try:
            s = services.RequestsService().process()

            r = s.get(url)
            body = re.match(r'^[^\[]*(\[.*\])[^\[]*$', r.text).group(1)
            items = json.loads(body)
        except:
            items = []

        for item in items:
            content = '{}<br/><img alt="{}" src="https://cs-a.ecimg.tw{}"/>'.format(
                html.escape(item['Nick']),
                html.escape(item['Nick']),
                html.escape(item['Pic']['B']),
            )
            book_title = item['Nick']
            book_url = 'https://24h.pchome.com.tw/books/prod/{}'.format(
                urllib.parse.quote_plus(item['Id']))

            entry = feed.add_entry()
            entry.content(content, type='xhtml')
            entry.id(book_url)
            entry.title(book_title)
            entry.link(href=book_url)

        res = HttpResponse(feed.atom_str(),
                           content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #21
0
def job1111(keyword):
    url = "https://www.1111.com.tw/job-bank/job-index.asp?flag=13&ks={}&fs=1&si=1&ts=4&col=da&sort=desc".format(
        urllib.parse.quote_plus(keyword))

    title = "1111 搜尋 - {}".format(keyword)

    feed = feedgen.feed.FeedGenerator()
    feed.author({"name": "Feed Generator"})
    feed.id(url)
    feed.link(href=url, rel="alternate")
    feed.title(title)

    r = requests.get(url, headers={"User-agent": user_agent}, timeout=5)
    r.encoding = "utf-8"
    body = lxml.html.fromstring(r.text)

    for item in body.cssselect("li.digest"):
        a = item.cssselect("a.mobiFullLInk")[0]
        job_title = a.get("title")
        job_url = a.get("href")
        if job_url.startswith("//"):
            job_url = "https:" + job_url

        job_company = item.cssselect(".jbInfoin h4 a")[0].get("title")

        job_desc = item.cssselect(".jbInfoTxt")[0].text_content()
        content = "<h3>{}</h3><p>{}</p>".format(html.escape(job_company),
                                                html.escape(job_desc))

        entry = feed.add_entry()
        entry.content(content, type="xhtml")
        entry.id(job_url)
        entry.link(href=job_url)
        entry.title(job_title)

    bottle.response.set_header("Cache-Control", "max-age=300,public")
    bottle.response.set_header("Content-Type", "application/atom+xml")

    return feed.atom_str()
예제 #22
0
파일: plurk.py 프로젝트: hasname/feedgen
    def get(self, *args, **kwargs):
        lang = kwargs['lang']

        url = 'https://www.plurk.com/Stats/topReplurks?period=day&lang={}&limit=10'.format(urllib.parse.quote_plus(lang))

        title = 'Plurk Top ({})'.format(lang)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        s = services.RequestsService().process()

        r = s.get(url)
        body = json.loads(r.text)

        for (x, stat) in body['stats']:
            url = 'https://www.plurk.com/p/' + base36.dumps(stat['id'])

            content = self.str_clean(stat['content'])
            content = re.sub(r' height="\d+(px)?"', ' ', content)
            content = re.sub(r' width="\d+(px)?"', ' ', content)

            entry = feed.add_entry()
            entry.author({'name': self.str_clean(stat['owner']['full_name'])})
            entry.content(content, type='CDATA')
            entry.id(url)
            entry.link(href=url)
            entry.published(stat['posted'])
            entry.title(self.str_clean(stat['content_raw']))

        res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #23
0
def job104(keyword):
    url = "https://www.104.com.tw/jobs/search/?ro=0&kwop=7&keyword={}&order=11&asc=0&page=1&mode=s".format(
        keyword)

    title = "104 搜尋 - {}".format(keyword)

    feed = feedgen.feed.FeedGenerator()
    feed.author({"name": "Feed Generator"})
    feed.id(url)
    feed.link(href=url, rel="alternate")
    feed.title(title)

    r = requests.get(url, headers={"User-agent": user_agent}, timeout=5)
    body = lxml.html.fromstring(r.text)

    for item in body.cssselect("article.job-list-item"):
        job_company = item.get("data-cust-name")
        job_desc = item.cssselect("p.job-list-item__info")[0].text_content()
        job_title = item.get("data-job-name")
        job_url = item.cssselect("a.js-job-link")[0].get("href")
        job_url = re.sub(r"^//", "https://", job_url)
        job_url = re.sub(r"&jobsource=\w*$", "", job_url)

        content = "<h3>{}</h3><pre>{}</pre>".format(html.escape(job_company),
                                                    html.escape(job_desc))

        entry = feed.add_entry()
        entry.content(content, type="xhtml")
        entry.id(job_url)
        entry.link(href=job_url)
        entry.title(job_title)

    bottle.response.set_header("Cache-Control", "max-age=300,public")
    bottle.response.set_header("Content-Type", "application/atom+xml")

    return feed.atom_str()
예제 #24
0
 def default_feed(self) -> feedgen.feed.FeedGenerator:
     feed = feedgen.feed.FeedGenerator()
     feed.title(self.channel)
     feed.link(href=self.repo_url, rel="self")
     feed.description(f'{self.channel} on {config.INSTANCE["host"]}')
     return feed
예제 #25
0
파일: yrss.py 프로젝트: Scheirle/yrss
def generatefeed(user):
    # Validate that it's a valid user id
    # https://support.google.com/a/answer/33386?hl=en
    if not re.match('^[A-Za-z0-9_\'.-]{5,20}$', user):
        flask.abort(400, 'Invalid username format')

    # Try the cache first, unless it's old
    cache_file = os.path.join('.cache', user)
    if os.path.exists(cache_file):
        creation_time = os.path.getmtime(cache_file)
        if time.time() - creation_time < CACHE_TIME:
            with open(cache_file) as fin:
                return fin.read()

    # Use the channel to get the 'uploads' playlist id
    response = requests.get(
        'https://www.googleapis.com/youtube/v3/channels',
        params = {
            'part': 'contentDetails',
            'forUsername': user,
            'key': API_KEY,
        }
    )
    if response.status_code != 200:
        flask.abort(400, 'YouTube API error')
    if not response.json()['items']:
        flask.abort(400, 'User not found')

    playlistId = response.json()['items'][0]['contentDetails']['relatedPlaylists']['uploads']

    # Get the most recent 20 videos on the 'uploads' playlist
    response = requests.get(
        'https://www.googleapis.com/youtube/v3/playlistItems',
        params = {
            'part': 'snippet',
            'maxResults': 20,
            'playlistId': playlistId,
            'key': API_KEY
        }
    )

    # Generate a list of results that can be used as feed items
    feed = feedgen.feed.FeedGenerator()
    feed.title(user + ' (YRSS)')
    feed.author({'name': user + ' (YRSS)'})
    feed.link(href = 'https://www.youtube.com/user/' + user)
    feed.id('https://www.youtube.com/user/' + user)

    for item in response.json()['items']:
        title = item['snippet']['title']
        video_id = item['snippet']['resourceId']['videoId']
        published = item['snippet']['publishedAt']
        thumbnail = item['snippet']['thumbnails']['high']['url']
        video_url = 'https://www.youtube.com/watch?v=' + video_id

        item = feed.add_entry()
        item.title(title)
        item.link(href = video_url)
        item.published(dateutil.parser.parse(published))
        item.updated(dateutil.parser.parse(published))
        item.id(video_url)

        item.content('''
<a href="{url}"><img src="{img}" /></a><br />
<a href="{url}">{title}</a>
'''.format(
            url = video_url,
            img = thumbnail,
            title = title,
        ), None, 'html')

    # Cache to disk
    feed_txt = feed.atom_str()
    with open(cache_file, 'w') as fout:
        fout.write(feed_txt)

    return feed_txt
예제 #26
0
    def get(self, *args, **kwargs):
        board = kwargs['board']
        url = 'https://www.dcard.tw/f/{}'.format(
            urllib.parse.quote_plus(board))

        title = 'Dcard 看板 - {}'.format(board)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        try:
            proxy = services.ProxySocks5Service().process()
            s = services.RequestsService().process()

            s.proxies = {'http': proxy, 'https': proxy}
            r = s.get(url)
            body = lxml.html.fromstring(r.text)
        except:
            return HttpResponse('Service Unavailable', status=503)

        items = body.cssselect('div[data-index]')
        for item in items:
            if not item.cssselect('article'):
                continue

            item_title = item.cssselect('article > h2')[0].text_content()
            item_url = item.cssselect('article > h2 > a')[0].get('href')
            item_desc = item.cssselect('article > h2 + div')[0].text_content()
            try:
                item_img = item.cssselect('article > img')[0]
            except IndexError:
                item_img_src = None
            else:
                item_img_src = item_img.get('src')
                g = re.match(r'^(https://imgur\.dcard\.tw/\w+)b(\.jpg)$',
                             item_img_src)
                if g:
                    item_img_src = g.group(1) + g.group(2)

            if item_url.startswith('/f/'):
                item_url = 'https://www.dcard.tw' + item_url

            if item_img_src is None:
                item_content = '{}'.format(html.escape(item_desc))
            else:
                item_content = '<img alt="{}" src="{}"/><br/>{}'.format(
                    html.escape(item_title), html.escape(item_img_src),
                    html.escape(item_desc))

            entry = feed.add_entry()
            entry.content(item_content, type='xhtml')
            entry.id(item_url)
            entry.title(item_title)
            entry.link(href=item_url)

        res = HttpResponse(feed.atom_str(),
                           content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #27
0
    def get(self, *args, **kwargs):
        region = kwargs['region']
        keyword = kwargs['keyword']

        # Support query string to filter results.
        qs = self.request.META.get('QUERY_STRING', '')
        if qs != '':
            qs = '&' + qs

        url = 'https://rent.591.com.tw/?kind=0&order=posttime&orderType=desc&region={}&keywords={}{}'.format(
            region, keyword, qs)

        if qs == '':
            title = '591 出租搜尋 - {}'.format(keyword)
        else:
            title = '591 出租搜尋 - {} ({})'.format(keyword, qs)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        try:
            s = services.RequestsService().process()

            r = s.get(url)
            text = r.text
        except:
            text = '<html></html>'

        body = lxml.html.fromstring(text)

        items = body.cssselect('#content > ul')

        for item in items:
            item_metainfo = item.cssselect(
                '.infoContent .lightBox')[0].text_content()
            item_area = re.search(r'([\.0-9]+坪)', item_metainfo).group(1)
            item_desc = item.text_content()
            item_img = item.cssselect('.imageBox img')[0].get('data-original')
            item_price = item.cssselect('.price')[0].text_content()
            item_title = item.cssselect('.infoContent')[0].text_content()
            item_url = item.cssselect('a')[0].get('href')
            item_url = re.sub(r'^//', 'https://', item_url)

            item_price_num = item_price.replace(',', '')
            item_price_num = float(re.sub(r' .*', '', item_price_num))
            item_area_num = float(re.sub(r'坪.*', '', item_area))
            item_unitprice = int(item_price_num / item_area_num)

            content = '<img alt="{}" src="{}"/><br/>{}<br/>{}'.format(
                html.escape(item_title), html.escape(item_img),
                html.escape(item_title), html.escape(item_desc))

            entry = feed.add_entry()
            entry.content(content, type='xhtml')
            entry.id(item_url)
            entry.link(href=item_url)
            entry.title('${}/坪 - {} - {}'.format(item_unitprice, item_area,
                                                 item_title))

        res = HttpResponse(feed.atom_str(),
                           content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #28
0
    def get(self, *args, **kwargs):
        keyword = kwargs['keyword']

        url = 'https://www.momoshop.com.tw/search/searchShop.jsp?keyword={}&searchType=4&cateLevel=0&cateCode=&curPage=1&_isFuzzy=0&showType=chessboardType'.format(urllib.parse.quote_plus(keyword))

        title = 'Momoshop 搜尋 - {}'.format(keyword)

        feed = feedgen.feed.FeedGenerator()
        feed.author({'name': 'Feed Generator'})
        feed.id(url)
        feed.link(href=url, rel='alternate')
        feed.title(title)

        try:
            s = services.RequestsService().process()

            # Environment cookie.
            r = s.get('https://www.momoshop.com.tw/')

            # Get the actual content.
            now = int(time.time())
            data = {
                'flag': 2018,
                'data': {
                    'specialGoodsType': '',
                    'searchValue': keyword,
                    'cateCode': '',
                    'cateLevel': '-1',
                    'cp': 'N',
                    'NAM': 'N',
                    'first': 'N',
                    'freeze': 'N',
                    'superstore': 'N',
                    'tvshop': 'N',
                    'china': 'N',
                    'tomorrow': 'N',
                    'stockYN': 'N',
                    'prefere': 'N',
                    'threeHours': 'N',
                    'showType': 'chessboardType',
                    'curPage': '1',
                    'priceS': '0',
                    'priceE': '9999999',
                    'searchType': '4',
                    'reduceKeyword': '',
                    'isFuzzy': '0',
                    'rtnCateDatainfo':  {
                        'cateCode': '',
                        'cateLv': '-1',
                        'keyword': keyword,
                        'curPage': '1',
                        'historyDoPush': False,
                        'timestamp': now,
                    },
                }
            }

            url = 'https://www.momoshop.com.tw/ajax/ajaxTool.jsp?n=2018'
            r = s.post(url, data={'data': json.dumps(data)}, headers={'Referer': 'https://www.momoshop.com.tw/'})
            body = json.loads(r.text)
        except:
            return HttpResponse('Service Unavailable', status=503)

        # If it's not 200 then return an empty feed.
        if body['rtnData']['searchResult']['resultCode'] != '200':
            res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8')
            res['Cache-Control'] = 'max-age=300,public'
            return res

        for item in body['rtnData']['searchResult']['rtnSearchData']['goodsInfoList']:
            # Product name & description
            item_img = item['imgUrl']
            item_name = item['goodsName']
            item_title = '({}) {}'.format(item['goodsPrice'], item_name)
            item_url = 'https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code={}'.format(item['goodsCode'])

            # Use larger size.
            item_img = item_img.replace('_L.', '_B.')

            content = '<img alt="{}" src="{}"/>'.format(html.escape(item_name), html.escape(item_img))

            entry = feed.add_entry()
            entry.content(content, type='xhtml')
            entry.id(item_url)
            entry.link(href=item_url)
            entry.title(item_title)

        res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8')
        res['Cache-Control'] = 'max-age=300,public'

        return res
예제 #29
0
 def _init_feed() -> feedgen.feed.FeedGenerator:
     feed = feedgen.feed.FeedGenerator()
     feed.title(config.FEED_TITLE)
     feed.link(href=config.REPO_URL, rel='self')
     feed.description(config.FEED_DESCRIPTION)
     return feed