def get(self, *args, **kwargs): url = 'https://www.bookwalker.com.tw/more/fiction/1/3' title = 'BOOKWALKER 輕小說' feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) s = services.RequestsService().process() r = s.get(url) body = lxml.html.fromstring(r.text) for item in body.cssselect('.bwbookitem a'): img = item.cssselect('img')[0] img.set('src', img.get('data-src')) content = lxml.etree.tostring(item, encoding='unicode') book_title = item.get('title') book_url = item.get('href') entry = feed.add_entry() entry.content(content, type='xhtml') entry.id(book_url) entry.title(book_title) entry.link(href=book_url) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def bookwalker_lightnovel(): url = "https://www.bookwalker.com.tw/more/fiction/1/3" title = "BOOKWALKER 輕小說" feed = feedgen.feed.FeedGenerator() feed.author({"name": "Feed Generator"}) feed.id(url) feed.link(href=url, rel="alternate") feed.title(title) r = requests.get(url, headers={"User-agent": user_agent}, timeout=5) body = lxml.html.fromstring(r.text) for item in body.cssselect(".bwbookitem a"): img = item.cssselect("img")[0] img.set("src", img.get("data-src")) content = lxml.etree.tostring(item, encoding="unicode") book_title = item.get("title") book_url = item.get("href") entry = feed.add_entry() entry.content(content, type="xhtml") entry.id(book_url) entry.title(book_title) entry.link(href=book_url) bottle.response.set_header("Cache-Control", "max-age=300,public") bottle.response.set_header("Content-Type", "application/atom+xml") return feed.atom_str()
def get(self, *args, **kwargs): keyword = kwargs['keyword'] url = 'https://www.plurk.com/Search/search2' title = 'Plurk Search - {}'.format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) s = services.RequestsService().process() r = s.post(url, data={'query': keyword}) body = json.loads(r.text) for p in body['plurks']: url = 'https://www.plurk.com/p/' + base36.dumps(p['id']) content = self.str_clean(p['content']) entry = feed.add_entry() entry.content(content, type='CDATA') entry.id(url) entry.link(href=url) entry.published(dateutil.parser.parse(p['posted'])) entry.title(self.str_clean(p['content_raw'])) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def pchome_lightnovel(): url = "https://ecapi.pchome.com.tw/cdn/ecshop/prodapi/v2/newarrival/DJAZ/prod&offset=1&limit=20&fields=Id,Nick,Pic,Price,Discount,isSpec,Name,isCarrier,isSnapUp,isBigCart&_callback=jsonp_prodlist?_callback=jsonp_prodlist" title = "PChome 輕小說" feed = feedgen.feed.FeedGenerator() feed.author({"name": "Feed Generator"}) feed.id(url) feed.link(href=url, rel="alternate") feed.title(title) r = requests.get(url, headers={"User-agent": user_agent}, timeout=5) body = re.match(r"^[^\[]*(\[.*\])[^\[]*$", r.text).group(1) items = json.loads(body) for item in items: content = '{}<br/><img alt="{}" src="https://a.ecimg.tw{}"/>'.format( html.escape(item["Nick"]), html.escape(item["Nick"]), html.escape(item["Pic"]["B"]), ) book_title = item["Nick"] book_url = "https://24h.pchome.com.tw/books/prod/{}".format( urllib.parse.quote_plus(item["Id"])) entry = feed.add_entry() entry.content(content, type="xhtml") entry.id(book_url) entry.title(book_title) entry.link(href=book_url) bottle.response.set_header("Cache-Control", "max-age=300,public") bottle.response.set_header("Content-Type", "application/atom+xml") return feed.atom_str()
def plurktop(lang): url = "https://www.plurk.com/Stats/topReplurks?period=day&lang={}&limit=50".format( urllib.parse.quote_plus(lang)) title = "Plurk Top ({})".format(lang) feed = feedgen.feed.FeedGenerator() feed.author({"name": "Feed Generator"}) feed.id(url) feed.link(href=url, rel="alternate") feed.title(title) r = requests.get(url, headers={"User-agent": user_agent}, timeout=5) body = json.loads(r.text) for (x, stat) in body["stats"]: url = "https://www.plurk.com/p/" + base36.dumps(stat["id"]) content = stat["content"] content = re.sub(r' height="\d+(px)?"', " ", content) content = re.sub(r' width="\d+(px)?"', " ", content) entry = feed.add_entry() entry.author({"name": stat["owner"]["full_name"]}) entry.content(content, type="CDATA") entry.id(url) entry.link(href=url) entry.published(stat["posted"]) entry.title(stat["content_raw"]) bottle.response.set_header("Cache-Control", "max-age=300,public") bottle.response.set_header("Content-Type", "application/atom+xml") return feed.atom_str()
def get(self, *args, **kwargs): keyword = kwargs['keyword'] url = 'https://www.youtube.com/results?search_query={}&sp=CAI%253D'.format(urllib.parse.quote_plus(keyword)) title = 'YouTube Search - {}'.format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) s = services.RequestsService().process() r = s.get(url) m = re.search(r"var ytInitialData = (.*?);?</script>", r.text, re.MULTILINE) ytInitialData = m.group(1) j = json.loads(ytInitialData) items = j['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'] for item in items: try: # author author = item['videoRenderer']['longBylineText']['runs'][0]['text'] # link link = 'https://www.youtube.com/watch?v=' + urllib.parse.quote(item['videoRenderer']['videoId']) # img img = 'https://i.ytimg.com/vi/' + item['videoRenderer']['videoId'] + '/hqdefault.jpg' # title title = item['videoRenderer']['title']['runs'][0]['text'] # content content = '<img alt="{}" src="{}"/>'.format( html.escape(title), html.escape(img) ) entry = feed.add_entry() entry.author({'name': author}) entry.content(content, type='xhtml') entry.id(link) entry.title(title) entry.link(href=link) except IndexError: pass except KeyError: pass res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def shopee(keyword): url = "https://shopee.tw/api/v2/search_items/?by=ctime&keyword={}&limit=50&newest=0&order=desc&page_type=search".format( urllib.parse.quote_plus(keyword)) title = "蝦皮搜尋 - {}".format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({"name": "Feed Generator"}) feed.id(url) feed.link(href=url, rel="alternate") feed.title(title) r = requests.get(url, headers={"User-agent": user_agent}, timeout=5) body = json.loads(r.text) session = FuturesSession(executor=ThreadPoolExecutor(max_workers=10)) futures = [] for item in body["items"]: itemid = item["itemid"] name = item["name"] shopid = item["shopid"] itemapi_url = "https://shopee.tw/api/v2/item/get?itemid=%d&shopid=%d" % ( itemid, shopid, ) futures.append( session.get(itemapi_url, headers={"User-agent": user_agent}, timeout=5)) for f in futures: r = f.result() item = json.loads(r.text)["item"] itemid = item["itemid"] name = item["name"] shopid = item["shopid"] prod_url = "https://shopee.tw/product/%d/%d" % (shopid, itemid) img_url = "https://cf.shopee.tw/file/%s" % (item["image"]) content = '{}<br/><img alt="{}" src="{}"/>'.format( html.escape(name), html.escape(name), html.escape(img_url)) entry = feed.add_entry() entry.content(content, type="xhtml") entry.id(prod_url) entry.link(href=prod_url) entry.title(name) bottle.response.set_header("Cache-Control", "max-age=300,public") bottle.response.set_header("Content-Type", "application/atom+xml") return feed.atom_str()
def youtube(keyword): url = "https://www.youtube.com/results?sp=CAI%%253D&search_query={}".format( urllib.parse.quote_plus(keyword)) title = "YouTube Search - {}".format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({"name": "Feed Generator"}) feed.id(url) feed.link(href=url, rel="alternate") feed.title(title) r = requests.get(url) body = lxml.html.fromstring(r.text) for item in body.cssselect("ol.item-section div.yt-lockup-video"): try: a = item.cssselect("a[title].spf-link")[0] # author author = item.cssselect( ".yt-lockup-byline a.spf-link.yt-uix-sessionlink" )[0].text_content() # link link = a.get("href") if "/" == link[0]: link = "https://www.youtube.com" + link # img link_tuple = urllib.parse.urlparse(link) d = urllib.parse.parse_qs(link_tuple[4]) img = "https://i.ytimg.com/vi/" + d["v"][0] + "/hqdefault.jpg" # title title = a.get("title") # content content = '<img alt="{}" src="{}"/>'.format( html.escape(title), html.escape(img)) entry = feed.add_entry() entry.author({"name": author}) entry.content(content, type="xhtml") entry.id(link) entry.title(title) entry.link(href=link) except IndexError: pass bottle.response.set_header("Cache-Control", "max-age=300,public") bottle.response.set_header("Content-Type", "application/atom+xml") return feed.atom_str()
def get(self, *args, **kwargs): keyword = kwargs['keyword'] url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q={}&page=1&sort=new/dc'.format( urllib.parse.quote_plus(keyword)) title = 'PChome 搜尋 - {}'.format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) try: s = services.RequestsService().process() r = s.get(url) body = json.loads(r.text) except: body = {'prods': []} for item in body['prods']: # Product name & description item_author = self.str_clean(item['author']) item_desc = self.str_clean(item['describe']) item_name = self.str_clean(item['name']) item_origin_price = item['originPrice'] item_price = item['price'] item_title = '(${}/${}) {}'.format(item_origin_price, item_price, item_name) # URL if item['cateId'][0] == 'D': item_url = 'https://24h.pchome.com.tw/prod/' + item['Id'] else: item_url = 'https://mall.pchome.com.tw/prod/' + item['Id'] img_url = 'https://cs-a.ecimg.tw%s' % (item['picB']) content = '{}<br/><img alt="{}" src="{}"/>'.format( html.escape(item_desc), html.escape(item_name), html.escape(img_url)) entry = feed.add_entry() entry.author({'name': item_author}) entry.content(content, type='xhtml') entry.id(item_url) entry.link(href=item_url) entry.title(item_title) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def get(self, *args, **kwargs): keyword = kwargs['keyword'] url = 'https://shopee.tw/api/v2/search_items/?by=ctime&keyword={}&limit=50&newest=0&order=desc&page_type=search&version=2'.format( urllib.parse.quote_plus(keyword)) referer = 'https://shopee.tw/search?keyword={}'.format( urllib.parse.quote_plus(keyword)) title = '蝦皮搜尋 - {}'.format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) try: proxy = services.ProxyService().process() s = services.RequestsService().process() s.proxies = {'http': proxy, 'https': proxy} r = s.get(url, headers={'Referer': referer}) body = json.loads(r.text) items = body['items'] except: return HttpResponse('Service Unavailable', status=503) if not isinstance(items, list): items = [] for item in items: itemid = item['itemid'] name = item['name'] shopid = item['shopid'] prod_url = 'https://shopee.tw/product/%d/%d' % (shopid, itemid) img_url = 'https://cf.shopee.tw/file/%s' % (item['image']) content = '{}<br/><img alt="{}" src="{}"/>'.format( html.escape(name), html.escape(name), html.escape(img_url)) entry = feed.add_entry() entry.content(content, type='xhtml') entry.id(prod_url) entry.link(href=prod_url) entry.title(name) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def get(self, *args, **kwargs): keyword = kwargs['keyword'] url = 'https://www.cakeresume.com/jobs?q={}'.format( urllib.parse.quote_plus(keyword)) title = 'CakeResume 搜尋 - {}'.format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) try: s = services.RequestsService().process() r = s.get(url) state = re.search( r'<script>window\.__APP_INITIAL_REDUX_STATE__ = (.*?)</script>', r.text, re.MULTILINE).group(1) state = state.replace('"jwt":undefined', '"jwt":false') items = json.loads(state)['jobSearch']['jobResultsState'][ 'content']['_rawResults'][0]['hits'] except: items = [] for item in items: item_author = item['page']['name'] item_content = '<p>{}</p><p>{}</p>'.format( html.escape(item.get('requirements_plain_text', '')), html.escape(item.get('description_plain_text', ''))) item_title = item['title'] item_url = 'https://www.cakeresume.com/companies/{}/jobs/{}'.format( item['page']['path'], item['path']) item_updated_at = datetime.datetime.fromtimestamp( item['content_updated_at'] / 1000, tz=datetime.timezone.utc) entry = feed.add_entry() entry.author({'name': item_author}) entry.content(item_content, type='xhtml') entry.id(item_url) entry.link(href=item_url) entry.title(item_title) entry.updated(item_updated_at) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def get(self, *args, **kwargs): keyword = kwargs['keyword'] url = 'https://www.518.com.tw/job-index-P-1.html?i=1&am=1&ad={}&orderType=1&orderField=8'.format(urllib.parse.quote_plus(keyword)) title = '518 搜尋 - {}'.format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) try: s = services.RequestsService().process() r = s.get(url) body = lxml.html.fromstring(r.text) except: body = lxml.html.fromstring('<html></html>') for item in body.cssselect('#listContent > ul'): try: a = item.cssselect('li.title a')[0] job_title = a.getchildren()[0].text_content() job_url = a.get('href') job_url = re.sub('\\?.*', '', job_url) job_company = item.cssselect('li.company')[0].text_content() job_desc = item.cssselect('li.sumbox')[0].text_content() content = '<h3>{}</h3><p>{}</p>'.format( html.escape(job_company), html.escape(job_desc) ) entry = feed.add_entry() entry.content(content, type='xhtml') entry.id(job_url) entry.link(href=job_url) entry.title(job_title) except IndexError: pass res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def get(self, *args, **kwargs): keyword = kwargs['keyword'] url = 'https://www.104.com.tw/jobs/search/?ro=0&kwop=7&keyword={}&order=11&asc=0&page=1&mode=s'.format( keyword) title = '104 搜尋 - {}'.format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) try: s = services.RequestsService().process() r = s.get(url) body = lxml.html.fromstring(r.text) except: body = lxml.html.fromstring('</html></html>') for item in body.cssselect('article.job-list-item'): try: job_company = item.get('data-cust-name') job_desc = item.cssselect( 'p.job-list-item__info')[0].text_content() job_title = item.get('data-job-name') job_url = item.cssselect('a.js-job-link')[0].get('href') job_url = re.sub(r'^//', 'https://', job_url) job_url = re.sub(r'&jobsource=\w*$', '', job_url) content = '<h3>{}</h3><pre>{}</pre>'.format( html.escape(job_company), html.escape(job_desc)) entry = feed.add_entry() entry.content(content, type='xhtml') entry.id(job_url) entry.link(href=job_url) entry.title(job_title) except: pass res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def magic(keyword): url = 'https://www.youtube.com/results?sp=CAI%%253D&search_query=%s' % ( urllib.parse.quote_plus(keyword)) r = requests.get(url) title = 'YouTube Search - %s' % (keyword) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'YouTube Search Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) body = lxml.html.fromstring(r.text) for item in body.cssselect('ol.item-section div.yt-lockup-video'): try: a = item.cssselect('a[title].spf-link')[0] # link link = a.get('href') if '/' == link[0]: link = 'https://www.youtube.com' + link # img link_tuple = urllib.parse.urlparse(link) d = urllib.parse.parse_qs(link_tuple[4]) img = 'https://i.ytimg.com/vi/' + d['v'][0] + '/hqdefault.jpg' # title title = a.get('title') # content content = '%s<br/><img alt="%s" src="%s"/>' % ( html.escape(title), html.escape(title), html.escape(img)) entry = feed.add_entry() entry.content(content, type='xhtml') entry.id(link) entry.title(title) entry.link(href=link) except IndexError: pass print(str(feed.atom_str(), 'utf-8'))
def get(self, *args, **kwargs): url = 'https://www.dcard.tw/f' title = 'Dcard 首頁' feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) proxy = services.ProxySocks5Service().process() s = services.RequestsService().process() r = s.get( 'https://www.dcard.tw/service/api/v2/popularForums/GetHead?listKey=popularForums' ) if r.status_code == 200: head = r.json()['head'] r = s.get( 'https://www.dcard.tw/service/api/v2/popularForums/GetPage?pageKey={}' .format(head)) items = r.json()['items'] else: items = [] for item in items: item_title = item['posts'][0]['title'] item_url = 'https://www.dcard.tw/f/{}/p/{}'.format( item['alias'], item['posts'][0]['id']) item_desc = item['posts'][0]['excerpt'] item_content = '<p>{}</p>'.format(html.escape(item_desc)) entry = feed.add_entry() entry.content(item_content, type='xhtml') entry.id(item_url) entry.title(item_title) entry.link(href=item_url) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def get(self, *args, **kwargs): keyword = kwargs['keyword'] url = 'https://www.1111.com.tw/search/job?flag=13&ks={}&fs=1&si=1&ts=4&col=da&sort=desc'.format( urllib.parse.quote_plus(keyword)) title = '1111 搜尋 - {}'.format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) s = services.RequestsService().process() r = s.get(url) r.encoding = 'utf-8' body = lxml.html.fromstring(r.text) for item in body.cssselect('li.jbInfo'): a = item.cssselect('a.mobileItemClick')[0] job_title = a.get('title') job_url = a.get('href') if job_url.startswith('/job/'): job_url = 'https://www.1111.com.tw' + job_url job_company = item.cssselect('a.d-block.organ')[0].get('title') job_desc = item.cssselect('.jbInfoTxt')[0].text_content() content = '<h3>{}</h3><p>{}</p>'.format(html.escape(job_company), html.escape(job_desc)) entry = feed.add_entry() entry.content(content, type='xhtml') entry.id(job_url) entry.link(href=job_url) entry.title(job_title) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def dcardtop(): url = "https://www.dcard.tw/f" title = "Dcard Top" feed = feedgen.feed.FeedGenerator() feed.author({"name": "Feed Generator"}) feed.id(url) feed.link(href=url, rel="alternate") feed.title(title) r = requests.get(url, headers={"User-agent": user_agent}, timeout=5) body = lxml.html.fromstring(r.text) for post in body.cssselect('div[class^="PostList_entry_"]'): try: post_author = post.cssselect( 'div[class^="PostAuthorHeader_meta_"]')[0].text_content() post_excerpt = post.cssselect( 'div[class^="PostEntry_excerpt_"]')[0].text_content() post_title = post.cssselect( 'h3[class^="PostEntry_title_"]')[0].text_content() post_url = post.cssselect('a[class^="PostEntry_root_"]')[0].get( 'href') if post_url.startswith("/"): post_url = "https://www.dcard.tw" + post_url content = html.escape(post_excerpt) entry = feed.add_entry() entry.author({"name": post_author}) entry.content(content, type="xhtml") entry.id(post_url) entry.link(href=post_url) entry.title(post_title) except IndexError: pass bottle.response.set_header("Cache-Control", "max-age=300,public") bottle.response.set_header("Content-Type", "application/atom+xml") return feed.atom_str()
def pchome(keyword): url = "https://ecshweb.pchome.com.tw/search/v3.3/all/results?q={}&page=1&sort=new/dc".format( urllib.parse.quote_plus(keyword)) title = "PChome 搜尋 - {}".format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({"name": "Feed Generator"}) feed.id(url) feed.link(href=url, rel="alternate") feed.title(title) r = requests.get(url, headers={"User-agent": user_agent}, timeout=5) body = json.loads(r.text) for prod in body["prods"]: # Product name & description prod_name = prod["name"] prod_desc = prod["describe"] prod_author = prod["author"] # URL if prod["cateId"][0] == "D": prod_url = "https://24h.pchome.com.tw/prod/" + prod["Id"] else: prod_url = "https://mall.pchome.com.tw/prod/" + prod["Id"] img_url = "https://a.ecimg.tw%s" % (prod["picB"]) content = '{}<br/><img alt="{}" src="{}"/>'.format( html.escape(prod_desc), html.escape(prod_name), html.escape(img_url)) entry = feed.add_entry() entry.author({"name": prod_author}) entry.content(content, type="xhtml") entry.id(prod_url) entry.link(href=prod_url) entry.title(prod_name) bottle.response.set_header("Cache-Control", "max-age=300,public") bottle.response.set_header("Content-Type", "application/atom+xml") return feed.atom_str()
def job518(keyword): url = "https://www.518.com.tw/job-index-P-1.html?i=1&am=1&ad={}&orderType=1&orderField=8".format( urllib.parse.quote_plus(keyword)) title = "518 搜尋 - {}".format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({"name": "Feed Generator"}) feed.id(url) feed.link(href=url, rel="alternate") feed.title(title) r = requests.get(url, headers={"User-agent": user_agent}, timeout=5) body = lxml.html.fromstring(r.text) for item in body.cssselect("#listContent > ul"): try: a = item.cssselect("li.title a")[0] job_title = a.getchildren()[0].text_content() job_url = a.get("href") job_url = re.sub("\?.*", "", job_url) job_company = item.cssselect("li.company")[0].text_content() job_desc = item.cssselect("li.sumbox")[0].text_content() content = "<h3>{}</h3><p>{}</p>".format(html.escape(job_company), html.escape(job_desc)) entry = feed.add_entry() entry.content(content, type="xhtml") entry.id(job_url) entry.link(href=job_url) entry.title(job_title) except IndexError: pass bottle.response.set_header("Cache-Control", "max-age=300,public") bottle.response.set_header("Content-Type", "application/atom+xml") return feed.atom_str()
def get(self, *args, **kwargs): url = 'https://ecapi.pchome.com.tw/cdn/ecshop/prodapi/v2/newarrival/DJAZ/prod&offset=1&limit=20&fields=Id,Nick,Pic,Price,Discount,isSpec,Name,isCarrier,isSnapUp,isBigCart&_callback=jsonp_prodlist?_callback=jsonp_prodlist' title = 'PChome 輕小說' feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) try: s = services.RequestsService().process() r = s.get(url) body = re.match(r'^[^\[]*(\[.*\])[^\[]*$', r.text).group(1) items = json.loads(body) except: items = [] for item in items: content = '{}<br/><img alt="{}" src="https://cs-a.ecimg.tw{}"/>'.format( html.escape(item['Nick']), html.escape(item['Nick']), html.escape(item['Pic']['B']), ) book_title = item['Nick'] book_url = 'https://24h.pchome.com.tw/books/prod/{}'.format( urllib.parse.quote_plus(item['Id'])) entry = feed.add_entry() entry.content(content, type='xhtml') entry.id(book_url) entry.title(book_title) entry.link(href=book_url) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def job1111(keyword): url = "https://www.1111.com.tw/job-bank/job-index.asp?flag=13&ks={}&fs=1&si=1&ts=4&col=da&sort=desc".format( urllib.parse.quote_plus(keyword)) title = "1111 搜尋 - {}".format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({"name": "Feed Generator"}) feed.id(url) feed.link(href=url, rel="alternate") feed.title(title) r = requests.get(url, headers={"User-agent": user_agent}, timeout=5) r.encoding = "utf-8" body = lxml.html.fromstring(r.text) for item in body.cssselect("li.digest"): a = item.cssselect("a.mobiFullLInk")[0] job_title = a.get("title") job_url = a.get("href") if job_url.startswith("//"): job_url = "https:" + job_url job_company = item.cssselect(".jbInfoin h4 a")[0].get("title") job_desc = item.cssselect(".jbInfoTxt")[0].text_content() content = "<h3>{}</h3><p>{}</p>".format(html.escape(job_company), html.escape(job_desc)) entry = feed.add_entry() entry.content(content, type="xhtml") entry.id(job_url) entry.link(href=job_url) entry.title(job_title) bottle.response.set_header("Cache-Control", "max-age=300,public") bottle.response.set_header("Content-Type", "application/atom+xml") return feed.atom_str()
def get(self, *args, **kwargs): lang = kwargs['lang'] url = 'https://www.plurk.com/Stats/topReplurks?period=day&lang={}&limit=10'.format(urllib.parse.quote_plus(lang)) title = 'Plurk Top ({})'.format(lang) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) s = services.RequestsService().process() r = s.get(url) body = json.loads(r.text) for (x, stat) in body['stats']: url = 'https://www.plurk.com/p/' + base36.dumps(stat['id']) content = self.str_clean(stat['content']) content = re.sub(r' height="\d+(px)?"', ' ', content) content = re.sub(r' width="\d+(px)?"', ' ', content) entry = feed.add_entry() entry.author({'name': self.str_clean(stat['owner']['full_name'])}) entry.content(content, type='CDATA') entry.id(url) entry.link(href=url) entry.published(stat['posted']) entry.title(self.str_clean(stat['content_raw'])) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def job104(keyword): url = "https://www.104.com.tw/jobs/search/?ro=0&kwop=7&keyword={}&order=11&asc=0&page=1&mode=s".format( keyword) title = "104 搜尋 - {}".format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({"name": "Feed Generator"}) feed.id(url) feed.link(href=url, rel="alternate") feed.title(title) r = requests.get(url, headers={"User-agent": user_agent}, timeout=5) body = lxml.html.fromstring(r.text) for item in body.cssselect("article.job-list-item"): job_company = item.get("data-cust-name") job_desc = item.cssselect("p.job-list-item__info")[0].text_content() job_title = item.get("data-job-name") job_url = item.cssselect("a.js-job-link")[0].get("href") job_url = re.sub(r"^//", "https://", job_url) job_url = re.sub(r"&jobsource=\w*$", "", job_url) content = "<h3>{}</h3><pre>{}</pre>".format(html.escape(job_company), html.escape(job_desc)) entry = feed.add_entry() entry.content(content, type="xhtml") entry.id(job_url) entry.link(href=job_url) entry.title(job_title) bottle.response.set_header("Cache-Control", "max-age=300,public") bottle.response.set_header("Content-Type", "application/atom+xml") return feed.atom_str()
def get(self, *args, **kwargs): board = kwargs['board'] url = 'https://www.dcard.tw/f/{}'.format( urllib.parse.quote_plus(board)) title = 'Dcard 看板 - {}'.format(board) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) try: proxy = services.ProxySocks5Service().process() s = services.RequestsService().process() s.proxies = {'http': proxy, 'https': proxy} r = s.get(url) body = lxml.html.fromstring(r.text) except: return HttpResponse('Service Unavailable', status=503) items = body.cssselect('div[data-index]') for item in items: if not item.cssselect('article'): continue item_title = item.cssselect('article > h2')[0].text_content() item_url = item.cssselect('article > h2 > a')[0].get('href') item_desc = item.cssselect('article > h2 + div')[0].text_content() try: item_img = item.cssselect('article > img')[0] except IndexError: item_img_src = None else: item_img_src = item_img.get('src') g = re.match(r'^(https://imgur\.dcard\.tw/\w+)b(\.jpg)$', item_img_src) if g: item_img_src = g.group(1) + g.group(2) if item_url.startswith('/f/'): item_url = 'https://www.dcard.tw' + item_url if item_img_src is None: item_content = '{}'.format(html.escape(item_desc)) else: item_content = '<img alt="{}" src="{}"/><br/>{}'.format( html.escape(item_title), html.escape(item_img_src), html.escape(item_desc)) entry = feed.add_entry() entry.content(item_content, type='xhtml') entry.id(item_url) entry.title(item_title) entry.link(href=item_url) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def generatefeed(user): # Validate that it's a valid user id # https://support.google.com/a/answer/33386?hl=en if not re.match('^[A-Za-z0-9_\'.-]{5,20}$', user): flask.abort(400, 'Invalid username format') # Try the cache first, unless it's old cache_file = os.path.join('.cache', user) if os.path.exists(cache_file): creation_time = os.path.getmtime(cache_file) if time.time() - creation_time < CACHE_TIME: with open(cache_file) as fin: return fin.read() # Use the channel to get the 'uploads' playlist id response = requests.get( 'https://www.googleapis.com/youtube/v3/channels', params = { 'part': 'contentDetails', 'forUsername': user, 'key': API_KEY, } ) if response.status_code != 200: flask.abort(400, 'YouTube API error') if not response.json()['items']: flask.abort(400, 'User not found') playlistId = response.json()['items'][0]['contentDetails']['relatedPlaylists']['uploads'] # Get the most recent 20 videos on the 'uploads' playlist response = requests.get( 'https://www.googleapis.com/youtube/v3/playlistItems', params = { 'part': 'snippet', 'maxResults': 20, 'playlistId': playlistId, 'key': API_KEY } ) # Generate a list of results that can be used as feed items feed = feedgen.feed.FeedGenerator() feed.title(user + ' (YRSS)') feed.author({'name': user + ' (YRSS)'}) feed.link(href = 'https://www.youtube.com/user/' + user) feed.id('https://www.youtube.com/user/' + user) for item in response.json()['items']: title = item['snippet']['title'] video_id = item['snippet']['resourceId']['videoId'] published = item['snippet']['publishedAt'] thumbnail = item['snippet']['thumbnails']['high']['url'] video_url = 'https://www.youtube.com/watch?v=' + video_id item = feed.add_entry() item.title(title) item.link(href = video_url) item.published(dateutil.parser.parse(published)) item.updated(dateutil.parser.parse(published)) item.id(video_url) item.content(''' <a href="{url}"><img src="{img}" /></a><br /> <a href="{url}">{title}</a> '''.format( url = video_url, img = thumbnail, title = title, ), None, 'html') # Cache to disk feed_txt = feed.atom_str() with open(cache_file, 'w') as fout: fout.write(feed_txt) return feed_txt
def get(self, *args, **kwargs): keyword = kwargs['keyword'] url = 'https://www.momoshop.com.tw/search/searchShop.jsp?keyword={}&searchType=4&cateLevel=0&cateCode=&curPage=1&_isFuzzy=0&showType=chessboardType'.format(urllib.parse.quote_plus(keyword)) title = 'Momoshop 搜尋 - {}'.format(keyword) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) try: s = services.RequestsService().process() # Environment cookie. r = s.get('https://www.momoshop.com.tw/') # Get the actual content. now = int(time.time()) data = { 'flag': 2018, 'data': { 'specialGoodsType': '', 'searchValue': keyword, 'cateCode': '', 'cateLevel': '-1', 'cp': 'N', 'NAM': 'N', 'first': 'N', 'freeze': 'N', 'superstore': 'N', 'tvshop': 'N', 'china': 'N', 'tomorrow': 'N', 'stockYN': 'N', 'prefere': 'N', 'threeHours': 'N', 'showType': 'chessboardType', 'curPage': '1', 'priceS': '0', 'priceE': '9999999', 'searchType': '4', 'reduceKeyword': '', 'isFuzzy': '0', 'rtnCateDatainfo': { 'cateCode': '', 'cateLv': '-1', 'keyword': keyword, 'curPage': '1', 'historyDoPush': False, 'timestamp': now, }, } } url = 'https://www.momoshop.com.tw/ajax/ajaxTool.jsp?n=2018' r = s.post(url, data={'data': json.dumps(data)}, headers={'Referer': 'https://www.momoshop.com.tw/'}) body = json.loads(r.text) except: return HttpResponse('Service Unavailable', status=503) # If it's not 200 then return an empty feed. if body['rtnData']['searchResult']['resultCode'] != '200': res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res for item in body['rtnData']['searchResult']['rtnSearchData']['goodsInfoList']: # Product name & description item_img = item['imgUrl'] item_name = item['goodsName'] item_title = '({}) {}'.format(item['goodsPrice'], item_name) item_url = 'https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code={}'.format(item['goodsCode']) # Use larger size. item_img = item_img.replace('_L.', '_B.') content = '<img alt="{}" src="{}"/>'.format(html.escape(item_name), html.escape(item_img)) entry = feed.add_entry() entry.content(content, type='xhtml') entry.id(item_url) entry.link(href=item_url) entry.title(item_title) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res
def get(self, *args, **kwargs): region = kwargs['region'] keyword = kwargs['keyword'] # Support query string to filter results. qs = self.request.META.get('QUERY_STRING', '') if qs != '': qs = '&' + qs url = 'https://rent.591.com.tw/?kind=0&order=posttime&orderType=desc®ion={}&keywords={}{}'.format( region, keyword, qs) if qs == '': title = '591 出租搜尋 - {}'.format(keyword) else: title = '591 出租搜尋 - {} ({})'.format(keyword, qs) feed = feedgen.feed.FeedGenerator() feed.author({'name': 'Feed Generator'}) feed.id(url) feed.link(href=url, rel='alternate') feed.title(title) try: s = services.RequestsService().process() r = s.get(url) text = r.text except: text = '<html></html>' body = lxml.html.fromstring(text) items = body.cssselect('#content > ul') for item in items: item_metainfo = item.cssselect( '.infoContent .lightBox')[0].text_content() item_area = re.search(r'([\.0-9]+坪)', item_metainfo).group(1) item_desc = item.text_content() item_img = item.cssselect('.imageBox img')[0].get('data-original') item_price = item.cssselect('.price')[0].text_content() item_title = item.cssselect('.infoContent')[0].text_content() item_url = item.cssselect('a')[0].get('href') item_url = re.sub(r'^//', 'https://', item_url) item_price_num = item_price.replace(',', '') item_price_num = float(re.sub(r' .*', '', item_price_num)) item_area_num = float(re.sub(r'坪.*', '', item_area)) item_unitprice = int(item_price_num / item_area_num) content = '<img alt="{}" src="{}"/><br/>{}<br/>{}'.format( html.escape(item_title), html.escape(item_img), html.escape(item_title), html.escape(item_desc)) entry = feed.add_entry() entry.content(content, type='xhtml') entry.id(item_url) entry.link(href=item_url) entry.title('${}/坪 - {} - {}'.format(item_unitprice, item_area, item_title)) res = HttpResponse(feed.atom_str(), content_type='application/atom+xml; charset=utf-8') res['Cache-Control'] = 'max-age=300,public' return res