def story(self, ref): ref = self.strip_ref_prefix(ref) stories = json(lambda x: api_stories(x, self.BASE_DOMAIN), headers={'Referer': self.BASE_DOMAIN}) if not stories: return False stories = list( filter(None, [ i if i.get("audience") == "everyone" else None for i in stories ])) stories = list( filter(None, [i if str(i.get('id')) == ref else None for i in stories])) if len(stories) == 0: return False r = stories[0] if not r: return False s = {} s['author'] = '' s['author_link'] = '' s['date'] = unix(r.get('post_date')) s['score'] = r.get('reactions').get('❤') s['title'] = r.get('title', '') s['link'] = r.get('canonical_url', '') s['url'] = r.get('canonical_url', '') comments = json(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), headers={'Referer': self.BASE_DOMAIN}) s['comments'] = [] if not comments else [ comment(i) for i in comments.get('comments') ] s['comments'] = list(filter(bool, s['comments'])) s['num_comments'] = r.get('comment_count', 0) authors = list( filter(None, [ self._bylines(byline) for byline in r.get('publishedBylines') ])) if len(authors): s['author'] = authors[0].get('name') s['author_link'] = authors[0].get('link') markup = xml(lambda x: s['link']) if markup: icons = get_icons(markup, url=s['link']) if icons: s['icon'] = icons[0] return s
def get_json_comments(url, markup=None): regex = r"https:\/\/www\.stuff\.co\.nz\/(.*\/\d+)/[^\/]+" p = re.compile(regex).match(url) if not p: return [] path = p.groups()[0] if not markup: markup = xml(lambda x: url) soup = BeautifulSoup(markup, features='html.parser') scripts = soup.find_all('script', src=True) scripts = list(filter(None, [s if s['src'].startswith("https://cdns.gigya.com/JS/gigya.js?apiKey=") else None for s in scripts])) if not scripts: return [] script = scripts[0] if not script: return [] meh, params = script['src'].split('?', maxsplit=1) params = params.split('&') params = [p.split('=') for p in params] params = list(filter(None, [value if name.lower() == 'apikey' else None for name, value in params])) if not params: return [] apiKey = params[0] if not apiKey: return [] url = f"https://comments.us1.gigya.com/comments.getComments?threaded=true&format=json&categoryID=Stuff&streamID=stuff/{path}&APIKey={apiKey}" data = json(lambda x: url) comments = data.get('comments', []) comments = [_parse_json_comment(c) for c in comments] return comments
def story(self, ref): ref = self.strip_ref_prefix(ref) stories = json(SUBSTACK_API_TOP_POSTS, headers={'Referer': SUBSTACK_REFERER}) if not stories: return False stories = list( filter(None, [ i if i.get("audience") == "everyone" else None for i in stories ])) stories = list( filter(None, [i if str(i.get('id')) == ref else None for i in stories])) if len(stories) == 0: return False r = stories[0] if not r: return False s = {} pub = r.get('pub') base_url = pub.get('base_url') s['author'] = pub.get('author_name') s['author_link'] = author_link(pub.get('author_id'), base_url) s['date'] = unix(r.get('post_date')) s['score'] = r.get('score') s['title'] = r.get('title', '') s['link'] = r.get('canonical_url', '') s['url'] = r.get('canonical_url', '') comments = json(lambda x: api_comments(x, base_url), r.get('id'), headers={'Referer': base_url}) s['comments'] = [] if not comments else [ comment(i) for i in comments.get('comments') ] s['comments'] = list(filter(bool, s['comments'])) s['num_comments'] = r.get('comment_count', 0) return s
def feed(self): too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE stories = json(lambda x: api_stories(x, self.BASE_DOMAIN), headers={'Referer': self.BASE_DOMAIN}) if not stories: return [] stories = list( filter(None, [ i if i.get("audience") == "everyone" else None for i in stories ])) stories = list( filter(None, [ i if unix(i.get('post_date')) > too_old else None for i in stories ])) stories.sort(key=lambda a: unix(a.get('post_date')), reverse=True) return [self.ref_prefix(str(i.get("id"))) for i in stories or []]
def feed(self): too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE stories = json(SUBSTACK_API_TOP_POSTS, headers={'Referer': SUBSTACK_REFERER}) if not stories: return [] stories = list( filter(None, [ i if i.get("audience") == "everyone" else None for i in stories ])) stories = list( filter(None, [ i if unix(i.get('post_date')) > too_old else None for i in stories ])) stories.sort(key=lambda a: unix(a.get('post_date')), reverse=True) stories = [ self.ref_prefix(str(i.get("pub").get("base_url")), str(i.get("id"))) for i in stories ] return stories