def main(): site = Site() site.name = "serienjunkies" site.alias = "serienjunkies" site.short = "sj" site.base_url = "https://serienjunkies.de" site.channel_id = -1001422493025 raw_data = feedparser.parse("https://www.serienjunkies.de/rss/news.xml") for x in raw_data["entries"]: link_match = re.findall("(.*\.html)", x['link']) link = link_match[0] if link_match else x['link'] if site.check_article_exists(link): continue img, tags = get_img_and_tags(x["link"]) site.add_article(text=x["summary"], title=x["title"], link=link, tags=tags, img=img) site.post()
def main(): site = Site() site.name = "sueddeutsche" site.alias = "Süddeutsche" site.short = "sz" site.base_url = "https://sueddeutsche.de/" site.channel_id = -1001356683060 site.instant_id = -1001431408008 site.join_instant = "https://t.me/joinchat/AAAAAFVRjYiqwgQUBJvWqQ" raw_data = feedparser.parse("https://rss.sueddeutsche.de/rss/Topthemen") for x in raw_data["entries"]: img, tags = get_img_and_tags(x["link"]) site.add_article(text=x["summary"], title=x["title"], link=x["link"], img=img, tags=tags) site.post()
def main(): site = Site() site.name = "tagesthemen_de" site.alias = "tagesschau.de" site.short = "tages" site.base_url = "https://www.tagesschau.de/xml/rss2" site.channel_id = -1001151817211 site.instant_id = -1001450868426 site.join_instant = "http://t.me/joinchat/AAAAAFZ6fsojLN6G1q2rmA" raw_data = feedparser.parse("https://www.tagesschau.de/xml/atom/") for x in raw_data["entries"]: if x["link"] in [ "https://novi.funk.net", "http://blog.ard-hauptstadtstudio.de", ]: continue if site.check_article_exists(x["link"]): continue img, tags = get_img_and_tags(x["link"]) site.add_article(text=x["summary"], title=x["title"], link=x["link"], tags=tags, img=img) site.post()
def main(): site = Site() site.name = "brohltal" site.alias = "BrohltalInfo24" site.base_url = "http://www.brohltal-info24.de/" site.channel_id = -1001131410143 site.short = "brohl" for article in get_data(site.base_url): text, tags = create_hashtag(article[2]) site.add_article( img=site.base_url + article[0], title=article[1], text=text, tags=tags, date=article[4], link=site.base_url + article[3], ) site.post(variant=3)
def main(): site = Site() site.name = "focus_de" site.alias = "Focus Online" site.short = "focus" site.base_url = "https://www.focus.de/" site.channel_id = -1001358620859 site.instant_id = -1001479434546 site.join_instant = "https://t.me/joinchat/AAAAAFguYTI39BV7VxK9qQ" for x in feedparser.parse( "https://rss.focus.de/fol/XML/rss_folnews_eilmeldungen.xml" )["entries"]: if site.check_article_exists(x["link"]): continue link = x["link"] text = x["summary"] text = re.split("\n", text, re.MULTILINE) text = text[-1].split("<br />")[0] img = get_img(link) tags = [y["term"] for y in x["tags"]] site.add_article(title=x["title"], text=text, link=link, img=img, tags=tags) site.post()
def main(): site = Site() site.name = "zeit_de" site.alias = "Zeit Online" site.short = "zeit" site.base_url = "https://zeit-online.de/" site.channel_id = -1001497423999 site.instant_id = -1001355451098 site.join_instant = "http://t.me/joinchat/AAAAAFDKitrF93gyfjCUeg" raw_data = feedparser.parse("https://newsfeed.zeit.de/index") for x in raw_data["entries"]: if site.check_article_exists(x["link"]): continue img, tags = get_img_and_tags(x["link"]) site.add_article(text=x["summary"], title=x["title"], link=x["link"], img=img, tags=tags) site.post()
def main(): site = Site() site.name = "welt_de" site.alias = "Welt.de" site.short = "welt" site.base_url = "https://welt.de/" site.channel_id = -1001247567899 site.instant_id = -1001184127734 site.join_instant = "http://t.me/joinchat/AAAAAEaUWvabMwWbupISFw" raw_data = feedparser.parse("https://www.welt.de/feeds/latest.rss") for x in raw_data["entries"]: if site.check_article_exists(x["link"]): continue tags = [y["term"] for y in x["tags"]] img = ( x["links"][1]["href"] if len(x["links"]) > 1 and x["links"][1]["type"] == "image/jpeg" else "" ) text = x["summary"] if "summary" in x else "" site.add_article( text=text, title=x["title"], link=x["link"], tags=tags, img=img ) site.post()
def main(): site = Site() site.name = "spiegel_online_de" site.alias = "spiegel-online" site.short = "spiegel" site.base_url = "https://spiegel-online.de/" site.channel_id = -1001442405890 site.instant_id = -1001182811341 site.join_instant = "http://t.me/joinchat/AAAAAEaARM11yh3ZtMJGXQ" raw_data = feedparser.parse("http://www.spiegel.de/schlagzeilen/index.rss") for x in raw_data["entries"]: if site.check_article_exists(x["link"]): continue img, tags = get_img_and_tags(x["link"]) text = x["summary"] if 'summary' in x else '' site.add_article(text=text, title=x["title"], link=x["link"], tags=tags, img=img) site.post()
def main(): site = Site() site.name = "pro_linux_de" site.alias = "Pro-Linux.de" site.short = "proli" site.base_url = "https://www.pro-linux.de/" site.channel_id = -1001232676629 site.instant_id = -1001499098018 site.join_instant = "https://t.me/joinchat/AAAAAFlaa6IhIGAqmFuZFQ" raw_data = feedparser.parse( "https://www.pro-linux.de/NB3/rss/2/4/atom_aktuell.xml") for x in raw_data["entries"]: if site.check_article_exists(x["link"]): continue img, tags = get_img_and_tags(x["link"]) site.add_article(text=x["summary"], title=x["title"], link=x["link"], tags=tags, img=img) site.post()
def main(): for s in HEISE_SITES: site = Site() site.name = s['channel_link'] site.alias = s['name'] site.short = s['short'] site.channel_id = s['channel_id'] site.instant_id = s['instant_id'] site.join_instant = s['instant_link'] raw_data = feedparser.parse(s['rss']) for x in raw_data["entries"]: if site.check_article_exists(x["link"]): continue img, tags = get_img_and_tags(x["link"]) text = x["summary"] if "summary" in x else "" site.add_article(title=x["title"], link=x["link"], text=text, tags=tags, img=img) site.post()
def main(): site = Site() site.name = "tageszeitung" site.alias = "taz" site.short = "taz" site.base_url = "https://taz.de/" site.channel_id = -1001152190997 raw_data = feedparser.parse("https://www.taz.de/!p4608;rss/") for x in raw_data["entries"]: if site.check_article_exists(x["link"]): continue img, tags = get_img_and_tags(x["link"]) text = x['summary'].replace("mehr...", "") if 'summary' in x else "" site.add_article( text=text, title=x["title"], link=x["link"], img=img, tags=tags ) site.post()
def main(): site = Site() site.name = "tarnkappe" site.short = "tk" site.alias = "Tarnkappe.info" site.base_url = "https://tarnkappe.info/" site.channel_id = -1001096556431 site.instant_id = -1001201795584 site.instant_hash = "ea1f50995623f3" site.join_instant = "http://t.me/joinchat/AAAAAEeh8gAMwazWu2hoTA" raw_data = feedparser.parse(site.base_url + "feed/") for x in raw_data["entries"]: if site.check_article_exists(x["link"]): continue sourcecode = requests.get(x["link"]).text img = re.findall('<meta property="og:image" content="([^"]*)"', sourcecode)[0] tags = [y["term"] for y in x["tags"]] site.add_article(link=x["link"], title=x["title"], text=x["summary"], img=img, tags=tags) site.post()
def main(): for s in BILD_SITES: site = Site() site.name = s['channel_link'] site.alias = s['name'] site.short = s['short'] site.channel_id = s['channel_id'] site.instant_id = s['instant_id'] site.join_instant = s['instant_link'] for x in feedparser.parse(s['rss'])["entries"]: if site.check_article_exists(x["link"]): continue text = "" if "summary" in x: text = x["summary"] text = re.split("\n", text, re.MULTILINE) text = text[-1].split("<br />")[0] img = ( x["media_thumbnail"][0]["url"].replace(",w=120,", ",w=1200,") if "media_thumbnail" in x else None ) tags = [y["term"] for y in x["tags"]] site.add_article( title=x["title"], text=text, link=x["link"], img=img, tags=tags ) site.post()
def main(): site = Site() site.name = "golem_de" site.alias = "Golem.de" site.short = "golem" site.base_url = "https://golem.de/" site.channel_id = -1001138540100 site.instant_id = -1001304871255 site.join_instant = "http://t.me/joinchat/AAAAAE3GwVcigPpRU96awA" for x in feedparser.parse("https://rss.golem.de/rss.php?feed=RSS2.0")["entries"]: if site.check_article_exists(x["link"]): continue text = x["summary"].split("(<a")[0] article_code = requests.get(x["link"]).text img = re.findall( '"twitter:image" property="og:image" content="([^"]*)"', article_code )[0] tags = re.findall('a href="[^"]*">([^<]*)<', html.unescape(x["summary"])) site.add_article( text=text, title=x["title"], img=img, tags=tags, link=x["link"] ) site.post()
def main(): site = Site() site.name = "rundschau" site.alias = "Frankfurter Rundschau" site.short = "fr" site.base_url = "https://fr.de/" site.channel_id = -1001389663056 raw_data = feedparser.parse("https://www.fr.de/rssfeed.rdf") for x in raw_data["entries"]: if site.check_article_exists(x["link"]): continue img, tags = get_img_and_tags(x["link"]) text = x["summary"] if 'summary' in x else '' site.add_article(text=text, title=x["title"], link=x["link"], tags=tags, img=img) site.post()
def main(): site = Site() site.name = "olbrueck" site.alias = "Olbrück Rundschau" site.base_url = "https://archiv.wittich.de/epapers/pdf/3" site.short = "olbrück" site.channel_id = -1001199332289 for i in range(10): result = get_newspaper(site, week=(i - 5) * -1) if result: title, jpg, url = tuple(result) site.add_article(title=title, img=jpg, link=url) site.post(4)