def get_keywords(number=100, offset=0): config = bot.Config(bot.CONFIGFILE) database = bot.initialize_database(config) keywords = database["keyword_clicks"].items() relevance_of_keyword = lambda x: __relevance_of_keyword(database, x) keywords = sorted(keywords, key=relevance_of_keyword, reverse=True)[offset*number:(offset+1)*number] content = render_template("keywords.html", style=url_for("static", filename="default.css"), number=number, offset=offset, keywords=keywords) return content
def mark_as_read(hashed): config = bot.Config(bot.CONFIGFILE) database = bot.initialize_database(config) hashed = int(hashed) try: link = DEHASHED[hashed] if link: article = database["articles"][link] article.update(read=True) database["articles"][link] = article except KeyError: pass return "OK"
def gallery(offset=0, number=32, since=259200, keyword=None): offset, number, since = [int(x) for x in [offset, number, since]] unread_young = lambda x: not x["read"] and x["release"] >= back_then relevance_of_article = lambda x: __relevance_of_article(database, x) articles = list() config = bot.Config(bot.CONFIGFILE) database = bot.initialize_database(config) back_then = since articles = database["articles"].values() articles = [x for x in articles if unread_young(x)] for article in database["articles"].values(): if not unread_young(article): continue articles.append(article) link = article["link"] HASHED[link] = hash(link) DEHASHED[HASHED[link]] = link article.update(read=True) database["articles"][link] = article articles = sorted(articles, key=relevance_of_article, reverse=True)[offset*number:(offset*number+number)] scores = {a["link"]: relevance_of_article(a) for a in articles} scores["all"] = sum([relevance_of_article(x) for x in articles]) content = render_template("gallery.html", style=url_for("static", filename="default.css"), articles=articles, new_offset=offset + 1, hashed=HASHED, scores=scores) return content
def read_article(hashed=None, keyword=None): hashed = int(hashed) if keyword: like_keyword(keyword) articles = list() more_articles = list() config = bot.Config(bot.CONFIGFILE) database = bot.initialize_database(config) if hashed: link = None try: link = DEHASHED[hashed] except KeyError: for article in database["articles"]: if hashed == hash(article): link = article break if link: database["articles"][link]["read"] = True articles.append(database["articles"][link]) unread_with_keyword = lambda x: not x["read"] and keyword in x["keywords"] relevance_of_article = lambda x: __relevance_of_article(database, x) more_articles = sorted([x for x in database["articles"].values() if unread_with_keyword(x)], key=relevance_of_article) HASHED.update({x["link"]: hash(x["link"]) for x in more_articles}) return render_template("read.html", style=url_for("static", filename="default.css"), articles=articles, more_articles=more_articles, hashed=HASHED, keyword=keyword )
def like_keyword(keyword): config = bot.Config(bot.CONFIGFILE) database = bot.initialize_database(config) database["keyword_clicks"].inc(keyword)
def gallery(offset=0, number=12, since=259200, keyword=None): """Arrangement of unread articles.""" offset = int(offset) number = int(number) back_then = int(since) unread_young = lambda x: not x["read"] and x["release"] >= back_then relevance_of_article = lambda x: __relevance_of_article(database, x) articles = list() config = bot.Config(bot.CONFIGFILE) database = bot.initialize_database(config) # look for yound, unread articles articles = [] for article in database["articles"].values(): if not unread_young(article): continue articles.append(article) # sort by relevance and cut off slice articles = sorted(articles, key=relevance_of_article, reverse=True)[offset * number : (offset * number + number)] for article in articles: link = article["link"] # generate and remember hash values HASHED[link] = hash(link) DEHASHED[HASHED[link]] = link # mark articles as read # article.update(read=True) # update article in the database database["articles"][link] = article # split headline into links split_headline = unicode(escape(article["title"].lower())).split(" ") sorted_kwords = sorted(article["keywords"], key=len, reverse=True) if not sorted_kwords: continue linked_headline = [] for word in split_headline: kwords = [kw for kw in sorted_kwords if kw.lower() in word.lower()] if not kwords: continue linked_headline.append( re_sub( r"(%s)" % kwords[0], r"""<a href="/read/%s/because/of/\1" target="_blank">\1</a>""" % HASHED[link], word, flags=IGNORECASE, ) ) if not linked_headline: continue article["linked_headline"] = " ".join(linked_headline) # prepare data sets for gallery scores = {a["link"]: relevance_of_article(a) for a in articles} scores["all"] = sum([relevance_of_article(x) for x in articles]) content = render_template( "gallery.html", style=url_for("static", filename="default.css"), articles=articles, new_offset=offset + 1, hashed=HASHED, scores=scores, ) return content
def read_article(hashed=None, keyword=None): hashed = int(hashed) if keyword: like_keyword(keyword) articles = list() more_articles = list() config = bot.Config(bot.CONFIGFILE) database = bot.initialize_database(config) if hashed: link = None try: link = DEHASHED[hashed] except KeyError: for article in database["articles"]: if hashed == hash(article): link = article break if link: article = database["articles"][link] article.update(read=True) database["articles"][link] = article article = dict(article) article["source"] = __get_source_domain(link) article["date"] = time.ctime(article["release"]) original_content = markdown.markdown(escape(article["content"])) spaned_content = [] for paragraph in [p for p in RE_PARAGRAPHS.findall(original_content) if p]: sentences = [s for s in RE_SENTENCES.findall(paragraph) if s] if not sentences: continue elif len(sentences) == 1: spaned_content.append("<p><span>%s</span></p>" % sentences[0]) else: spaned_content.append( "<p>%s</p>" % ("<span>%s</span>" * 3 % (sentences[0], "".join(sentences[1:-2]), sentences[-1])) ) article["spaned_content"] = " ".join(spaned_content) if keyword: article["spaned_content"] = re_sub( r"(%s)" % keyword, r"<strong>\1</strong>", article["spaned_content"], flags=IGNORECASE ) articles.append(article) unread_with_keyword = lambda x: not x["read"] and keyword in x["keywords"] relevance_of_article = lambda x: __relevance_of_article(database, x) more_articles = sorted( [x for x in database["articles"].values() if unread_with_keyword(x)], key=relevance_of_article ) HASHED.update({x["link"]: hash(x["link"]) for x in more_articles}) return render_template( "read.html", style=url_for("static", filename="default.css"), articles=articles, more_articles=more_articles, hashed=HASHED, keyword=keyword, )
slack_token = f.read().strip() print("Starting Bot") # print(slack_token) slack_client = WebClient(slack_token) credentials = service_account.Credentials.from_service_account_file( args.bigquery_auth_path) bq_client = bigquery.Client(credentials=credentials) # Initialize the folder structures rundata_dir = os.path.join(args.data_dir, "rundata") for path in [args.data_dir, rundata_dir]: if not os.path.exists(path): os.makedirs(path) # initialize the database db_path = os.path.join(args.data_dir, "runs.sqlite3") conn = initialize_database(db_path, purge=args.purge) target_companies = args.companies.split(",") bot = AnalyticsCSUpdater(bq_client, slack_client, sql_conn=conn, target_companies=target_companies, target_channel=args.target_channel) # Execution of the bot # First define the start and end dates start_date = args.start_date if start_date: start_date = parse(start_date).date() logger.debug(f"raw start_date: {start_date}") start_date, end_date = get_output_date_range(conn,