Exemple #1
0
def get_keywords(number=100, offset=0):
    config = bot.Config(bot.CONFIGFILE)
    database = bot.initialize_database(config)
    keywords = database["keyword_clicks"].items()
    relevance_of_keyword = lambda x: __relevance_of_keyword(database, x)
    keywords = sorted(keywords,
                      key=relevance_of_keyword,
                      reverse=True)[offset*number:(offset+1)*number]
    content = render_template("keywords.html",
                              style=url_for("static", filename="default.css"),
                              number=number,
                              offset=offset,
                              keywords=keywords)
    return content
Exemple #2
0
def mark_as_read(hashed):
    config = bot.Config(bot.CONFIGFILE)
    database = bot.initialize_database(config)

    hashed = int(hashed)
    try:
        link = DEHASHED[hashed]
        if link:
            article = database["articles"][link]
            article.update(read=True)
            database["articles"][link] = article
    except KeyError:
        pass
    return "OK"
Exemple #3
0
def gallery(offset=0, number=32, since=259200, keyword=None):
    offset, number, since = [int(x) for x in [offset, number, since]]

    unread_young = lambda x: not x["read"] and x["release"] >= back_then
    relevance_of_article = lambda x: __relevance_of_article(database, x)
    articles = list()

    config = bot.Config(bot.CONFIGFILE)
    database = bot.initialize_database(config)

    back_then = since
    articles = database["articles"].values()
    articles = [x for x in articles if unread_young(x)]

    for article in database["articles"].values():
        if not unread_young(article):
            continue
        articles.append(article)
        link = article["link"]
        HASHED[link] = hash(link)
        DEHASHED[HASHED[link]] = link
        article.update(read=True)
        database["articles"][link] = article

    articles = sorted(articles,
                      key=relevance_of_article,
                      reverse=True)[offset*number:(offset*number+number)]

    scores = {a["link"]: relevance_of_article(a) for a in articles}
    scores["all"] = sum([relevance_of_article(x) for x in articles])

    content = render_template("gallery.html",
                              style=url_for("static", filename="default.css"),
                              articles=articles,
                              new_offset=offset + 1,
                              hashed=HASHED,
                              scores=scores)
    return content
Exemple #4
0
def read_article(hashed=None, keyword=None):
    hashed = int(hashed)
    if keyword:
        like_keyword(keyword)

    articles = list()
    more_articles = list()

    config = bot.Config(bot.CONFIGFILE)
    database = bot.initialize_database(config)
    if hashed:
        link = None
        try:
            link = DEHASHED[hashed]
        except KeyError:
            for article in database["articles"]:
                if hashed == hash(article):
                    link = article
                    break
        if link:
            database["articles"][link]["read"] = True
            articles.append(database["articles"][link])

    unread_with_keyword = lambda x: not x["read"] and keyword in x["keywords"]
    relevance_of_article = lambda x: __relevance_of_article(database, x)
    more_articles = sorted([x for x in database["articles"].values()
                            if unread_with_keyword(x)],
                           key=relevance_of_article)
    HASHED.update({x["link"]: hash(x["link"]) for x in more_articles})

    return render_template("read.html",
                           style=url_for("static", filename="default.css"),
                           articles=articles,
                           more_articles=more_articles,
                           hashed=HASHED,
                           keyword=keyword
                           )
Exemple #5
0
def like_keyword(keyword):
    config = bot.Config(bot.CONFIGFILE)
    database = bot.initialize_database(config)
    database["keyword_clicks"].inc(keyword)
Exemple #6
0
def gallery(offset=0, number=12, since=259200, keyword=None):
    """Arrangement of unread articles."""
    offset = int(offset)
    number = int(number)
    back_then = int(since)

    unread_young = lambda x: not x["read"] and x["release"] >= back_then
    relevance_of_article = lambda x: __relevance_of_article(database, x)
    articles = list()

    config = bot.Config(bot.CONFIGFILE)
    database = bot.initialize_database(config)

    # look for yound, unread articles
    articles = []
    for article in database["articles"].values():
        if not unread_young(article):
            continue
        articles.append(article)

    # sort by relevance and cut off slice
    articles = sorted(articles, key=relevance_of_article, reverse=True)[offset * number : (offset * number + number)]

    for article in articles:
        link = article["link"]

        # generate and remember hash values
        HASHED[link] = hash(link)
        DEHASHED[HASHED[link]] = link

        # mark articles as read
        # article.update(read=True)

        # update article in the database
        database["articles"][link] = article

        # split headline into links
        split_headline = unicode(escape(article["title"].lower())).split(" ")
        sorted_kwords = sorted(article["keywords"], key=len, reverse=True)
        if not sorted_kwords:
            continue
        linked_headline = []
        for word in split_headline:
            kwords = [kw for kw in sorted_kwords if kw.lower() in word.lower()]
            if not kwords:
                continue
            linked_headline.append(
                re_sub(
                    r"(%s)" % kwords[0],
                    r"""<a href="/read/%s/because/of/\1" target="_blank">\1</a>""" % HASHED[link],
                    word,
                    flags=IGNORECASE,
                )
            )
        if not linked_headline:
            continue
        article["linked_headline"] = " ".join(linked_headline)

    # prepare data sets for gallery
    scores = {a["link"]: relevance_of_article(a) for a in articles}
    scores["all"] = sum([relevance_of_article(x) for x in articles])
    content = render_template(
        "gallery.html",
        style=url_for("static", filename="default.css"),
        articles=articles,
        new_offset=offset + 1,
        hashed=HASHED,
        scores=scores,
    )
    return content
Exemple #7
0
def read_article(hashed=None, keyword=None):
    hashed = int(hashed)
    if keyword:
        like_keyword(keyword)

    articles = list()
    more_articles = list()

    config = bot.Config(bot.CONFIGFILE)
    database = bot.initialize_database(config)
    if hashed:
        link = None
        try:
            link = DEHASHED[hashed]
        except KeyError:
            for article in database["articles"]:
                if hashed == hash(article):
                    link = article
                    break
        if link:
            article = database["articles"][link]
            article.update(read=True)
            database["articles"][link] = article

            article = dict(article)
            article["source"] = __get_source_domain(link)
            article["date"] = time.ctime(article["release"])

            original_content = markdown.markdown(escape(article["content"]))
            spaned_content = []
            for paragraph in [p for p in RE_PARAGRAPHS.findall(original_content) if p]:
                sentences = [s for s in RE_SENTENCES.findall(paragraph) if s]
                if not sentences:
                    continue
                elif len(sentences) == 1:
                    spaned_content.append("<p><span>%s</span></p>" % sentences[0])
                else:
                    spaned_content.append(
                        "<p>%s</p>" % ("<span>%s</span>" * 3 % (sentences[0], "".join(sentences[1:-2]), sentences[-1]))
                    )
            article["spaned_content"] = " ".join(spaned_content)
            if keyword:
                article["spaned_content"] = re_sub(
                    r"(%s)" % keyword, r"<strong>\1</strong>", article["spaned_content"], flags=IGNORECASE
                )
            articles.append(article)

    unread_with_keyword = lambda x: not x["read"] and keyword in x["keywords"]
    relevance_of_article = lambda x: __relevance_of_article(database, x)
    more_articles = sorted(
        [x for x in database["articles"].values() if unread_with_keyword(x)], key=relevance_of_article
    )
    HASHED.update({x["link"]: hash(x["link"]) for x in more_articles})

    return render_template(
        "read.html",
        style=url_for("static", filename="default.css"),
        articles=articles,
        more_articles=more_articles,
        hashed=HASHED,
        keyword=keyword,
    )
        slack_token = f.read().strip()
    print("Starting Bot")
    # print(slack_token)
    slack_client = WebClient(slack_token)
    credentials = service_account.Credentials.from_service_account_file(
        args.bigquery_auth_path)
    bq_client = bigquery.Client(credentials=credentials)

    # Initialize the folder structures
    rundata_dir = os.path.join(args.data_dir, "rundata")
    for path in [args.data_dir, rundata_dir]:
        if not os.path.exists(path):
            os.makedirs(path)
    # initialize the database
    db_path = os.path.join(args.data_dir, "runs.sqlite3")
    conn = initialize_database(db_path, purge=args.purge)

    target_companies = args.companies.split(",")
    bot = AnalyticsCSUpdater(bq_client,
                             slack_client,
                             sql_conn=conn,
                             target_companies=target_companies,
                             target_channel=args.target_channel)

    # Execution of the bot
    # First define the start and end dates
    start_date = args.start_date
    if start_date:
        start_date = parse(start_date).date()
    logger.debug(f"raw start_date: {start_date}")
    start_date, end_date = get_output_date_range(conn,