Esempio n. 1
0
def get_all_sites(category: str = None, domain: str = None):
    """
    Get all site urls
    :return:
    """
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    SELECT site_url, sitemap_url FROM scrapy_sites
    """

    if category:
        sql += " WHERE category='{}'".format(category)

    if domain:
        sql += " WHERE site_url LIKE '%{}%'".format(domain)

    cursor.execute(sql)

    conn.commit()
    rows = dictfecth(cursor)
    cursor.close()

    return rows
Esempio n. 2
0
def save_link_fb_shares(site_link: str, r: dict) -> None:
    conn = get_mysql_conn()
    cursor = conn.cursor()

    site_link_title = r.get('og_object', {}).get('title', '')
    fb_description = r.get('og_object', {}).get('description', '')
    fb_type = r.get('og_object', {}).get('type', '')
    fb_shares = r.get('engagement', {}).get('share_count', 0)
    fb_updated_time = r.get('og_object', {}).get('updated_time', '')
    fb_graph_object = ujson.dumps(r)

    # is_viral = if the link got a share it could be viral
    is_viral = "yes"
    if fb_shares < 1:
        is_viral = "no"

    sql = """
    INSERT INTO facebook_most_shared(site_link, site_link_title, fb_description, fb_type, fb_shares, fb_updated_time,
    fb_graph_object, is_viral) VALUES ('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}')
    """.format(site_link, site_link_title.replace("'", r"\'"),
               fb_description.replace("'", r"\'"), fb_type, fb_shares,
               fb_updated_time, fb_graph_object.replace("'", r"\'"), is_viral)

    try:
        cursor.execute(sql)
        conn.commit()
    except Exception as e:
        print("\nERROR! ({})".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 3
0
def update_link_fb_shares(site_link: str, r: dict):
    conn = get_mysql_conn()
    cursor = conn.cursor()

    fb_shares = r.get('engagement', {}).get('share_count', 0)
    fb_updated_time = r.get('og_object', {}).get('updated_time', '')
    fb_graph_object = ujson.dumps(r)

    # is_viral = if the link got a share it could be viral
    is_viral = "yes"
    if fb_shares < 1:
        is_viral = "no"

    sql = """
    UPDATE facebook_most_shared
    SET fb_shares = {},
    fb_updated_time = '{}',
    fb_graph_object = '{}',
    is_viral = '{}'
    WHERE site_link = '{}'
    """.format(fb_shares, fb_updated_time, fb_graph_object.replace("'", r"\'"),
               is_viral, site_link)

    try:
        cursor.execute(sql)
        conn.commit()
    except Exception as e:
        print("ERROR! ({})\n".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 4
0
def get_fb_link_title(site_link):
    """
    Get FB link by the site_url

    :param site_link:
    :return:
    """
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    SELECT * FROM facebook_most_shared WHERE site_link = "{}"
    """.format(site_link)

    try:
        cursor.execute(sql)
        conn.commit()
        rows = dictfecth(cursor)
        cursor.close()
    except Exception as e:
        print("\nERROR! ({})".format(str(e)))
        conn.rollback()
        return

    return rows
Esempio n. 5
0
def save_ga_report(title, path, avg_top, top, start_date, users,
                   adsense_clicks, adsense_revenue) -> None:
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    INSERT INTO ga_reports(page_title, page_path, avg_time_on_page, time_on_page, start_date,
    users, adsense_clicks, adsense_revenue)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
    """

    print("Saving GA report for: {}...\n".format(title))

    try:
        # truncate to 5 decimals
        avg_top = format(float(avg_top), '.5f')
        top = format(float(top), '.5f')
        adsense_clicks = format(float(adsense_clicks), '.5f')
        adsense_revenue = format(float(adsense_revenue), '.5f')

        cursor.execute(sql, (title, path, avg_top, top, start_date, users,
                             adsense_clicks, adsense_revenue))
        conn.commit()
    except Exception as e:
        print("ERROR! ({})\n".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 6
0
def update_ga_report(avg_top, top, path, users, adsense_clicks,
                     adsense_revenue) -> None:
    conn = get_mysql_conn()
    cursor = conn.cursor()

    # truncate to 5 decimals
    avg_top = format(float(avg_top), '.5f')
    top = format(float(top), '.5f')
    adsense_clicks = format(float(adsense_clicks), '.5f')
    adsense_revenue = format(float(adsense_revenue), '.5f')

    sql = """UPDATE ga_reports
    SET avg_time_on_page = {},
    time_on_page = {},
    users = {},
    adsense_clicks = {},
    adsense_revenue = {}
    WHERE page_path = "{}"
    """.format(avg_top, top, users, adsense_clicks, adsense_revenue, path)

    print("Updating GA report for: {}...\n".format(path))

    try:
        cursor.execute(sql)
        conn.commit()
    except Exception as e:
        print("ERROR! ({})\n".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 7
0
def save_campaign_report(r: dict) -> None:
    conn = get_mysql_conn()
    cursor = conn.cursor()

    email_subject = r.get('subject_line', 'No subject')
    emails_sent = r.get('emails_sent', 0)
    unique_opens = r.get('opens', {}).get('unique_opens', 0)
    open_rate = round(r.get('opens', {}).get('open_rate', 0) * 100, 2)
    response_object = ujson.dumps(r)

    sql = """
    INSERT INTO mailchimp_reports(email_subject, emails_sent, unique_opens, open_rate, response_object)
    VALUES (%s, %s, %s, %s, %s)
    """

    print("Saving report for: {}...\n".format(email_subject))

    try:
        cursor.execute(sql, (email_subject, emails_sent, unique_opens,
                             open_rate, response_object))
        conn.commit()
    except Exception as e:
        print("ERROR! ({})\n".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 8
0
def get_top_game_last_entry(game_id):
    """
    Get the last stats for a given game

    :return:
    """
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    SELECT current_position FROM twitch_top_games
    WHERE game_id = "{}"
    ORDER BY insert_time DESC
    LIMIT 0,1
    """.format(game_id)
    print(sql)

    cursor.execute(sql)

    conn.commit()
    rows = dictfecth(cursor)
    cursor.close()

    if rows:
        return rows[0]

    return False
Esempio n. 9
0
def insert_top_game_stats(r, current_position, last_position):
    conn = get_mysql_conn()
    cursor = conn.cursor()

    game_id = r.get('id', 0)
    game_name = r.get('name', "")
    box_art_url = r.get('box_art_url', "")
    # current_position = r.get('current_position', "")
    # last_position = r.get('last_position', "")
    delta_position = int(last_position) - int(current_position)

    sql = """
    INSERT INTO twitch_top_games(game_id, game_name, box_art_url, current_position, last_position, delta_position)
    VALUES (%s, %s, %s, %s, %s, %s)
    """

    try:
        cursor.execute(sql, (game_id, game_name, box_art_url, current_position,
                             last_position, delta_position))
        conn.commit()
    except Exception as e:
        print("ERROR! ({})\n".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 10
0
def update_campaign_report(email_subject: str, r: dict) -> None:
    conn = get_mysql_conn()
    cursor = conn.cursor()

    emails_sent = r.get('emails_sent', 0)
    unique_opens = r.get('opens', {}).get('unique_opens', 0)
    open_rate = round(r.get('opens', {}).get('open_rate', 0) * 100, 2)
    response_object = ujson.dumps(r)

    sql = """UPDATE mailchimp_reports 
    SET unique_opens = {},
    open_rate = {},
    emails_sent = {},
    response_object = '{}' 
    WHERE email_subject = "{}"
    """.format(unique_opens, open_rate, emails_sent, response_object,
               email_subject)

    print("Updating report for: {}...\n".format(email_subject))

    try:
        cursor.execute(sql)
        conn.commit()
    except Exception as e:
        print("ERROR! ({})\n".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 11
0
def save_tweet(t: dict, keyword: str) -> None:
    conn = get_mysql_conn()

    cursor = conn.cursor()

    retweet_count = int(t.get('retweet_count', 0))
    created_at = t['created_at']
    tweet_id = t['id_str']
    tweet = t['text']
    user_id = t['user']['id_str']
    user_name = t['user']['screen_name']
    blob = ujson.dumps(t)
    tweet_link = "https://twitter.com/{}/status/{}".format(user_name, tweet_id)
    # TODO: add a site_link field and extract the link from the tweet

    sql = """
    INSERT INTO twitter_most_retweeted(created_at, tweet_id, retweet_count, tweet, user_id, user_name,
    tweet_blob, tweet_link, keyword)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
    """

    print("\nSaving tweet: {}...".format(tweet_link))

    try:
        cursor.execute(sql, (created_at, tweet_id, retweet_count, tweet, user_id, user_name, blob, tweet_link, keyword))
        conn.commit()
    except Exception as e:
        print("\nERROR! ({})".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 12
0
def get_allowed_domains():
    """
    Get all domains
    :return:
    """
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    SELECT site_url, sitemap_url FROM scrapy_sites
    """

    cursor.execute(sql)

    conn.commit()
    rows = dictfecth(cursor)

    # extract domains
    domains = list()
    for r in rows:
        d = get_domain(r['site_url'])
        domains.append(d)

    cursor.close()

    return domains
Esempio n. 13
0
def update_link_title_descrip(id, title, description):
    """
    Update a link's title and description from a social network
    """
    conn = get_mysql_conn()
    cursor = conn.cursor()

    # update shares
    sql = """
    UPDATE links_shares
    SET link_title = '{}', link_description = '{}'
    WHERE idlinks_shares = '{}'
    """.format(title.replace("'", r"\'"), description.replace("'", r"\'"), id)

    try:
        cursor.execute(sql)
        print("'{}' -> link updated.".format(title))
        conn.commit()
    except Exception as e:
        print("ERROR! ({})\n".format(str(e)))
        conn.rollback()
        return False

    cursor.close()
    return True
Esempio n. 14
0
def is_fb_link_viral(site_link: str) -> bool:
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    SELECT is_viral FROM facebook_most_shared WHERE site_link = "{}"
    """.format(site_link)

    try:
        cursor.execute(sql)
        conn.commit()
        rows = dictfecth(cursor)
        cursor.close()

        if rows:
            v = rows[0].get('is_viral', 'unknown')

            if v == 'unknown' or v == 'yes':
                print("{} is VIRAL!".format(site_link))
                return True
            else:
                print("{} is NOT VIRAL :(".format(site_link))
                return False
        else:
            print("{} is NEW, it COULD BE VIRAL 8)".format(site_link))
            return True

    except Exception as e:
        print("\nERROR! ({})".format(str(e)))
        conn.rollback()

    return False
Esempio n. 15
0
def save_link_retweets(query: str, t: dict) -> None:
    conn = get_mysql_conn()
    cursor = conn.cursor()

    retweet_count = int(t.get('retweet_count', 0))
    created_at = t['created_at']
    tweet_id = t['id_str']
    tweet = t['text']
    user_id = t['user']['id_str']
    user_name = t['user']['screen_name']
    blob = ujson.dumps(t).replace("'", r"\'")
    tweet_link = "https://twitter.com/{}/status/{}".format(user_name, tweet_id)

    sql = """
    INSERT INTO twitter_most_retweeted(created_at, tweet_id, retweet_count, tweet, user_id, user_name,
    tweet_blob, tweet_link, query)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
    """

    print("\nSaving tweet: {}...".format(tweet_link))

    try:
        cursor.execute(sql, (created_at, tweet_id, retweet_count, tweet,
                             user_id, user_name, blob, tweet_link, query))
        conn.commit()
    except Exception as e:
        print("\nERROR! ({})".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 16
0
def update_link_retweets(query: str, t: dict):
    conn = get_mysql_conn()
    cursor = conn.cursor()

    retweet_count = int(t.get('retweet_count', 0))
    tweet_id = t['id_str']
    user_name = t['user']['screen_name']
    blob = ujson.dumps(t).replace("'", r"\'")
    tweet_link = "https://twitter.com/{}/status/{}".format(user_name, tweet_id)

    sql = """
    UPDATE twitter_most_retweeted
    SET retweet_count = {},
    tweet_blob = '{}'
    WHERE tweet_id = '{}'
    """.format(retweet_count, blob, tweet_id)

    try:
        cursor.execute(sql)
        conn.commit()
    except Exception as e:
        print("ERROR! ({})\n".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 17
0
def update_member(email: str, item: dict) -> None:
    conn = get_mysql_conn()
    cursor = conn.cursor()

    rating = item.get('member_rating', 0)
    open_rate = item.get('stats', {}).get('avg_open_rate', 0)
    click_rate = item.get('stats', {}).get('avg_click_rate', 0)

    sql = """UPDATE mailchimp_members
    SET rating = {},
    open_rate = {},
    click_rate = {}
    WHERE email = "{}"
    """.format(rating, open_rate, click_rate, email)

    print("Updating member: {}...\n".format(email))

    try:
        cursor.execute(sql)
        conn.commit()
    except Exception as e:
        print("ERROR! ({})\n".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 18
0
def save_member(item: dict) -> None:
    conn = get_mysql_conn()
    cursor = conn.cursor()

    city = item.get('merge_fields', {}).get('FCITY')
    role = item.get('merge_fields', {}).get('FROLE')
    country_code = item.get('location', {}).get('country_code')
    name = item.get('merge_fields', {}).get('FNAME')
    email = item.get('email_address')
    rating = item.get('member_rating', 0)
    open_rate = item.get('stats', {}).get('avg_open_rate', 0)
    click_rate = item.get('stats', {}).get('avg_click_rate', 0)

    sql = """
    INSERT INTO mailchimp_members(name, email, country_code, city, role, rating, open_rate, click_rate)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
    """

    print("Saving member: {}...\n".format(name))

    try:
        cursor.execute(sql, (name, email, country_code, city, role, rating,
                             open_rate, click_rate))
        conn.commit()
    except Exception as e:
        print("ERROR! ({})\n".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 19
0
def update_link_stats(link: str, t: dict) -> None:
    conn = get_mysql_conn()
    cursor = conn.cursor()

    total = int(t.get('total', 0))
    facebook = int(t.get('shares', {}).get('facebook', 0))
    linkedin = int(t.get('shares', {}).get('linkedin', 0))
    twitter = int(t.get('shares', {}).get('twitter', 0))
    blob = ujson.dumps(t).replace("'", r"\'")

    sql = """
    UPDATE sharethis_stats
    SET total = {},
    facebook = {},
    linkedin = {},
    twitter = {},
    response_blob = '{}'
    WHERE site_link = '{}'
    """.format(total, facebook, linkedin, twitter, blob, link)
    print(sql)

    try:
        cursor.execute(sql)
        conn.commit()
    except Exception as e:
        print("ERROR! ({})\n".format(str(e)))
        conn.rollback()

    cursor.close()
    return
Esempio n. 20
0
def get_top_stream_last_entry(stream_id):
    """
    Get the last stats for a given stream

    :return:
    """
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    SELECT viewer_count FROM twitch_top_streams
    WHERE stream_id = "{}"
    ORDER BY insert_time DESC
    LIMIT 0,1
    """.format(stream_id)
    print(sql)

    cursor.execute(sql)

    conn.commit()
    rows = dictfecth(cursor)
    cursor.close()

    if rows:
        return rows[0]

    return False
Esempio n. 21
0
def get_fb_shares_by_domain(domain: str, threshold: int = 1, limit: int = 0):
    """
    Get links and shares for a given domain

    :param domain:
    :param threshold:
    :param limit:
    :return:
    """
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    SELECT site_link, site_link_title, fb_shares FROM facebook_most_shared
    WHERE site_link LIKE "%{}%" AND fb_shares > {} ORDER BY fb_shares
    """.format(domain, threshold)

    if limit > 0:
        sql += " LIMIT 0,{}".format(limit)

    cursor.execute(sql)

    conn.commit()
    rows = dictfecth(cursor)
    cursor.close()

    return rows
Esempio n. 22
0
def word_weight_upsert(word, weight):
    conn = get_mysql_conn()
    cursor = conn.cursor()

    # check if the word exists
    sql = """
    SELECT idprediction_blog_titles FROM prediction_blog_titles
    WHERE word = "{}";
    """.format(word)

    r = cursor.execute(sql)

    if r > 0:
        conn.commit()
        rows = dictfecth(cursor)

        # update shares
        id = rows[0]["idprediction_blog_titles"]
        sql = """
        UPDATE prediction_blog_titles
        SET weight = {}
        WHERE idprediction_blog_titles = '{}'
        """.format(weight, id)

        try:
            cursor.execute(sql)
            print("{} = {} weight updated.".format(word, weight))
            conn.commit()
        except Exception as e:
            print("ERROR! ({})\n".format(str(e)))
            conn.rollback()
            return False

        cursor.close()
        return True
    else:
        # insert new link with shares
        sql = """
        INSERT INTO prediction_blog_titles(word, weight)
        VALUES (%s, %s)
        """.format()

        try:
            cursor.execute(sql, (word, weight))
            print("{} = {} weight inserted.".format(word, weight))
            conn.commit()
        except Exception as e:
            print("\nERROR! ({})".format(str(e)))
            conn.rollback()
            return False

        cursor.close()
        return True
Esempio n. 23
0
def get_site_links_by_category(category: str):
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = "SELECT A.site_link as site_link FROM scrapy_sites_links as A " \
          "INNER JOIN scrapy_sites as B ON A.site_url = B.site_url " \
          "WHERE B.category = '{}'".format(category)

    cursor.execute(sql)
    conn.commit()
    rows = dictfecth(cursor)
    cursor.close()

    return rows
Esempio n. 24
0
def get_top_tweets(retweet_threshold: int = 5, limit: int = 100):
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    SELECT * FROM twitter_most_retweeted
    WHERE retweet_count > {} ORDER BY retweet_count DESC LIMIT 0,{};
    """.format(retweet_threshold, limit)

    cursor.execute(sql)

    conn.commit()
    rows = dictfecth(cursor)
    cursor.close()

    return rows
Esempio n. 25
0
def get_open_rate_by_country():
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    select country_code as country, AVG(open_rate) as total from mailchimp_members
    WHERE country_code <> '' GROUP BY country_code;
    """

    cursor.execute(sql)

    conn.commit()
    rows = dictfecth(cursor)
    cursor.close()

    return rows
Esempio n. 26
0
def get_members_by_country():
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    SELECT country_code as country, COUNT(*) as total
    from mailchimp_members WHERE country_code <> '' GROUP BY country_code;
    """

    cursor.execute(sql)

    conn.commit()
    rows = dictfecth(cursor)
    cursor.close()

    return rows
Esempio n. 27
0
def get_top_open_rate(limit: int = 10):
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    select email_subject, unique_opens, open_rate, emails_sent
    from mailchimp_reports where emails_sent > 100 order by open_rate desc limit 0,{};
    """.format(limit)

    cursor.execute(sql)

    conn.commit()
    rows = dictfecth(cursor)
    cursor.close()

    return rows
Esempio n. 28
0
def get_word_weight(word):
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    select weight from prediction_blog_titles
    WHERE word = "{}";
    """.format(word)

    r = cursor.execute(sql)

    if r > 0:
        conn.commit()
        rows = dictfecth(cursor)

        return rows[0]["weight"]

    return 0
Esempio n. 29
0
def get_retweets_by_domain(domain: str, threshold: int = 10, limit: int = 0):
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    SELECT * FROM twitter_most_retweeted
    WHERE query LIKE "%{}%" AND retweet_count > {} ORDER BY retweet_count DESC
    """.format(domain, threshold)

    if limit > 0:
        sql += " LIMIT 0,{}".format(limit)

    cursor.execute(sql)

    conn.commit()
    rows = dictfecth(cursor)
    cursor.close()

    return rows
Esempio n. 30
0
def get_domains():
    """
    Get all domains

    :return:
    """
    conn = get_mysql_conn()
    cursor = conn.cursor()

    sql = """
    SELECT * FROM domain_stats;
    """

    cursor.execute(sql)
    conn.commit()
    rows = dictfecth(cursor)
    cursor.close()

    return rows