Esempio n. 1
0
def export_featureless_counts(interval="day"):
    """
    Create hourly counts for Tweets without mentions or URLs.
    Complex queries on many-to-many-relationships are very
    contrived with peewee. For the sake of simplicity, this
    function instead
    """
    # Create output file
    with open("featureless_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write("featureless,")
        f.write("\n")
        # Prepare interator over intervals
        intervals = database.objects_by_interval(database.Tweet,
                                                 interval=interval,
                                                 start_date=None,
                                                 stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            featureless_count = 0
            for t in query:
                if bool(t.mentions.is_null() and t.urls.is_null()
                        and t.reply_to_tweet is None):
                    featureless_count += 1
            f.write("{0},".format(featureless_count))
            f.write("\n")
Esempio n. 2
0
def export_user_counts(interval="day",
                       usernames=[
                           "JebBush", "RealBenCarson", "ChrisChristie",
                           "tedcruz", "CarlyFiorina", "GovMikeHuckabee",
                           "JohnKasich", "RandPaul", "marcorubio",
                           "realDonaldTrump"
                       ]):
    """
    Create daily counts for given Users.
    """
    # Create output file
    with open("user_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write(",".join(usernames))
        f.write(",\n")
        # Prepare interator over intervals
        intervals = database.objects_by_interval(database.Tweet,
                                                 interval=interval,
                                                 start_date=None,
                                                 stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            for username in usernames:
                # Match precise username
                ucount = query.join(database.User).where(
                    database.User.username == username).count()
                f.write("{0},".format(ucount))
            f.write("\n")
Esempio n. 3
0
def export_featureless_counts(interval="day"):
    """
    Create hourly counts for Tweets without mentions or URLs.
    Complex queries on many-to-many-relationships are very
    contrived with peewee. For the sake of simplicity, this
    function instead
    """
    # Create output file
    with open("featureless_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write("featureless,")
        f.write("\n")
        # Prepare interator over intervals
        intervals = database.objects_by_interval(
            database.Tweet, interval=interval, start_date=None, stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            featureless_count = 0
            for t in query:
                if bool(t.mentions.is_null() and t.urls.is_null() and t.reply_to_tweet is None):
                    featureless_count += 1
            f.write("{0},".format(featureless_count))
            f.write("\n")
Esempio n. 4
0
def export_keyword_counts(interval="day",
                          keywords=[
                              "Bush", "Carson", "Christie", "Cruz", "Fiorina",
                              "Huckabee", "Kasich", "Paul", "Rubio", "Trump"
                          ]):
    """
    Create daily counts for given Keywords.
    """
    # Create output file
    with open("keyword_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write(",".join(keywords))
        f.write(",\n")
        # Prepare interator over intervals
        intervals = database.objects_by_interval(database.Tweet,
                                                 interval=interval,
                                                 start_date=None,
                                                 stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            for word in keywords:
                # Match ignoring case
                kwcount = query.where(
                    peewee.fn.Lower(database.Tweet.text).contains(
                        word.lower())).count()
                f.write("{0},".format(kwcount))
            f.write("\n")
Esempio n. 5
0
def export_hashtag_counts(interval="day",
                          hashtags=[
                              "Bush", "Carson", "Christie", "Cruz", "Fiorina",
                              "Huckabee", "Kasich", "Paul", "Rubio", "Trump"
                          ]):
    """
    Create daily counts for given Hashtags. A bit slow. An easy speedup is to convert the list of hashtags to Hashtag database objects and query for them.
    """
    # Create output file
    with open("hashtag_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write(",".join(hashtags))
        f.write(",\n")
        # Prepare interator over intervals
        # htm is an intermediary model for many-to-many-relationships
        # In this case Tweet -> htm -> Hashtag
        htm = database.Tweet.tags.get_through_model()
        intervals = database.objects_by_interval(database.Tweet,
                                                 interval=interval,
                                                 start_date=None,
                                                 stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            for tag in hashtags:
                # Match ignoring case
                count = query.join(htm).join(database.Hashtag).where(
                    peewee.fn.Lower(database.Hashtag.tag) ==
                    tag.lower()).count()
                f.write("{0},".format(count))
            f.write("\n")
Esempio n. 6
0
def export_user_counts(interval="day", usernames=["JebBush", "RealBenCarson", "ChrisChristie", "tedcruz", "CarlyFiorina", "GovMikeHuckabee", "JohnKasich", "RandPaul", "marcorubio", "realDonaldTrump"]):
    """
    Create daily counts for given Users.
    """
    # Create output file
    with open("user_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write(",".join(usernames))
        f.write(",\n")
        # Prepare interator over intervals
        intervals = database.objects_by_interval(
            database.Tweet, interval=interval, start_date=None, stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            for username in usernames:
                # Match precise username
                ucount = query.join(database.User).where(
                    database.User.username == username).count()
                f.write("{0},".format(ucount))
            f.write("\n")
Esempio n. 7
0
def export_keyword_counts(interval="day", keywords=["Bush", "Carson", "Christie", "Cruz", "Fiorina", "Huckabee", "Kasich", "Paul", "Rubio", "Trump"]):
    """
    Create daily counts for given Keywords.
    """
    # Create output file
    with open("keyword_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write(",".join(keywords))
        f.write(",\n")
        # Prepare interator over intervals
        intervals = database.objects_by_interval(
            database.Tweet, interval=interval, start_date=None, stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            for word in keywords:
                # Match ignoring case
                kwcount = query.where(
                    peewee.fn.Lower(database.Tweet.text).contains(word.lower())).count()
                f.write("{0},".format(kwcount))
            f.write("\n")
Esempio n. 8
0
def export_mention_counts(interval="day", usernames=["jebbush", "realbencarson", "chrischristie", "tedcruz", "carlyfiorina", "govmikehuckabee", "johnkasich", "randpaul", "marcorubio", "realdonaldtrump"]):
    """
    Create daily counts for mentions of given Users.
    """
    # Create output file
    with open("mention_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write(",".join(usernames))
        f.write(",\n")
        # Prepare interator over intervals
        # htm is an intermediary model for many-to-many-relationships
        # In this case Tweet -> htm -> Hashtag
        mtm = database.Tweet.mentions.get_through_model()
        intervals = database.objects_by_interval(
            database.Tweet, interval=interval, start_date=None, stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            for user in usernames:
                # Match ignoring case
                count = query.join(mtm).join(database.User).where(
                    peewee.fn.Lower(database.User.username) == user.lower()).count()
                f.write("{0},".format(count))
            f.write("\n")
Esempio n. 9
0
def export_hashtag_counts(interval="day", hashtags=["Bush", "Carson", "Christie", "Cruz", "Fiorina", "Huckabee", "Kasich", "Paul", "Rubio", "Trump"]):
    """
    Create daily counts for given Hashtags. A bit slow. An easy speedup is to convert the list of hashtags to Hashtag database objects and query for them.
    """
    # Create output file
    with open("hashtag_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write(",".join(hashtags))
        f.write(",\n")
        # Prepare interator over intervals
        # htm is an intermediary model for many-to-many-relationships
        # In this case Tweet -> htm -> Hashtag
        htm = database.Tweet.tags.get_through_model()
        intervals = database.objects_by_interval(
            database.Tweet, interval=interval, start_date=None, stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            for tag in hashtags:
                # Match ignoring case
                count = query.join(htm).join(database.Hashtag).where(
                    peewee.fn.Lower(database.Hashtag.tag) == tag.lower()).count()
                f.write("{0},".format(count))
            f.write("\n")
Esempio n. 10
0
def export_total_counts(interval="day"):
    """
    Create hourly counts for Tweets
    """
    # Create output file
    with open("total_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write("total,")
        f.write("\n")
        # Prepare interator over intervals
        intervals = database.objects_by_interval(
            database.Tweet, interval=interval, start_date=None, stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            f.write("{0},".format(query.count()))
            f.write("\n")
Esempio n. 11
0
def export_total_counts(interval="day"):
    """
    Create hourly counts for Tweets
    """
    # Create output file
    with open("total_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write("total,")
        f.write("\n")
        # Prepare interator over intervals
        intervals = database.objects_by_interval(database.Tweet,
                                                 interval=interval,
                                                 start_date=None,
                                                 stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            f.write("{0},".format(query.count()))
            f.write("\n")
Esempio n. 12
0
def export_mention_counts(interval="day",
                          usernames=[
                              "jebbush", "realbencarson", "chrischristie",
                              "tedcruz", "carlyfiorina", "govmikehuckabee",
                              "johnkasich", "randpaul", "marcorubio",
                              "realdonaldtrump"
                          ]):
    """
    Create daily counts for mentions of given Users.
    """
    # Create output file
    with open("mention_counts.csv", "w") as f:
        # Write header line
        f.write("{0},".format(interval))
        f.write(",".join(usernames))
        f.write(",\n")
        # Prepare interator over intervals
        # htm is an intermediary model for many-to-many-relationships
        # In this case Tweet -> htm -> Hashtag
        mtm = database.Tweet.mentions.get_through_model()
        intervals = database.objects_by_interval(database.Tweet,
                                                 interval=interval,
                                                 start_date=None,
                                                 stop_date=None)
        for (interval_start, interval_stop), query in intervals:
            # Convert the timestamp to Mountain Standard Time which is
            # the local timezone for the example data
            timestamp = MST.normalize(interval_start).strftime(
                "%Y-%m-%d %H:%M:%S %z")
            f.write("{0},".format(timestamp))
            for user in usernames:
                # Match ignoring case
                count = query.join(mtm).join(database.User).where(
                    peewee.fn.Lower(database.User.username) ==
                    user.lower()).count()
                f.write("{0},".format(count))
            f.write("\n")