コード例 #1
0
"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011,
                           'smapp_readOnly', 'PASSWORD',
                           'USElection2016_Hillary')
plt.figure(12, 12)
col.since(datetime.utcnow() -
          timedelta(days=7)).tweets_retweets_figure(show=False)
plt.title('Tweets and RT volume from Hillary 2016 collection')
plt.savefig('hillary_rts.png')
コード例 #2
0
"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'Sean_Hungary')
plt.figure(figsize=(12,12))
col.since(datetime(2015,6,10)).until(datetime(2015,6,11)).tweets_with_hashtags_figure(group_by='hours', xtick_format='%H', show=False)
plt.title('Tweets with hashtags volume for a day in the Hungary collection')
plt.savefig('hungary_hts.png')
"""
This script counts all tweets from the Ukraine collection sent in the past 12 hours where the user's
specified 'location' field has one of the following words in it:
 - Kiev
 - Kyiv
 - Kiew
"""

from smapp_toolkit.twitter import MongoTweetCollection
from datetime import datetime, timedelta

collection = MongoTweetCollection(address='WRITE REAL DATABASE ADDRESS HERE',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ukraine')

twelve_hours_ago = datetime.utcnow() - timedelta(hours=12)

print "Matched {} tweets.".format(collection.user_location_containing('kiev', 'kiew', 'kyiv').since(twelve_hours_ago).count())
コード例 #4
0
"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011,
                           'smapp_readOnly', 'PASSWORD', 'Sean_Hungary')
plt.figure(figsize=(12, 12))
col.since(datetime(2015, 6, 10)).until(datetime(
    2015, 6, 11)).tweets_with_hashtags_figure(group_by='hours',
                                              xtick_format='%H',
                                              show=False)
plt.title('Tweets with hashtags volume for a day in the Hungary collection')
plt.savefig('hungary_hts.png')
コード例 #5
0
"""
This script shows how you can use smapp-toolkit to plot tweets languages by time unit.
For the purpose of demonstration, we'll plot english and spanish tweets about Ebola by minute for one hour
on December 3 2014.

@jonathanronen
"""

from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

start_time = datetime(2014, 12, 3, 20)

collection = MongoTweetCollection(
    address='WRITE REAL DATABASE ADDRESS HERE',
    port=27011,
    username='******',
    password='******',
    dbname='Ebola'
)


collection.languages_per_day_figure(
    start=start_time,
    step_size=timedelta(minutes=1),
    num_steps=60,
    languages=['en', 'es', 'other'],
    language_colors=['red', 'royalblue', 'grey'])
コード例 #6
0
"""
Script demonstrates the "geolocation_names_per_day_figure" and "user_locations_per_day_figure" functionality.

@jonathanronen 2015/6
"""

from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection
import matplotlib.pyplot as plt

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'Britain_Geo')

plt.figure(figsize=(10,6))
col.geolocation_names_per_day_figure(start=datetime(2015,1,12,17,34), step_size=timedelta(minutes=10), num_steps=6, n_names=5, xtick_format='%H:%M')
plt.savefig('geolocation_names.png')

plt.figure(figsize=(10,6))
col.user_locations_per_day_figure(start=datetime(2015,1,12,17,34), step_size=timedelta(minutes=10), num_steps=6, n_names=8, xtick_format='%H:%M')
plt.savefig('user_locations.png')
コード例 #7
0
You are invited to plot this for the "EricGarner" collection, which will show the enormous
volume of tweets using #ericgarner following that second no-indictment decision.

@jonathanronen
"""

from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection
from matplotlib import pyplot as plt

start_time = datetime(2014, 12, 3, 20)

collection = MongoTweetCollection(
    address='REAL SERVER',
    port=27011,
    username='******',
    password='******',
    dbname='IfTheyGunnedMeDown'
)

events = [
    (19,  'No indictment for Darren Wilson', 'bottom'), # nov 24
    (28, 'No indictment for Daniel Pantaleo', 'top'),  # dec 3
]

collection.tweets_per_day_with_annotations_figure(
    start=datetime(2014,11,5),
    num_steps=31,
    step_size=timedelta(days=1),
    alpha=.4,
    line_width=2.0,
コード例 #8
0
"""
This script shows how you can use smapp-toolkit to plot tweets languages by time unit.
For the purpose of demonstration, we'll plot english and spanish tweets about Ebola by minute for one hour
on December 3 2014.

@jonathanronen
"""

from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

start_time = datetime(2014, 12, 3, 20)

collection = MongoTweetCollection(address='WRITE REAL DATABASE ADDRESS HERE',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ebola')

collection.languages_per_day_figure(
    start=start_time,
    step_size=timedelta(minutes=1),
    num_steps=60,
    languages=['en', 'es', 'other'],
    language_colors=['red', 'royalblue', 'grey'])
"""
This example script finds all tweets sent by users who've set their location to ukraine or kiev,
and where the user's language is russian or ukrainian,
and saves them to a CSV file called ukraine_data.csv.


"""

from datetime import datetime
from smapp_toolkit.twitter import MongoTweetCollection

collection = MongoTweetCollection(address='ACTUAL DB ADDRESS',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ukraine')

columns = [
    'id_str', 'timestamp', 'coordinates.coordinates.0',
    'coordinates.coordinates.1', 'user.id_str', 'user.lang', 'lang', 'text'
]

collection.since(datetime(2013,12,1)) \
               .until(datetime(2013,12,2)) \
               .user_location_containing('ukraine', 'kiev', 'kyiv', 'kiew') \
               .user_lang_containing('uk', 'ru') \
               .dump_csv('ukraine_data.csv', columns=columns)
コード例 #10
0
"""
Script demonstrates the "geolocation_names_per_day_figure" and "user_locations_per_day_figure" functionality.

@jonathanronen 2015/6
"""

from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection
import matplotlib.pyplot as plt

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011,
                           'smapp_readOnly', 'PASSWORD', 'Britain_Geo')

plt.figure(figsize=(10, 6))
col.geolocation_names_per_day_figure(start=datetime(2015, 1, 12, 17, 34),
                                     step_size=timedelta(minutes=10),
                                     num_steps=6,
                                     n_names=5,
                                     xtick_format='%H:%M')
plt.savefig('geolocation_names.png')

plt.figure(figsize=(10, 6))
col.user_locations_per_day_figure(start=datetime(2015, 1, 12, 17, 34),
                                  step_size=timedelta(minutes=10),
                                  num_steps=6,
                                  n_names=8,
                                  xtick_format='%H:%M')
plt.savefig('user_locations.png')
コード例 #11
0
        args.database))

    TIMEZONE = pytz.timezone(args.timezone)
    print("Days will be split according to time zone {}".format(args.timezone))

    today = datetime.now().replace(hour=0,
                                   minute=0,
                                   second=0,
                                   microsecond=0,
                                   tzinfo=TIMEZONE)
    n_days_ago = today - timedelta(days=args.days)
    print("The period being considered is {} to {}".format(
        n_days_ago.strftime('%Y-%m-%d'), today.strftime('%Y-%m-%d')))

    print("Connecting to database")
    collection = MongoTweetCollection(args.server, args.port, args.user,
                                      args.password, args.database)

    ntweets = collection.since(n_days_ago).until(today).count()
    print("Considering {} tweets".format(ntweets))

    userids = set()
    counts = dict()
    for i in range(args.days):
        day_counts = defaultdict(lambda: 0)
        day_start = n_days_ago + i * timedelta(days=1)
        day_end = n_days_ago + (i + 1) * timedelta(days=1)
        print("Counting for {}".format(day_start.strftime('%Y-%m-%d')))
        for tweet in collection.since(day_start).until(day_end):
            day_counts[tweet['user']['id']] += 1
            userids.add(tweet['user']['id'])
        counts[day_start] = day_counts
コード例 #12
0
Demonstrate how to plot tweets per hour with annotation lines,
using the "data-then-plot" framework (the smapp_toolkit.plotting module)

@jonathanronen 2015/6
"""

import pytz
import getpass
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection
from smapp_toolkit.plotting import line_with_annotations

# Connect to database
print("Enter password for 'smapp_readOnly'")
col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', getpass.getpass(), 'USElection2016_DTrumps')

# Set start time to a full day in the New York time zone
start_time = datetime(2015,6,21).replace(tzinfo=pytz.timezone('America/New_York')).astimezone(pytz.UTC).replace(tzinfo=None)
end_time = start_time + timedelta(days=1)

# Get the tweets per day data from the database
data = col.since(start_time).until(end_time).group_by('hours').count()

# Define the events to plot horizontal lines for
events = [
      (datetime(2015,6,21,10), 'Sunrise', 'top'),
      (datetime(2015,6,21,22), 'Sunset', 'bottom')
    ]

# Make plot
コード例 #13
0
"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'USElection2016_Hillary')
plt.figure(12,12)
col.since(datetime.utcnow()-timedelta(days=7)).tweets_retweets_figure(show=False)
plt.title('Tweets and RT volume from Hillary 2016 collection')
plt.savefig('hillary_rts.png')
コード例 #14
0
ファイル: barcharts.py プロジェクト: IWhisper/smapp-toolkit
one of total tweets per minute for 1 hour on november 1st,
the other of only tweets containing the word "death" in that same hour.

@jonathanronen
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

start_time = datetime(2014, 12, 3, 20)

collection = MongoTweetCollection(
    address='REAL SERVER HERE',
    port=27011,
    username='******',
    password='******',
    dbname='Ebola'
)

start_time = datetime(2014,11,1)
plt.figure()

plt.subplot(211)
bins, counts = collection.tweets_over_time_figure(
    start_time,
    step_size=timedelta(minutes=1),
    num_steps=60,
    show=False)
plt.title('All tweets')
コード例 #15
0
ファイル: barcharts.py プロジェクト: kbenoit/smapp-toolkit
Script demonstrates plotting two histograms from the Ebola collection:
one of total tweets per minute for 1 hour on november 1st,
the other of only tweets containing the word "death" in that same hour.

@jonathanronen
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

start_time = datetime(2014, 12, 3, 20)

collection = MongoTweetCollection(address='REAL SERVER HERE',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ebola')

start_time = datetime(2014, 11, 1)
plt.figure()

plt.subplot(211)
bins, counts = collection.tweets_over_time_figure(
    start_time, step_size=timedelta(minutes=1), num_steps=60, show=False)
plt.title('All tweets')

plt.subplot(212)
bins, counts = collection.containing('death').tweets_over_time_figure(
    start_time, step_size=timedelta(minutes=1), num_steps=60, show=False)
plt.title('Tweets containing "death"')
"""
This example script finds all tweets sent by users who've set their location to ukraine or kiev,
and where the user's language is russian or ukrainian,
and saves them to a CSV file called ukraine_data.csv.


"""

from datetime import datetime
from smapp_toolkit.twitter import MongoTweetCollection

collection = MongoTweetCollection(address='ACTUAL DB ADDRESS',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ukraine')

columns = [
        'id_str',
        'timestamp',
        'coordinates.coordinates.0',
        'coordinates.coordinates.1',
        'user.id_str',
        'user.lang',
        'lang',
        'text'
        ]

collection.since(datetime(2013,12,1)) \
               .until(datetime(2013,12,2)) \
               .user_location_containing('ukraine', 'kiev', 'kyiv', 'kiew') \
コード例 #17
0
    parser.add_argument('--output-file', default='histogram.png', help='Output file [histogram.png]')

    args = parser.parse_args()
    print("Generating avg tweets/user/day histogram for {}".format(args.database))

    TIMEZONE = pytz.timezone(args.timezone)
    print("Days will be split according to time zone {}".format(args.timezone))

    today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=TIMEZONE)
    n_days_ago = today - timedelta(days=args.days)
    print("The period being considered is {} to {}".format(
        n_days_ago.strftime('%Y-%m-%d'),
        today.strftime('%Y-%m-%d')))

    print("Connecting to database")
    collection = MongoTweetCollection(args.server, args.port, args.user, args.password, args.database)

    ntweets = collection.since(n_days_ago).until(today).count()
    print("Considering {} tweets".format(ntweets))

    userids = set()
    counts = dict()
    for i in range(args.days):
        day_counts = defaultdict(lambda: 0)
        day_start = n_days_ago + i*timedelta(days=1)
        day_end   = n_days_ago + (i+1)*timedelta(days=1)
        print("Counting for {}".format(day_start.strftime('%Y-%m-%d')))
        for tweet in collection.since(day_start).until(day_end):
            day_counts[tweet['user']['id']] += 1
            userids.add(tweet['user']['id'])
        counts[day_start] = day_counts