forked from hugovk/word-tools
/
word_usage.py
executable file
·102 lines (86 loc) · 2.93 KB
/
word_usage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python
# encoding=utf8
"""
Show the number of mentions per month for a word:
word_usage -w twizzle
word_usage -w bae -y 2014
word_usage -w twizzle -s favorite
word_usage -w twizzle -s favourite
"""
from __future__ import print_function
from most_frequent_words import commafy
import argparse
from word_charts import load_csv, filter_year, filter_search_term
from collections import OrderedDict
def filter_word(tweets, desired_word):
found = []
for tweet in tweets:
if desired_word == tweet["word"]:
found.append(tweet)
print("Total " + desired_word + ":\t" + commafy(len(found)))
return found
def print_per_month(tweets):
counts = OrderedDict()
for tweet in tweets:
year = tweet["created_at"][-4:]
month = tweet["created_at"][4:7]
month_year = month + " " + year
if month_year not in counts:
counts[month_year] = 0
counts[month_year] += 1
# from pprint import pprint
xs, ys = [], []
for month_year in counts:
print(month_year, counts[month_year])
xs.append(month_year)
ys.append(counts[month_year])
return xs, ys
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Show the number of mentions per month for a word.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
'-c', '--csv', default='M:/bin/data/all.csv',
help='Input CSV file')
parser.add_argument(
'-w', '--word', default='bae',
help='Word to search for')
parser.add_argument(
'-y', '--year', type=int, default=None,
help="Only from this year")
parser.add_argument(
'-s', '--search_term',
help="Only for this search term")
parser.add_argument(
'-p', '--plot', action='store_true',
help="Create a bar graph of results (requires Plotly authentication: "
"https://plot.ly/python/getting-started/")
args = parser.parse_args()
tweets = load_csv(args.csv)
print("Total tweets:\t" + commafy(len(tweets)))
tweets = filter_word(tweets, args.word)
if args.search_term:
tweets = filter_search_term(tweets, args.search_term)
if args.year:
tweets = filter_year(tweets, args.year)
# Sort by ID = sort chronologically
tweets = sorted(tweets, key=lambda k: k['id_str'])
xs, ys = print_per_month(tweets)
if args.plot:
print("Plotting...")
import plotly.plotly as py # pip install plotly + needs auth
import plotly.graph_objs as go
layout = go.Layout(
title=args.word
)
trace0 = go.Scatter(
x=xs,
y=ys,
mode='lines+markers',
name=args.word
)
data = [trace0]
fig = go.Figure(data=data, layout=layout)
plot_url = py.plot(fig, filename='word_usage_' + args.word)
print(plot_url)
# End of file