/
pyborg-reddit.py
88 lines (77 loc) · 2.79 KB
/
pyborg-reddit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python
#
# PyBorg reddit file input module
#
# Copyright (c) 2000, 2006, 2010 Tom Morton, Sebastien Dailly, Jrabbit
#
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
import string
import sys
from urllib2 import urlopen
import json
import time
from pyborg import pyborg
class ModRedditIn(object):
"""
Module for reddit input. Learning from reddit, is that even possible?
"""
# Command list for this module
commandlist = "Reddit Module Commands:\nNone"
commanddict = {}
def __init__(self, Borg):
#begin copypasta thanks http://github.com/ketralnis/redditron/blob/master/redditron.py
url = 'http://www.reddit.com/comments.json?limit=100'
while True:
s = urlopen(url).read().decode('utf8')
js = json.loads(s)
cms = js['data']['children']
bodies = {}
for cm in cms:
cm = cm['data']
print type(cms), type(cm)
if cm.get('body', None):
bodies[cm['id']] = cm['body']
#end copypasta
print "I knew "+`Borg.settings.num_words`+" words ("+`len(Borg.lines)`+" lines) before reading Reddit.com"
# cm['body'] = buffer
for k in bodies:
#print cm['id'], k
buff = pyborg.filter_message(bodies[cm['id']], Borg)
# Learn from input
try:
print buff
Borg.learn(buff)
except KeyboardInterrupt, e:
# Close database cleanly
print "Premature termination :-("
print "I know "+`Borg.settings.num_words`+" words ("+`len(Borg.lines)`+" lines) now."
def shutdown(self):
pass
def start(self):
sys.exit()
def output(self, message, args):
pass
if __name__ == "__main__":
# if len(sys.argv) < 2:
# print "Specify a filename."
# sys.exit()
# start the pyborg
# No need for this, we don't have any args to process (until I add subredits)
my_pyborg = pyborg.pyborg()
ModRedditIn(my_pyborg)
my_pyborg.save_all()
del my_pyborg