forked from regosen/gallery_get
/
reddit_get.py
executable file
·146 lines (131 loc) · 5.44 KB
/
reddit_get.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# REDDIT_GET is a tool for downloading all imgur albums and pictures
# that were submitted by a given reddit user.
#
# DEPENDENCIES
# This relies on gallery_get and the imgur_album plugin.
#
# See gallery_get for more info
#
# Rego Sen
# Nov 2, 2013
#
import gallery_get
import os, time, sys, traceback
import urllib, datetime, json
DEST_ROOT = gallery_get.DEST_ROOT
gallery_get.PLUGIN = gallery_get.gallery_plugins.PLUGINS["plugin_imgur_album"]
def reddit_url(user):
return "http://www.reddit.com/user/%s/submitted/.json?limit=1000" % user
def download_image(url, fileNameFull):
urlBase, fileExtension = os.path.splitext(url)
fileName = os.path.abspath(fileNameFull)[:255] + fileExtension #full path must be 260 characters or lower
folder = os.path.dirname(fileName)
if not os.path.exists(folder):
os.makedirs(folder)
elif os.path.exists(fileName):
print "Skipping " + fileName
return
gallery_get.add_job(path=url, dest=folder, subtitle=fileName)
def run_internal(user, dest):
reddit_json_str = ""
reddit_json = {}
localpath = user + ".json"
if os.path.exists(localpath):
print "Getting JSON data from local file (%s)" % localpath
reddit_json_str = open(localpath,"r").read()
reddit_json = json.loads(reddit_json_str)
else:
print "Requesting JSON data from reddit..."
for i in range(5):
reddit_json_str = urllib.urlopen(reddit_url(user)).read()
reddit_json = json.loads(reddit_json_str)
if "data" in reddit_json:
break
else:
time.sleep(2)
if not "data" in reddit_json:
print "ERROR getting json data after several retries! Does the user exist?"
print "If so, try saving the contents of the following to [USERNAME].json and try again."
print reddit_url(user)
else:
visited_links = set()
num_valid_posts = 0
for post in reddit_json['data']['children']:
url = post['data']['url']
if url.lower() in visited_links:
print "Skipping already visited link: " + url
continue
else:
visited_links.add(url.lower())
cdate = post['data']['created']
sdate = datetime.datetime.fromtimestamp(cdate).strftime("%Y-%m-%d")
title = post['data']['title'].replace('/', '_').replace('\\', '_').strip()
if title:
title = " - " + title
folder = os.path.join(dest, user, gallery_get.safestr(sdate + title))
if "/i.imgur.com/" in url:
download_image(url, folder)
elif "/imgur.com/a/" in url:
if not gallery_get.run_wrapped(url, folder, titleAsFolder=True, cacheDest=False, flushJobs=False):
return False
elif "/imgur.com/" in url:
# Create direct image URL with dummy extension (otherwise it will redirect)
# Then get correct extension from header
# (This is way faster than opening the redirect)
img_base = url.replace("/imgur.com/","/i.imgur.com/")
ext = "jpg"
file = urllib.urlopen("%s.%s" % (img_base, ext))
real_ext = file.headers.get("content-type")[6:]
if real_ext != "jpeg": # jpeg -> jpg
ext = real_ext
download_image("%s.%s" % (img_base, ext), folder)
elif "vidble.com/album" in url:
if not gallery_get.run_wrapped(url, folder, titleAsFolder=True, cacheDest=False, flushJobs=False):
return False
elif url.endswith(".jpg") or url.endswith(".jpeg") or url.endswith(".gif"):
download_image(url, folder)
else:
continue
num_valid_posts += 1
gallery_get.flush_jobs()
if num_valid_posts == 0:
print "\nApparently this user hasn't submitted any imgur links. Nothing to do."
def run_wrapped(user, dest=""):
global DEST_ROOT
try:
if dest:
gallery_get.safeCacheDestination(dest)
elif os.path.exists(gallery_get.DESTPATH_FILE):
dest = open(gallery_get.DESTPATH_FILE,"r").read().strip()
DEST_ROOT = dest
run_internal(user, dest)
except:
print '\n' + '-'*60
traceback.print_exc(file=sys.stdout)
print "Using params: [%s, %s]" % (user, dest)
print '-'*60 + '\n'
print gallery_get.EXCEPTION_NOTICE
def run_prompted():
user = raw_input("Input reddit user: ").strip()
if not user:
print "Nothing to do!"
sys.exit()
new_dest = raw_input("Destination (%s): " % DEST_ROOT).strip()
run_wrapped(user, new_dest)
def run(user="", dest=""):
if not user:
run_prompted()
else:
run_wrapped(user, dest)
cur_file = os.path.basename(str(__file__))
arg_file = sys.argv[0]
if arg_file and os.path.basename(arg_file) == cur_file:
### DIRECT LAUNCH (not import)
if len(sys.argv) > 1:
# use first parameter as reddit user, second (if exists) as dest
if len(sys.argv) > 2:
run_wrapped(sys.argv[1], sys.argv[2])
else:
run_wrapped(sys.argv[1])
else:
run_prompted()