/
Flickr.py
executable file
·349 lines (278 loc) · 12.6 KB
/
Flickr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
#!/usr/bin/env python
import sys
import os
import urllib2
#import flickrapi
import json
import pprint
import time, calendar
from datetime import timedelta
import datetime
import math
from PIL import Image
import glob
import subprocess
import pymongo
import dpy
pp = pprint.PrettyPrinter(indent=3)
flickr = None
def flickrJSON2MongoGeoJSON(flickrJSON):
mongoGeoJSON = dict()
mongoGeoJSON['flickr'] = flickrJSON
mongoGeoJSON['location'] = [ float(flickrJSON['latitude']), float(flickrJSON['longitude']) ] # mongo recommended format for geo queries
return mongoGeoJSON
class GeoMine: # TODO separate top level mine and this (sub mine)
def __init__(self, params):
self.bbox = params['bbox']
#self.year = year # TODO make low/high constructor instead
self.min_upload_time = params['min_upload_time']
self.max_upload_time = params['max_upload_time']
self.limit = 250
self.results = None
def toJSON(self):
params = dict()
params['bbox'] = self.bbox
params['min_upload_time'] = self.min_upload_time
params['max_upload_time'] = self.max_upload_time
return json.dumps(params);
def llwidth(self):
return self.bbox['right'] - self.bbox['left']
def llheight(self):
return self.bbox['top'] - self.bbox['bottom']
def might_be_truncated(self):
self.assure_query_ran()
print str(self.bbox) + ": " + str(len(self.results))
return len(self.results) >= int(0.9 * self.limit)
def assure_query_ran(self):
if self.results == None:
self.results = flickr.query_bbox_and_upload_time_segment(self.bbox, self.min_upload_time, self.max_upload_time, self.limit)
def children(self):
MINIMUM_QUAD_SIZE = 0.0001
children = list()
if self.llwidth() > MINIMUM_QUAD_SIZE and self.llheight() > MINIMUM_QUAD_SIZE:
for r in range(0, 2):
for c in range(0, 2):
child_bbox = dict()
child_bbox['left'] = self.bbox['left'] + c * (self.llwidth() / 2)
child_bbox['right'] = child_bbox['left'] + (self.llwidth() / 2)
child_bbox['bottom'] = self.bbox['bottom'] + r * (self.llheight() / 2)
child_bbox['top'] = child_bbox['bottom'] + (self.llheight() / 2)
params = dict();
params['bbox'] = child_bbox
params['min_upload_time'] = self.min_upload_time
params['max_upload_time'] = self.max_upload_time
yield GeoMine(params)
else:
print "Subdivided time"
middle_upload_time = self.min_upload_time / 2 + self.max_upload_time / 2
params = dict();
params['bbox'] = self.bbox
params['min_upload_time'] = self.min_upload_time
params['max_upload_time'] = middle_upload_time
yield GeoMine(params)
params = dict();
params['bbox'] = self.bbox
params['min_upload_time'] = middle_upload_time
params['max_upload_time'] = self.max_upload_time
yield GeoMine(params)
def store_photos_and_metadata(self):
self.assure_query_ran()
for photo in self.results:
try:
photo.store_medium_and_metadata()
except:
# wait a bit (maybe Flickr is rate limiting us), and skip this photo (maybe problem with this photo)
with open("data/flickr_mine/error.log", "a") as logfile:
logfile.write(photo.flickr_locator_string() + "\n")
time.sleep(4)
class FlickrPhoto:
def __init__(self, xml=None, locator_string=None, flickr=None, locator_path=None):
self.xml = xml
self.locator_string = locator_string
self.locator_path = locator_path
self.flickr = flickr
def image_base_url(self):
base_url = "http://farm%s.staticflickr.com/%s/%s_%s_" % ( self.xml.attrib['farm'], self.xml.attrib['server'], self.xml.attrib['id'], self.xml.attrib['secret'] )
return base_url
def big_url(self):
return self.image_base_url() + "b.jpg"
def medium_url(self):
return self.image_base_url() + "m.jpg"
def page_url(self):
(farm, server, photo_id, secret) = self.locator_string.split('_')
user_id = self.flickr.get_photo_user_id(photo_id, secret)
return "http://www.flickr.com/photos/"+user_id+"/"+photo_id
def lat_lng(self):
return (float(self.xml.attrib['latitude']), float(self.xml.attrib['longitude']))
def flickr_locator_string(self):
return "%s_%s_%s_%s" % ( self.xml.attrib['farm'], self.xml.attrib['server'], self.xml.attrib['id'], self.xml.attrib['secret'] )
def flickr_locator_path(self):
if self.locator_path != None:
return self.locator_path
else:
return "%s/%s/%s_%s" % ( self.xml.attrib['farm'], self.xml.attrib['server'], self.xml.attrib['id'], self.xml.attrib['secret'] )
def image_path(self):
return "data/flickr_mirror/" + self.flickr_locator_path()
def saveToDirectory(self, path):
image_response = urllib2.urlopen(self.big_url())
image_file = open(path + "/" + self.flickr_locator_string() + ".jpg", "w")
image_file.write(image_response.read())
image_file.close()
def save_image_to_path(self, path):
image_response = urllib2.urlopen(self.big_url())
image_file = open(path, "w")
image_file.write(image_response.read())
image_file.close()
def save_metadata_to_path(self, path):
metadata_file = open(path, "w")
metadata_file.write(json.dumps(self.xml.attrib))
metadata_file.close()
def store_medium_and_metadata(self):
image_dir_path = "data/flickr_mirror/" + self.flickr_locator_path()
dpy.ensure_dir(image_dir_path)
image_path = image_dir_path + "/b.jpg"
metadata_path = image_dir_path + "/metadata.json"
if not os.path.exists(image_path) and not os.path.exists(metadata_path):
self.save_image_to_path(image_path)
self.save_metadata_to_path(metadata_path)
print "Stored image/metadata " + image_dir_path
else:
print "Already stored " + image_dir_path
def store_in_geodb(self):
if self.xml == None:
metadata = json.load(open(self.image_path() + "/metadata.json"))
geo_mongo_metadata = dict()
geo_mongo_metadata['flickr'] = metadata
geo_mongo_metadata['location'] = [ float(metadata['latitude']), float(metadata['longitude']) ]
flickr.geodb_photos.insert(geo_mongo_metadata)
class MirroredPhoto:
def __init__(self, dbjson):
self.dbjson = dbjson
self.farm = self.dbjson['flickr']['farm']
self.server = self.dbjson['flickr']['server']
self.photo_id = self.dbjson['flickr']['id']
self.secret = self.dbjson['flickr']['secret']
self.user_id = self.dbjson['flickr']['owner']
self.dbid = self.dbjson['_id']
def ui_metadata(self):
return {
'image_url' : self.mirrored_big_url(),
'flickr_page_url' : self.flickr_page_url()
}
def flickr_locator_path(self):
return "%s/%s/%s_%s" % (self.dbjson['flickr']['farm'], self.dbjson['flickr']['server'], self.dbjson['flickr']['id'], self.dbjson['flickr']['secret'] )
def flickr_locator_string(self):
return "%s_%s_%s_%s" % (self.dbjson['flickr']['farm'], self.dbjson['flickr']['server'], self.dbjson['flickr']['id'], self.dbjson['flickr']['secret'] )
def flickr_medium_url(self):
return "http://farm%s.staticflickr.com/%s/%s_%s_m.jpg" % ( self.farm, self.server, self.photo_id, self.secret )
def flickr_page_url(self):
photo_id = self.dbjson['flickr']['id']
return "http://www.flickr.com/photos/"+self.user_id+"/"+self.photo_id
def mirrored_big_url(self):
return 'static/flickr/' + self.flickr_locator_path() + '/b.jpg'
def jpg_path(self):
return 'data/flickr_mirror/' + self.flickr_locator_path() + '/b.jpg'
#class MirrorImage:
#
# def __init__(self, image_path):
# self.path = image_path
#
# def store_in_geo_db():
class Flickr:
api_key = '9db4bbb1d275baedb6e77c2aa7538c90'
api_secret = '09be4700c52c3996'
geodb_photos = None
def __init__(self):
import flickrapi
self.flickr = flickrapi.FlickrAPI(self.api_key, self.api_secret)
# authenticate
(token, frob) = self.flickr.get_token_part_one(perms='read')
if not token: raw_input("Press ENTER after you authorized this program")
self.flickr.get_token_part_two((token, frob))
sys.stderr.write("Authed to Flickr\n")
#pass
# FIXME holy crap so broken flickrapi keeps flask from printing debug stuff....
def connect_geodb(self):
self.geodb_connection = pymongo.Connection('localhost', 27017)
self.geodb_photos = self.geodb_connection.ltte.photos
def query_bbox_and_upload_time_segment(self, bbox, min_upload_time, max_upload_time, limit):
all_extras = "description,license,date_upload,date_taken,owner_name,icon_server,original_format,last_update,geo,tags,machine_tags,o_dims,views,media,path_alias"
bbox_string = "%s,%s,%s,%s" % (bbox['left'], bbox['bottom'], bbox['right'], bbox['top'])
photos_response = self.flickr.photos_search(bbox=bbox_string, min_upload_date=min_upload_time, max_upload_date=max_upload_time, per_page=limit, extras=all_extras, page=0)
photos = []
for photoxml in photos_response[0]:
photo = FlickrPhoto(xml=photoxml)
photos.append(photo)
(lat, lng) = photo.lat_lng()
if lat < bbox['bottom'] or lat > bbox['top'] or lng < bbox['left'] or lng > bbox['right']:
print "WTSSSSSSSSSSSSSSSSSSSSSSSSSS " + str(photo.lat_lng()) + " not in " + str(bbox)
return photos
def getPhotos(self, bounds, limit):
bbox_string = "%s,%s,%s,%s" % (bounds['left'], bounds['bottom'], bounds['right'], bounds['top'])
# TODO search smartly through time limits (Flickr requires limiter for geo queries, like time limits...)
photos_response = self.flickr.photos_search(bbox=bbox_string, min_upload_date='1238433133', max_upload_date='1298433133', per_page=limit, extras="geo", page=0)
photos = []
for photoxml in photos_response[0]:
photo = FlickrPhoto(xml=photoxml)
photos.append(photo)
(lat, lng) = photo.lat_lng()
if lat < bounds['bottom'] or lat > bounds['top'] or lng < bounds['left'] or lng > bounds['right']:
print "WTSSSSSSSSSSSSSSSSSSSSSSSSSS " + str(photo.lat_lng()) + " not in " + str(bounds)
return photos
def get_photo_user_id(self, photo_id, secret):
info_response = self.flickr.photos_getInfo(photo_id=photo_id, secret=secret) #omg will this work
return info_response.find('photo').find('owner').attrib['nsid']
def mirror_image_count(self):
self._mirror_image_count = 0
def visit(self, dirname, names):
#print dirname
for name in names:
if name[-4:] == "json":
self._mirror_image_count = self._mirror_image_count + 1
os.path.walk("data/flickr_mirror", visit, self)
return self._mirror_image_count
def mirrored_images(self):
farms = os.listdir("data/flickr_mirror/")
for farm in farms:
servers = os.listdir("data/flickr_mirror/" + farm)
for server in servers:
photos = os.listdir("data/flickr_mirror/" + farm + "/" + server)
for photo in photos:
locator = farm + "/" + server + "/" + photo
yield(FlickrPhoto(locator_path=locator))
def foreach_local_photo(self, function):
for photo in self.mirrored_images():
function(photo)
def get_geodb_photos(self):
if self.geodb_photos == None:
self.connect_geodb()
return self.geodb_photos
def points_in_circle(self, center, radius):
return self.get_geodb_photos().find({"location": {"$within": {"$center": [[center[0], center[1]], radius]}}})
def photos_in_bbox(self, bbox):
left = float(bbox['left'])
right = float(bbox['right'])
top = float(bbox['top'])
bottom = float(bbox['bottom'])
return self.get_geodb_photos().find({'location': {'$within': {'$box': [[bottom, left], [top, right]]}}}, {'location':1})
def count_photos_in_bbox(self, bbox):
left = float(bbox['left'])
right = float(bbox['right'])
top = float(bbox['top'])
bottom = float(bbox['bottom'])
return self.get_geodb_photos().find({'location': {'$within': {'$box': [[bottom, left], [top, right]]}}}, {'location':1}).count()
flickr = Flickr()
def geo_store_all(args):
flickr.connect_geodb()
success = 0
fail = 0
for photo in flickr.mirrored_images():
try:
photo.store_in_geodb()
success = success + 1
except KeyboardInterrupt:
print "User exited"
exit()
except:
fail = fail + 1
print "geodb insertion: " + str(success) + " succeeded, " + str(fail) + " failed"