forked from zhwindy/follow_center
/
tumblr.py
115 lines (101 loc) · 3.11 KB
/
tumblr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
create by bigzhu at 15/07/15 17:17:29 取github的动态
'''
import requests
import pg
import json
import time_bz
import time
import public_bz
import public_db
def check(user_name=None):
'''
'''
where = '''
tumblr is not null and tumblr!=''
'''
if user_name:
where += " and tumblr='%s'" % user_name
users = pg.select('user_info', what='tumblr', where=where)
for user in users:
print 'check tumblr %s' % user.tumblr
main(user.tumblr)
def delTumblrUser(user_name):
sql = '''
update user_info set tumblr=null where lower(tumblr)=lower('%s')
''' % user_name
pg.query(sql)
def saveUserCheckNew(blogs):
'''
create by bigzhu at 15/09/05 11:57:11
'''
user = blogs['blog']
user_name = user['name']
where = "name='%s'" % user_name
result = list(pg.select('tumblr_user', where=where))
if result:
if result[0].updated == user['updated']: # 如果没有更新过,就不用继续了
print user_name, ' no update'
# return
pass
pg.insertOrUpdate(pg, 'tumblr_user', user, where)
blogs = blogs['posts']
saveBlogs(user_name, blogs, offset=20)
def saveBlogs(user_name, blogs, offset):
if blogs:
pass
else:
return
for blog in blogs:
m = public_bz.storage()
m.id_str = blog['id']
m.m_type = 'tumblr'
m.m_user_id = user_name
m.created_at = time_bz.timestampToDateTime(blog['timestamp'])
m.extended_entities = json.dumps(blog.get('photos'))
m.content = None
m.text = blog.get('caption')
m.href = blog.get('short_url')
m.type = blog.get('type')
result = pg.insertIfNotExist(pg, 'm', m, "id_str='%s' and m_type='tumblr' " % m.id_str)
if result is None: # 有重复记录了,就不再继续了
print 'have same data'
return
else:
print 'new ', m.id_str, m.type, 'offset:', offset, 'name:', user_name
# 继续取
new_offset = offset + 20
new_blogs = callGetMeidaApi(user_name, offset=new_offset)['response']['posts']
saveBlogs(user_name, new_blogs, new_offset)
def callGetMeidaApi(user_name, offset=0, limit=20):
api_key = 'fuiKNFp9vQFvjLNvx4sUwti4Yb5yGutBN4Xh10LXZhhRKjWlV4'
params = {'api_key': api_key,
'offset': offset,
'limit': limit,
}
url = '''http://api.tumblr.com/v2/blog/%s.tumblr.com/posts''' % user_name
try:
r = requests.get(url, params=params)
except requests.exceptions.ConnectionError:
print public_bz.getExpInfoAll()
return
if r.status_code == 200:
medias = r.json()
return medias
else:
print r.status_code
def main(user_name=None):
blogs = callGetMeidaApi(user_name)
if blogs is None:
public_db.delNoName('tumblr', user_name)
else:
blogs = blogs['response']
saveUserCheckNew(blogs)
if __name__ == '__main__':
main('coverlibrary')
while True:
check()
pg.refresh('messages')
time.sleep(300)