# -*- coding: utf8 -*- from MySQLServer import MySQLServer if __name__ == '__main__': # start database = MySQLServer('./config/mysql_account.json') database.start() # create db database.create_db('twitter') # use db database.use_db('twitter') # create table database.create_table('./config/table_User.json') database.create_table('./config/table_TimeLine.json') database.create_table('./config/table_Image.json') # stop database.stop()
def main(): """ 涉及的数据库 个人信息表 User twitter表 TimeLine 图片表 Image 选取研究对象 存储个人信息(以TEXT) 获取此人的所有twitter(包含转发等,以TEXT存储,设置twitter上限) 如果twitter中包含图片,那么下载图片 while: 获得部分followers 将获得的内容加入个人信息表 for one_follower in followers: 获得所有twitter (要不要设置max page) 获得所有的评论(还存在问题吧) if next_cursor == ? (已经没有follower了) break :return: """ # database database = MySQLServer('./config/mysql_account.json') database.start() database.use_db('twitter') # crawler crawler = TwitterCrawler() user_id = '813286' # 存储个人信息(以TEXT) try: user = crawler.get_user(user_id=user_id) except: traceback.print_exc() return None user = user.replace('\"', '\\\"') user = user.replace('\;', '\\\;') database.insert(table_name='User', values={ 'user_id': user_id, 'content': user }) # 获取此人的所有twitter(包含转发等,以TEXT存储,设置twitter上限) # 如果twitter中包含图片,那么下载图片 num = 1 limit = 20 while num <= limit: while True: twitters = None try: twitters = crawler.get_twitter_page(user_id=user_id, page=num) except: traceback.print_exc() sleep(65) continue # 将内容以TEXT存入数据库 for twitter in twitters: twitter_str = str(twitter) twitter_str = twitter_str.replace('\"', '\\\"').replace('\;', '\\\;') twitter_dict = twitter.AsDict() database.insert('TimeLine', values={ 'user_id': str(user_id), 'twitter_id': str(twitter_dict['id']), 'content': twitter_str }) if len(twitters) == 0: num = limit break print('get twitter page : ' + str(num)) num += 1 sleep(65) # 遍历好友 while True: # 获得部分followers # 将获得的内容加入个人信息表 print('start get follows') followers = crawler.get_followers(user_id=user_id) print('end get follows') for follower in followers: follower_str = str(follower).replace('\"', '\\\"').replace('\;', '\\\;') follower_dict = follower.AsDict() database.insert('User', values={ 'user_id': str(follower_dict['id']), 'content': follower_str }) # 获得所有twitter(要不要设置max page) # 获得所有的评论(还存在问题吧) _num = 1 _limit = 5 while _num <= _limit: while True: # _twitters = None try: print('start get twitter page') _twitters = crawler.get_twitter_page(user_id=follower_dict['id'], page=_num) print('end get twitter page') except TwitterError: traceback.print_exc() _num = _limit break except: traceback.print_exc() sleep(65) continue # 将内容以TEXT存入数据库 for twitter in _twitters: twitter_str = str(twitter) twitter_str = twitter_str.replace('\"', '\\\"').replace('\;', '\\\;') twitter_dict = twitter.AsDict() database.insert('TimeLine', values={ 'user_id': str(follower_dict['id']), 'twitter_id': str(twitter_dict['id']), 'content': twitter_str }) # print('twitter_id : ' + str(twitter_dict['id'])) print('twitter num : ' + str(len(_twitters))) if len(_twitters) == 0: _num = _limit break print(str(follower_dict['id']) + ' get twitter page : ' + str(_num)) _num += 1 print('start sleep') sleep(65) print('end sleep') if crawler.follower_is_none(): break