def get_data(): url = next_urls.pop().replace('\n', '') if url in seen_urls: return print(url) person = Person(url, session) try: person.get_all_info() except: print('Get user data error!') return print(person.id, person.name, person.url, person.gender, person.location, person.business, person.employment, person.position, person.education, person.education_extra, person.description, person.hash_id, person.follower_num, person.followee_num, person.asks, person.answers, person.posts, person.collections, person.logs, person.agrees, person.thanks) data = (person.id, person.name, person.url, person.gender, person.location, person.business, person.employment, person.position, person.education, person.education_extra, person.description, person.hash_id, person.follower_num, person.followee_num, person.asks, person.answers, person.posts, person.collections, person.logs, person.agrees, person.thanks) try: db.insert_data(data) except pymysql.err.DataError as E: print(E) return next_urls.update(person.follow()) seen_urls.add(url) print(len(next_urls)) # 爬取一百人之后更新一次next_urls.txt文件,实时保存进度 if len(next_urls) % 100 == 0: save_next_urls() save_seen_urls(url)