Exemple #1
0
def get_data():
    url = next_urls.pop().replace('\n', '')
    if url in seen_urls:
        return
    print(url)
    person = Person(url, session)
    try:
        person.get_all_info()
    except:
        print('Get user data error!')
        return
    print(person.id, person.name, person.url, person.gender, person.location, person.business,
          person.employment, person.position, person.education, person.education_extra,
          person.description, person.hash_id,
          person.follower_num, person.followee_num, person.asks, person.answers, person.posts,
          person.collections, person.logs, person.agrees, person.thanks)

    data = (person.id, person.name, person.url, person.gender, person.location, person.business,
            person.employment, person.position, person.education, person.education_extra,
            person.description, person.hash_id,
            person.follower_num, person.followee_num, person.asks, person.answers, person.posts,
            person.collections, person.logs, person.agrees, person.thanks)
    try:
        db.insert_data(data)
    except pymysql.err.DataError as E:
        print(E)
        return
    next_urls.update(person.follow())
    seen_urls.add(url)
    print(len(next_urls))
    # 爬取一百人之后更新一次next_urls.txt文件,实时保存进度
    if len(next_urls) % 100 == 0:
        save_next_urls()
    save_seen_urls(url)