i = 0 saved_num = 17406 for user in users: print i,user i += 1 if i < saved_num: continue else: if User(user).asks_num()!=0: j = 0 try: questions = User(user).get_asks() for question in questions: if question.url not in pool_questions: if page_exist(question.url): url = question.url tmp = Question(url) try: db.insert_question(tmp.get_title(),url,tmp.get_followers_num(),tmp.get_answers_num(),tmp.get_detail()) j += 1 except Exception as e: print question.url,e else: print question.url+"404 not found" pool_questions.add(question.url) except Exception as e: print user,e print ("saved %d new questions" % j) else: print "no questions"
pool_answers = db.get_existing("answers") i = 1 saved_num = 1277 for user in users: print i,user i += 1 if i < saved_num : continue else: if User(user).answers_num()!=0: j = 0 answers = User(user).get_answers() for answer in answers: if answer.answer_url not in pool_answers: if page_exist(answer.answer_url): url = answer.answer_url tmp = Answer(url) if tmp.get_content()!= None: try: db.insert_answer(tmp.answer_url,tmp.get_author(),tmp.get_content(),tmp.get_upvote(),tmp.get_question()) j += 1 except Exception as e: print answer.answer_url,e else: print answer.answer_url+"被折叠" else: print answer.answer_url+"404 not found" pool_answers.add(answer.answer_url) print ("saved %d new answers" % j)