def extract_answerers_followers(user_id, user_id_list): mongo = Mongo_2() follower_id = [] if user_id in user_id_list: temp_list = mongo.db.haoyouguanxi.answerer_follower.find( {"user_id": user_id}) for list in temp_list: if list.get('follower') is not None: for l in list.get('follower'): follower_id.append(l.get('follower_id')) break if len(follower_id) == 0: temp_list1 = mongo.db.haoyouguanxi.answerer_followers.find( {"user_id": user_id}) for list in temp_list1: if list.get('follower') is not None: for l in list.get('follower'): follower_id.append(l.get('follower_id')) break follower_id.append('user_exists') mongo.client.close() return follower_id else: mongo.client.close() return follower_id
def __init__(self, fileNum): self.answer_id = None self.is_del = False self.mongo = Mongo_2() self.fileNum = fileNum self.file = None self.start = None self.end = None self.answerID_list = None # self.proxy = None self.type = None self.state = False self.answer_type = None self.voter_id_list = [] self.current_proxy = None self.headers = { 'Accept': 'textml,application/json,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2', 'Host': 'www.zhihu.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36', 'Referer': 'http://www.zhihu.com/', 'Cookie': None, 'x-udid': None, } self.get_voters()
def extract_answerID(): answerID_list = [] mongo = Mongo_2() temp_list = mongo.db.question_answers.find({}, { 'answer_num': 1, 'answers': 1 }) # 旧的回答 temp_list1 = mongo.db.answer.find({}, {'answer_num': 1, 'answers': 1}) for item in temp_list: if item.get('answer_num') == 0: continue else: answer_list = item.get('answers') for answer in answer_list: answerID_list.append(answer.get('answer_id')) for item in temp_list1: if item.get('answer_num') == 0: continue else: answer_list = item.get('answers') for answer in answer_list: answerID_list.append(answer.get('answer_id')) mongo.client.close() # 去重 return list(set(answerID_list))
def extract_last_followers(): mongo = Mongo_2() temp = mongo.db.followers_last.find({}) user_id_list = [] for l in temp: user_id_list.append(l.get('user_id')) mongo.client.close() return user_id_list
def extract_questionUrl(): questionUrl_list = [] mongo = Mongo_2() items = mongo.db.question_url.find( {}, {"question_url": 1}) #find()----检测字符串中是否包含子字符串str for item in items: questionUrl_list.append(item['question_url']) mongo.client.close() return questionUrl_list
def extract_question_followers(question_id): mongo = Mongo_2() lists = mongo.db.english_followers_new.find({"question_id": question_id}) follower_id = [] for l in lists: if l.get('followers') is not None: for l1 in l.get('followers'): follower_id.append(l1) mongo.client.close() return follower_id
def __init__(self, fileNum): self.fileNum = fileNum self.mongo = Mongo_2() self.userID_list = [] self.id = None self.file = None self.start = None self.end = None self.driver = None self.get_people_detail()
def extract_answer_voters(answer_id): mongo = Mongo_2() # 1 lists = mongo.db.voters_old.find({"answer_id": answer_id}) voter_id = [] for l in lists: if l.get('voters') is not None: for l1 in l.get('voters'): voter_id.append(l1) mongo.client.close() return voter_id
def extract_answer_comments(answer_id): mongo = Mongo_2() temp_list = mongo.db.comment.find({"answer_id": answer_id}) comment_id = [] for list in temp_list: if list.get('comment_num') is not None: comment_id.append(list.get('comment_num')) break mongo.client.close() return comment_id
def extract_question_answers(question_id): mongo = Mongo_2() temp_list = mongo.db.answer.find({"question_id": question_id}) answer_id = [] for list in temp_list: if list.get('answer_num') is not None: answer_id.append(list.get('answer_num')) break mongo.client.close() return answer_id
def extract_last_voters(): mongo = Mongo_2() temp = mongo.db.voters_last.find({}) user_id_list = [] for l in temp: user_id_list.append(l.get('user_id')) mongo.client.close() return user_id_list # if __name__ == "__main__": # print extract_answer_voters(255313072)
def extract_voters_following(user_id, user_id_list): mongo = Mongo_2() following_id = [] if user_id in user_id_list: temp_list = mongo.db.haoyouguanxi.voter_following.find( {"user_id": user_id}) for list in temp_list: if list.get('following') is not None: for l in list.get('following'): following_id.append(l.get('following_id')) break mongo.client.close() following_id.append('user_exists') return following_id else: mongo.client.close() return following_id
def __init__(self, fileNum): # self.question_url = question_url # self.id = question_url.replace('/question/', '') self.question_url = None self.id = None self.log_url = None self.url = None # self.log_url = 'https://www.zhihu.com' + question_url + '/log' # self.url = 'https://www.zhihu.com' + question_url #'/answers/created' self.url_domain = 'https://www.zhihu.com' self.content = None self.is_del = False self.fileNum = fileNum self.file = None self.start = None self.end = None # self.proxy = None self.type = None self.questionUrl_list = None self.topic_list = [] self.log = [] self.mongo = Mongo_2() self.current_proxy = None self.state = False self.question_type = None self.answer_id_list = [] self.headers = { 'Accept': 'textml,application/json,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2', 'Host': 'www.zhihu.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36', 'Referer': 'http://www.zhihu.com/', 'Cookie': None, 'x-udid': None, } self.get_answers()
def __init__(self): self.user_list = [] self.mongo = Mongo_2()
def __init__(self): self.mongo = Mongo_2()
# self.mongo.db.question_followers_userDetail_2222.insert(detail) # self.mongo.db.voters_userDetail_2222.insert(detail) # self.mongo.db.commenters_userDetail.insert(detail) self.delLogger(logger) self.mongo.client.close() # 删除日志手柄 def delLogger(self, myLogger): for myHandler in myLogger.handlers: myHandler.close() myLogger.removeHandler(myHandler) if __name__ == '__main__': userID_list = [] mongo = Mongo_2() items = mongo.db.answerers.find() for item in items: userID_list.append(item.get('user_id')) mongo.client.close() ud = UserDetail() #driver = webdriver.Chrome() driver = webdriver.PhantomJS(executable_path=r'D:\PhantomJS\phantomjs-2.1.1-windows\bin\phantomjs.exe') dt = re.sub(r'[^0-9]', '', str(datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S'))) for i in xrange(0, 2367): ud.get_people_detail(userID_list[i], i , dt) #time.sleep(1)
def __init__(self): self.followers_list = [] self.followers_list_old = [] self.mongo = Mongo_2()
def extract_voters_info(): mongo = Mongo_2() temp_list = mongo.db.voters_info_1111.find({}, {"user_id": 1}) dt = {list.get('user_id'): "" for list in temp_list} mongo.client.close() return dt