class FriendExist(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login_mobile() time.sleep(2) self.driver.get('https://m.facebook.com/friends/center/friends') time.sleep(3) #加载更多 try: self.driver.find_element_by_xpath( '//div[@id="friends_center_main"]/div[2]/a').click() except: pass self.es = Es_fb() self.friends_list = [] self.current_ts = int(time.time()) self.update_time = self.current_ts def get_friend_exist(self): for each in self.driver.find_elements_by_xpath( '//div[@id="friends_center_main"]/div[2]/div'): item = {} #try: #pic_url = each.find_element_by_xpath('./table/tbody/tr/td[1]/img').get_attribute('src') name = each.find_element_by_xpath('./table/tbody/tr/td[2]/a').text user_id = ''.join( re.findall( re.compile('uid=(\d+)'), each.find_element_by_xpath( './table/tbody/tr/td[2]/a').get_attribute('href'))) profile_url = 'https://m.facebook.com/profile.php?id=' + str( user_id) #except: #pass item['uid'] = user_id #item['photo_url'] = pic_url item['nick_name'] = name item['profile_url'] = profile_url self.friends_list.append(item) #for i in self.friends_list: # self.driver.get(i['profile_url']) # try: # self.driver.find_element_by_xpath('//div[@id="m-timeline-cover-section"]/div[4]/a[2]').click() # except: # self.driver.find_element_by_xpath('//div[@id="root"]/div[1]/div[1]/div[4]/a[2]').click() # time.sleep(2) # try: # friends = int(re.search(r'(\d+)', self.driver.find_element_by_xpath('//div[@id="root"]/div[1]/h3').text.replace(',', '').replace(' ', '')).group(1)) # except: # friends = 'None' # i['friends'] = friends # i['update_time'] = self.update_time self.driver.quit() return self.friends_list def save(self, indexName, typeName, friends_exist_list): self.es.executeES(indexName, typeName, friends_exist_list)
class Friend(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver,self.display = self.launcher.login() time.sleep(2) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass self.driver.find_element_by_xpath('//a[@title="个人主页"]').click() time.sleep(3) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass #self.driver.find_element_by_xpath('//ul[@data-referrer="timeline_light_nav_top"]/li[3]/a').click() self.driver.find_element_by_xpath('//ul[@data-referrer="timeline_light_nav_top"]/li[3]/a').click() time.sleep(3) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass #加载更多 length=100 for i in range(0,50): js="var q=document.documentElement.scrollTop="+str(length) self.driver.execute_script(js) time.sleep(1) length+=length self.es = Es_fb() self.list = [] self.current_ts = int(time.time()) self.update_time = self.current_ts def get_friend(self): try: for each in self.driver.find_elements_by_xpath('//div[@class="_5h60 _30f"]//ul//li'): try: pic_url = each.find_element_by_xpath('./div/a/img').get_attribute('src') name = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').text user_id = ''.join(re.findall(re.compile('id=(\d+)'),each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').get_attribute('data-hovercard'))) friends = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/a').text profile_url = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').get_attribute('href') + '&sk=about' except: pass self.list.append({'uid':user_id, 'photo_url':pic_url, 'nick_name':name, 'friends':friends, 'profile_url':profile_url, 'update_time':self.update_time}) finally: self.driver.quit() self.display.popen.kill() return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Share(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() self.es = Es_fb() self.list = [] self.share_list = self.launcher.get_share_list() def get_share(self): self.driver.get(self.share_list[0]) for ea in self.driver.find_elements_by_xpath( '//div[@id="repost_view_permalink"]/div/div[1]/div'): for each in ea.find_elements_by_xpath('./div'): author_name = each.find_element_by_xpath( './div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).text author_id = re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).get_attribute('data-hovercard')) pic_url = each.find_element_by_xpath( './div/div[2]/div/div[2]/div/div/a/div/img').get_attribute( 'src') try: content = each.find_element_by_xpath( './div/div[2]/div/div[2]/div[2]//p').text except Exception as e: content = 'None' timestamp = int( each.find_element_by_xpath( './div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a/abbr' ).get_attribute('data-utime')) item = { 'nick_name': author_name, 'uid': author_id, 'photo_url': pic_url, 'text': content, 'timestamp': timestamp } self.list.append(item) return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Message(): def __init__(self,username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() self.es = Es_fb() self.list = [] def get_list(self): self.driver.get('https://www.facebook.com/messages/t/') sx_list = [] for each in self.driver.find_elements_by_xpath('//ul[@aria-label="对话列表"]/li'): author_name = each.find_element_by_xpath('./div/a/div[2]/div[1]/span').text pic_url = each.find_element_by_xpath('./div/a/div[1]/div/div/div//img').get_attribute('src') message_url = each.find_element_by_xpath('./div/a').get_attribute('data-href') sx_list.append({'name':author_name,'pic':pic_url,'message_url':message_url}) return sx_list def get_message(self): sx_list = self.get_list() for sx in sx_list: self.driver.get(sx['message_url']) time.sleep(1) for message in self.driver.find_elements_by_xpath('//div[@class="_41ud"]'): try: mes = message.find_element_by_xpath('./div/div/div/span').text except Exception as e: mes = 'None' try: ti = [each for each in self.driver.find_elements_by_xpath('//div[@aria-label="消息"]//time')][-1].text ti = '-'.join([i for i in re.findall(re.compile('(\d+)年(\d+)月(\d+)日'),ti)[0]]) timestamp = int(time.mktime(time.strptime(ti,"%Y-%m-%d"))) except: timestamp = int(time.time()) self.list.append({'nick_name':sx['name'],'text':mes,'timestamp':timestamp}) return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Message(): def __init__(self,username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() self.es = Es_fb() self.list = [] self.update_time = int(time.time()) def get_list(self): self.driver.get('https://www.facebook.com/messages/t/') # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass sx_list = [] for each in self.driver.find_elements_by_xpath('//ul[@aria-label="对话列表"]/li'): try: author_name = each.find_element_by_xpath('./div/a/div[2]/div[1]/span').text except: author_name = 'None' try: author_id = ''.join(re.findall(re.compile('row_header_id_user:(\d+)'),each.find_element_by_xpath('./div').get_attribute('id'))) except: author_id = 'None' try: pic_url = each.find_element_by_xpath('./div/a/div[1]/div/div/div//img').get_attribute('src') except: pic_url = 'None' try: message_url = each.find_element_by_xpath('./div/a').get_attribute('data-href') except: message_url = False if message_url: sx_list.append({'name':author_name, 'pic':pic_url, 'message_url':message_url, 'author_id':author_id}) return sx_list def get_message(self): try: sx_list = self.get_list() for sx in sx_list: self.driver.get(sx['message_url']) time.sleep(1) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass for message in self.driver.find_elements_by_xpath('//div[@class="_41ud"]'): try: ymd = '-'.join([t for t in re.findall(re.compile('(\d+)年(\d+)月(\d+)日'),message.find_element_by_xpath('./div/div').get_attribute('data-tooltip-content'))[0]]) hm = ':'.join([q for q in re.findall(re.compile('(\d+):(\d+)'),message.find_element_by_xpath('./div/div').get_attribute('data-tooltip-content'))[0]]) messagetime = ymd + ' ' + hm + ':00' messageTime = int(time.mktime(time.strptime(messagetime,'%Y-%m-%d %H:%M:%S'))) except: messageTime = 0 try: messageId = re.findall(re.compile('"fbid:(\d+)"'),message.find_element_by_xpath('./div/div').get_attribute('participants'))[-1] if messageId == sx['author_id']: private_type = 'receive' text = message.text root_text = 'None' else: private_type = 'make' text = 'None' root_text = message.text except: private_type = 'unknown' text = 'None' root_text = 'None' self.list.append({'uid':sx['author_id'], 'photo_url':sx['pic'], 'nick_name':sx['name'], 'timestamp':messageTime, 'update_time':self.update_time, 'text':text, 'root_text':root_text, 'private_type':private_type}) finally: self.driver.close() return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Comment(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() self.es = Es_fb() self.comment_list = self.launcher.get_comment_list() self.list = [] self.update_time = int(time.time()) def get_comment(self): try: for url in self.comment_list: print(url) self.driver.get(url) time.sleep(1) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath( '//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass try: try: root_text = self.driver.find_element_by_xpath( '//div[@role="feed"]/div[1]/div[1]/div[2]/div[1]/div[2]/div[2]' ).text except: root_text = self.driver.find_element_by_xpath( '//div[@role="feed"]/div[1]/div[1]/div[1]/div[1]/div[2]/div[2]' ).text except: root_text = 'None' try: try: root_mid = ''.join( re.findall( re.compile('story_fbid=(\d+)'), self.driver.find_element_by_xpath( '//div[@role="feed"]/div[1]/div[1]/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/div/span[3]/span/a' ).get_attribute('href'))) except: root_mid = ''.join( re.findall( re.compile('story_fbid=(\d+)'), self.driver.find_element_by_xpath( '//div[@role="feed"]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/div/span[3]/span/a' ).get_attribute('href'))) except: root_mid = 'None' for each in self.driver.find_elements_by_xpath( '//div[@aria-label="评论"]'): try: try: author_name = each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/div/div/span/span[1]/a' ).text except: author_name = each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/span/span[1]/a' ).text except: author_name = 'None' try: try: author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/div/div/span/span[1]/a' ).get_attribute('data-hovercard'))) except: author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/span/span[1]/a' ).get_attribute('data-hovercard'))) except: author_id = 'None' try: pic_url = each.find_element_by_xpath( './div/div/div/div[1]/a/img').get_attribute('src') except: pic_url = 'None' try: content = each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/div/div/span/span[2]/span/span/span/span' ).text except: content = each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/span/span[2]/span/span/span/span' ).text try: ti = int( each.find_element_by_xpath( './div/div/div/div[2]/div/div/div[2]/span[4]/a/abbr' ).get_attribute('data-utime')) except: ti = int( each.find_element_by_xpath( './div/div/div/div[2]/div/div/div[2]/span[5]/a/abbr' ).get_attribute('data-utime')) self.list.append({'uid':author_id, 'photo_url':pic_url, 'nick_name':author_name, 'mid':root_mid, 'timestamp':ti, 'text':content,\ 'update_time':self.update_time, 'root_text':root_text, 'root_mid':root_mid}) finally: self.driver.close() return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Like(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.like_urls_list, self.driver = self.launcher.get_like_list() self.es = Es_fb() self.like_list = [] self.update_time = int(time.time()) def date2timestamp(self, date): date = date.replace(u'月', '-').replace(u'日', '').replace(' ', '') if u'上午' in date: date = date.split(u'上午')[0] if u'下午' in date: date = date.split(u'下午')[0] if u'分钟' in date: timestamp = int( time.time()) - int(re.search(r'(\d+)', date).group(1)) * 60 return timestamp if u'小时' in date: timestamp = int(time.time()) - int( re.search(r'(\d+)', date).group(1)) * 60 * 60 return timestamp if u'年' not in date and u'分钟' not in date and u'小时' not in date: date = str( time.strftime('%Y-%m-%d', time.localtime( time.time())).split('-')[0]) + '-' + date if u'年' in date and u'分钟' not in date and u'小时' not in date: date = date.replace(u'年', '-') timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d'))) return timestamp def get_like(self): for url in self.like_urls_list: self.driver.get(url) time.sleep(1) try: root_text = self.driver.find_element_by_xpath( '//div[@id="m_story_permalink_view"]/div[1]/div/div[1]/div[2]' ).text except: root_text = 'None' print root_text try: timestamp = self.date2timestamp( self.driver.find_element_by_xpath( '//div[@id="m_story_permalink_view"]/div[1]/div/div[2]/div[1]' ).text) except: timestamp = 0 print timestamp try: root_mid = ''.join( re.search(re.compile('fbid%3D(\d+)%'), url).group(1)) except: root_mid = 0 print root_mid # 进入点赞列表页 self.driver.get( self.driver.find_element_by_xpath( '//div[@id="m_story_permalink_view"]/div[2]/div/div[3]/a'). get_attribute('href')) time.sleep(5) for each in self.driver.find_elements_by_xpath( '//div[@id="root"]/table/tbody/tr/td/div/ul/li'): try: author_name = each.find_element_by_xpath( './table/tbody/tr/td/table/tbody/tr/td[3]/div/h3[1]/a' ).text except: author_name = 'None' print author_name try: author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './table/tbody/tr/td/table/tbody/tr/td[3]/div/h3[1]/a' ).get_attribute('href'))) except: author_id = 0 try: pic_url = each.find_element_by_xpath( './table/tbody/tr/td/table/tbody/tr/td[1]/img' ).get_attribute('src') except: pic_url = 'None' item = { 'uid': author_id, 'photo_url': pic_url, 'nick_name': author_name, 'timestamp': timestamp, 'root_text': root_text, 'update_time': self.update_time, 'root_mid': root_mid } self.like_list.append(item) self.driver.quit() return self.like_list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Message(): def __init__(self,username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login_mobile() self.es = Es_fb() self.list = [] self.update_time = int(time.time()) def date2timestamp(self, date): date = date.replace(u'月', '-').replace(u'日', '').replace(' ', '') if date == '刚刚': timestamp = int(time.time()) return timestamp if u'上午' in date: date = date.replace(u'上午', ' ') if u'下午' in date: if date.split(u'下午')[1].split(':')[0] == '12': date = date.replace(u'下午', ' ') elif eval(date.split(u'下午')[1].split(':')[0]) < 12: date = date.split(u'下午')[0] + ' ' + str(eval(date.split(u'下午')[1].split(':')[0])+12) + ':' + date.split(u'下午')[1].split(':')[1] if u'年' not in date and u'分钟' not in date and u'小时' not in date: date = str(time.strftime('%Y-%m-%d', time.localtime(time.time())).split('-')[0]) + '-' + date if u'年' in date and u'分钟' not in date and u'小时' not in date: date = date.replace(u'年', '-') if u'分钟' in date: timestamp = int(time.time()) - int(re.search(r'(\d+)', date).group(1)) * 60 return timestamp if u'小时' in date: timestamp = int(time.time()) - int(re.search(r'(\d+)', date).group(1)) * 60 * 60 return timestamp try: timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d'))) except: timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d %H:%M'))) return timestamp def get_list(self): self.driver.get('https://m.facebook.com/messages/t/') sx_list = [] for each in self.driver.find_elements_by_xpath('//div[@id="root"]/div[1]/div[2]/div/table'): try: author_name = each.find_element_by_xpath('./tbody/tr/td/div/h3[1]').text except: author_name = 'None' print author_name try: author_id = ''.join(re.findall(re.compile('%3A(\d+)#'),each.find_element_by_xpath('./tbody/tr/td/div/h3[1]/a').get_attribute('href'))) except: author_id = 'None' print author_id try: message_url = each.find_element_by_xpath('./tbody/tr/td/div/h3[1]/a').get_attribute('href') except: message_url = False print message_url if message_url: sx_list.append({'author_name':author_name, 'message_url':message_url, 'author_id':author_id}) return sx_list def get_message(self): sx_list = self.get_list() for sx in sx_list: self.driver.get('https://m.facebook.com/profile.php?id=' + str(sx['author_id'])) try: photo_url = self.driver.find_element_by_xpath('//div[@id="m-timeline-cover-section"]/div[1]/div[2]/div[1]/div/a/img').get_attribute('src') except: try: photo_url = self.driver.find_element_by_xpath('//div[@id="m-timeline-cover-section"]/div[2]/div/div[1]/div[1]/a/img').get_attribute('src') except: photo_url = self.driver.find_element_by_xpath('//div[@id="m-timeline-cover-section"]/div[2]/div/div[1]/a/img') sx['photo_url'] = photo_url for sx in sx_list: self.driver.get(sx['message_url']) time.sleep(1) for message in self.driver.find_elements_by_xpath('//div[@id="messageGroup"]/div/div'): try: date = message.find_element_by_xpath('./div[2]/abbr').text except: break print date try: messageTime = self.date2timestamp(date) except: messageTime = 0 print messageTime try: #messageId = re.findall(re.compile('"fbid:(\d+)"'),message.find_element_by_xpath('./div/div').get_attribute('participants'))[-1] #if messageId == sx['author_id']: # private_type = 'receive' # text = message.text # root_text = 'None' #else: # private_type = 'make' # text = 'None' # root_text = message.text if re.findall(r'id=(\d+)&', message.find_element_by_xpath('./div[1]/a').get_attribute('href')): private_type = 'receive' text = message.text root_text = '' else: private_type = 'make' text = '' root_text = message.text except: private_type = 'unknown' text = message.text root_text = '' self.list.append({'uid':sx['author_id'], 'photo_url':sx['photo_url'], 'nick_name':sx['author_name'], 'timestamp':messageTime, 'update_time':self.update_time, 'text':text, 'root_text':root_text, 'private_type':private_type}) self.driver.quit() return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Like(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() self.like_list = self.launcher.get_like_list() self.es = Es_fb() self.list = [] self.update_time = int(time.time()) def get_like(self): try: for url in self.like_list: self.driver.get(url) time.sleep(1) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass try: text = self.driver.find_element_by_xpath('//div[@class="_5pbx userContent _22jv _3576"]').text except Exception as e: text = 'None' try: try: timestamp = int(self.driver.find_element_by_xpath('//abbr[@class="_5ptz"]').get_attribute('data-utime')) except: timestamp = int(self.driver.find_element_by_xpath('//abbr[@class="_5ptz timestamp livetimestamp"]').get_attribute('data-utime')) except: timestamp = 0 try: mid = ''.join(re.findall(re.compile('/(\d+)'),self.driver.find_element_by_xpath('//a[@class="_5pcq"]').get_attribute('href'))) except: mid = 0 # 进入点赞列表页 self.driver.get(self.driver.find_element_by_xpath('//a[@class="_2x4v"]').get_attribute('href')) time.sleep(5) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass for each in self.driver.find_elements_by_xpath('//li[@class="_5i_q"]'): try: author_name = each.find_element_by_xpath('./div/div/div/div[1]/div[2]/div/a').text except: author_name = 'None' try: author_id = ''.join(re.findall(re.compile('id=(\d+)'),each.find_element_by_xpath('./div/div/div/div[1]/div[2]/div/a').get_attribute('data-hovercard'))) except: author_id = 'None' try: pic_url = each.find_element_by_xpath('./div/a/div/img').get_attribute('src') except: pic_url = 'None' item = {'uid':author_id, 'photo_url':pic_url, 'nick_name':author_name, 'timestamp':timestamp, 'text':text, 'update_time':self.update_time, 'root_text':text, 'root_mid':mid} self.list.append(item) finally: self.driver.close() return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Friend(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() time.sleep(2) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass # # 进入个人主页 # self.driver.find_element_by_xpath('//a[@title="个人主页"]').click() # time.sleep(3) # # 退出通知弹窗进入页面 # try: # self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() # except: # pass # # 点击好友列表 # self.driver.find_element_by_xpath('//ul[@data-referrer="timeline_light_nav_top"]/li[3]/a').click() # time.sleep(3) # 进入好友请求页面 self.driver.get('https://www.facebook.com/friends/requests') time.sleep(3) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass #加载更多 length=100 for i in range(0,20): js="var q=document.documentElement.scrollTop="+str(length) self.driver.execute_script(js) time.sleep(1) length+=400 self.es = Es_fb() self.list = [] self.current_ts = int(time.time()) self.update_time = self.current_ts def get_friend(self): try: # for each in self.driver.find_elements_by_xpath('//div[@class="_5h60 _30f"]//ul//li'): # try: # pic_url = each.find_element_by_xpath('./div/a/img').get_attribute('src') # name = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').text # user_id = ''.join(re.findall(re.compile('id=(\d+)'),each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').get_attribute('data-hovercard'))) # friends = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/a').text # profile_url = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').get_attribute('href') + '&sk=about' # except: # pass for each in self.driver.find_elements_by_xpath('//div[@id="globalContainer"]/div/div/div/div/div[3]/div'): try: pic_url = each.find_element_by_xpath('./a/div/img').get_attribute('src') name = each.find_element_by_xpath('./div/div[2]/div[1]/a').text user_id = ''.join(re.findall(re.compile('id=(\d+)'),each.find_element_by_xpath('./div/div[2]/div[1]/a').get_attribute('data-hovercard'))) friends = None profile_url = each.find_element_by_xpath('./div/div[2]/div[1]/a').get_attribute('href') self.list.append({'uid':user_id, 'photo_url':pic_url, 'nick_name':name, 'friends':friends, 'profile_url':profile_url, 'update_time':self.update_time}) except Exception as e: print(e) finally: self.driver.quit() return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Mention(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() self.mention_list = self.launcher.get_mention_list() self.es = Es_fb() self.list = [] def get_mention(self): for url in self.mention_list: print(url) self.driver.get(url) for each in self.driver.find_elements_by_xpath( '//div[@id="contentArea"]'): try: author_name = each.find_element_by_xpath( './div/div/div[3]/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).text except: author_name = each.find_element_by_xpath( './div/div/div/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).text try: author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div[3]/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).get_attribute('data-hovercard'))) except: author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).get_attribute('data-hovercard'))) try: pic_url = each.find_element_by_xpath( './div/div/div[3]/div/div/div/div[2]/div/div[2]/div/div/a/div/img' ).get_attribute('src') except: pic_url = each.find_element_by_xpath( './div/div/div/div/div/div/div[2]/div/div[2]/div/div/a/div/img' ).get_attribute('src') try: ti = int( each.find_element_by_xpath( './div/div/div[3]/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/span/a/abbr' ).get_attribute('data-utime')) except: ti = int( each.find_element_by_xpath( './div/div/div/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a/abbr' ).get_attribute('data-utime')) try: content = each.find_element_by_xpath( './div/div/div/div/div/div/div[2]/div/div[2]/div[2]/p' ).text except Exception as e: content = 'None' item = { 'nick_name': author_name, 'uid': author_id, 'photo_url': pic_url, 'timestamp': ti, 'text': content } self.list.append(item) return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Mention(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() self.mention_list = self.launcher.get_mention_list() self.es = Es_fb() self.list = [] self.update_time = int(time.time()) def get_mention(self): try: for url in self.mention_list: self.driver.get(url) time.sleep(1) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath( '//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass for each in self.driver.find_elements_by_xpath( '//div[@id="contentArea"]'): try: try: author_name = each.find_element_by_xpath( './div/div/div[3]/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).text except: author_name = each.find_element_by_xpath( './div/div/div/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).text except: author_name = 'None' try: try: author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div[3]/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).get_attribute('data-hovercard'))) except: author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).get_attribute('data-hovercard'))) except: author_id = 'None' try: try: pic_url = each.find_element_by_xpath( './div/div/div[3]/div/div/div/div[2]/div/div[2]/div/div/a/div/img' ).get_attribute('src') except: pic_url = each.find_element_by_xpath( './div/div/div/div/div/div/div[2]/div/div[2]/div/div/a/div/img' ).get_attribute('src') except: pic_url = 'None' try: try: ti = int( each.find_element_by_xpath( './div/div/div[3]/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/span/a/abbr' ).get_attribute('data-utime')) except: ti = int( each.find_element_by_xpath( './div/div/div/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a/abbr' ).get_attribute('data-utime')) except: ti = 'None' try: content = each.find_element_by_xpath( './div/div/div/div/div/div/div[2]/div/div[2]/div[2]/p' ).text except: content = 'None' try: try: mid = ''.join( re.findall( re.compile('/(\d+)'), each.find_element_by_xpath( './div/div/div[3]/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/span/a' ).get_attribute('href'))) except: mid = ''.join( re.findall( re.compile('/(\d+)'), each.find_element_by_xpath( './div/div/div/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a' ).get_attribute('href'))) except: mid = 'None' item = { 'uid': author_id, 'photo_url': pic_url, 'nick_name': author_name, 'mid': mid, 'timestamp': ti, 'text': content, 'update_time': self.update_time } self.list.append(item) finally: self.driver.close() return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Mention(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.mention_list, self.driver = self.launcher.get_mention_list() self.es = Es_fb() self.list = [] self.update_time = int(time.time()) def date2timestamp(self, date): date = date.replace(u'月', '-').replace(u'日', '').replace(' ', '') if date == '刚刚': timestamp = int(time.time()) return timestamp if u'上午' in date: date = date.replace(u'上午', ' ') if u'下午' in date: if date.split(u'下午')[1].split(':')[0] == '12': date = date.replace(u'下午', ' ') elif eval(date.split(u'下午')[1].split(':')[0]) < 12: date = date.split(u'下午')[0] + ' ' + str( eval(date.split(u'下午')[1].split(':')[0]) + 12) + ':' + date.split(u'下午')[1].split(':')[1] if u'年' not in date and u'分钟' not in date and u'小时' not in date: date = str( time.strftime('%Y-%m-%d', time.localtime( time.time())).split('-')[0]) + '-' + date if u'年' in date and u'分钟' not in date and u'小时' not in date: date = date.replace(u'年', '-') if u'分钟' in date: timestamp = int( time.time()) - int(re.search(r'(\d+)', date).group(1)) * 60 return timestamp if u'小时' in date: timestamp = int(time.time()) - int( re.search(r'(\d+)', date).group(1)) * 60 * 60 return timestamp try: timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d'))) except: timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d %H:%M'))) return timestamp def get_mention(self): for url in self.mention_list: self.driver.get(url) time.sleep(1) try: nick_name = self.driver.find_element_by_xpath( '//div[@id="root"]/div[1]/div[1]/div/div[1]/div[1]/table/tbody/tr/td[2]/div/h3/strong/a' ).text except: nick_name = '' print nick_name try: uid = re.findall( r'id=(\d+)', self.driver.find_element_by_xpath( '//div[@id="root"]/div[1]/div[1]/div/div[1]/div[1]/table/tbody/tr/td[2]/div/h3/strong/a' ).get_attribute('href'))[0] except: uid = '' print uid try: timestamp = self.date2timestamp( self.driver.find_element_by_xpath( '//div[@id="root"]/div[1]/div[1]/div/div[2]/div/abbr'). text) except: timestamp = 0 print timestamp try: text = self.driver.find_element_by_xpath( '//div[@id="root"]/div[1]/div[1]/div/div[1]/div[2]').text except: text = '' print text try: mid = ''.join(re.findall(re.compile('fbid%3D(\d+)'), url)) except: mid = '' print mid item = { 'uid': uid, 'nick_name': nick_name, 'mid': mid, 'timestamp': timestamp, 'text': text, 'update_time': self.update_time } self.list.append(item) for i in self.list: self.driver.get('https://m.facebook.com/profile.php?id=' + str(i['uid'])) try: photo_url = self.driver.find_element_by_xpath( '//div[@id="m-timeline-cover-section"]/div[1]/div[2]/div[1]/div/a/img' ).get_attribute('src') except: try: photo_url = self.driver.find_element_by_xpath( '//div[@id="m-timeline-cover-section"]/div[2]/div/div[1]/div[1]/a/img' ).get_attribute('src') except: photo_url = self.driver.find_element_by_xpath( '//div[@id="m-timeline-cover-section"]/div[2]/div/div[1]/a/img' ).get_attribute('src') i['photo_url'] = photo_url self.driver.quit() return self.list def save(self, indexName, typeName, mention_list): self.es.executeES(indexName, typeName, mention_list)
class Share(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.es = Es_fb() self.list = [] self.share_list, self.driver = self.launcher.get_share_list() self.update_time = int(time.time()) def get_share(self): for url in self.share_list: self.driver.get(url) time.sleep(120) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath( '//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass page = self.driver.page_source self.driver.save_screenshot('get_share000.png') #for ea in self.driver.find_elements_by_xpath('//div[@role="feed"]/div'): #for ea in divs: # for each in ea.find_elements_by_xpath('./div'): try: author_name = self.driver.find_element_by_xpath( '//table[@role="presentation"]/tbody/tr/td[2]/div/h3/strong/a' ).text except: author_name = '' print author_name try: author_id = ''.join( re.search(re.compile('id%3D(\d+)&'), url).group(1)) except: author_id = '' print author_id # try: # pic_url = each.find_element_by_xpath('./div[2]/div/div[2]/div/div/a/div/img').get_attribute('src') # except: # pic_url = 'None' try: content = self.driver.find_element_by_xpath( '/html/body/div/div/div[2]/div/div[1]/div[1]/div/div[1]/div[2]' ).text except: content = '' try: timestamp = int( re.search( re.compile('"publish_time":(\d+),'), page.replace(' ', '').replace('\n', '').replace('\t', '')).group(1)) except: timestamp = '' print timestamp try: mid = ''.join( re.search(re.compile('fbid%3D(\d+)%'), url).group(1)) except: mid = '' print mid try: root_mid = ''.join( re.search( re.compile( '"original_content_id":"(\d+)"' ), page).group(1)) except: root_mid = '' print root_mid try: root_text = self.driver.find_element_by_xpath( '/html/body/div/div/div[2]/div/div[1]/div[1]/div/div[1]/div[3]/div[2]/div/div/div[2]' ).text.replace(' ', '').replace('\n', '').replace('\t', '') except: root_text = '' print root_text item = {'uid':author_id, 'nick_name':author_name, 'mid':mid, 'timestamp':timestamp,\ 'text':content, 'update_time':self.update_time, 'root_text':root_text, 'root_mid':root_mid} self.list.append(item) self.driver.quit() return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Like(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() self.like_list = self.launcher.get_like_list() self.es = Es_fb() self.list = [] def get_like(self): for url in self.like_list: self.driver.get(url) try: root_name = self.driver.find_element_by_xpath( '//span[@class="fwb"]').text except: root_name = self.driver.find_element_by_xpath( '//span[@class="fwb fcg"]').text try: id = ''.join( re.findall( re.compile('id=(\d+)'), self.driver.find_element_by_xpath( '//span[@class="fwb"]/a').get_attribute( 'data-hovercard'))) except: id = ''.join( re.findall( re.compile('id=(\d+)'), self.driver.find_element_by_xpath( '//span[@class="fwb fcg"]/a').get_attribute( 'data-hovercard'))) try: root_content = self.driver.find_element_by_xpath( '//div[@class="_5pbx userContent _22jv _3576"]/p').text except Exception as e: root_content = 'None' try: timestamp = int( self.driver.find_element_by_xpath( '//abbr[@class="_5ptz"]').get_attribute('data-utime')) except: timestamp = int( self.driver.find_element_by_xpath( '//abbr[@class="_5ptz timestamp livetimestamp"]'). get_attribute('data-utime')) self.driver.get( self.driver.find_element_by_xpath( '//a[@class="_2x4v"]').get_attribute('href')) time.sleep(10) for each in self.driver.find_elements_by_xpath( '//li[@class="_5i_q"]'): author_name = each.find_element_by_xpath( './div/div/div/div[1]/div[2]/div/a').text author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div/div[1]/div[2]/div/a').get_attribute( 'data-hovercard'))) pic_url = each.find_element_by_xpath( './div/a/div/img').get_attribute('src') try: relationship = each.find_element_by_xpath( './div/div/div/div[2]/div[2]/span/div/a/span[2]/span' ).text except: relationship = "None" item = { 'nick_name': author_name, 'uid': author_id, 'photo_url': pic_url, 'facebook_type': relationship, 'root_name': root_name, 'id': id, 'root_content': root_content, 'timestamp': timestamp } self.list.append(item) return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Friend(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() time.sleep(2) self.driver.find_element_by_xpath('//a[@title="个人主页"]').click() time.sleep(3) self.driver.find_element_by_xpath( '//ul[@data-referrer="timeline_light_nav_top"]/li[3]/a').click() time.sleep(1) self.driver.execute_script(""" (function () { var y = 0; var step = 100; window.scroll(0, 0); function f() { if (y < document.body.scrollHeight) { y += step; window.scroll(0, y); setTimeout(f, 150); } else { window.scroll(0, 0); document.title += "scroll-done"; } } setTimeout(f, 1500); })(); """) time.sleep(3) while True: if "scroll-done" in self.driver.title: break else: time.sleep(3) self.data_gt = self.driver.find_element_by_xpath( '//div[@id="contentArea"]/div[1]').get_attribute('data-gt') self.root_uid = json.loads(self.data_gt)['profile_owner'] self.es = Es_fb() self.list = [] self.current_ts = int(time.time()) self.update_time = self.current_ts def get_friend(self): for each in self.driver.find_elements_by_xpath( '//div[@class="_5h60 _30f"]//ul//li'): try: pic_url = each.find_element_by_xpath( './div/a/img').get_attribute('src') name = each.find_element_by_xpath( './div/div/div[2]/div/div[2]/div/a').text user_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div[2]/div/div[2]/div/a').get_attribute( 'data-hovercard'))) update_time = self.update_time except Exception as e: pass self.list.append({ 'root_uid': self.root_uid, 'photo_url': pic_url, 'nick_name': name, 'uid': user_id, 'update_time': update_time }) return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Comment(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() self.es = Es_fb() self.comment_list = self.launcher.get_comment_list() self.list = [] def get_comment(self): for url in self.comment_list: print(url) self.driver.get(url) time.sleep(1) try: root_content = self.driver.find_element_by_xpath( '//div[@role="feed"]/div[1]/div[1]/div[2]/div[1]/div[2]/div[2]' ).text except: root_content = self.driver.find_element_by_xpath( '//div[@role="feed"]/div[1]/div[1]/div[1]/div[1]/div[2]/div[2]' ).text try: root_time = self.driver.find_element_by_xpath( '//abbr[@class="_5ptz"]').get_attribute('data-utime') except: root_time = self.driver.find_element_by_xpath( '//abbr[@class="_5ptz timestamp livetimestamp"]' ).get_attribute('data-utime') for each in self.driver.find_elements_by_xpath( '//div[@aria-label="评论"]'): try: author_name = each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/div/div/span/span[1]/a' ).text except: author_name = each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/span/span[1]/a').text try: author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/div/div/span/span[1]/a' ).get_attribute('data-hovercard'))) except: author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/span/span[1]/a' ).get_attribute('data-hovercard'))) pic_url = each.find_element_by_xpath( './div/div/div/div[1]/a/img').get_attribute('src') try: content = each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/div/div/span/span[2]/span/span/span/span' ).text except: content = each.find_element_by_xpath( './div/div/div/div[2]/div/div/div/span/span[2]/span/span/span/span' ).text try: ti = int( each.find_element_by_xpath( './div/div/div/div[2]/div/div/div[2]/span[4]/a/abbr' ).get_attribute('data-utime')) except: ti = int( each.find_element_by_xpath( './div/div/div/div[2]/div/div/div[2]/span[5]/a/abbr' ).get_attribute('data-utime')) self.list.append({ 'nick_name': author_name, 'uid': author_id, 'photo_url': pic_url, 'text': content, 'timestamp': ti }) return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Share(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.es = Es_fb() self.list = [] self.share_list, self.driver, self.display = self.launcher.get_share_list( ) self.update_time = int(time.time()) def get_share(self): try: for url in self.share_list: self.driver.get(url) time.sleep(1) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath( '//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass for ea in self.driver.find_elements_by_xpath( '//div[@role="feed"]/div'): for each in ea.find_elements_by_xpath('./div'): try: author_name = each.find_element_by_xpath( './div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).text except: author_name = 'None' try: author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a' ).get_attribute('data-hovercard'))) except: author_id = 'None' try: pic_url = each.find_element_by_xpath( './div[2]/div/div[2]/div/div/a/div/img' ).get_attribute('src') except: pic_url = 'None' try: content = each.find_element_by_xpath( './div[2]/div/div[2]/div[2]').text except: content = 'None' try: try: timestamp = int( each.find_element_by_xpath( './div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a/abbr' ).get_attribute('data-utime')) except: timestamp = int( each.find_element_by_xpath( './div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[2]/span/a/abbr' ).get_attribute('data-utime')) except: timestamp = 'None' try: mid = ''.join( re.findall( re.compile('/(\d+)'), each.find_element_by_xpath( './div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a' ).get_attribute('href'))) except: mid = 'None' try: root_mid = ''.join( re.findall( re.compile('story_fbid=(\d+)'), each.find_element_by_xpath( './div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/h5/span/span/a' ).get_attribute('href'))) except: root_mid = 'None' item = {'uid':author_id, 'photo_url':pic_url, 'nick_name':author_name, 'mid':mid, 'timestamp':timestamp,\ 'text':content, 'update_time':self.update_time, 'root_text':content, 'root_mid':root_mid} self.list.append(item) finally: self.driver.quit() self.display.popen.kill() return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)