class Like(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() self.like_list = self.launcher.get_like_list() self.es = Es_fb() self.list = [] def get_like(self): for url in self.like_list: self.driver.get(url) try: root_name = self.driver.find_element_by_xpath( '//span[@class="fwb"]').text except: root_name = self.driver.find_element_by_xpath( '//span[@class="fwb fcg"]').text try: id = ''.join( re.findall( re.compile('id=(\d+)'), self.driver.find_element_by_xpath( '//span[@class="fwb"]/a').get_attribute( 'data-hovercard'))) except: id = ''.join( re.findall( re.compile('id=(\d+)'), self.driver.find_element_by_xpath( '//span[@class="fwb fcg"]/a').get_attribute( 'data-hovercard'))) try: root_content = self.driver.find_element_by_xpath( '//div[@class="_5pbx userContent _22jv _3576"]/p').text except Exception as e: root_content = 'None' try: timestamp = int( self.driver.find_element_by_xpath( '//abbr[@class="_5ptz"]').get_attribute('data-utime')) except: timestamp = int( self.driver.find_element_by_xpath( '//abbr[@class="_5ptz timestamp livetimestamp"]'). get_attribute('data-utime')) self.driver.get( self.driver.find_element_by_xpath( '//a[@class="_2x4v"]').get_attribute('href')) time.sleep(10) for each in self.driver.find_elements_by_xpath( '//li[@class="_5i_q"]'): author_name = each.find_element_by_xpath( './div/div/div/div[1]/div[2]/div/a').text author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div/div[1]/div[2]/div/a').get_attribute( 'data-hovercard'))) pic_url = each.find_element_by_xpath( './div/a/div/img').get_attribute('src') try: relationship = each.find_element_by_xpath( './div/div/div/div[2]/div[2]/span/div/a/span[2]/span' ).text except: relationship = "None" item = { 'nick_name': author_name, 'uid': author_id, 'photo_url': pic_url, 'facebook_type': relationship, 'root_name': root_name, 'id': id, 'root_content': root_content, 'timestamp': timestamp } self.list.append(item) return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
for each in driver.find_elements_by_xpath('//li[@class="_5i_q"]'): author_name = each.find_element_by_xpath( './div/div/div/div[1]/div[2]/div/a').text print(author_name) author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './div/div/div/div[1]/div[2]/div/a').get_attribute( 'data-hovercard'))) print(author_id) pic_url = each.find_element_by_xpath( './div/a/div/img').get_attribute('src') print(pic_url) relationship = each.find_element_by_xpath( './div/div/div/div[2]/div[2]/span/div/a/span[2]/span').text print(relationship) print('-----') time.sleep(10) def save(self, indexName, typeName, item): es.executeES(indexName, typeName, item) if __name__ == '__main__': fb = Launcher('18538728360', 'zyxing,0513') es = es_twitter() like_list = fb.get_like_list() like = Like() like.get_like()
class Like(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.like_urls_list, self.driver = self.launcher.get_like_list() self.es = Es_fb() self.like_list = [] self.update_time = int(time.time()) def date2timestamp(self, date): date = date.replace(u'月', '-').replace(u'日', '').replace(' ', '') if u'上午' in date: date = date.split(u'上午')[0] if u'下午' in date: date = date.split(u'下午')[0] if u'分钟' in date: timestamp = int( time.time()) - int(re.search(r'(\d+)', date).group(1)) * 60 return timestamp if u'小时' in date: timestamp = int(time.time()) - int( re.search(r'(\d+)', date).group(1)) * 60 * 60 return timestamp if u'年' not in date and u'分钟' not in date and u'小时' not in date: date = str( time.strftime('%Y-%m-%d', time.localtime( time.time())).split('-')[0]) + '-' + date if u'年' in date and u'分钟' not in date and u'小时' not in date: date = date.replace(u'年', '-') timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d'))) return timestamp def get_like(self): for url in self.like_urls_list: self.driver.get(url) time.sleep(1) try: root_text = self.driver.find_element_by_xpath( '//div[@id="m_story_permalink_view"]/div[1]/div/div[1]/div[2]' ).text except: root_text = 'None' print root_text try: timestamp = self.date2timestamp( self.driver.find_element_by_xpath( '//div[@id="m_story_permalink_view"]/div[1]/div/div[2]/div[1]' ).text) except: timestamp = 0 print timestamp try: root_mid = ''.join( re.search(re.compile('fbid%3D(\d+)%'), url).group(1)) except: root_mid = 0 print root_mid # 进入点赞列表页 self.driver.get( self.driver.find_element_by_xpath( '//div[@id="m_story_permalink_view"]/div[2]/div/div[3]/a'). get_attribute('href')) time.sleep(5) for each in self.driver.find_elements_by_xpath( '//div[@id="root"]/table/tbody/tr/td/div/ul/li'): try: author_name = each.find_element_by_xpath( './table/tbody/tr/td/table/tbody/tr/td[3]/div/h3[1]/a' ).text except: author_name = 'None' print author_name try: author_id = ''.join( re.findall( re.compile('id=(\d+)'), each.find_element_by_xpath( './table/tbody/tr/td/table/tbody/tr/td[3]/div/h3[1]/a' ).get_attribute('href'))) except: author_id = 0 try: pic_url = each.find_element_by_xpath( './table/tbody/tr/td/table/tbody/tr/td[1]/img' ).get_attribute('src') except: pic_url = 'None' item = { 'uid': author_id, 'photo_url': pic_url, 'nick_name': author_name, 'timestamp': timestamp, 'root_text': root_text, 'update_time': self.update_time, 'root_mid': root_mid } self.like_list.append(item) self.driver.quit() return self.like_list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)
class Like(): def __init__(self, username, password): self.launcher = Launcher(username, password) self.driver = self.launcher.login() self.like_list = self.launcher.get_like_list() self.es = Es_fb() self.list = [] self.update_time = int(time.time()) def get_like(self): try: for url in self.like_list: self.driver.get(url) time.sleep(1) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass try: text = self.driver.find_element_by_xpath('//div[@class="_5pbx userContent _22jv _3576"]').text except Exception as e: text = 'None' try: try: timestamp = int(self.driver.find_element_by_xpath('//abbr[@class="_5ptz"]').get_attribute('data-utime')) except: timestamp = int(self.driver.find_element_by_xpath('//abbr[@class="_5ptz timestamp livetimestamp"]').get_attribute('data-utime')) except: timestamp = 0 try: mid = ''.join(re.findall(re.compile('/(\d+)'),self.driver.find_element_by_xpath('//a[@class="_5pcq"]').get_attribute('href'))) except: mid = 0 # 进入点赞列表页 self.driver.get(self.driver.find_element_by_xpath('//a[@class="_2x4v"]').get_attribute('href')) time.sleep(5) # 退出通知弹窗进入页面 try: self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click() except: pass for each in self.driver.find_elements_by_xpath('//li[@class="_5i_q"]'): try: author_name = each.find_element_by_xpath('./div/div/div/div[1]/div[2]/div/a').text except: author_name = 'None' try: author_id = ''.join(re.findall(re.compile('id=(\d+)'),each.find_element_by_xpath('./div/div/div/div[1]/div[2]/div/a').get_attribute('data-hovercard'))) except: author_id = 'None' try: pic_url = each.find_element_by_xpath('./div/a/div/img').get_attribute('src') except: pic_url = 'None' item = {'uid':author_id, 'photo_url':pic_url, 'nick_name':author_name, 'timestamp':timestamp, 'text':text, 'update_time':self.update_time, 'root_text':text, 'root_mid':mid} self.list.append(item) finally: self.driver.close() return self.list def save(self, indexName, typeName, list): self.es.executeES(indexName, typeName, list)